Repository 'multi_intersect'
hg clone https://eddie.galaxyproject.org/repos/aaronquinlan/multi_intersect

Changeset 0:dfcd8b6c1bda (2011-11-03)
Next changeset 1:bec36315bd12 (2011-11-19)
Commit message:
Uploaded
added:
BEDTools-Version-2.14.3/LICENSE
BEDTools-Version-2.14.3/Makefile
BEDTools-Version-2.14.3/README.rst
BEDTools-Version-2.14.3/RELEASE_HISTORY
BEDTools-Version-2.14.3/data/knownGene.hg18.chr21.bed
BEDTools-Version-2.14.3/data/rmsk.hg18.chr21.bed
BEDTools-Version-2.14.3/genomes/human.hg18.genome
BEDTools-Version-2.14.3/genomes/human.hg19.genome
BEDTools-Version-2.14.3/genomes/mouse.mm8.genome
BEDTools-Version-2.14.3/genomes/mouse.mm9.genome
BEDTools-Version-2.14.3/src/Makefile
BEDTools-Version-2.14.3/src/annotateBed/Makefile
BEDTools-Version-2.14.3/src/annotateBed/annotateBed.cpp
BEDTools-Version-2.14.3/src/annotateBed/annotateBed.h
BEDTools-Version-2.14.3/src/annotateBed/annotateMain.cpp
BEDTools-Version-2.14.3/src/bamToBed/Makefile
BEDTools-Version-2.14.3/src/bamToBed/bamToBed.cpp
BEDTools-Version-2.14.3/src/bed12ToBed6/Makefile
BEDTools-Version-2.14.3/src/bed12ToBed6/bed12ToBed6.cpp
BEDTools-Version-2.14.3/src/bedToBam/Makefile
BEDTools-Version-2.14.3/src/bedToBam/bedToBam.cpp
BEDTools-Version-2.14.3/src/bedToIgv/Makefile
BEDTools-Version-2.14.3/src/bedToIgv/bedToIgv.cpp
BEDTools-Version-2.14.3/src/closestBed/Makefile
BEDTools-Version-2.14.3/src/closestBed/closestBed.cpp
BEDTools-Version-2.14.3/src/closestBed/closestBed.h
BEDTools-Version-2.14.3/src/closestBed/closestMain.cpp
BEDTools-Version-2.14.3/src/complementBed/Makefile
BEDTools-Version-2.14.3/src/complementBed/complementBed.cpp
BEDTools-Version-2.14.3/src/complementBed/complementBed.h
BEDTools-Version-2.14.3/src/complementBed/complementMain.cpp
BEDTools-Version-2.14.3/src/coverageBed/Makefile
BEDTools-Version-2.14.3/src/coverageBed/coverageBed.cpp
BEDTools-Version-2.14.3/src/coverageBed/coverageBed.h
BEDTools-Version-2.14.3/src/coverageBed/coverageMain.cpp
BEDTools-Version-2.14.3/src/cuffToTrans/Makefile
BEDTools-Version-2.14.3/src/fastaFromBed/Makefile
BEDTools-Version-2.14.3/src/fastaFromBed/fastaFromBed.cpp
BEDTools-Version-2.14.3/src/fastaFromBed/fastaFromBed.h
BEDTools-Version-2.14.3/src/fastaFromBed/fastaFromBedMain.cpp
BEDTools-Version-2.14.3/src/fjoin/Makefile
BEDTools-Version-2.14.3/src/fjoin/fjoin.cpp
BEDTools-Version-2.14.3/src/fjoin/fjoin.h
BEDTools-Version-2.14.3/src/fjoin/fjoinMain.cpp
BEDTools-Version-2.14.3/src/flankBed/Makefile
BEDTools-Version-2.14.3/src/flankBed/flankBed.cpp
BEDTools-Version-2.14.3/src/flankBed/flankBed.h
BEDTools-Version-2.14.3/src/flankBed/flankBedMain.cpp
BEDTools-Version-2.14.3/src/genomeCoverageBed/Makefile
BEDTools-Version-2.14.3/src/genomeCoverageBed/genomeCoverageBed.cpp
BEDTools-Version-2.14.3/src/genomeCoverageBed/genomeCoverageBed.h
BEDTools-Version-2.14.3/src/genomeCoverageBed/genomeCoverageMain.cpp
BEDTools-Version-2.14.3/src/intersectBed/Makefile
BEDTools-Version-2.14.3/src/intersectBed/intersectBed.cpp
BEDTools-Version-2.14.3/src/intersectBed/intersectBed.h
BEDTools-Version-2.14.3/src/intersectBed/intersectMain.cpp
BEDTools-Version-2.14.3/src/linksBed/Makefile
BEDTools-Version-2.14.3/src/linksBed/linksBed.cpp
BEDTools-Version-2.14.3/src/linksBed/linksBed.h
BEDTools-Version-2.14.3/src/linksBed/linksMain.cpp
BEDTools-Version-2.14.3/src/maskFastaFromBed/Makefile
BEDTools-Version-2.14.3/src/maskFastaFromBed/maskFastaFromBed.cpp
BEDTools-Version-2.14.3/src/maskFastaFromBed/maskFastaFromBed.h
BEDTools-Version-2.14.3/src/maskFastaFromBed/maskFastaFromBedMain.cpp
BEDTools-Version-2.14.3/src/mergeBed/Makefile
BEDTools-Version-2.14.3/src/mergeBed/mergeBed.cpp
BEDTools-Version-2.14.3/src/mergeBed/mergeBed.h
BEDTools-Version-2.14.3/src/mergeBed/mergeMain.cpp
BEDTools-Version-2.14.3/src/multiBamCov/Makefile
BEDTools-Version-2.14.3/src/multiBamCov/multiBamCov.cpp
BEDTools-Version-2.14.3/src/multiBamCov/multiBamCov.h
BEDTools-Version-2.14.3/src/multiBamCov/multiBamCovMain.cpp
BEDTools-Version-2.14.3/src/multiIntersectBed/Makefile
BEDTools-Version-2.14.3/src/multiIntersectBed/intervalItem.h
BEDTools-Version-2.14.3/src/multiIntersectBed/multiIntersectBed.cpp
BEDTools-Version-2.14.3/src/multiIntersectBed/multiIntersectBed.h
BEDTools-Version-2.14.3/src/multiIntersectBed/multiIntersectBedMain.cpp
BEDTools-Version-2.14.3/src/nucBed/LargeFileSupport.h
BEDTools-Version-2.14.3/src/nucBed/Makefile
BEDTools-Version-2.14.3/src/nucBed/nucBed.cpp
BEDTools-Version-2.14.3/src/nucBed/nucBed.h
BEDTools-Version-2.14.3/src/nucBed/nucBedMain.cpp
BEDTools-Version-2.14.3/src/overlap/Makefile
BEDTools-Version-2.14.3/src/overlap/overlap.cpp
BEDTools-Version-2.14.3/src/pairToBed/Makefile
BEDTools-Version-2.14.3/src/pairToBed/pairToBed.cpp
BEDTools-Version-2.14.3/src/pairToBed/pairToBed.h
BEDTools-Version-2.14.3/src/pairToBed/pairToBedMain.cpp
BEDTools-Version-2.14.3/src/pairToPair/Makefile
BEDTools-Version-2.14.3/src/pairToPair/pairToPair.cpp
BEDTools-Version-2.14.3/src/pairToPair/pairToPair.h
BEDTools-Version-2.14.3/src/pairToPair/pairToPairMain.cpp
BEDTools-Version-2.14.3/src/shuffleBed/Makefile
BEDTools-Version-2.14.3/src/shuffleBed/shuffleBed.cpp
BEDTools-Version-2.14.3/src/shuffleBed/shuffleBed.h
BEDTools-Version-2.14.3/src/shuffleBed/shuffleBedMain.cpp
BEDTools-Version-2.14.3/src/slopBed/Makefile
BEDTools-Version-2.14.3/src/slopBed/slopBed.cpp
BEDTools-Version-2.14.3/src/slopBed/slopBed.h
BEDTools-Version-2.14.3/src/slopBed/slopBedMain.cpp
BEDTools-Version-2.14.3/src/sortBed/Makefile
BEDTools-Version-2.14.3/src/sortBed/sortBed.cpp
BEDTools-Version-2.14.3/src/sortBed/sortBed.h
BEDTools-Version-2.14.3/src/sortBed/sortMain.cpp
BEDTools-Version-2.14.3/src/subtractBed/Makefile
BEDTools-Version-2.14.3/src/subtractBed/subtractBed.cpp
BEDTools-Version-2.14.3/src/subtractBed/subtractBed.h
BEDTools-Version-2.14.3/src/subtractBed/subtractMain.cpp
BEDTools-Version-2.14.3/src/tagBam/Makefile
BEDTools-Version-2.14.3/src/tagBam/tagBam.cpp
BEDTools-Version-2.14.3/src/tagBam/tagBam.h
BEDTools-Version-2.14.3/src/tagBam/tagBamMain.cpp
BEDTools-Version-2.14.3/src/unionBedGraphs/Makefile
BEDTools-Version-2.14.3/src/unionBedGraphs/intervalItem.h
BEDTools-Version-2.14.3/src/unionBedGraphs/unionBedGraphs.cpp
BEDTools-Version-2.14.3/src/unionBedGraphs/unionBedGraphs.h
BEDTools-Version-2.14.3/src/unionBedGraphs/unionBedGraphsMain.cpp
BEDTools-Version-2.14.3/src/utils/BamTools-Ancillary/BamAncillary.cpp
BEDTools-Version-2.14.3/src/utils/BamTools-Ancillary/BamAncillary.h
BEDTools-Version-2.14.3/src/utils/BamTools-Ancillary/Makefile
BEDTools-Version-2.14.3/src/utils/BamTools/LICENSE
BEDTools-Version-2.14.3/src/utils/BamTools/Makefile
BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamAlignment.cpp
BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamAlignment.h
BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamAux.h
BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamConstants.h
BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamIndex.h
BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamMultiReader.cpp
BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamMultiReader.h
BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamReader.cpp
BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamReader.h
BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamWriter.cpp
BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamWriter.h
BEDTools-Version-2.14.3/src/utils/BamTools/src/api/CMakeLists.txt
BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamConstants.h
BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamHeader.cpp
BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamHeader.h
BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamProgram.cpp
BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamProgram.h
BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamProgramChain.cpp
BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamProgramChain.h
BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamReadGroup.cpp
BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamReadGroup.h
BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamReadGroupDictionary.cpp
BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamReadGroupDictionary.h
BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamSequence.cpp
BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamSequence.h
BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamSequenceDictionary.cpp
BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamSequenceDictionary.h
BEDTools-Version-2.14.3/src/utils/BamTools/src/api/api_global.h
BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamHeader_p.cpp
BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamHeader_p.h
BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamIndexFactory_p.cpp
BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamIndexFactory_p.h
BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamMultiMerger_p.h
BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamMultiReader_p.cpp
BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamMultiReader_p.h
BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamRandomAccessController_p.cpp
BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamRandomAccessController_p.h
BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamReader_p.cpp
BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamReader_p.h
BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamStandardIndex_p.cpp
BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamStandardIndex_p.h
BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamToolsIndex_p.cpp
BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamToolsIndex_p.h
BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamWriter_p.cpp
BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamWriter_p.h
BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BgzfStream_p.cpp
BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BgzfStream_p.h
BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/SamFormatParser_p.cpp
BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/SamFormatParser_p.h
BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/SamFormatPrinter_p.cpp
BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/SamFormatPrinter_p.h
BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/SamHeaderValidator_p.cpp
BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/SamHeaderValidator_p.h
BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/SamHeaderVersion_p.h
BEDTools-Version-2.14.3/src/utils/BamTools/src/shared/bamtools_global.h
BEDTools-Version-2.14.3/src/utils/Fasta/Fasta.cpp
BEDTools-Version-2.14.3/src/utils/Fasta/Fasta.h
BEDTools-Version-2.14.3/src/utils/Fasta/LargeFileSupport.h
BEDTools-Version-2.14.3/src/utils/Fasta/Makefile
BEDTools-Version-2.14.3/src/utils/Fasta/split.cpp
BEDTools-Version-2.14.3/src/utils/Fasta/split.h
BEDTools-Version-2.14.3/src/utils/bedFile/Makefile
BEDTools-Version-2.14.3/src/utils/bedFile/bedFile.cpp
BEDTools-Version-2.14.3/src/utils/bedFile/bedFile.h
BEDTools-Version-2.14.3/src/utils/bedFile/bedFile.h.orig
BEDTools-Version-2.14.3/src/utils/bedFilePE/Makefile
BEDTools-Version-2.14.3/src/utils/bedFilePE/bedFilePE.cpp
BEDTools-Version-2.14.3/src/utils/bedFilePE/bedFilePE.h
BEDTools-Version-2.14.3/src/utils/bedGraphFile/Makefile
BEDTools-Version-2.14.3/src/utils/bedGraphFile/bedGraphFile.cpp
BEDTools-Version-2.14.3/src/utils/bedGraphFile/bedGraphFile.h
BEDTools-Version-2.14.3/src/utils/chromsweep/Makefile
BEDTools-Version-2.14.3/src/utils/chromsweep/chromsweep.cpp
BEDTools-Version-2.14.3/src/utils/chromsweep/chromsweep.h
BEDTools-Version-2.14.3/src/utils/fileType/Makefile
BEDTools-Version-2.14.3/src/utils/fileType/fileType.cpp
BEDTools-Version-2.14.3/src/utils/fileType/fileType.h
BEDTools-Version-2.14.3/src/utils/genomeFile/Makefile
BEDTools-Version-2.14.3/src/utils/genomeFile/genomeFile.cpp
BEDTools-Version-2.14.3/src/utils/genomeFile/genomeFile.h
BEDTools-Version-2.14.3/src/utils/gzstream/COPYING.LIB
BEDTools-Version-2.14.3/src/utils/gzstream/Makefile
BEDTools-Version-2.14.3/src/utils/gzstream/README
BEDTools-Version-2.14.3/src/utils/gzstream/gzstream.C
BEDTools-Version-2.14.3/src/utils/gzstream/gzstream.h
BEDTools-Version-2.14.3/src/utils/gzstream/gzstream.o
BEDTools-Version-2.14.3/src/utils/gzstream/test_gunzip.o
BEDTools-Version-2.14.3/src/utils/gzstream/test_gzip.o
BEDTools-Version-2.14.3/src/utils/gzstream/version
BEDTools-Version-2.14.3/src/utils/lineFileUtilities/Makefile
BEDTools-Version-2.14.3/src/utils/lineFileUtilities/lineFileUtilities.cpp
BEDTools-Version-2.14.3/src/utils/lineFileUtilities/lineFileUtilities.h
BEDTools-Version-2.14.3/src/utils/sequenceUtilities/Makefile
BEDTools-Version-2.14.3/src/utils/sequenceUtilities/sequenceUtils.cpp
BEDTools-Version-2.14.3/src/utils/sequenceUtilities/sequenceUtils.h
BEDTools-Version-2.14.3/src/utils/stringUtilities/stringUtilities.h
BEDTools-Version-2.14.3/src/utils/tabFile/Makefile
BEDTools-Version-2.14.3/src/utils/tabFile/tabFile.cpp
BEDTools-Version-2.14.3/src/utils/tabFile/tabFile.h
BEDTools-Version-2.14.3/src/utils/version/version.h
BEDTools-Version-2.14.3/src/windowBed/Makefile
BEDTools-Version-2.14.3/src/windowBed/windowBed.cpp
BEDTools-Version-2.14.3/src/windowBed/windowBed.h
BEDTools-Version-2.14.3/src/windowBed/windowMain.cpp
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/LICENSE
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/LICENSE Thu Nov 03 10:25:04 2011 -0400
b
b'@@ -0,0 +1,340 @@\n+\t\t    GNU GENERAL PUBLIC LICENSE\n+\t\t       Version 2, June 1991\n+\n+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.,\n+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\n+ Everyone is permitted to copy and distribute verbatim copies\n+ of this license document, but changing it is not allowed.\n+\n+\t\t\t    Preamble\n+\n+  The licenses for most software are designed to take away your\n+freedom to share and change it.  By contrast, the GNU General Public\n+License is intended to guarantee your freedom to share and change free\n+software--to make sure the software is free for all its users.  This\n+General Public License applies to most of the Free Software\n+Foundation\'s software and to any other program whose authors commit to\n+using it.  (Some other Free Software Foundation software is covered by\n+the GNU Lesser General Public License instead.)  You can apply it to\n+your programs, too.\n+\n+  When we speak of free software, we are referring to freedom, not\n+price.  Our General Public Licenses are designed to make sure that you\n+have the freedom to distribute copies of free software (and charge for\n+this service if you wish), that you receive source code or can get it\n+if you want it, that you can change the software or use pieces of it\n+in new free programs; and that you know you can do these things.\n+\n+  To protect your rights, we need to make restrictions that forbid\n+anyone to deny you these rights or to ask you to surrender the rights.\n+These restrictions translate to certain responsibilities for you if you\n+distribute copies of the software, or if you modify it.\n+\n+  For example, if you distribute copies of such a program, whether\n+gratis or for a fee, you must give the recipients all the rights that\n+you have.  You must make sure that they, too, receive or can get the\n+source code.  And you must show them these terms so they know their\n+rights.\n+\n+  We protect your rights with two steps: (1) copyright the software, and\n+(2) offer you this license which gives you legal permission to copy,\n+distribute and/or modify the software.\n+\n+  Also, for each author\'s protection and ours, we want to make certain\n+that everyone understands that there is no warranty for this free\n+software.  If the software is modified by someone else and passed on, we\n+want its recipients to know that what they have is not the original, so\n+that any problems introduced by others will not reflect on the original\n+authors\' reputations.\n+\n+  Finally, any free program is threatened constantly by software\n+patents.  We wish to avoid the danger that redistributors of a free\n+program will individually obtain patent licenses, in effect making the\n+program proprietary.  To prevent this, we have made it clear that any\n+patent must be licensed for everyone\'s free use or not licensed at all.\n+\n+  The precise terms and conditions for copying, distribution and\n+modification follow.\n+\n+\t\t    GNU GENERAL PUBLIC LICENSE\n+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION\n+\n+  0. This License applies to any program or other work which contains\n+a notice placed by the copyright holder saying it may be distributed\n+under the terms of this General Public License.  The "Program", below,\n+refers to any such program or work, and a "work based on the Program"\n+means either the Program or any derivative work under copyright law:\n+that is to say, a work containing the Program or a portion of it,\n+either verbatim or with modifications and/or translated into another\n+language.  (Hereinafter, translation is included without limitation in\n+the term "modification".)  Each licensee is addressed as "you".\n+\n+Activities other than copying, distribution and modification are not\n+covered by this License; they are outside its scope.  The act of\n+running the Program is not restricted, and the output from the Program\n+is covered only if its contents constitute a work based on the\n+Program (independent of having been made by running the Program).\n+Whethe'..b'/OR OTHER PARTIES\n+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED\n+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF\n+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS\n+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE\n+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,\n+REPAIR OR CORRECTION.\n+\n+  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING\n+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR\n+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,\n+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING\n+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED\n+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY\n+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER\n+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE\n+POSSIBILITY OF SUCH DAMAGES.\n+\n+\t\t     END OF TERMS AND CONDITIONS\n+\n+\t    How to Apply These Terms to Your New Programs\n+\n+  If you develop a new program, and you want it to be of the greatest\n+possible use to the public, the best way to achieve this is to make it\n+free software which everyone can redistribute and change under these terms.\n+\n+  To do so, attach the following notices to the program.  It is safest\n+to attach them to the start of each source file to most effectively\n+convey the exclusion of warranty; and each file should have at least\n+the "copyright" line and a pointer to where the full notice is found.\n+\n+    <one line to give the program\'s name and a brief idea of what it does.>\n+    Copyright (C) <year>  <name of author>\n+\n+    This program is free software; you can redistribute it and/or modify\n+    it under the terms of the GNU General Public License as published by\n+    the Free Software Foundation; either version 2 of the License, or\n+    (at your option) any later version.\n+\n+    This program is distributed in the hope that it will be useful,\n+    but WITHOUT ANY WARRANTY; without even the implied warranty of\n+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n+    GNU General Public License for more details.\n+\n+    You should have received a copy of the GNU General Public License along\n+    with this program; if not, write to the Free Software Foundation, Inc.,\n+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.\n+\n+Also add information on how to contact you by electronic and paper mail.\n+\n+If the program is interactive, make it output a short notice like this\n+when it starts in an interactive mode:\n+\n+    Gnomovision version 69, Copyright (C) year name of author\n+    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w\'.\n+    This is free software, and you are welcome to redistribute it\n+    under certain conditions; type `show c\' for details.\n+\n+The hypothetical commands `show w\' and `show c\' should show the appropriate\n+parts of the General Public License.  Of course, the commands you use may\n+be called something other than `show w\' and `show c\'; they could even be\n+mouse-clicks or menu items--whatever suits your program.\n+\n+You should also get your employer (if you work as a programmer) or your\n+school, if any, to sign a "copyright disclaimer" for the program, if\n+necessary.  Here is a sample; alter the names:\n+\n+  Yoyodyne, Inc., hereby disclaims all copyright interest in the program\n+  `Gnomovision\' (which makes passes at compilers) written by James Hacker.\n+\n+  <signature of Ty Coon>, 1 April 1989\n+  Ty Coon, President of Vice\n+\n+This General Public License does not permit incorporating your program into\n+proprietary programs.  If your program is a subroutine library, you may\n+consider it more useful to permit linking proprietary applications with the\n+library.  If this is what you want to do, use the GNU Lesser General\n+Public License instead of this License.\n+\n'
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/Makefile
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/Makefile Thu Nov 03 10:25:04 2011 -0400
[
@@ -0,0 +1,89 @@
+# ==========================
+# BEDTools Makefile
+# (c) 2009 Aaron Quinlan
+# ==========================
+
+# define our object and binary directories
+export OBJ_DIR = obj
+export BIN_DIR = bin
+export SRC_DIR = src
+export CXX = g++
+export CXXFLAGS = -Wall -O2 -D_FILE_OFFSET_BITS=64 -fPIC
+export LIBS = -lz
+export BT_ROOT  = src/utils/BamTools/
+
+
+SUBDIRS = $(SRC_DIR)/annotateBed \
+   $(SRC_DIR)/bamToBed \
+   $(SRC_DIR)/bedToBam \
+   $(SRC_DIR)/bedToIgv \
+   $(SRC_DIR)/bed12ToBed6 \
+   $(SRC_DIR)/closestBed \
+   $(SRC_DIR)/complementBed \
+   $(SRC_DIR)/coverageBed \
+   $(SRC_DIR)/fastaFromBed \
+   $(SRC_DIR)/flankBed \
+   $(SRC_DIR)/genomeCoverageBed \
+   $(SRC_DIR)/intersectBed \
+   $(SRC_DIR)/linksBed \
+   $(SRC_DIR)/maskFastaFromBed \
+   $(SRC_DIR)/mergeBed \
+   $(SRC_DIR)/multiBamCov \
+   $(SRC_DIR)/multiIntersectBed \
+   $(SRC_DIR)/nucBed \
+   $(SRC_DIR)/overlap \
+   $(SRC_DIR)/pairToBed \
+   $(SRC_DIR)/pairToPair \
+   $(SRC_DIR)/shuffleBed \
+   $(SRC_DIR)/slopBed \
+   $(SRC_DIR)/sortBed \
+   $(SRC_DIR)/subtractBed \
+   $(SRC_DIR)/tagBam \
+   $(SRC_DIR)/unionBedGraphs \
+   $(SRC_DIR)/windowBed
+
+UTIL_SUBDIRS = $(SRC_DIR)/utils/lineFileUtilities \
+ $(SRC_DIR)/utils/bedFile \
+ $(SRC_DIR)/utils/bedGraphFile \
+ $(SRC_DIR)/utils/chromsweep \
+ $(SRC_DIR)/utils/gzstream \
+ $(SRC_DIR)/utils/fileType \
+ $(SRC_DIR)/utils/bedFilePE \
+ $(SRC_DIR)/utils/sequenceUtilities \
+ $(SRC_DIR)/utils/tabFile \
+ $(SRC_DIR)/utils/BamTools \
+ $(SRC_DIR)/utils/BamTools-Ancillary \
+ $(SRC_DIR)/utils/Fasta \
+ $(SRC_DIR)/utils/genomeFile
+
+all:
+ [ -d $(OBJ_DIR) ] || mkdir -p $(OBJ_DIR)
+ [ -d $(BIN_DIR) ] || mkdir -p $(BIN_DIR)
+
+ @echo "Building BEDTools:"
+ @echo "========================================================="
+
+ @for dir in $(UTIL_SUBDIRS); do \
+ echo "- Building in $$dir"; \
+ $(MAKE) --no-print-directory -C $$dir; \
+ echo ""; \
+ done
+
+ @for dir in $(SUBDIRS); do \
+ echo "- Building in $$dir"; \
+ $(MAKE) --no-print-directory -C $$dir; \
+ echo ""; \
+ done
+
+
+.PHONY: all
+
+clean:
+ @echo "Cleaning up."
+ @rm -f $(OBJ_DIR)/* $(BIN_DIR)/*
+ @rm -Rf $(BT_ROOT)/lib
+ @rm -f $(BT_ROOT)/src/api/*.o
+ @rm -f $(BT_ROOT)/src/api/internal/*.o
+ @rm -Rf $(BT_ROOT)/include
+
+.PHONY: clean
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/README.rst
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/README.rst Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,47 @@
+==============================
+          BEDTools         
+==============================
+
+Created by Aaron Quinlan Spring 2009.
+
+Copyright 2009,2010,2011 Aaron Quinlan. All rights reserved.
+
+Stable releases: http://code.google.com/p/bedtools
+
+Repository:      https://github.com/arq5x/bedtools
+
+Released under GNU public license version 2 (GPL v2).
+
+
+Summary
+-------
+BEDTools is a collection of utilities for comparing, summarizing, and 
+intersecting genomic features in BED, GTF/GFF, VCF and BAM formats. 
+
+
+Manual
+------
+See the extensive PDF manual included at: http://code.google.com/p/bedtools/downloads/detail?name=BEDTools-User-Manual.v4.pdf.
+
+This manual covers many common usage examples.  There are also examples available at:
+http://code.google.com/p/bedtools/wiki/Usage
+http://code.google.com/p/bedtools/wiki/UsageAdvanced
+
+Installation
+------------
+Git
+...
+git clone git://github.com/arq5x/bedtools.git
+
+Download tarball - that big gray button on the upper right.
+...........................................................
+#. Unpack the source downloaded tarball.
+#. cd into the expanded folder.
+#. Type "make clean" and hit enter.
+#. Type "make all" and hit enter.
+#. If you encountered no errors, then all of the BED Tools should now be in bin/
+  If not, try to troubleshoot then email me: aaronquinlan at gmail dot com
+#. Copy the files in bin/ to ~/bin or if you have the privileges, to /usr/local/bin.
+#. Use the tools.
+
+
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/RELEASE_HISTORY
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/RELEASE_HISTORY Thu Nov 03 10:25:04 2011 -0400
b
b'@@ -0,0 +1,646 @@\n+Version 2.14.2 (2-Nov-2011)\n+\n+Bug Fixes\n+=========\n+1. Corrected the help for closestBed. It now correctly reads -io instead of -no.\n+2. Fixed regression in closestBed injected in version 2.13.4 whereby B features to the right of an A feature were missed.\n+\n+New tool.\n+============\n+1. Added the multiIntersectBed tool for reporting common intervals among multiple **sorted** BED/GFF/VCF files.\n+\n+\n+\n+Version 2.13.4 (26-Oct-2011)\n+Bug Fixes\n+=========\n+1. The -sorted option (chromsweep) in intersectBed now obeys -s and -S.  I had neglected to implement that. Thanks to Paul Ryvkin for pointing this out.\n+2. The -split option was mistakenly splitting of D CIGAR ops.\n+3. The Makefile was not including zlib properly for newer versions of GCC. Thanks to Istvan Albert for pointing this out and providing the solution.\n+\n+Improvements\n+============\n+1. Thanks to Jacob Biesinger for a new option (-D) in closestBed that will report _signed_ distances.  Moreover, the new option allows fine control over whether the distances are reported based on the reference genome or based on the strand of the A or B feature. Many thanks to Jacob.\n+2. Thanks to some nice analysis from Paul Ryvkin, I realized that the -sorted option was using way too much memory in certain cases where there is a chromosome change in a sorted BED file.  This has been corrected.\n+\n+Version 2.13.3 (30-Sept-2011)\n+Bug Fixes\n+============\n+1. intersectBed detected, but did not report overlaps when using BAM input and -bed.\n+\n+Other\n+=====\n+1. Warning that -sorted trusts, but does not enforce that data is actually sorted.\n+\n+\n+Version 2.13.2 (23-Sept-2011)\n+\n+New algorithm\n+=============\n+1. Preliminary release of the chrom_sweep algorithm.\n+\n+New options\n+===========\n+1. genomeCoverageBed no longer requires a genome file when working with BAM input.  It instead uses the BAM header.\n+2. tagBam now has a -score option for annotating alignments with the BED "scores" field in annotation files.  This overrides the default behavior, which is to use the -labels associated with the annotation files passed in on the command line.\n+\n+Bug fixes\n+=========\n+1. Correct a bug that prevented proper BAM support in intersectBed.\n+2. Improved detection of GFF features with negative coordinates.\n+\n+\n+\n+Version 2.13.1 (6-Sept-2011)\n+\n+New options\n+===========\n+1. tagBam now has -s and -S options for only annotating alignments with features on the same and opposite strand, respectively.\n+2. tagBam now has a -names option for annotating alignments with the "name" field in annotation files.  This overrides the default behavior, which is to use the -labels associated with the annotation files passed in on the command line.  Currently, this works well with BED files, but given the limited metadata support for GFF files, annotating with -names and GFF files may not work as well as wished, depending on the type of GFF file used.\n+\n+\n+\n+Version 2.13.0 (1-Sept-2011)\n+\n+New tools\n+=========\n+1. tagBam. This tool annotates a BAM file with custom tag fields based on overlaps with BED/GFF/VCF files.\n+For example:\n+$ tagBam -i aln.bam -files exons.bed introns.bed cpg.bed utrs.bed \\\n+                    -tags exonic intonic cpg utr \\\n+                    > aln.tagged.bam\n+For alignments that have overlaps, you should see new BAM tags like "YB:Z:exonic", "YB:Z:cpg;utr"\n+\n+2. multiBamCov. The new tool counts sequence coverage for multiple bams at specific loci defined in a BED/GFF/VCF file.\n+For example:\n+\n+$ multiBamCov -bams aln.1.bam aln.2.bam aln3.bam -bed exons.bed\n+chr1\t861306\t861409\tSAMD11\t1\t+\t181\t280\t236\n+chr1\t865533\t865718\tSAMD11\t2\t+\t249\t365\t374\n+chr1\t866393\t866496\tSAMD11\t3\t+\t162\t298\t322\n+\n+where the last 3 columns represent the number of alignments overlapping each interval from the three BAM file.\n+\n+The following options are available to control which types of alignments are are counted.\n+-q\tMinimum mapping quality allowed. Default is 0.\n+\n+-D\tInclude duplicate-marked reads.  Defa'..b'.\n+\n+\n+Version 2.2.1\n+1. Fixed a very obvious bug in subtractBed that caused improper behavior when a feature in A was overlapped by more than one feature in B.\n+Many thanks to folks in the Hannon lab at CSHL for pointing this out.\n+\n+\n+Version 2.2.0\n+=== Notable changes in this release ===\n+1.  coverageBed will optionally only count features in BED file A (e.g. sequencing reads) that overlap with \n+\tthe intervals/windows in BED file B on the same strand.  This has been requested several times recently \n+\tand facilitates CHiP-Seq and RNA-Seq experiments.\n+\n+2.  intersectBed can now require a minimum __reciprocal__ overlap between intervals in BED A and BED B.  For example,\n+\tpreviously, if one used -f 0.90, it required that a feature in B overlap 90% of the feature in A for the "hit"\n+\tto be reported.  If one adds the -r (reciprocal) option, the hit must also cover 90% of the feature in B.  This helps\n+\tto exclude overlaps between say small features in A and large features in B:\n+\n+\tA ==========\n+\tB  **********************************************************\n+\t\t\n+\t-f 0.50 (Reported), whereas -f 0.50 -r (Not reported)\n+\n+3.  The score field has been changed to be a string.  While this deviates from the UCSC definition, it allows one to track\n+\tmuch more meaningful information about a feature/interval.  For example, score could now be:\n+\t\n+\t7.31E-05  (a p-value)\n+\t0.334577  (mean enrichment)\n+\t2:2.2:40:2 (several values encoded in a string)\n+\t\n+4.  closestBed now, by default, reports __all__ intervals in B that overlap equally with an interval in A.  Previously, it\n+\tmerely reported the first such feature that appeared in B.  Here\'s a cartoon explaining the difference.\n+\t\n+\t**Prior behavior**\n+\t\n+\tA\t ==============\n+\tB.1        \t\t\t\t++++++++++++++\n+\tB.2       \t\t\t\t++++++++++++++\n+\tB.3               \t\t\t\t+++++++++\n+\n+\t-----------------------------------------\n+\tResult = B.1 \t\t\t++++++++++++++\n+\t\n+\t\n+\t**Current behavior**\n+\t\n+\tA\t ==============\n+\tB.1        \t\t\t\t++++++++++++++\n+\tB.2       \t\t\t\t++++++++++++++\n+\tB.3               \t\t\t\t+++++++++\n+\n+\t-----------------------------------------\n+\tResult = B.1 \t\t\t++++++++++++++\n+\t\t\t B.2 \t\t\t++++++++++++++\n+\n+\tUsing the -t option, one can also choose to report either the first or the last entry in B in the event of a tie.\n+\n+5.  Several other minor changes to the algorithms have been made to increase speed a bit.\n+\n+\n+VERSION 2.1.2\n+1. Fixed yet another bug in the parsing of "track" or "browser" lines.  Sigh...\n+2. Change the "score" column (i.e. column 5) to b stored as a string.  While this deviates\n+   from the UCSC convention, it allows significantly more information to be packed into the column.\n+\n+\n+VERSION 2.1.1\n+1. Added limits.h to bedFile.h to fix compilation issues on some systems.\n+2. Fixed bug in testing for "track" or "browser" lines.\n+\n+\n+VERSION 2.1.0\n+1. Fixed a bug in peIntersectBed that prevented -a from being correctly handled when passed via stdin.\n+2. Added new functionality to coverageBed that calculates the density of coverage.\n+3. Fixed bug in geneomCoverageBed.\n+\n+\n+VERSION 2.0.1\n+1. Added the ability to retain UCSC browser track/browser headers in BED files.\n+\n+\n+VERSION 2.0\n+1.  Sped up the file parsing.  ~10-20% increase in speed.\n+2.  Created reportBed() as a common method in the bedFile class.  Cleans up the code quite nicely.\n+3.  Added the ability to compare BED files accounting for strandedness.\n+4.  Paired-end intersect.\n+5.  Fixed bug that prevented overlaps from being reported when the overlap fraction requested is 1.0\n+\n+\n+\n+VERSION 1.2, 04/27/2009. (1eb06115bdf3c49e75793f764a70c3501bb53f33)\n+1.  Added subtractBed.\n+\tA. Fixed bug that prevented "split" overlaps from being reported.\n+\tB. Prevented A from being reported if >=1 feature in B completely spans it.\n+2.  Added linksBed.\n+3.  Added the ability to define separate windows for upstream and downstream to windowBed.\n+\n+\n+VERSION 1.1, 04/23/2009. (b74eb1afddca9b70bfa90ba763d4f2981a56f432)\n+Initial release.\n'
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/data/knownGene.hg18.chr21.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/data/knownGene.hg18.chr21.bed Thu Nov 03 10:25:04 2011 -0400
b
b'@@ -0,0 +1,828 @@\n+chr21\t9928613\t10012791\tuc002yip.1\t0\t-\t9928775\t9995604\t0\t24\t298,71,93,80,106,81,62,89,82,61,65,64,100,120,162,51,60,54,54,54,54,58,109,158,\t0,2082,3564,7620,9627,13341,15191,27109,27296,28194,35165,35968,36178,37925,44523,46170,52998,62332,63266,64549,66980,78302,81026,84020,\n+chr21\t9928613\t10012791\tuc002yiq.1\t0\t-\t9928775\t9995604\t0\t23\t298,71,93,80,106,81,62,89,82,61,65,64,100,120,162,51,60,54,54,54,58,109,158,\t0,2082,3564,7620,9627,13341,15191,27109,27296,28194,35165,35968,36178,37925,44523,46170,52998,63266,64549,66980,78302,81026,84020,\n+chr21\t9928613\t10012791\tuc002yir.1\t0\t-\t9928775\t9995604\t0\t22\t298,71,93,80,106,81,62,89,82,61,65,64,100,120,162,51,54,54,54,58,109,158,\t0,2082,3564,7620,9627,13341,15191,27109,27296,28194,35165,35968,36178,37925,44523,46170,63266,64549,66980,78302,81026,84020,\n+chr21\t9928613\t10012791\tuc010gkv.1\t0\t-\t9928775\t9973168\t0\t19\t298,71,93,80,106,81,62,89,82,61,65,64,100,120,162,51,58,109,158,\t0,2082,3564,7620,9627,13341,15191,27109,27296,28194,35165,35968,36178,37925,44523,46170,78302,81026,84020,\n+chr21\t9928613\t10061300\tuc002yis.1\t0\t-\t9928613\t9928613\t0\t33\t298,71,93,80,81,62,89,82,61,65,64,100,120,162,51,60,54,54,54,58,109,120,129,213,66,130,165,197,105,102,117,120,702,\t0,2082,3564,7620,13341,15191,27109,27296,28194,35165,35968,36178,37925,44523,46170,52998,63266,64549,66980,78302,81026,89277,91464,104695,106174,106728,108195,108605,114070,114367,119980,122855,131985,\n+chr21\t10042683\t10120796\tuc002yit.1\t0\t-\t10071441\t10120588\t0\t10\t105,102,117,120,702,115,172,163,101,223,\t0,297,5910,8785,17915,26668,28637,37348,76733,77890,\n+chr21\t10042683\t10120808\tuc002yiu.1\t0\t-\t10080193\t10120608\t0\t9\t105,102,117,702,115,172,163,101,215,\t0,297,5910,17915,26668,28637,37348,76733,77910,\n+chr21\t10079666\t10120808\tuc002yiv.1\t0\t-\t10081686\t10120608\t0\t4\t528,91,101,215,\t0,1930,39750,40927,\n+chr21\t10080031\t10081687\tuc002yiw.1\t0\t-\t10080031\t10080031\t0\t2\t200,91,\t0,1565,\n+chr21\t10081660\t10120796\tuc002yix.2\t0\t-\t10081660\t10081660\t0\t3\t27,101,223,\t0,37756,38913,\n+chr21\t13332351\t13346202\tuc002yiy.2\t0\t+\t13332351\t13332351\t0\t5\t265,115,2492,65,215,\t0,4342,4619,10805,13636,\n+chr21\t13336975\t13346202\tuc002yiz.2\t0\t+\t13336975\t13336975\t0\t4\t169,108,65,215,\t0,2379,6181,9012,\n+chr21\t13361138\t13412440\tuc002yja.2\t0\t+\t13361189\t13412250\t0\t3\t102,118,411,\t0,2228,50891,\n+chr21\t13904368\t13935777\tuc002yjb.1\t0\t+\t13904420\t13935758\t0\t11\t573,115,174,107,138,71,71,45,167,124,241,\t0,4946,5220,8172,9981,12628,18205,19679,20764,29338,31168,\n+chr21\t13944438\t13944477\tuc002yjc.1\t0\t+\t13944438\t13944438\t0\t1\t39,\t0,\n+chr21\t13945076\t13945106\tuc002yjd.1\t0\t+\t13945076\t13945076\t0\t1\t30,\t0,\n+chr21\t13973491\t13975330\tuc002yje.1\t0\t-\t13973781\t13974201\t0\t1\t1839,\t0,\n+chr21\t14137333\t14142556\tuc002yjf.1\t0\t-\t14137333\t14137333\t0\t6\t291,114,270,129,191,275,\t0,880,1863,2871,3617,4948,\n+chr21\t14200023\t14200052\tuc002yjg.1\t0\t+\t14200023\t14200023\t0\t1\t29,\t0,\n+chr21\t14202070\t14202096\tuc002yjh.1\t0\t-\t14202070\t14202070\t0\t1\t26,\t0,\n+chr21\t14237966\t14274631\tuc002yji.1\t0\t-\t14237966\t14237966\t0\t6\t88,71,73,29,85,738,\t0,1264,7292,7457,10291,35927,\n+chr21\t14270940\t14274631\tuc002yjj.2\t0\t-\t14270940\t14270940\t0\t2\t1809,738,\t0,2953,\n+chr21\t14321612\t14438647\tuc002yjk.2\t0\t+\t14321612\t14321612\t0\t3\t177,195,699,\t0,56529,116336,\n+chr21\t14321612\t14438730\tuc002yjl.2\t0\t+\t14321612\t14321612\t0\t3\t177,195,1030,\t0,56529,116088,\n+chr21\t14403005\t14501125\tuc002yjm.1\t0\t-\t14403184\t14501115\t0\t10\t267,177,112,105,168,90,102,109,386,119,\t0,35809,43759,54605,56409,57548,72944,77147,80220,98001,\n+chr21\t14459414\t14483611\tuc010gkw.1\t0\t-\t14459415\t14483519\t0\t4\t168,102,109,386,\t0,16535,20738,23811,\n+chr21\t14510336\t14522564\tuc002yjo.2\t0\t+\t14510378\t14521485\t0\t5\t138,163,73,100,1493,\t0,3418,4952,8293,10735,\n+chr21\t14510336\t14522564\tuc002yjn.2\t0\t+\t14518639\t14521485\t0\t5\t138,173,73,100,1493,\t0,3408,4952,8293,10735,\n+chr21\t14510336\t14522564\tuc002yjp.2\t0\t+\t14518639\t14521485\t0\t4\t138,73,100,1493,\t0,4952,8293,10735,\n+chr21\t14567990\t14585577\tuc002yjq.1\t0\t+\t14568283\t14585568\t0\t5\t359,62,148,143,100,\t0,5797,13751,14605,17487,\n+chr21\t14567990\t14595563\t'..b',39359,42463,57573,61711,64621,65078,66549,67043,73143,73892,74113,75468,77429,78209,87067,88715,91947,94082,97848,101709,103500,104273,105888,106385,107407,111781,112856,114041,115960,116732,118374,119687,120571,121161,\n+chr21\t46569229\t46690110\tuc002zjj.1\t0\t+\t46578825\t46689668\t0\t47\t323,213,372,81,256,56,175,137,112,223,82,175,218,455,556,147,152,143,233,163,213,230,117,21,207,171,153,879,156,771,103,155,141,174,196,223,151,450,245,103,174,120,230,77,139,128,486,\t0,1489,9509,21240,21855,22569,24124,24796,26541,28216,29099,30565,32087,38593,41697,56807,60945,63855,64312,65783,66277,72377,73126,73347,74702,76663,77443,86301,87949,91181,93316,97082,100943,102734,103507,105122,105619,106641,111015,112090,113275,115194,115966,117608,118921,119805,120395,\n+chr21\t46578738\t46594162\tuc010gqk.1\t0\t+\t46578738\t46578738\t0\t6\t372,81,259,56,175,137,\t0,11731,12326,13060,14615,15287,\n+chr21\t46699327\t46703021\tuc002zjk.1\t0\t-\t46699327\t46699327\t0\t2\t932,597,\t0,3097,\n+chr21\t46703317\t46790647\tuc002zjl.1\t0\t+\t46703472\t46790347\t0\t20\t246,72,120,120,252,129,120,198,92,111,124,110,103,65,94,120,115,140,137,426,\t0,25780,31623,38011,39605,45384,50280,52440,70115,73150,74547,75027,75608,78247,78469,79506,80900,82779,86160,86904,\n+chr21\t46703317\t46791548\tuc002zjm.1\t0\t+\t46703472\t46791387\t0\t21\t246,72,120,120,252,129,120,198,92,111,124,110,103,65,94,120,115,140,137,81,293,\t0,25780,31623,38011,39605,45384,50280,52440,70115,73150,74547,75027,75608,78247,78469,79506,80900,82779,86160,86904,87938,\n+chr21\t46703317\t46791548\tuc010gql.1\t0\t+\t46703472\t46791387\t0\t20\t246,72,120,120,252,120,198,92,111,124,110,103,65,94,120,115,140,137,81,293,\t0,25780,31623,38011,39605,50280,52440,70115,73150,74547,75027,75608,78247,78469,79506,80900,82779,86160,86904,87938,\n+chr21\t46703317\t46794451\tuc002zjn.1\t0\t+\t46703472\t46794259\t0\t22\t246,72,120,120,252,129,120,198,92,111,124,110,103,65,94,120,115,140,137,81,128,340,\t0,25780,31623,38011,39605,45384,50280,52440,70115,73150,74547,75027,75608,78247,78469,79506,80900,82779,86160,86904,87938,90794,\n+chr21\t46703317\t46813028\tuc002zjo.1\t0\t+\t46703472\t46811963\t0\t38\t246,72,120,120,252,129,120,198,92,111,124,110,103,65,94,120,115,140,137,81,128,115,202,110,81,124,122,112,110,131,169,171,62,58,75,175,124,1318,\t0,25780,31623,38011,39605,45384,50280,52440,70115,73150,74547,75027,75608,78247,78469,79506,80900,82779,86160,86904,87938,90794,91566,92657,92847,95192,95598,95845,97005,97962,98608,99246,101745,102771,104881,106736,107583,108393,\n+chr21\t46748653\t46789614\tuc002zjp.1\t0\t+\t46748811\t46789614\t0\t14\t177,120,198,92,111,124,110,103,65,94,120,115,140,137,\t0,4944,7104,24779,27814,29211,29691,30272,32911,33133,34170,35564,37443,40824,\n+chr21\t46783075\t46791548\tuc002zjq.1\t0\t+\t46784120\t46791387\t0\t5\t1257,140,137,81,293,\t0,3021,6402,7146,8180,\n+chr21\t46797251\t46803437\tuc002zjr.2\t0\t+\t46798578\t46802830\t0\t7\t1382,122,112,110,131,169,874,\t0,1664,1911,3071,4028,4674,5312,\n+chr21\t46805625\t46813028\tuc002zjs.1\t0\t+\t46806017\t46811963\t0\t5\t521,75,175,124,1318,\t0,2573,4428,5275,6085,\n+chr21\t46808123\t46813028\tuc002zjt.1\t0\t+\t46808123\t46808123\t0\t4\t150,175,124,1318,\t0,1930,2777,3587,\n+chr21\t46842958\t46849463\tuc002zju.1\t0\t-\t46843703\t46846756\t0\t3\t886,139,110,\t0,3660,6395,\n+chr21\t46842958\t46849463\tuc002zjv.1\t0\t-\t46843791\t46846756\t0\t4\t886,94,139,110,\t0,1664,3660,6395,\n+chr21\t46879954\t46904483\tuc002zjw.2\t0\t+\t46881291\t46903264\t0\t7\t149,95,105,183,162,165,1399,\t0,1281,7920,8691,12843,13960,23130,\n+chr21\t46879954\t46909291\tuc002zjx.1\t0\t+\t46881291\t46908667\t0\t12\t149,109,95,105,183,162,165,176,130,137,172,657,\t0,824,1281,7920,8691,12843,13960,23130,25218,26185,27768,28680,\n+chr21\t46879954\t46909291\tuc002zjy.1\t0\t+\t46881291\t46908667\t0\t11\t149,95,105,183,162,165,176,130,137,172,657,\t0,1281,7920,8691,12843,13960,23130,25218,26185,27768,28680,\n+chr21\t46879954\t46909291\tuc010gqm.1\t0\t+\t46881291\t46908667\t0\t9\t149,95,105,183,162,165,137,172,657,\t0,1281,7920,8691,12843,13960,26185,27768,28680,\n+chr21\t46887625\t46906276\tuc002zjz.1\t0\t+\t46892812\t46905317\t0\t6\t354,183,162,165,176,1104,\t0,1020,5172,6289,15459,17547,\n'
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/data/rmsk.hg18.chr21.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/data/rmsk.hg18.chr21.bed Thu Nov 03 10:25:04 2011 -0400
b
b'@@ -0,0 +1,57261 @@\n+chr21\t9719768\t9721892\tALR/Alpha\t1004\t+\n+chr21\t9721905\t9725582\tALR/Alpha\t1010\t+\n+chr21\t9725582\t9725977\tL1PA3\t3288\t+\n+chr21\t9726021\t9729309\tALR/Alpha\t1051\t+\n+chr21\t9729320\t9729809\tL1PA3\t3897\t-\n+chr21\t9729809\t9730866\tL1P1\t8367\t+\n+chr21\t9730866\t9734026\tALR/Alpha\t1036\t-\n+chr21\t9734037\t9757471\tALR/Alpha\t1182\t-\n+chr21\t9757520\t9758476\tALR/Alpha\t1092\t-\n+chr21\t9758521\t9764575\tL1PA3\t26286\t-\n+chr21\t9764577\t9778787\tALR/Alpha\t1141\t-\n+chr21\t9778798\t9788657\tALR/Alpha\t1188\t-\n+chr21\t9788657\t9794680\tL1PA3\t27485\t-\n+chr21\t9794680\t9795266\tALR/Alpha\t1011\t-\n+chr21\t9795278\t9795587\tALR/Alpha\t979\t-\n+chr21\t9795589\t9795713\t(GAATG)n\t308\t+\n+chr21\t9795736\t9795894\t(GAATG)n\t683\t+\n+chr21\t9795911\t9796007\t(GAATG)n\t345\t+\n+chr21\t9796028\t9796187\t(GAATG)n\t756\t+\n+chr21\t9796202\t9796615\t(GAATG)n\t891\t+\n+chr21\t9796637\t9796824\t(GAATG)n\t621\t+\n+chr21\t9796824\t9796866\tHSATII\t242\t-\n+chr21\t9796866\t9797049\t(GAATG)n\t621\t+\n+chr21\t9797067\t9797436\t(GAATG)n\t900\t+\n+chr21\t9797482\t9797839\t(GAATG)n\t1008\t+\n+chr21\t9797866\t9798044\t(GAATG)n\t858\t+\n+chr21\t9798051\t9798118\t(GAGTG)n\t259\t+\n+chr21\t9798118\t9798658\t(GAATG)n\t969\t+\n+chr21\t9798626\t9798765\t(AAATG)n\t201\t+\n+chr21\t9798770\t9798950\t(GAGTG)n\t571\t+\n+chr21\t9798908\t9799265\t(GAATG)n\t942\t+\n+chr21\t9799280\t9799460\t(GAATG)n\t813\t+\n+chr21\t9799500\t9800262\t(GAATG)n\t933\t+\n+chr21\t9800289\t9800469\t(GAATG)n\t666\t+\n+chr21\t9800481\t9800797\t(GAATG)n\t977\t+\n+chr21\t9800840\t9800878\t(GAGTG)n\t225\t+\n+chr21\t9800913\t9801092\t(GAGTG)n\t930\t+\n+chr21\t9801092\t9801169\t(GAATG)n\t298\t+\n+chr21\t9801182\t9801639\t(GAATG)n\t747\t+\n+chr21\t9801651\t9801769\t(GAATG)n\t330\t+\n+chr21\t9801781\t9802265\t(GAATG)n\t747\t+\n+chr21\t9802265\t9802310\t(GAGTG)n\t245\t+\n+chr21\t9802310\t9802490\t(GAATG)n\t1203\t+\n+chr21\t9802490\t9802503\t(GAGTG)n\t245\t+\n+chr21\t9802508\t9802679\t(GAATG)n\t660\t+\n+chr21\t9802699\t9803425\t(GAATG)n\t1008\t+\n+chr21\t9803427\t9803488\t(GAGTG)n\t332\t+\n+chr21\t9803490\t9803789\t(GAATG)n\t708\t+\n+chr21\t9803803\t9804202\t(GAATG)n\t897\t+\n+chr21\t9804215\t9804262\t(GAATG)n\t261\t+\n+chr21\t9804276\t9804450\t(GAATG)n\t771\t+\n+chr21\t9804469\t9804637\t(GAATG)n\t756\t+\n+chr21\t9804660\t9804840\t(GAATG)n\t729\t+\n+chr21\t9804905\t9805085\t(GAATG)n\t726\t+\n+chr21\t9805118\t9805404\t(GAATG)n\t930\t+\n+chr21\t9805416\t9805716\t(GAATG)n\t708\t+\n+chr21\t9805730\t9806084\t(GAATG)n\t1050\t+\n+chr21\t9806147\t9806522\t(GAATG)n\t765\t+\n+chr21\t9806555\t9806812\t(GAATG)n\t661\t+\n+chr21\t9806824\t9807184\t(GAATG)n\t1080\t+\n+chr21\t9807228\t9807661\t(GAATG)n\t888\t+\n+chr21\t9807669\t9807698\t(GAGTG)n\t231\t+\n+chr21\t9807698\t9808290\t(GAATG)n\t807\t+\n+chr21\t9808301\t9808897\t(GAATG)n\t984\t+\n+chr21\t9808920\t9809796\t(GAATG)n\t960\t+\n+chr21\t9809843\t9810023\t(GAATG)n\t972\t+\n+chr21\t9810043\t9810492\t(GAATG)n\t690\t+\n+chr21\t9810503\t9810553\t(GAATG)n\t208\t+\n+chr21\t9810554\t9810733\t(GAGTG)n\t828\t+\n+chr21\t9810696\t9811576\t(GAATG)n\t1005\t+\n+chr21\t9811606\t9811772\t(GAATG)n\t604\t+\n+chr21\t9811778\t9812022\t(GAGTG)n\t858\t+\n+chr21\t9812022\t9812464\t(GAATG)n\t1017\t+\n+chr21\t9812479\t9812900\t(GAATG)n\t729\t+\n+chr21\t9812901\t9812954\t(GAGTG)n\t235\t+\n+chr21\t9812958\t9813124\t(GAATG)n\t740\t+\n+chr21\t9813179\t9813356\t(GAGTG)n\t819\t+\n+chr21\t9813335\t9813790\t(GAATG)n\t837\t+\n+chr21\t9813801\t9813973\t(GAATG)n\t582\t+\n+chr21\t9814004\t9814407\t(GAATG)n\t780\t+\n+chr21\t9814408\t9814467\t(GAGTG)n\t300\t+\n+chr21\t9814467\t9814824\t(GAATG)n\t921\t+\n+chr21\t9814871\t9815045\t(GAATG)n\t654\t+\n+chr21\t9815045\t9815117\t(GAGTG)n\t423\t+\n+chr21\t9815118\t9815297\t(GAATG)n\t926\t+\n+chr21\t9815356\t9815455\t(GAATG)n\t325\t+\n+chr21\t9815463\t9815640\t(GAGTG)n\t981\t+\n+chr21\t9815642\t9815982\t(GAATG)n\t805\t+\n+chr21\t9816000\t9816174\t(GAATG)n\t660\t+\n+chr21\t9816197\t9816535\t(GAATG)n\t919\t+\n+chr21\t9816595\t9816936\t(GAATG)n\t867\t+\n+chr21\t9816995\t9817175\t(GAATG)n\t510\t+\n+chr21\t9817189\t9817257\t(GAGTG)n\t269\t+\n+chr21\t9817258\t9817854\t(GAATG)n\t1092\t+\n+chr21\t9817883\t9818578\t(GAATG)n\t966\t+\n+chr21\t9818589\t9818768\t(GAATG)n\t552\t+\n+chr21\t9818798\t9818860\t(GAATG)n\t213\t+\n+chr21\t9818872\t9819215\t(GAATG)n\t894\t+\n+chr21\t9819230\t9819370\t(GAATG)n\t312\t+\n+chr21\t9819426\t9819773\t(GAATG)n\t897\t+\n+chr21\t9819798\t9819976\t(GAATG)n\t878\t+\n+chr21\t9819990\t9820169\t(GAATG)n\t680\t+\n+chr21\t9820188\t9820366\t(GAATG)n\t738\t+\n+chr21\t9820322\t9820507\t(GAGTG)n\t641\t+\n+chr21\t9820'..b'6885745\t46886054\tAluSx\t1935\t+\n+chr21\t46886059\t46886096\t(TG)n\t270\t+\n+chr21\t46886242\t46886708\tL1ME4a\t405\t+\n+chr21\t46886812\t46886962\tL1ME4a\t361\t+\n+chr21\t46887080\t46887388\tAluSx\t1967\t+\n+chr21\t46888920\t46889201\tMER58B\t1178\t-\n+chr21\t46889293\t46889629\tAluJo\t1519\t+\n+chr21\t46889638\t46889821\tAluJo\t850\t+\n+chr21\t46889914\t46890044\tL1ME3B\t405\t+\n+chr21\t46890044\t46890353\tAluSx\t1946\t-\n+chr21\t46890353\t46890614\tL1ME3B\t405\t+\n+chr21\t46890829\t46891136\tL1MC4a\t413\t-\n+chr21\t46891141\t46891421\tMLT1A1\t693\t-\n+chr21\t46891477\t46891642\tL1MC4_3endX\t343\t-\n+chr21\t46891836\t46892003\tFRAM\t650\t-\n+chr21\t46892029\t46892086\tAT_rich\t22\t+\n+chr21\t46892396\t46892604\tL1MC4a\t314\t-\n+chr21\t46892583\t46892681\tL1MD\t293\t-\n+chr21\t46896438\t46896576\tL1PREC2\t277\t+\n+chr21\t46896932\t46897040\tL1ME4a\t288\t+\n+chr21\t46898261\t46898291\t(T)n\t195\t+\n+chr21\t46898475\t46898726\tL1ME4a\t442\t+\n+chr21\t46898714\t46898903\tL1MD2\t776\t+\n+chr21\t46898903\t46899208\tMER2\t1135\t+\n+chr21\t46899304\t46899922\tL1MD2\t2612\t+\n+chr21\t46899916\t46900310\tL1MD2\t1306\t+\n+chr21\t46900317\t46900724\tMSTB1\t2258\t+\n+chr21\t46900724\t46902105\tL1MD2\t2718\t+\n+chr21\t46902161\t46902336\tL1ME4a\t384\t+\n+chr21\t46902470\t46902579\tMER45B\t546\t-\n+chr21\t46903800\t46903973\tL1M5\t344\t+\n+chr21\t46904289\t46904311\tAT_rich\t22\t+\n+chr21\t46906284\t46906449\tG-rich\t373\t+\n+chr21\t46909243\t46909287\tL2\t195\t+\n+chr21\t46909413\t46909464\tAT_rich\t23\t+\n+chr21\t46909464\t46909768\tAluJo\t2015\t+\n+chr21\t46909769\t46910593\tL1ME3A\t1283\t-\n+chr21\t46910643\t46910947\tAluSx\t2427\t-\n+chr21\t46911036\t46911163\tAluJo\t773\t+\n+chr21\t46911163\t46911432\tAluSx\t2091\t+\n+chr21\t46913108\t46913727\tL1PA3\t5539\t-\n+chr21\t46915754\t46916276\tMLT1E2\t1609\t-\n+chr21\t46916330\t46916418\tL1M5\t226\t-\n+chr21\t46916433\t46916857\tL1M5\t1626\t-\n+chr21\t46916857\t46917170\tAluJo\t1400\t+\n+chr21\t46917170\t46917283\tL1M5\t1626\t-\n+chr21\t46917286\t46917758\tL1MA4A\t2227\t+\n+chr21\t46917764\t46917862\tL1MA4A\t435\t+\n+chr21\t46917957\t46918166\tMIR\t320\t-\n+chr21\t46918500\t46918536\tAT_rich\t22\t+\n+chr21\t46918925\t46919030\tGA-rich\t252\t+\n+chr21\t46919402\t46919654\tL2\t390\t+\n+chr21\t46919654\t46919928\tAluSx\t1723\t-\n+chr21\t46919928\t46920171\tL2\t390\t+\n+chr21\t46920752\t46922048\tL1PA7\t7374\t-\n+chr21\t46922123\t46922411\tL1ME1\t883\t+\n+chr21\t46922411\t46922704\tAluJb\t1385\t-\n+chr21\t46922718\t46922935\tL1ME1\t699\t+\n+chr21\t46923377\t46923802\tMLT1K\t323\t-\n+chr21\t46924116\t46924575\tHAL1\t655\t+\n+chr21\t46924661\t46924875\tMER74A\t443\t+\n+chr21\t46925595\t46925624\tAT_rich\t22\t+\n+chr21\t46926764\t46927048\tMLT1I\t641\t-\n+chr21\t46927048\t46927090\t(A)n\t378\t+\n+chr21\t46927155\t46927194\tMLT1I\t248\t-\n+chr21\t46927433\t46927614\t(TTAGGG)n\t234\t+\n+chr21\t46928301\t46928745\tMER4A1\t2938\t-\n+chr21\t46929613\t46930969\tL1MC3\t5007\t-\n+chr21\t46930969\t46931413\tMSTB1\t1749\t-\n+chr21\t46931413\t46931818\tL1MC3\t5007\t-\n+chr21\t46931945\t46932055\tMER34B-int\t303\t-\n+chr21\t46932058\t46932281\tAluJb\t1504\t-\n+chr21\t46932282\t46932323\tAluJb\t228\t-\n+chr21\t46932323\t46932751\tMER34B-int\t2060\t-\n+chr21\t46932771\t46933151\tLTR10C\t1336\t-\n+chr21\t46933171\t46933204\t(CA)n\t297\t+\n+chr21\t46933260\t46934052\tMER34B-int\t3116\t-\n+chr21\t46934052\t46934352\tAluY\t2333\t-\n+chr21\t46934352\t46934522\tMER34B-int\t3116\t-\n+chr21\t46934536\t46935047\tMER34B-int\t1297\t-\n+chr21\t46935651\t46936098\tMER34B-int\t1588\t-\n+chr21\t46936101\t46936169\tMLT2B3\t330\t+\n+chr21\t46936194\t46936262\tMLT2B3\t406\t+\n+chr21\t46936287\t46936355\tMLT2B3\t375\t+\n+chr21\t46936380\t46936448\tMLT2B3\t330\t+\n+chr21\t46936473\t46936541\tMLT2B3\t346\t+\n+chr21\t46936566\t46936632\tMLT2B3\t335\t+\n+chr21\t46936657\t46936725\tMLT2B5\t417\t+\n+chr21\t46936750\t46936977\tMLT2B3\t2194\t+\n+chr21\t46936977\t46937339\tTHE1C\t2217\t+\n+chr21\t46937339\t46937689\tMLT2B5\t1691\t+\n+chr21\t46937696\t46938061\tMER34B-int\t678\t-\n+chr21\t46938036\t46938374\tMER34B-int\t910\t-\n+chr21\t46938415\t46938527\tMER34B-int\t268\t-\n+chr21\t46938776\t46938841\tMER72\t3923\t-\n+chr21\t46938841\t46939137\tAluSq\t2287\t-\n+chr21\t46939137\t46939777\tMER72\t3923\t-\n+chr21\t46939986\t46940174\tL1MC\t565\t-\n+chr21\t46940179\t46940735\tMER34C_\t2769\t-\n+chr21\t46940746\t46941357\tL1MC\t1506\t-\n+chr21\t46941373\t46941479\tLTR60\t529\t+\n+chr21\t46941590\t46941894\tAluYb8\t2829\t+\n+chr21\t46941894\t46942298\tLTR60\t958\t+\n+chr21\t46942142\t46944181\tTAR1\t16459\t+\n+chr21\t46944181\t46944323\t(TTAGGG)n\t1057\t+\n'
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/genomes/human.hg18.genome
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/genomes/human.hg18.genome Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,50 @@
+chr1 247249719
+chr1_random 1663265
+chr10 135374737
+chr10_random 113275
+chr11 134452384
+chr11_random 215294
+chr12 132349534
+chr13 114142980
+chr13_random 186858
+chr14 106368585
+chr15 100338915
+chr15_random 784346
+chr16 88827254
+chr16_random 105485
+chr17 78774742
+chr17_random 2617613
+chr18 76117153
+chr18_random 4262
+chr19 63811651
+chr19_random 301858
+chr2 242951149
+chr2_random 185571
+chr20 62435964
+chr21 46944323
+chr21_random 1679693
+chr22 49691432
+chr22_random 257318
+chr22_h2_hap1 63661
+chr3 199501827
+chr3_random 749256
+chr4 191273063
+chr4_random 842648
+chr5 180857866
+chr5_random 143687
+chr5_h2_hap1 1794870
+chr6 170899992
+chr6_random 1875562
+chr6_cox_hap1 4731698
+chr6_qbl_hap2 4565931
+chr7 158821424
+chr7_random 549659
+chr8 146274826
+chr8_random 943810
+chr9 140273252
+chr9_random 1146434
+chrM 16571
+chrX 154913754
+chrX_random 1719168
+chrY 57772954
+
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/genomes/human.hg19.genome
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/genomes/human.hg19.genome Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,94 @@
+chr1 249250621
+chr2 243199373
+chr3 198022430
+chr4 191154276
+chr5 180915260
+chr6 171115067
+chr7 159138663
+chrX 155270560
+chr8 146364022
+chr9 141213431
+chr10 135534747
+chr11 135006516
+chr12 133851895
+chr13 115169878
+chr14 107349540
+chr15 102531392
+chr16 90354753
+chr17 81195210
+chr18 78077248
+chr20 63025520
+chrY 59373566
+chr19 59128983
+chr22 51304566
+chr21 48129895
+chr6_ssto_hap7 4928567
+chr6_mcf_hap5 4833398
+chr6_cox_hap2 4795371
+chr6_mann_hap4 4683263
+chr6_apd_hap1 4622290
+chr6_qbl_hap6 4611984
+chr6_dbb_hap3 4610396
+chr17_ctg5_hap1 1680828
+chr4_ctg9_hap1 590426
+chr1_gl000192_random 547496
+chrUn_gl000225 211173
+chr4_gl000194_random 191469
+chr4_gl000193_random 189789
+chr9_gl000200_random 187035
+chrUn_gl000222 186861
+chrUn_gl000212 186858
+chr7_gl000195_random 182896
+chrUn_gl000223 180455
+chrUn_gl000224 179693
+chrUn_gl000219 179198
+chr17_gl000205_random 174588
+chrUn_gl000215 172545
+chrUn_gl000216 172294
+chrUn_gl000217 172149
+chr9_gl000199_random 169874
+chrUn_gl000211 166566
+chrUn_gl000213 164239
+chrUn_gl000220 161802
+chrUn_gl000218 161147
+chr19_gl000209_random 159169
+chrUn_gl000221 155397
+chrUn_gl000214 137718
+chrUn_gl000228 129120
+chrUn_gl000227 128374
+chr1_gl000191_random 106433
+chr19_gl000208_random 92689
+chr9_gl000198_random 90085
+chr17_gl000204_random 81310
+chrUn_gl000233 45941
+chrUn_gl000237 45867
+chrUn_gl000230 43691
+chrUn_gl000242 43523
+chrUn_gl000243 43341
+chrUn_gl000241 42152
+chrUn_gl000236 41934
+chrUn_gl000240 41933
+chr17_gl000206_random 41001
+chrUn_gl000232 40652
+chrUn_gl000234 40531
+chr11_gl000202_random 40103
+chrUn_gl000238 39939
+chrUn_gl000244 39929
+chrUn_gl000248 39786
+chr8_gl000196_random 38914
+chrUn_gl000249 38502
+chrUn_gl000246 38154
+chr17_gl000203_random 37498
+chr8_gl000197_random 37175
+chrUn_gl000245 36651
+chrUn_gl000247 36422
+chr9_gl000201_random 36148
+chrUn_gl000235 34474
+chrUn_gl000239 33824
+chr21_gl000210_random 27682
+chrUn_gl000231 27386
+chrUn_gl000229 19913
+chrM 16571
+chrUn_gl000226 15008
+chr18_gl000207_random 4262
+
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/genomes/mouse.mm8.genome
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/genomes/mouse.mm8.genome Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,35 @@
+chr1 197069962
+chr2 181976762
+chr3 159872112
+chr4 155029701
+chr5 152003063
+chr6 149525685
+chr7 145134094
+chr8 132085098
+chr9 124000669
+chrM 16299
+chrX 165556469
+chrY 16029404
+chr10 129959148
+chr11 121798632
+chr12 120463159
+chr13 120614378
+chr14 123978870
+chr15 103492577
+chr16 98252459
+chr17 95177420
+chr18 90736837
+chr19 61321190
+chr1_random 172274
+chr5_random 2921247
+chr7_random 243910
+chr8_random 206961
+chr9_random 17232
+chrX_random 39696
+chrY_random 14577732
+chr10_random 10781
+chr13_random 436191
+chr15_random 105932
+chr17_random 89091
+chrUn_random 1540053
+
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/genomes/mouse.mm9.genome
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/genomes/mouse.mm9.genome Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,36 @@
+chr1 197195432
+chr2 181748087
+chr3 159599783
+chr4 155630120
+chr5 152537259
+chr6 149517037
+chr7 152524553
+chr8 131738871
+chr9 124076172
+chr10 129993255
+chr11 121843856
+chr12 121257530
+chr13 120284312
+chr14 125194864
+chr15 103494974
+chr16 98319150
+chr17 95272651
+chr18 90772031
+chr19 61342430
+chrX 166650296
+chrY 15902555
+chrM 16299
+chr13_random 400311
+chr16_random 3994
+chr17_random 628739
+chr1_random 1231697
+chr3_random 41899
+chr4_random 160594
+chr5_random 357350
+chr7_random 362490
+chr8_random 849593
+chr9_random 449403
+chrUn_random 5900358
+chrX_random 1785075
+chrY_random 58682461
+
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/Makefile
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/Makefile Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,48 @@
+UTILITIES_DIR = ../utils/
+OBJ_DIR = ../../obj/
+BIN_DIR = ../../bin/
+
+# -------------------
+# define our includes
+# -------------------
+INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \
+           -I$(UTILITIES_DIR)/version/ \
+           -I$(UTILITIES_DIR)/gzstream/ \
+           -I$(UTILITIES_DIR)/lineFileUtilities/ \
+           -I$(UTILITIES_DIR)/fileType/ \
+           -I$(UTILITIES_DIR)/BamTools/include
+
+# ----------------------------------
+# define our source and object files
+# ----------------------------------
+SOURCES= multiCovMain.cpp multiCovBam.cpp
+OBJECTS= $(SOURCES:.cpp=.o)
+_EXT_OBJECTS=bedFile.o lineFileUtilities.o gzstream.o fileType.o
+EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS))
+BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
+PROGRAM= multiCovBam
+
+all: $(PROGRAM)
+
+.PHONY: all
+
+$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS)
+ @echo "  * linking $(PROGRAM)"
+ @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ $(LIBS) -L$(UTILITIES_DIR)/BamTools/lib/ -lbamtools
+
+$(BUILT_OBJECTS): $(SOURCES)
+ @echo "  * compiling" $(*F).cpp
+ @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES)
+
+$(EXT_OBJECTS):
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/BamTools/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/
+
+clean:
+ @echo "Cleaning up."
+ @rm -f $(OBJ_DIR)/* $(BIN_DIR)/*
+
+.PHONY: clean
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/annotateBed/Makefile
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/annotateBed/Makefile Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,43 @@
+UTILITIES_DIR = ../utils/
+OBJ_DIR = ../../obj/
+BIN_DIR = ../../bin/
+
+# -------------------
+# define our includes
+# -------------------
+INCLUDES = -I$(UTILITIES_DIR)/bedFile/ -I$(UTILITIES_DIR)/lineFileUtilities/ -I$(UTILITIES_DIR)/version/ -I$(UTILITIES_DIR)/BamTools/ -I$(UTILITIES_DIR)/gzstream/ -I$(UTILITIES_DIR)/fileType/
+
+# ----------------------------------
+# define our source and object files
+# ----------------------------------
+SOURCES= annotateMain.cpp annotateBed.cpp
+OBJECTS= $(SOURCES:.cpp=.o)
+_EXT_OBJECTS=bedFile.o lineFileUtilities.o gzstream.o fileType.o
+EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS))
+BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
+PROGRAM= annotateBed
+
+
+all: $(PROGRAM)
+
+.PHONY: all
+
+$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS)
+ @echo "  * linking $(PROGRAM)"
+ @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ $(LIBS)
+
+$(BUILT_OBJECTS): $(SOURCES)
+ @echo "  * compiling" $(*F).cpp
+ @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES)
+
+$(EXT_OBJECTS):
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/
+
+clean:
+ @echo "Cleaning up."
+ @rm -f $(OBJ_DIR)/* $(BIN_DIR)/*
+
+.PHONY: clean
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/annotateBed/annotateBed.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/annotateBed/annotateBed.cpp Thu Nov 03 10:25:04 2011 -0400
[
b'@@ -0,0 +1,209 @@\n+/*****************************************************************************\n+  annotateBed.cpp\n+\n+  (c) 2009 - Aaron Quinlan\n+  Hall Laboratory\n+  Department of Biochemistry and Molecular Genetics\n+  University of Virginia\n+  aaronquinlan@gmail.com\n+\n+  Licenced under the GNU General Public License 2.0 license.\n+******************************************************************************/\n+#include "lineFileUtilities.h"\n+#include "annotateBed.h"\n+\n+// build\n+BedAnnotate::BedAnnotate(const string &mainFile, const vector<string> &annoFileNames,\n+            const vector<string> &annoTitles, bool sameStrand, bool diffStrand, bool reportCounts, bool reportBoth) :\n+\n+    _mainFile(mainFile),\n+    _annoFileNames(annoFileNames),\n+    _annoTitles(annoTitles),\n+    _sameStrand(sameStrand),\n+    _diffStrand(diffStrand),\n+    _reportCounts(reportCounts),\n+    _reportBoth(reportBoth)\n+{\n+    _bed = new BedFile(_mainFile);\n+}\n+\n+\n+// destroy and delete the open file pointers\n+BedAnnotate::~BedAnnotate(void) {\n+    delete _bed;\n+    CloseAnnoFiles();\n+}\n+\n+\n+void BedAnnotate::OpenAnnoFiles() {\n+    for (size_t i=0; i < _annoFileNames.size(); ++i) {\n+        BedFile *file = new BedFile(_annoFileNames[i]);\n+        file->Open();\n+        _annoFiles.push_back(file);\n+    }\n+}\n+\n+\n+void BedAnnotate::CloseAnnoFiles() {\n+    for (size_t i=0; i < _annoFiles.size(); ++i) {\n+        BedFile *file = _annoFiles[i];\n+        delete file;\n+        _annoFiles[i] = NULL;\n+    }\n+}\n+\n+\n+void BedAnnotate::PrintHeader() {\n+    // print a hash to indicate header and then write a tab\n+    // for each field in the main file.\n+    printf("#");\n+    for (size_t i = 0; i < _bed->bedType; ++i)\n+        printf("\\t");\n+\n+    // now print the label for each file.\n+    if (_reportBoth == false) {\n+        for (size_t i = 0; i < _annoTitles.size(); ++i)\n+            printf("%s\\t", _annoTitles[i].c_str());\n+        printf("\\n");\n+    }\n+    else {\n+        for (size_t i = 0; i < _annoTitles.size(); ++i)\n+            printf("%s_cnt\\t%s_pct", _annoTitles[i].c_str(), _annoTitles[i].c_str());\n+        printf("\\n");\n+    }\n+}\n+\n+\n+void BedAnnotate::InitializeMainFile() {\n+    // process each chromosome\n+    masterBedCovListMap::iterator chromItr = _bed->bedCovListMap.begin();\n+    masterBedCovListMap::iterator chromEnd = _bed->bedCovListMap.end();\n+    for (; chromItr != chromEnd; ++chromItr) {\n+        // for each chrom, process each bin\n+        binsToBedCovLists::iterator binItr = chromItr->second.begin();\n+        binsToBedCovLists::iterator binEnd = chromItr->second.end();\n+        for (; binItr != binEnd; ++binItr) {\n+            // initialize BEDCOVLIST in this chrom/bin\n+            vector<BEDCOVLIST>::iterator bedItr = binItr->second.begin();\n+            vector<BEDCOVLIST>::iterator bedEnd = binItr->second.end();\n+            for (; bedItr != bedEnd; ++bedItr) {\n+                // initialize the depthMaps, counts, etc. for each anno file.\n+                for (size_t i = 0; i < _annoFiles.size(); ++i) {\n+                    map<unsigned int, DEPTH> dummy;\n+                    bedItr->depthMapList.push_back(dummy);\n+                    bedItr->counts.push_back(0);\n+                    bedItr->minOverlapStarts.push_back(INT_MAX);\n+                }\n+            }\n+        }\n+    }\n+}\n+\n+\n+void BedAnnotate::AnnotateBed() {\n+\n+    // load the "main" bed file into a map so\n+    // that we can easily compare each annoFile to it for overlaps\n+    _bed->loadBedCovListFileIntoMap();\n+    // open the annotations files for processing;\n+    OpenAnnoFiles();\n+    // initialize counters, depths, etc. for the main file\n+    InitializeMainFile();\n+\n+    // annotate the main file with the coverage from the annotation files.\n+    for (size_t annoIndex = 0; annoIndex < _annoFiles.size(); ++annoIndex) {\n+        // grab the current annotation file.\n+        BedFile *anno = _annoFiles[annoIndex];\n+        int lineNum = 0;\n+        BED a, nullBed;\n+        BedLi'..b'   a = nullBed;\n+            }\n+        }\n+    }\n+\n+    // report the annotations of the main file from the anno file.\n+    ReportAnnotations();\n+    // close the annotations files;\n+    CloseAnnoFiles();\n+}\n+\n+\n+void BedAnnotate::ReportAnnotations() {\n+\n+    if (_annoTitles.size() > 0) {\n+        PrintHeader();\n+    }\n+\n+    // process each chromosome\n+    masterBedCovListMap::const_iterator chromItr = _bed->bedCovListMap.begin();\n+    masterBedCovListMap::const_iterator chromEnd = _bed->bedCovListMap.end();\n+    for (; chromItr != chromEnd; ++chromItr) {\n+        // for each chrom, process each bin\n+        binsToBedCovLists::const_iterator binItr = chromItr->second.begin();\n+        binsToBedCovLists::const_iterator binEnd = chromItr->second.end();\n+        for (; binItr != binEnd; ++binItr) {\n+            // for each chrom & bin, compute and report\n+            // the observed coverage for each feature\n+            vector<BEDCOVLIST>::const_iterator bedItr = binItr->second.begin();\n+            vector<BEDCOVLIST>::const_iterator bedEnd = binItr->second.end();\n+            for (; bedItr != bedEnd; ++bedItr) {\n+                // print the main BED entry.\n+                _bed->reportBedTab(*bedItr);\n+\n+                // now report the coverage from each annotation file.\n+                for (size_t i = 0; i < _annoFiles.size(); ++i) {\n+                    unsigned int totalLength = 0;\n+                    int zeroDepthCount = 0; // number of bases with zero depth\n+                    int depth          = 0; // tracks the depth at the current base\n+\n+                    // the start is either the first base in the feature OR\n+                    // the leftmost position of an overlapping feature. e.g. (s = start):\n+                    // A    ----------\n+                    // B    s    ------------\n+                    int start          = min(bedItr->minOverlapStarts[i], bedItr->start);\n+\n+                    map<unsigned int, DEPTH>::const_iterator depthItr;\n+                    map<unsigned int, DEPTH>::const_iterator depthEnd;\n+\n+                    // compute the coverage observed at each base in the feature marching from start to end.\n+                    for (CHRPOS pos = start+1; pos <= bedItr->end; pos++) {\n+                        // map pointer grabbing the starts and ends observed at this position\n+                        depthItr = bedItr->depthMapList[i].find(pos);\n+                        depthEnd = bedItr->depthMapList[i].end();\n+\n+                        // increment coverage if starts observed at this position.\n+                        if (depthItr != depthEnd)\n+                            depth += depthItr->second.starts;\n+                        // update zero depth\n+                        if ((pos > bedItr->start) && (pos <= bedItr->end) && (depth == 0))\n+                            zeroDepthCount++;\n+                        // decrement coverage if ends observed at this position.\n+                        if (depthItr != depthEnd)\n+                            depth = depth - depthItr->second.ends;\n+                    }\n+                    // Summarize the coverage for the current interval,\n+                    CHRPOS length     = bedItr->end - bedItr->start;\n+                    totalLength       += length;\n+                    int nonZeroBases   = (length - zeroDepthCount);\n+                    float fractCovered = (float) nonZeroBases / length;\n+                    if (_reportCounts == false && _reportBoth == false)\n+                        printf("%f\\t", fractCovered);\n+                    else if (_reportCounts == true && _reportBoth == false)\n+                        printf("%d\\t", bedItr->counts[i]);\n+                    else if (_reportCounts == false && _reportBoth == true)\n+                        printf("%d\\t%f\\t", bedItr->counts[i], fractCovered);\n+                }\n+                // print newline for next feature.\n+                printf("\\n");\n+            }\n+        }\n+    }\n+}\n+\n+\n'
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/annotateBed/annotateBed.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/annotateBed/annotateBed.h Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,71 @@
+/*****************************************************************************
+  annotateBed.h
+
+  (c) 2009 - Aaron Quinlan
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licenced under the GNU General Public License 2.0 license.
+******************************************************************************/
+#ifndef ANNOTATEBED_H
+#define ANNOTATEBED_H
+
+#include "bedFile.h"
+#include <vector>
+#include <algorithm>
+#include <iostream>
+#include <iomanip>
+#include <fstream>
+#include <stdlib.h>
+
+using namespace std;
+
+//************************************************
+// Class methods and elements
+//************************************************
+class BedAnnotate {
+
+public:
+
+    // constructor
+    BedAnnotate(const string &mainFile, const vector<string> &annoFileNames,
+                const vector<string> &annoTitles, bool sameStrand, bool diffStrand, bool reportCounts, bool reportBoth);
+
+    // destructor
+    ~BedAnnotate(void);
+
+    // annotate the master file with all of the annotation files.
+    void AnnotateBed();
+
+private:
+
+    // input files.
+    string _mainFile;
+    vector<string> _annoFileNames;
+    vector<string> _annoTitles;
+
+    // instance of a bed file class.
+    BedFile *_bed;
+    vector<BedFile*> _annoFiles;
+
+    // do we care about strandedness when counting coverage?
+    bool _sameStrand;
+    bool _diffStrand;
+    
+    bool _reportCounts;
+    bool _reportBoth;
+
+    // private function for reporting coverage information
+    void ReportAnnotations();
+
+    void OpenAnnoFiles();
+
+    void CloseAnnoFiles();
+
+    void PrintHeader();
+
+    void InitializeMainFile();
+};
+#endif /* ANNOTATEBED_H */
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/annotateBed/annotateMain.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/annotateBed/annotateMain.cpp Thu Nov 03 10:25:04 2011 -0400
[
@@ -0,0 +1,170 @@
+/*****************************************************************************
+  annotateMain.cpp
+
+  (c) 2009 - Aaron Quinlan
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licenced under the GNU General Public License 2.0 license.
+******************************************************************************/
+#include "annotateBed.h"
+#include "version.h"
+
+using namespace std;
+
+// define the version
+#define PROGRAM_NAME "annotateBed"
+
+// define our parameter checking macro
+#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)
+
+// function declarations
+void ShowHelp(void);
+
+int main(int argc, char* argv[]) {
+
+    // our configuration variables
+    bool showHelp = false;
+
+    // input file
+    string mainFile;
+
+    // parm flags
+    bool sameStrand     = false;
+    bool diffStrand     = false;
+    bool haveBed        = false;
+    bool haveFiles      = false;
+    bool haveTitles     = false;
+    bool reportCounts   = false;
+    bool reportBoth     = false;
+
+    // list of annotation files / names
+    vector<string> inputFiles;
+    vector<string> inputTitles;
+
+    // check to see if we should print out some help
+    if(argc <= 1) showHelp = true;
+
+    for(int i = 1; i < argc; i++) {
+        int parameterLength = (int)strlen(argv[i]);
+
+        if((PARAMETER_CHECK("-h", 2, parameterLength)) ||
+        (PARAMETER_CHECK("--help", 5, parameterLength))) {
+            showHelp = true;
+        }
+    }
+
+    if(showHelp) ShowHelp();
+
+    // do some parsing (all of these parameters require 2 strings)
+    for(int i = 1; i < argc; i++) {
+
+        int parameterLength = (int)strlen(argv[i]);
+
+        if(PARAMETER_CHECK("-i", 2, parameterLength)) {
+            if ((i+1) < argc) {
+                haveBed  = true;
+                mainFile = argv[i + 1];
+                i++;
+            }
+        }
+        else if(PARAMETER_CHECK("-files", 6, parameterLength)) {
+            if ((i+1) < argc) {
+                haveFiles = true;
+                i = i+1;
+                string file = argv[i];
+                while (file[0] != '-' && i < argc) {
+                    inputFiles.push_back(file);
+                    i++;
+                    if (i < argc)
+                        file = argv[i];
+                }
+                i--;
+            }
+        }
+        else if(PARAMETER_CHECK("-names", 6, parameterLength)) {
+            if ((i+1) < argc) {
+                haveTitles = true;
+                i = i+1;
+                string title = argv[i];
+                while (title[0] != '-' && i < argc) {
+                    inputTitles.push_back(title);
+                    i++;
+                    if (i < argc)
+                        title = argv[i];
+                }
+                i--;
+            }
+        }
+        else if(PARAMETER_CHECK("-counts", 7, parameterLength)) {
+            reportCounts = true;
+        }
+        else if(PARAMETER_CHECK("-both", 5, parameterLength)) {
+            reportBoth = true;
+        }
+        else if (PARAMETER_CHECK("-s", 2, parameterLength)) {
+            sameStrand = true;
+        }
+        else if (PARAMETER_CHECK("-S", 2, parameterLength)) {
+            diffStrand = true;
+        }
+        else {
+            cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl;
+            showHelp = true;
+        }
+    }
+
+    // make sure we have both input files
+    if (!haveBed || !haveFiles) {
+        cerr << endl << "*****" << endl << "*****ERROR: Need -i and -files files. " << endl << "*****" << endl;
+        showHelp = true;
+    }
+    if (sameStrand && diffStrand) {
+        cerr << endl << "*****" << endl << "*****ERROR: Request either -s OR -S, not both." << endl << "*****" << endl;
+        showHelp = true;
+    }
+
+    if (!showHelp) {
+        BedAnnotate *ba = new BedAnnotate(mainFile, inputFiles, inputTitles, sameStrand, diffStrand, reportCounts, reportBoth);
+        ba->AnnotateBed();
+        delete ba;
+        return 0;
+    }
+    else {
+        ShowHelp();
+    }
+}
+
+void ShowHelp(void) {
+
+    cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl;
+
+    cerr << "Author:  Aaron Quinlan (aaronquinlan@gmail.com)" << endl;
+
+    cerr << "Summary: Annotates the depth & breadth of coverage of features from multiple files" << endl;
+    cerr << "\t on the intervals in -i." << endl << endl;
+
+    cerr << "Usage:   " << PROGRAM_NAME << " [OPTIONS] -i <bed/gff/vcf> -files FILE1 FILE2 .. FILEn" << endl << endl;
+
+    cerr << "Options: " << endl;
+
+    cerr << "\t-names\t"        << "A list of names (one / file) to describe each file in -i." << endl;
+    cerr                        << "\t\tThese names will be printed as a header line." << endl << endl;
+
+    cerr << "\t-counts\t"       << "Report the count of features in each file that overlap -i." << endl;
+    cerr                        << "\t\t- Default is to report the fraction of -i covered by each file." << endl << endl;
+
+    cerr << "\t-both\t"         << "Report the counts followed by the % coverage." << endl;
+    cerr                        << "\t\t- Default is to report the fraction of -i covered by each file." << endl << endl;
+
+    cerr << "\t-s\t"            << "Require same strandedness.  That is, only counts overlaps" << endl;
+    cerr                        << "\t\ton the _same_ strand." << endl;
+    cerr                        << "\t\t- By default, overlaps are counted without respect to strand." << endl << endl;
+
+    cerr << "\t-S\t"            << "Require different strandedness.  That is, only count overlaps" << endl;
+    cerr                        << "\t\ton the _opposite_ strand." << endl;
+    cerr                        << "\t\t- By default, overlaps are counted without respect to strand." << endl << endl;
+    exit(1);
+}
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/bamToBed/Makefile
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/bamToBed/Makefile Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,47 @@
+UTILITIES_DIR = ../utils/
+OBJ_DIR = ../../obj/
+BIN_DIR = ../../bin/
+
+# -------------------
+# define our includes
+# -------------------
+INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \
+           -I$(UTILITIES_DIR)/version/ \
+           -I$(UTILITIES_DIR)/gzstream/ \
+           -I$(UTILITIES_DIR)/lineFileUtilities/ \
+           -I$(UTILITIES_DIR)/fileType/ \
+           -I$(UTILITIES_DIR)/BamTools/include \
+    -I$(UTILITIES_DIR)/BamTools-Ancillary
+
+
+# ----------------------------------
+# define our source and object files
+# ----------------------------------
+SOURCES= bamToBed.cpp
+OBJECTS= $(SOURCES:.cpp=.o)
+_EXT_OBJECTS=BamAncillary.o
+EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS))
+BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
+PROGRAM= bamToBed
+
+
+all: $(PROGRAM)
+
+.PHONY: all
+
+$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS)
+ @echo "  * linking $(PROGRAM)"
+ @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ -L$(UTILITIES_DIR)/BamTools/lib/ -lbamtools $(LIBS)
+
+$(BUILT_OBJECTS): $(SOURCES)
+ @echo "  * compiling" $(*F).cpp
+ @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) 
+
+$(EXT_OBJECTS):
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/BamAncillary/
+
+clean:
+ @echo "Cleaning up."
+ @rm -f $(OBJ_DIR)/* $(BIN_DIR)/*
+
+.PHONY: clean
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/bamToBed/bamToBed.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/bamToBed/bamToBed.cpp Thu Nov 03 10:25:04 2011 -0400
[
b'@@ -0,0 +1,564 @@\n+/*****************************************************************************\n+  bamToBed.cpp\n+\n+  (c) 2009 - Aaron Quinlan\n+  Hall Laboratory\n+  Department of Biochemistry and Molecular Genetics\n+  University of Virginia\n+  aaronquinlan@gmail.com\n+\n+  Licenced under the GNU General Public License 2.0 license.\n+******************************************************************************/\n+#include "version.h"\n+#include "api/BamReader.h"\n+#include "api/BamAux.h"\n+#include "BamAncillary.h"\n+#include "bedFile.h"\n+using namespace BamTools;\n+\n+#include <vector>\n+#include <algorithm>    // for swap()\n+#include <iostream>\n+#include <fstream>\n+#include <stdlib.h>\n+\n+using namespace std;\n+\n+\n+// define our program name\n+#define PROGRAM_NAME "bamToBed"\n+\n+// define our parameter checking macro\n+#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)\n+\n+\n+// function declarations\n+void ShowHelp(void);\n+\n+void ConvertBamToBed(const string &bamFile, const bool &useEditDistance, const string &bamTag,\n+                     const bool &writeBed12, const bool &obeySplits, const string &color, const bool &useCigar);\n+void ConvertBamToBedpe(const string &bamFile, const bool &useEditDistance);\n+\n+void PrintBed(const BamAlignment &bam, const RefVector &refs, bool useEditDistance, const string &bamTag, bool obeySplits, bool useCigar);\n+void PrintBed12(const BamAlignment &bam, const RefVector &refs, bool useEditDistance, const string &bamTag, string color = "255,0,0");\n+void PrintBedPE(const BamAlignment &bam1, const BamAlignment &bam2,\n+                const RefVector &refs, bool useEditDistance);\n+\n+void ParseCigarBed12(const vector<CigarOp> &cigar, vector<int> &blockStarts,\n+                     vector<int> &blockEnds, int &alignmentEnd);\n+string BuildCigarString(const vector<CigarOp> &cigar);\n+\n+bool IsCorrectMappingForBEDPE (const BamAlignment &bam);\n+\n+\n+int main(int argc, char* argv[]) {\n+\n+    // our configuration variables\n+    bool showHelp = false;\n+\n+    // input files\n+    string bamFile = "stdin";\n+    string color   = "255,0,0";\n+    string tag     = "";\n+\n+    bool haveBam           = true;\n+    bool haveColor         = false;\n+    bool haveOtherTag      = false;\n+    bool writeBedPE        = false;\n+    bool writeBed12        = false;\n+    bool useEditDistance   = false;\n+    bool useAlignmentScore = false;\n+    bool useCigar          = false;\n+    bool obeySplits        = false;\n+\n+    // check to see if we should print out some help\n+\n+    for(int i = 1; i < argc; i++) {\n+        int parameterLength = (int)strlen(argv[i]);\n+\n+        if((PARAMETER_CHECK("-h", 2, parameterLength)) ||\n+        (PARAMETER_CHECK("--help", 5, parameterLength))) {\n+            showHelp = true;\n+        }\n+    }\n+\n+    if(showHelp) ShowHelp();\n+\n+    // do some parsing (all of these parameters require 2 strings)\n+    for(int i = 1; i < argc; i++) {\n+\n+        int parameterLength = (int)strlen(argv[i]);\n+\n+        if(PARAMETER_CHECK("-i", 2, parameterLength)) {\n+            if ((i+1) < argc) {\n+                bamFile = argv[i + 1];\n+                i++;\n+            }\n+        }\n+        else if(PARAMETER_CHECK("-bedpe", 6, parameterLength)) {\n+                writeBedPE = true;\n+        }\n+        else if(PARAMETER_CHECK("-bed12", 6, parameterLength)) {\n+                writeBed12 = true;\n+        }\n+        else if(PARAMETER_CHECK("-split", 6, parameterLength)) {\n+                obeySplits = true;\n+        }\n+        else if(PARAMETER_CHECK("-ed", 3, parameterLength)) {\n+                useEditDistance = true;\n+        }\n+        else if(PARAMETER_CHECK("-cigar", 6, parameterLength)) {\n+                useCigar = true;\n+        }\n+        else if(PARAMETER_CHECK("-as", 3, parameterLength)) {\n+                useAlignmentScore = true;\n+        }\n+        else if(PARAMETER_CHECK("-color", 6, parameterLength)) {\n+            if ((i+1) < argc) {\n+        '..b'etc.\n+    printf("%d\\t%d\\t%s\\t%d\\t", bam.Position, alignmentEnd, color.c_str(), (int) blockStarts.size());\n+\n+    // now write the lengths portion\n+    unsigned int b;\n+    for (b = 0; b < blockLengths.size() - 1; ++b) {\n+        printf("%d,", blockLengths[b]);\n+    }\n+    printf("%d\\t", blockLengths[b]);\n+\n+    // now write the starts portion\n+    for (b = 0; b < blockStarts.size() - 1; ++b) {\n+        printf("%d,", blockStarts[b]);\n+    }\n+    printf("%d\\n", blockStarts[b]);\n+}\n+\n+\n+void PrintBedPE(const BamAlignment &bam1, const BamAlignment &bam2, const RefVector &refs, bool useEditDistance) {\n+\n+    // initialize BEDPE variables\n+    string chrom1, chrom2, strand1, strand2;\n+    int start1, start2, end1, end2;\n+    uint32_t editDistance1, editDistance2;\n+    start1 = start2 = end1 = end2 = -1;\n+    chrom1 = chrom2 = strand1 = strand2 = ".";\n+    editDistance1 = editDistance2 = 0;\n+    uint16_t minMapQuality = 0;\n+\n+    // extract relevant info for end 1\n+    if (bam1.IsMapped()) {\n+        chrom1 = refs.at(bam1.RefID).RefName;\n+        start1 = bam1.Position;\n+        end1   = bam1.GetEndPosition(false);\n+        strand1 = "+";\n+        if (bam1.IsReverseStrand()) strand1 = "-";\n+\n+        // extract the edit distance from the NM tag\n+        // if possible. otherwise, complain.\n+        if (useEditDistance == true && bam1.GetTag("NM", editDistance1) == false) {\n+            cerr << "The edit distance tag (NM) was not found in the BAM file.  Please disable -ed.  Exiting\\n";\n+            exit(1);\n+        }\n+    }\n+\n+    // extract relevant info for end 2\n+    if (bam2.IsMapped()) {\n+        chrom2 = refs.at(bam2.RefID).RefName;\n+        start2 = bam2.Position;\n+        end2   = bam2.GetEndPosition(false);\n+        strand2 = "+";\n+        if (bam2.IsReverseStrand()) strand2 = "-";\n+\n+        // extract the edit distance from the NM tag\n+        // if possible. otherwise, complain.\n+        if (useEditDistance == true && bam2.GetTag("NM", editDistance2) == false) {\n+            cerr << "The edit distance tag (NM) was not found in the BAM file.  Please disable -ed.  Exiting\\n";\n+            exit(1);\n+        }\n+    }\n+\n+    // swap the ends if necessary\n+    if ( chrom1 > chrom2 || ((chrom1 == chrom2) && (start1 > start2)) ) {\n+        swap(chrom1, chrom2);\n+        swap(start1, start2);\n+        swap(end1, end2);\n+        swap(strand1, strand2);\n+    }\n+\n+    // report BEDPE using min mapQuality\n+    if (useEditDistance == false) {\n+        // compute the minimum mapping quality b/w the two ends of the pair.\n+        if (bam1.IsMapped() == true && bam2.IsMapped() == true)\n+            minMapQuality = min(bam1.MapQuality, bam2.MapQuality);\n+\n+        printf("%s\\t%d\\t%d\\t\\%s\\t%d\\t%d\\t%s\\t%d\\t%s\\t%s\\n",\n+                chrom1.c_str(), start1, end1, chrom2.c_str(), start2, end2,\n+                bam1.Name.c_str(), minMapQuality, strand1.c_str(), strand2.c_str());\n+    }\n+    // report BEDPE using total edit distance\n+    else {\n+        uint16_t totalEditDistance = 0;\n+        if (bam1.IsMapped() == true && bam2.IsMapped() == true)\n+            totalEditDistance = editDistance1 + editDistance2;\n+        else if (bam1.IsMapped() == true)\n+            totalEditDistance = editDistance1;\n+        else if (bam2.IsMapped() == true)\n+            totalEditDistance = editDistance2;\n+\n+        printf("%s\\t%d\\t%d\\t\\%s\\t%d\\t%d\\t%s\\t%d\\t%s\\t%s\\n",\n+                chrom1.c_str(), start1, end1, chrom2.c_str(), start2, end2,\n+                bam1.Name.c_str(), totalEditDistance, strand1.c_str(), strand2.c_str());\n+    }\n+}\n+\n+\n+// deprecated.\n+bool IsCorrectMappingForBEDPE (const BamAlignment &bam) {\n+\n+    if ( (bam.RefID == bam.MateRefID) && (bam.InsertSize > 0) ) {\n+        return true;\n+    }\n+    else if ( (bam.RefID == bam.MateRefID) && (bam.InsertSize == 0) && bam.IsFirstMate() ) {\n+        return true;\n+    }\n+    else if ( (bam.RefID != bam.MateRefID) && bam.IsFirstMate() ) {\n+        return true;\n+    }\n+    else return false;\n+}\n'
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/bed12ToBed6/Makefile
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/bed12ToBed6/Makefile Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,44 @@
+UTILITIES_DIR = ../utils/
+OBJ_DIR = ../../obj/
+BIN_DIR = ../../bin/
+
+# -------------------
+# define our includes
+# -------------------
+INCLUDES = -I$(UTILITIES_DIR)/bedFile/ -I$(UTILITIES_DIR)/lineFileUtilities/ -I$(UTILITIES_DIR)/version/ -I$(UTILITIES_DIR)/gzstream/ -I$(UTILITIES_DIR)/fileType/
+
+# ----------------------------------
+# define our source and object files
+# ----------------------------------
+SOURCES= bed12ToBed6.cpp
+OBJECTS= $(SOURCES:.cpp=.o)
+_EXT_OBJECTS=bedFile.o  lineFileUtilities.o gzstream.o fileType.o
+EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS))
+BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
+PROGRAM= bed12ToBed6
+
+
+all: $(PROGRAM)
+
+.PHONY: all
+
+
+$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS)
+ @echo "  * linking $(PROGRAM)"
+ @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ $(LIBS)
+
+$(BUILT_OBJECTS): $(SOURCES)
+ @echo "  * compiling" $(*F).cpp
+ @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES)
+
+$(EXT_OBJECTS):
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/
+
+clean:
+ @echo "Cleaning up."
+ @rm -f $(OBJ_DIR)/* $(BIN_DIR)/*
+
+.PHONY: clean
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/bed12ToBed6/bed12ToBed6.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/bed12ToBed6/bed12ToBed6.cpp Thu Nov 03 10:25:04 2011 -0400
[
@@ -0,0 +1,168 @@
+/*****************************************************************************
+  bed12ToBed6.cpp
+
+  (c) 2009 - Aaron Quinlan
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licenced under the GNU General Public License 2.0 license.
+******************************************************************************/
+#include "lineFileUtilities.h"
+#include "bedFile.h"
+#include "version.h"
+
+#include <vector>
+#include <iostream>
+#include <fstream>
+#include <stdlib.h>
+
+using namespace std;
+
+
+// define our program name
+#define PROGRAM_NAME "bed12ToBed6"
+
+// define our parameter checking macro
+#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)
+
+
+// function declarations
+void ShowHelp(void);
+void DetermineBedInput(BedFile *bed);
+void ProcessBed(istream &bedInput, BedFile *bed);
+
+
+bool addBlockNums = false;
+
+int main(int argc, char* argv[]) {
+
+    // our configuration variables
+    bool showHelp = false;
+
+    // input files
+    string bedFile       = "stdin";
+    bool haveBed         = true;
+
+    for(int i = 1; i < argc; i++) {
+        int parameterLength = (int)strlen(argv[i]);
+
+        if((PARAMETER_CHECK("-h", 2, parameterLength)) ||
+        (PARAMETER_CHECK("--help", 5, parameterLength))) {
+            showHelp = true;
+        }
+    }
+
+    if(showHelp) ShowHelp();
+
+    // do some parsing (all of these parameters require 2 strings)
+    for(int i = 1; i < argc; i++) {
+
+        int parameterLength = (int)strlen(argv[i]);
+
+        if(PARAMETER_CHECK("-i", 2, parameterLength)) {
+            if ((i+1) < argc) {
+                bedFile = argv[i + 1];
+                i++;
+            }
+        }
+        else if(PARAMETER_CHECK("-n", 2, parameterLength)) {
+            addBlockNums = true;
+            i++;
+        }
+        else {
+            cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl;
+            showHelp = true;
+        }
+    }
+
+    // make sure we have an input files
+    if (!haveBed ) {
+        cerr << endl << "*****" << endl << "*****ERROR: Need -i (BED) file. " << endl << "*****" << endl;
+        showHelp = true;
+    }
+
+    if (!showHelp) {
+        BedFile *bed       = new BedFile(bedFile);
+        DetermineBedInput(bed);
+    }
+    else {
+        ShowHelp();
+    }
+}
+
+
+void ShowHelp(void) {
+
+    cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl;
+
+    cerr << "Author:  Aaron Quinlan (aaronquinlan@gmail.com)" << endl;
+
+    cerr << "Summary: Splits BED12 features into discrete BED6 features." << endl << endl;
+
+    cerr << "Usage:   " << PROGRAM_NAME << " [OPTIONS] -i <bed12>" << endl << endl;
+
+    cerr << "Options: " << endl;
+
+    cerr << "\t-n\t"        << "Force the score to be the (1-based) block number from the BED12." << endl << endl;
+
+
+    // end the program here
+    exit(1);
+}
+
+
+void DetermineBedInput(BedFile *bed) {
+
+    // dealing with a proper file
+    if (bed->bedFile != "stdin") {
+
+        ifstream bedStream(bed->bedFile.c_str(), ios::in);
+        if ( !bedStream ) {
+            cerr << "Error: The requested bed file (" << bed->bedFile << ") could not be opened. Exiting!" << endl;
+            exit (1);
+        }
+        ProcessBed(bedStream, bed);
+    }
+    // reading from stdin
+    else {
+        ProcessBed(cin, bed);
+    }
+}
+
+
+void ProcessBed(istream &bedInput, BedFile *bed) {
+
+    // process each BED entry and convert to BAM
+    BED bedEntry, nullBed;
+    int lineNum = 0;
+    BedLineStatus bedStatus;
+    // open the BED file for reading.
+    bed->Open();
+    while ((bedStatus = bed->GetNextBed(bedEntry, lineNum)) != BED_INVALID) {
+        if (bedStatus == BED_VALID) {
+
+            bedVector bedBlocks;  // vec to store the discrete BED "blocks" from a
+            splitBedIntoBlocks(bedEntry, lineNum, bedBlocks);
+
+            for (int i = 0; i < (int) bedBlocks.size(); ++i) {
+                if (addBlockNums == false) {
+                    printf ("%s\t%d\t%d\t%s\t%s\t%s\n", bedBlocks[i].chrom.c_str(), bedBlocks[i].start, bedBlocks[i].end, bedBlocks[i].name.c_str(),
+                                                        bedBlocks[i].score.c_str(), bedBlocks[i].strand.c_str());
+                }
+                else {
+                    if (bedBlocks[i].strand == "+")
+                        printf ("%s\t%d\t%d\t%s\t%d\t%s\n", bedBlocks[i].chrom.c_str(), bedBlocks[i].start, bedBlocks[i].end, bedBlocks[i].name.c_str(),
+                                                        i+1, bedBlocks[i].strand.c_str());
+                    else 
+                        printf ("%s\t%d\t%d\t%s\t%d\t%s\n", bedBlocks[i].chrom.c_str(), bedBlocks[i].start, bedBlocks[i].end, bedBlocks[i].name.c_str(),
+                                                        (int) ((bedBlocks.size()+1)-i), bedBlocks[i].strand.c_str());
+                }
+            }
+            bedEntry = nullBed;
+        }
+    }
+    // close up
+    bed->Close();
+}
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/bedToBam/Makefile
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/bedToBam/Makefile Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,53 @@
+UTILITIES_DIR = ../utils/
+OBJ_DIR = ../../obj/
+BIN_DIR = ../../bin/
+
+# -------------------
+# define our includes
+# -------------------
+INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \
+           -I$(UTILITIES_DIR)/version/ \
+           -I$(UTILITIES_DIR)/gzstream/ \
+           -I$(UTILITIES_DIR)/genomeFile/ \
+           -I$(UTILITIES_DIR)/lineFileUtilities/ \
+           -I$(UTILITIES_DIR)/fileType/ \
+           -I$(UTILITIES_DIR)/BamTools/include \
+    -I$(UTILITIES_DIR)/BamTools-Ancillary
+
+# ----------------------------------
+# define our source and object files
+# ----------------------------------
+SOURCES= bedToBam.cpp
+OBJECTS= $(SOURCES:.cpp=.o)
+_EXT_OBJECTS=bedFile.o genomeFile.o lineFileUtilities.o gzstream.o fileType.o
+EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS))
+BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
+PROGRAM= bedToBam
+
+
+all: $(PROGRAM)
+
+.PHONY: all
+
+
+$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS)
+ @echo "  * linking $(PROGRAM)"
+ @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ -L$(UTILITIES_DIR)/BamTools/lib/ -lbamtools $(LIBS)
+
+$(BUILT_OBJECTS): $(SOURCES)
+ @echo "  * compiling" $(*F).cpp
+ @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES)
+
+$(EXT_OBJECTS):
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/BamTools/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/genomeFile/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/
+
+clean:
+ @echo "Cleaning up."
+ @rm -f $(OBJ_DIR)/* $(BIN_DIR)/*
+
+.PHONY: clean
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/bedToBam/bedToBam.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/bedToBam/bedToBam.cpp Thu Nov 03 10:25:04 2011 -0400
[
b'@@ -0,0 +1,357 @@\n+/*****************************************************************************\n+  bedToBam.cpp\n+\n+  (c) 2009 - Aaron Quinlan\n+  Hall Laboratory\n+  Department of Biochemistry and Molecular Genetics\n+  University of Virginia\n+  aaronquinlan@gmail.com\n+\n+  Licenced under the GNU General Public License 2.0 license.\n+******************************************************************************/\n+#include "lineFileUtilities.h"\n+#include "bedFile.h"\n+#include "genomeFile.h"\n+#include "version.h"\n+\n+\n+#include "api/BamReader.h"\n+#include "api/BamAux.h"\n+#include "api/BamWriter.h"\n+using namespace BamTools;\n+\n+#include <vector>\n+#include <iostream>\n+#include <fstream>\n+#include <stdlib.h>\n+\n+using namespace std;\n+\n+\n+// define our program name\n+#define PROGRAM_NAME "bedToBam"\n+\n+// define our parameter checking macro\n+#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)\n+\n+\n+// function declarations\n+void ShowHelp(void);\n+void ProcessBed(istream &bedInput, BedFile *bed, GenomeFile *genome, bool isBED12, int mapQual, bool uncompressedBam);\n+void ConvertBedToBam(const BED &bed, BamAlignment &bam, map<string, int> &chromToId, bool isBED12, int mapQual, int lineNum);\n+void MakeBamHeader(const string &genomeFile, RefVector &refs, string &header, map<string, int> &chromToInt);\n+int  reg2bin(int beg, int end);\n+\n+\n+\n+int main(int argc, char* argv[]) {\n+\n+    // our configuration variables\n+    bool showHelp = false;\n+\n+    // input files\n+    string bedFile = "stdin";\n+    string genomeFile;\n+\n+    unsigned int mapQual = 255;\n+\n+    bool haveBed         = true;\n+    bool haveGenome      = false;\n+    bool haveMapQual     = false;\n+    bool isBED12         = false;\n+    bool uncompressedBam = false;\n+\n+    for(int i = 1; i < argc; i++) {\n+        int parameterLength = (int)strlen(argv[i]);\n+\n+        if((PARAMETER_CHECK("-h", 2, parameterLength)) ||\n+        (PARAMETER_CHECK("--help", 5, parameterLength))) {\n+            showHelp = true;\n+        }\n+    }\n+\n+    if(showHelp) ShowHelp();\n+\n+    // do some parsing (all of these parameters require 2 strings)\n+    for(int i = 1; i < argc; i++) {\n+\n+        int parameterLength = (int)strlen(argv[i]);\n+\n+        if(PARAMETER_CHECK("-i", 2, parameterLength)) {\n+            if ((i+1) < argc) {\n+                bedFile = argv[i + 1];\n+                i++;\n+            }\n+        }\n+        else if(PARAMETER_CHECK("-g", 2, parameterLength)) {\n+            if ((i+1) < argc) {\n+                haveGenome = true;\n+                genomeFile = argv[i + 1];\n+                i++;\n+            }\n+        }\n+        else if(PARAMETER_CHECK("-mapq", 5, parameterLength)) {\n+            haveMapQual = true;\n+            if ((i+1) < argc) {\n+                mapQual = atoi(argv[i + 1]);\n+                i++;\n+            }\n+        }\n+        else if(PARAMETER_CHECK("-bed12", 6, parameterLength)) {\n+            isBED12 = true;\n+        }\n+        else if(PARAMETER_CHECK("-ubam", 5, parameterLength)) {\n+            uncompressedBam = true;\n+        }\n+        else {\n+            cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl;\n+            showHelp = true;\n+        }\n+    }\n+\n+    // make sure we have an input files\n+    if (!haveBed ) {\n+        cerr << endl << "*****" << endl << "*****ERROR: Need -i (BED) file. " << endl << "*****" << endl;\n+        showHelp = true;\n+    }\n+    if (!haveGenome ) {\n+        cerr << endl << "*****" << endl << "*****ERROR: Need -g (genome) file. " << endl << "*****" << endl;\n+        showHelp = true;\n+    }\n+    if (mapQual < 0 || mapQual > 255) {\n+        cerr << endl << "*****" << endl << "*****ERROR: MAPQ must be in range [0,255]. " << endl << "*****" << endl;\n+        showHelp = true;\n+    }\n+\n+\n+    if (!showHelp) {\n+        BedFile *bed       = new BedFile(bedFile);\n+        GenomeFile *genome = new GenomeFile(genomeFile);\n+\n+        '..b'() == 6) {\n+\n+            // extract the relevant BED fields to convert BED12 to BAM\n+            // namely: blockCount, blockStarts, blockEnds\n+            unsigned int blockCount = atoi(bed.otherFields[3].c_str());\n+\n+            vector<int> blockSizes, blockStarts;\n+            Tokenize(bed.otherFields[4], blockSizes, ",");\n+            Tokenize(bed.otherFields[5], blockStarts, ",");\n+\n+            // make sure this is a well-formed BED12 entry.\n+            if (blockSizes.size() != blockCount) {\n+                cerr << "Error: Number of BED blocks does not match blockCount at line: " << lineNum << ".  Exiting!" << endl;\n+                exit (1);\n+            }\n+            else {\n+                // does the first block start after the bed.start?\n+                // if so, we need to do some "splicing"\n+                if (blockStarts[0] > 0) {\n+                    CigarOp cOp;\n+                    cOp.Length = blockStarts[0];\n+                    cOp.Type = \'N\';\n+                    bam.CigarData.push_back(cOp);\n+                }\n+                // handle the "middle" blocks\n+                for (unsigned int i = 0; i < blockCount - 1; ++i) {\n+                    CigarOp cOp;\n+                    cOp.Length = blockSizes[i];\n+                    cOp.Type = \'M\';\n+                    bam.CigarData.push_back(cOp);\n+\n+                    if (blockStarts[i+1] > (blockStarts[i] + blockSizes[i])) {\n+                        CigarOp cOp;\n+                        cOp.Length = (blockStarts[i+1] - (blockStarts[i] + blockSizes[i]));\n+                        cOp.Type = \'N\';\n+                        bam.CigarData.push_back(cOp);\n+                    }\n+                }\n+                // handle the last block.\n+                CigarOp cOp;\n+                cOp.Length = blockSizes[blockCount - 1];\n+                cOp.Type = \'M\';\n+                bam.CigarData.push_back(cOp);\n+            }\n+        }\n+        // it doesn\'t smell like BED12.  complain.\n+        else {\n+            cerr << "You\'ve indicated that the input file is in BED12 format, yet the relevant fields cannot be found.  Exiting." << endl << endl;\n+            exit(1);\n+        }\n+    }\n+}\n+\n+\n+void MakeBamHeader(const string &genomeFile, RefVector &refs, string &header,\n+                   map<string, int, std::less<string> > &chromToId) {\n+\n+    // make a genome map of the genome file.\n+    GenomeFile genome(genomeFile);\n+\n+    header += "@HD\\tVN:1.0\\tSO:unsorted\\n";\n+    header += "@PG\\tID:BEDTools_bedToBam\\tVN:V";\n+    header += VERSION;\n+    header += "\\n";\n+\n+    int chromId = 0;\n+    vector<string> chromList = genome.getChromList();\n+    sort(chromList.begin(), chromList.end());\n+\n+    // create a BAM header (@SQ) entry for each chrom in the BEDTools genome file.\n+    vector<string>::const_iterator genomeItr  = chromList.begin();\n+    vector<string>::const_iterator genomeEnd  = chromList.end();\n+    for (; genomeItr != genomeEnd; ++genomeItr) {\n+        chromToId[*genomeItr] = chromId;\n+        chromId++;\n+\n+        // add to the header text\n+        int size = genome.getChromSize(*genomeItr);\n+        string chromLine = "@SQ\\tSN:" + *genomeItr + "\\tAS:" + genomeFile + "\\tLN:" + ToString(size) + "\\n";\n+        header += chromLine;\n+\n+        // create a chrom entry and add it to\n+        // the RefVector\n+        RefData chrom;\n+        chrom.RefName            = *genomeItr;\n+        chrom.RefLength          = size;\n+        refs.push_back(chrom);\n+    }\n+}\n+\n+\n+/* Taken directly from the SAMTools spec\n+calculate bin given an alignment in [beg,end) (zero-based, half-close, half-open) */\n+int reg2bin(int beg, int end) {\n+    --end;\n+    if (beg>>14 == end>>14) return ((1<<15)-1)/7 + (beg>>14);\n+    if (beg>>17 == end>>17) return ((1<<12)-1)/7 + (beg>>17);\n+    if (beg>>20 == end>>20) return ((1<<9)-1)/7 + (beg>>20);\n+    if (beg>>23 == end>>23) return ((1<<6)-1)/7 + (beg>>23);\n+    if (beg>>26 == end>>26) return ((1<<3)-1)/7 + (beg>>26);\n+    return 0;\n+}\n+\n+\n'
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/bedToIgv/Makefile
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/bedToIgv/Makefile Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,51 @@
+UTILITIES_DIR = ../utils/
+OBJ_DIR = ../../obj/
+BIN_DIR = ../../bin/
+
+# -------------------
+# define our includes
+# -------------------
+INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \
+           -I$(UTILITIES_DIR)/genomeFile/ \
+           -I$(UTILITIES_DIR)/lineFileUtilities/ \
+           -I$(UTILITIES_DIR)/version/ \
+           -I$(UTILITIES_DIR)/gzstream/ \
+           -I$(UTILITIES_DIR)/fileType/ \
+           -I$(UTILITIES_DIR)/BamTools/include
+
+# ----------------------------------
+# define our source and object files
+# ----------------------------------
+SOURCES= bedToIgv.cpp
+OBJECTS= $(SOURCES:.cpp=.o)
+_EXT_OBJECTS=bedFile.o genomeFile.o lineFileUtilities.o gzstream.o fileType.o
+EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS))
+BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
+PROGRAM= bedToIgv
+
+
+all: $(PROGRAM)
+
+.PHONY: all
+
+
+$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS)
+ @echo "  * linking $(PROGRAM)"
+ @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ $(LIBS)
+
+$(BUILT_OBJECTS): $(SOURCES)
+ @echo "  * compiling" $(*F).cpp
+ @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES)
+
+$(EXT_OBJECTS):
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/genomeFile/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/
+
+clean:
+ @echo "Cleaning up."
+ @rm -f $(OBJ_DIR)/* $(BIN_DIR)/*
+
+.PHONY: clean
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/bedToIgv/bedToIgv.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/bedToIgv/bedToIgv.cpp Thu Nov 03 10:25:04 2011 -0400
[
b'@@ -0,0 +1,269 @@\n+/*****************************************************************************\n+  bedToIgv.cpp\n+\n+  (c) 2009 - Aaron Quinlan\n+  Hall Laboratory\n+  Department of Biochemistry and Molecular Genetics\n+  University of Virginia\n+  aaronquinlan@gmail.com\n+\n+  Licenced under the GNU General Public License 2.0 license.\n+******************************************************************************/\n+#include "lineFileUtilities.h"\n+#include "bedFile.h"\n+#include "genomeFile.h"\n+#include "version.h"\n+\n+#include <vector>\n+#include <iostream>\n+#include <fstream>\n+#include <stdlib.h>\n+\n+using namespace std;\n+\n+// define our program name\n+#define PROGRAM_NAME "bedToIgv"\n+\n+// define our parameter checking macro\n+#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)\n+\n+// function declarations\n+void ShowHelp(void);\n+\n+void DetermineBedInput(BedFile *bed, string path, string sortType, string session,\n+                        bool collapse, bool useNames, string imageType, int slop);\n+void ProcessBed(istream &bedInput, BedFile *bed, string path, string sortType, string session,\n+                        bool collapse, bool useNames, string imageType, int slop);\n+\n+\n+int main(int argc, char* argv[]) {\n+\n+    // our configuration variables\n+    bool showHelp = false;\n+\n+    // input files\n+    string bedFile   = "stdin";\n+    string imagePath = "./";\n+    string sortType  = "none";\n+    string session   = "none";\n+    int slop         = 0;\n+    string imageType = "png";\n+\n+    bool haveBed         = true;\n+    bool collapse        = false;\n+    bool useNames        = false;\n+\n+    for(int i = 1; i < argc; i++) {\n+        int parameterLength = (int)strlen(argv[i]);\n+\n+        if((PARAMETER_CHECK("-h", 2, parameterLength)) ||\n+        (PARAMETER_CHECK("--help", 5, parameterLength))) {\n+            showHelp = true;\n+        }\n+    }\n+\n+    if(showHelp) ShowHelp();\n+\n+    // do some parsing (all of these parameters require 2 strings)\n+    for(int i = 1; i < argc; i++) {\n+\n+        int parameterLength = (int)strlen(argv[i]);\n+\n+        if(PARAMETER_CHECK("-i", 2, parameterLength)) {\n+            if ((i+1) < argc) {\n+                bedFile = argv[i + 1];\n+                i++;\n+            }\n+        }\n+        else if(PARAMETER_CHECK("-path", 5, parameterLength)) {\n+            if ((i+1) < argc) {\n+                imagePath = argv[i + 1];\n+                i++;\n+            }\n+        }\n+        else if(PARAMETER_CHECK("-sort", 5, parameterLength)) {\n+            if ((i+1) < argc) {\n+                sortType = argv[i + 1];\n+                i++;\n+            }\n+        }\n+        else if(PARAMETER_CHECK("-sess", 5, parameterLength)) {\n+            if ((i+1) < argc) {\n+                session = argv[i + 1];\n+                i++;\n+            }\n+        }\n+        else if(PARAMETER_CHECK("-clps", 5, parameterLength)) {\n+            collapse = true;\n+        }\n+        else if(PARAMETER_CHECK("-name", 5, parameterLength)) {\n+            useNames = true;\n+        }\n+        else if(PARAMETER_CHECK("-slop", 5, parameterLength)) {\n+            if ((i+1) < argc) {\n+                slop = atoi(argv[i + 1]);\n+                i++;\n+            }\n+        }\n+        else if(PARAMETER_CHECK("-img", 4, parameterLength)) {\n+            if ((i+1) < argc) {\n+                imageType = argv[i + 1];\n+                i++;\n+            }\n+        }\n+        else {\n+            cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl;\n+            showHelp = true;\n+        }\n+    }\n+\n+    // make sure we have an input files\n+    if (!haveBed ) {\n+        cerr << endl << "*****" << endl << "*****ERROR: Need -i (BED) file. " << endl << "*****" << endl;\n+        showHelp = true;\n+    }\n+    if (sortType != "none") {\n+        if ((sortType != "base")    && (sortType != "position") && (sortType != "strand") &&\n+            (sortType != "quali'..b'ns: base, position, strand, quality, sample, and readGroup" << endl;\n+    cerr                    << "\\t\\tDefault is to apply no sorting at all." << endl << endl;\n+\n+    cerr << "\\t-clps\\t"     << "Collapse the aligned reads prior to taking a snapshot. " << endl;\n+    cerr                    << "\\t\\tDefault is to no collapse." << endl << endl;\n+\n+    cerr << "\\t-name\\t"     << "Use the \\"name\\" field (column 4) for each image\'s filename. " << endl;\n+    cerr                    << "\\t\\tDefault is to use the \\"chr:start-pos.ext\\"." << endl << endl;\n+\n+    cerr << "\\t-slop\\t"     << "Number of flanking base pairs on the left & right of the image." << endl;\n+    cerr                    << "\\t\\t- (INT) Default = 0." << endl << endl;\n+\n+    cerr << "\\t-img\\t"      << "The type of image to be created. " << endl;\n+    cerr                    << "\\t\\tOptions: png, eps, svg" << endl;\n+    cerr                    << "\\t\\tDefault is png." << endl << endl;\n+\n+    cerr << "Notes: " << endl;\n+    cerr << "\\t(1)  The resulting script is meant to be run from within the IGV GUI version 1.5 or later." << endl;\n+    cerr << "\\t(2)  Unless you use the -sess option, it is assumed that prior to running the script, " << endl;\n+    cerr << "\\t\\tyou have loaded the proper genome, tracks and data files." << endl << endl;\n+\n+\n+    // end the program here\n+    exit(1);\n+}\n+\n+\n+void DetermineBedInput(BedFile *bed, string path, string sortType, string session,\n+                       bool collapse, bool useNames, string imageType, int slop) {\n+\n+    // dealing with a proper file\n+    if (bed->bedFile != "stdin") {\n+\n+        ifstream bedStream(bed->bedFile.c_str(), ios::in);\n+        if ( !bedStream ) {\n+            cerr << "Error: The requested bed file (" << bed->bedFile << ") could not be opened. Exiting!" << endl;\n+            exit (1);\n+        }\n+        ProcessBed(bedStream, bed, path, sortType, session, collapse, useNames, imageType, slop);\n+    }\n+    // reading from stdin\n+    else {\n+        ProcessBed(cin, bed, path, sortType, session, collapse, useNames, imageType, slop);\n+    }\n+}\n+\n+\n+void ProcessBed(istream &bedInput, BedFile *bed, string path, string sortType, string session,\n+                bool collapse, bool useNames, string imageType, int slop) {\n+\n+    // set the image path\n+    cout << "snapshotDirectory " << path << endl;\n+\n+    // should we load a session\n+    if (session != "none")\n+        cout << "load " << session << endl;\n+\n+\n+    BED bedEntry, nullBed;\n+    int lineNum = 0;\n+    BedLineStatus bedStatus;\n+\n+    bed->Open();\n+    // process each BED entry and convert to an IGV request\n+    while ((bedStatus = bed->GetNextBed(bedEntry, lineNum)) != BED_INVALID) {\n+        if (bedStatus == BED_VALID) {\n+\n+            string filename = bedEntry.chrom + "_" + ToString(bedEntry.start) + "_" + ToString(bedEntry.end);\n+            string locus    = bedEntry.chrom + ":" + ToString(bedEntry.start - slop) + "-" + ToString(bedEntry.end + slop);\n+\n+            if (useNames == true) {\n+                if (bedEntry.name.empty() == false)\n+                    filename = filename + "_" + bedEntry.name;\n+                else {\n+                    cerr << "Error: You requested that filenames be based upon the name field.  However, it appears to be empty. Exiting!" << endl;\n+                    exit (1);\n+                }\n+            }\n+            if (slop > 0) {\n+                filename = filename + "_" + "slop" + ToString(slop);\n+            }\n+            // goto\n+            cout << "goto " << locus << endl;\n+\n+            // sort\n+            if (sortType != "none")\n+                cout << "sort " << sortType << endl;\n+\n+            // collapse\n+            if (collapse == true)\n+                cout << "collapse" << endl;\n+\n+            // snapshot\n+            cout << "snapshot " << filename << "." << imageType << endl;\n+\n+            // reset\n+            bedEntry = nullBed;\n+        }\n+    }\n+    // close up\n+    bed->Close();\n+}\n'
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/closestBed/Makefile
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/closestBed/Makefile Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,41 @@
+UTILITIES_DIR = ../utils/
+OBJ_DIR = ../../obj/
+BIN_DIR = ../../bin/
+
+# -------------------
+# define our includes
+# -------------------
+INCLUDES = -I$(UTILITIES_DIR)/bedFile/ -I$(UTILITIES_DIR)/lineFileUtilities/ -I$(UTILITIES_DIR)/version/ -I$(UTILITIES_DIR)/gzstream/ -I$(UTILITIES_DIR)/fileType/
+# ----------------------------------
+# define our source and object files
+# ----------------------------------
+SOURCES= closestMain.cpp closestBed.cpp
+OBJECTS= $(SOURCES:.cpp=.o)
+_EXT_OBJECTS=bedFile.o lineFileUtilities.o gzstream.o fileType.o
+EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS))
+BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
+PROGRAM= closestBed
+
+all: $(PROGRAM)
+
+.PHONY: all
+
+$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS)
+ @echo "  * linking $(PROGRAM)"
+ @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ $(LIBS)
+
+$(BUILT_OBJECTS): $(SOURCES)
+ @echo "  * compiling" $(*F).cpp
+ @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES)
+
+$(EXT_OBJECTS):
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/
+
+clean:
+ @echo "Cleaning up."
+ @rm -f $(OBJ_DIR)/* $(BIN_DIR)/*
+
+.PHONY: clean
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/closestBed/closestBed.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/closestBed/closestBed.cpp Thu Nov 03 10:25:04 2011 -0400
[
b'@@ -0,0 +1,234 @@\n+/*****************************************************************************\n+  closestBed.cpp\n+\n+  (c) 2009 - Aaron Quinlan\n+  Hall Laboratory\n+  Department of Biochemistry and Molecular Genetics\n+  University of Virginia\n+  aaronquinlan@gmail.com\n+\n+  Licenced under the GNU General Public License 2.0 license.\n+******************************************************************************/\n+#include "lineFileUtilities.h"\n+#include "closestBed.h"\n+\n+const int MAXSLOP = 256000000;  // 2*MAXSLOP = 512 megabases.\n+                                // We don\'t want to keep looking if we\n+                                // can\'t find a nearby feature within 512 Mb.\n+const int SLOPGROWTH = 2048000;\n+\n+\n+/*\n+    Constructor\n+*/\n+BedClosest::BedClosest(string &bedAFile, string &bedBFile, bool sameStrand, bool diffStrand,\n+                       string &tieMode, bool reportDistance, bool signDistance, string &_strandedDistMode,\n+                       bool ignoreOverlaps) \n+    : _bedAFile(bedAFile)\n+    , _bedBFile(bedBFile)\n+    , _tieMode(tieMode)\n+    , _sameStrand(sameStrand)\n+    , _diffStrand(diffStrand)\n+    , _reportDistance(reportDistance)\n+    , _signDistance(signDistance)\n+    , _strandedDistMode(_strandedDistMode)\n+    , _ignoreOverlaps(ignoreOverlaps)\n+{\n+    _bedA           = new BedFile(_bedAFile);\n+    _bedB           = new BedFile(_bedBFile);\n+    FindClosestBed();\n+}\n+\n+\n+/*\n+    Destructor\n+*/\n+BedClosest::~BedClosest(void) {\n+}\n+\n+\n+void BedClosest::FindWindowOverlaps(BED &a, vector<BED> &hits) {\n+\n+    int slop = 0;  // start out just looking for overlaps\n+                   // within the current bin (~128Kb)\n+\n+    // update the current feature\'s start and end\n+\n+    CHRPOS aFudgeStart = 0;\n+    CHRPOS aFudgeEnd;\n+    int numOverlaps = 0;\n+    vector<BED> closestB;\n+    CHRPOS minDistance = INT_MAX;\n+    int32_t curDistance = INT_MAX;\n+    vector<int32_t> distances;\n+\n+    // is there at least one feature in B on the same chrom\n+    // as the current A feature?\n+    if(_bedB->bedMap.find(a.chrom) != _bedB->bedMap.end()) {\n+\n+        while ((numOverlaps == 0) && (slop <= MAXSLOP)) {\n+\n+            // add some slop (starting at 0 bases) to a in hopes\n+            // of finding a hit in B\n+            if ((static_cast<int>(a.start) - slop) > 0)\n+                aFudgeStart = a.start - slop;\n+            else\n+                aFudgeStart = 0;\n+\n+            if ((static_cast<int>(a.start) + slop) < (2 * MAXSLOP))\n+                aFudgeEnd = a.end + slop;\n+            else\n+                aFudgeEnd = 2 * MAXSLOP;\n+\n+            // THE HEAVY LIFTING\n+            // search for hits with the current slop added\n+            _bedB->FindOverlapsPerBin(a.chrom, aFudgeStart, aFudgeEnd, a.strand, hits, _sameStrand, _diffStrand);\n+\n+            vector<BED>::const_iterator h = hits.begin();\n+            vector<BED>::const_iterator hitsEnd = hits.end();\n+            for (; h != hitsEnd; ++h) {\n+\n+                // do the actual features overlap?\n+                int s = max(a.start, h->start);\n+                int e = min(a.end, h->end);\n+                int overlapBases = (e - s);             // the number of overlapping bases b/w a and b\n+\n+                // make sure we allow overlapping features.\n+                if ((overlapBases > 0) && (_ignoreOverlaps == true))\n+                    continue;\n+                else\n+                    numOverlaps++;\n+\n+                // there is overlap. make sure we allow overlapping features ()\n+                if (overlapBases > 0) {\n+                    closestB.push_back(*h);\n+                    distances.push_back(0);\n+                }\n+                // the hit is to the "left" of A\n+                else if (h->end <= a.start) {\n+                    curDistance = a.start - h->end;\n+                    if (_signDistance) {\n+                        if ((_strandedDistMode == "ref")\n+                                || (_strandedDistMode == "a" && a.strand != "'..b'se if (abs(curDistance) == minDistance) {\n+                        minDistance = abs(curDistance);\n+                        closestB.push_back(*h);\n+                        distances.push_back(curDistance);\n+                    }\n+                }\n+                // the hit is to the "right" of A\n+                else if (h->start >= a.end) {\n+                    curDistance = h->start - a.end;\n+                    if (_signDistance) {\n+                        if ((_strandedDistMode == "a" && a.strand == "-")\n+                                || (_strandedDistMode == "b" && h->strand != "-")) {\n+                            curDistance = -curDistance;\n+                        }\n+                    }\n+                    if (abs(curDistance) < minDistance) {\n+                        minDistance = abs(curDistance);\n+                        closestB.clear();\n+                        closestB.push_back(*h);\n+                        distances.clear();\n+                        distances.push_back(curDistance);\n+                    }\n+                    else if (abs(curDistance) == minDistance) {\n+                        minDistance = abs(curDistance);\n+                        closestB.push_back(*h);\n+                        distances.push_back(curDistance);\n+                    }\n+                }\n+            }\n+            // if no overlaps were found, we\'ll widen the range\n+            // by SLOPGROWTH in each direction and search again.\n+            slop += SLOPGROWTH;\n+        }\n+    }\n+    // there is no feature in B on the same chromosome as A\n+    else {\n+        _bedA->reportBedTab(a);\n+        if (_reportDistance == true) {\n+            _bedB->reportNullBedTab();\n+            cout << -1 << endl;\n+        }\n+        else\n+            _bedB->reportNullBedNewLine();\n+    }\n+\n+    // report the closest feature(s) in B to the current A feature.\n+    // obey the user\'s reporting request (_tieMode)\n+    if (numOverlaps > 0) {\n+        if (closestB.size() == 1 || _tieMode == "first") {\n+            _bedA->reportBedTab(a);\n+            if (_reportDistance == true) {\n+                _bedB->reportBedTab(closestB[0]);\n+                cout << distances[0] << endl;\n+            }\n+            else\n+                _bedB->reportBedNewLine(closestB[0]);\n+        }\n+        else {\n+            if (_tieMode == "all") {\n+                size_t i = 0;\n+                for (vector<BED>::iterator b = closestB.begin(); b != closestB.end(); ++b) {\n+                    _bedA->reportBedTab(a);\n+                    if (_reportDistance == true) {\n+                        _bedB->reportBedTab(*b);\n+                        cout << distances[i++] <<endl;\n+                    }\n+                    else\n+                        _bedB->reportBedNewLine(*b);\n+                }\n+            }\n+            else if (_tieMode == "last") {\n+                _bedA->reportBedTab(a);\n+                if (_reportDistance == true) {\n+                    _bedB->reportBedTab(closestB[closestB.size()-1]);\n+                    cout << distances[distances.size() - 1]<<endl;\n+                }\n+                else\n+                    _bedB->reportBedNewLine(closestB[closestB.size()-1]);\n+            }\n+        }\n+    }\n+}\n+\n+\n+void BedClosest::FindClosestBed() {\n+\n+    // load the "B" bed file into a map so\n+    // that we can easily compare "A" to it for overlaps\n+    _bedB->loadBedFileIntoMap();\n+\n+    BED a, nullBed;\n+    int lineNum = 0;                    // current input line number\n+    vector<BED> hits;                   // vector of potential hits\n+    hits.reserve(100);\n+    BedLineStatus bedStatus;\n+\n+    _bedA->Open();\n+    // process each entry in A in search of the closest feature in B\n+    while ((bedStatus = _bedA->GetNextBed(a, lineNum)) != BED_INVALID) {\n+        if (bedStatus == BED_VALID) {\n+            FindWindowOverlaps(a, hits);\n+            hits.clear();\n+            a = nullBed;\n+        }\n+    }\n+    _bedA->Close();\n+}\n+// END ClosestBed\n+\n'
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/closestBed/closestBed.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/closestBed/closestBed.h Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,61 @@
+/*****************************************************************************
+  closestBed.h
+
+  (c) 2009 - Aaron Quinlan
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licenced under the GNU General Public License 2.0 license.
+******************************************************************************/
+#ifndef CLOSESTBED_H
+#define CLOSESTBED_H
+
+#include "bedFile.h"
+#include <vector>
+#include <iostream>
+#include <fstream>
+
+using namespace std;
+
+//************************************************
+// Class methods and elements
+//************************************************
+class BedClosest {
+
+public:
+
+    // constructor
+    BedClosest(string &bedAFile, string &bedBFile, 
+               bool sameStrand, bool diffStrand, string &tieMode, 
+               bool reportDistance, bool signDistance, string &strandedDistMode,
+               bool ignoreOverlaps);
+
+    // destructor
+    ~BedClosest(void);
+
+    // find the closest feature in B to A
+    void FindClosestBed();
+
+private:
+
+    // data
+    string _bedAFile;
+    string _bedBFile;
+    string _tieMode;
+    bool   _sameStrand;
+    bool   _diffStrand;
+    bool   _reportDistance;
+    bool   _signDistance;
+    string _strandedDistMode;
+    bool   _ignoreOverlaps;
+
+    BedFile *_bedA, *_bedB;
+
+    // methods
+    void reportNullB();
+    void FindWindowOverlaps(BED &, vector<BED> &);
+
+};
+#endif /* CLOSEST_H */
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/closestBed/closestMain.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/closestBed/closestMain.cpp Thu Nov 03 10:25:04 2011 -0400
[
b'@@ -0,0 +1,202 @@\n+/*****************************************************************************\n+  closestMain.cpp\n+\n+  (c) 2009 - Aaron Quinlan\n+  Hall Laboratory\n+  Department of Biochemistry and Molecular Genetics\n+  University of Virginia\n+  aaronquinlan@gmail.com\n+\n+  Licenced under the GNU General Public License 2.0 license.\n+******************************************************************************/\n+#include "closestBed.h"\n+#include "version.h"\n+\n+using namespace std;\n+\n+// define our program name\n+#define PROGRAM_NAME "closestBed"\n+\n+// define our parameter checking macro\n+#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)\n+\n+// function declarations\n+void ShowHelp(void);\n+\n+int main(int argc, char* argv[]) {\n+\n+    // our configuration variables\n+    bool showHelp = false;\n+\n+    // input files\n+    string bedAFile;\n+    string bedBFile;\n+    string tieMode = "all";\n+    string strandedDistMode = "";\n+\n+    bool haveBedA       = false;\n+    bool haveBedB       = false;\n+    bool haveTieMode    = false;\n+    bool sameStrand     = false;\n+    bool diffStrand     = false;\n+    bool ignoreOverlaps = false;\n+    bool reportDistance = false;\n+    bool signDistance   = false;\n+    bool haveStrandedDistMode = false;\n+\n+\n+    // check to see if we should print out some help\n+    if(argc <= 1) showHelp = true;\n+\n+    for(int i = 1; i < argc; i++) {\n+        int parameterLength = (int)strlen(argv[i]);\n+\n+        if( (PARAMETER_CHECK("-h", 2, parameterLength)) ||\n+        (PARAMETER_CHECK("--help", 5, parameterLength))) {\n+            showHelp = true;\n+        }\n+    }\n+\n+    if(showHelp) ShowHelp();\n+\n+    // do some parsing (all of these parameters require 2 strings)\n+    for(int i = 1; i < argc; i++) {\n+\n+        int parameterLength = (int)strlen(argv[i]);\n+\n+        if(PARAMETER_CHECK("-a", 2, parameterLength)) {\n+            if ((i+1) < argc) {\n+                haveBedA = true;\n+                bedAFile = argv[i + 1];\n+                i++;\n+            }\n+        }\n+        else if(PARAMETER_CHECK("-b", 2, parameterLength)) {\n+            if ((i+1) < argc) {\n+                haveBedB = true;\n+                bedBFile = argv[i + 1];\n+                i++;\n+            }\n+        }\n+        else if (PARAMETER_CHECK("-s", 2, parameterLength)) {\n+            sameStrand = true;\n+        }\n+        else if (PARAMETER_CHECK("-S", 2, parameterLength)) {\n+            diffStrand = true;\n+        }\n+        else if (PARAMETER_CHECK("-d", 2, parameterLength)) {\n+            reportDistance = true;\n+        }\n+        else if (PARAMETER_CHECK("-D", 2, parameterLength)) {\n+            if ((i+1) < argc) {\n+                reportDistance = true;\n+                signDistance = true;\n+                haveStrandedDistMode = true;\n+                strandedDistMode = argv[i + 1];\n+                i++;\n+            }\n+        }\n+        else if (PARAMETER_CHECK("-io", 3, parameterLength)) {\n+            ignoreOverlaps = true;\n+        }\n+        else if (PARAMETER_CHECK("-t", 2, parameterLength)) {\n+            if ((i+1) < argc) {\n+                haveTieMode = true;\n+                tieMode = argv[i + 1];\n+                i++;\n+            }\n+        }\n+        else {\n+            cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl;\n+            showHelp = true;\n+        }\n+    }\n+\n+    // make sure we have both input files\n+    if (!haveBedA || !haveBedB) {\n+        cerr << endl << "*****" << endl << "*****ERROR: Need -a and -b files. " << endl << "*****" << endl;\n+        showHelp = true;\n+    }\n+\n+    if (haveTieMode && (tieMode != "all") && (tieMode != "first")\n+                    && (tieMode != "last")) {\n+        cerr << endl << "*****" << endl << "*****ERROR: Request \\"all\\" or \\"first\\" or \\"last\\" for Tie Mode (-t)" << endl << "*****" << endl;\n+        showHelp = true;\n+    }\n+    \n+    if (haveStrandedDi'..b'reOverlaps);\n+        delete bc;\n+        return 0;\n+    }\n+    else {\n+        ShowHelp();\n+    }\n+}\n+\n+void ShowHelp(void) {\n+\n+    cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl;\n+\n+    cerr << "Authors: Aaron Quinlan (aaronquinlan@gmail.com)" << endl;\n+    cerr       << "\\t Erik Arner, Riken" << endl << endl;\n+\n+    cerr << "Summary: For each feature in A, finds the closest " << endl;\n+    cerr << "\\t feature (upstream or downstream) in B." << endl << endl;\n+\n+    cerr << "Usage:   " << PROGRAM_NAME << " [OPTIONS] -a <bed/gff/vcf> -b <bed/gff/vcf>" << endl << endl;\n+\n+    cerr << "Options: " << endl;\n+    cerr << "\\t-s\\t"            << "Require same strandedness.  That is, find the closest feature in B" << endl;\n+    cerr                        << "\\t\\tthat overlaps A on the _same_ strand." << endl;\n+    cerr                        << "\\t\\t- By default, overlaps are reported without respect to strand." << endl << endl;\n+\n+    cerr << "\\t-S\\t"            << "Require opposite strandedness.  That is, find the closest feature in B" << endl;\n+    cerr                        << "\\t\\tthat overlaps A on the _opposite_ strand." << endl;\n+    cerr                        << "\\t\\t- By default, overlaps are reported without respect to strand." << endl << endl;\n+\n+    cerr << "\\t-d\\t"            << "In addition to the closest feature in B, " << endl;\n+    cerr                        << "\\t\\treport its distance to A as an extra column." << endl;\n+    cerr                        << "\\t\\t- The reported distance for overlapping features will be 0." << endl << endl;\n+    \n+    cerr << "\\t-D\\t"            << "Like -d, report the closest feature in B, and its distance to A" << endl;\n+    cerr                        << "\\t\\tas an extra column. Unlike -d, use negative distances to report" << endl;\n+    cerr                        << "\\t\\tupstream features. You must specify which orientation defines \\"upstream\\"." << endl;\n+    cerr                        << "\\t\\tThe options are:" << endl;\n+    cerr                        << "\\t\\t- \\"ref\\"   Report distance with respect to the reference genome. " << endl;\n+    cerr                        << "\\t\\t            B features with a lower (start, stop) are upstream" << endl;\n+    cerr                        << "\\t\\t- \\"a\\"     Report distance with respect to A." << endl;\n+    cerr                        << "\\t\\t            When A is on the - strand, \\"upstream\\" means B has a higher (start,stop)." << endl;\n+    cerr                        << "\\t\\t- \\"b\\"     Report distance with respect to B." << endl;\n+    cerr                        << "\\t\\t            When B is on the - strand, \\"upstream\\" means A has a higher (start,stop)." << endl << endl;\n+\n+    cerr << "\\t-io\\t"           << "Ignore features in B that overlap A.  That is, we want close, but " << endl;\n+    cerr                        << "\\t\\tnot touching features only." << endl << endl;\n+\n+    cerr << "\\t-t\\t"            << "How ties for closest feature are handled.  This occurs when two" << endl;\n+    cerr                        << "\\t\\tfeatures in B have exactly the same \\"closeness\\" with A." << endl;\n+    cerr                        << "\\t\\tBy default, all such features in B are reported." << endl;\n+    cerr                        << "\\t\\tHere are all the options:" << endl;\n+    cerr                        << "\\t\\t- \\"all\\"    Report all ties (default)." << endl;\n+    cerr                        << "\\t\\t- \\"first\\"  Report the first tie that occurred in the B file." << endl;\n+    cerr                        << "\\t\\t- \\"last\\"   Report the last tie that occurred in the B file." << endl << endl;\n+\n+    cerr << "Notes: " << endl;\n+    cerr << "\\tReports \\"none\\" for chrom and \\"-1\\" for all other fields when a feature" << endl;\n+    cerr << "\\tis not found in B on the same chromosome as the feature in A." << endl;\n+    cerr << "\\tE.g. none\\t-1\\t-1" << endl << endl;\n+\n+    // end the program here\n+    exit(1);\n+}\n'
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/complementBed/Makefile
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/complementBed/Makefile Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,50 @@
+UTILITIES_DIR = ../utils/
+OBJ_DIR = ../../obj/
+BIN_DIR = ../../bin/
+
+# -------------------
+# define our includes
+# -------------------
+INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \
+           -I$(UTILITIES_DIR)/genomeFile/ \
+           -I$(UTILITIES_DIR)/lineFileUtilities/ \
+           -I$(UTILITIES_DIR)/version/ \
+           -I$(UTILITIES_DIR)/gzstream/ \
+           -I$(UTILITIES_DIR)/fileType/ \
+           -I$(UTILITIES_DIR)/BamTools/include
+
+# ----------------------------------
+# define our source and object files
+# ----------------------------------
+SOURCES= complementMain.cpp complementBed.cpp
+OBJECTS= $(SOURCES:.cpp=.o)
+_EXT_OBJECTS=bedFile.o genomeFile.o lineFileUtilities.o gzstream.o fileType.o
+EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS))
+BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
+PROGRAM= complementBed
+
+
+all: $(PROGRAM)
+
+.PHONY: all
+
+$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS)
+ @echo "  * linking $(PROGRAM)"
+ @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ $(LIBS)
+
+$(BUILT_OBJECTS): $(SOURCES)
+ @echo "  * compiling" $(*F).cpp
+ @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES)
+
+$(EXT_OBJECTS):
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/genomeFile/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/
+
+clean:
+ @echo "Cleaning up."
+ @rm -f $(OBJ_DIR)/* $(BIN_DIR)/*
+
+.PHONY: clean
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/complementBed/complementBed.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/complementBed/complementBed.cpp Thu Nov 03 10:25:04 2011 -0400
[
@@ -0,0 +1,83 @@
+/*****************************************************************************
+  complementBed.cpp
+
+  (c) 2009 - Aaron Quinlan
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licenced under the GNU General Public License 2.0 license.
+******************************************************************************/
+#include "lineFileUtilities.h"
+#include "complementBed.h"
+
+BedComplement::BedComplement(string &bedFile, string &genomeFile) {
+
+    _bedFile = bedFile;
+    _genomeFile = genomeFile;
+
+    _bed    = new BedFile(bedFile);
+    _genome = new GenomeFile(genomeFile);
+
+}
+
+
+BedComplement::~BedComplement(void) {
+}
+
+
+//
+// Merge overlapping BED entries into a single entry
+//
+void BedComplement::ComplementBed() {
+
+    // load the "B" bed file into a map so
+    // that we can easily compare "A" to it for overlaps
+    _bed->loadBedFileIntoMapNoBin();
+
+    // get a list of the chroms in the user's genome
+    vector<string> chromList =  _genome->getChromList();
+
+    // process each chrom in the genome
+    for (size_t c = 0; c < chromList.size(); ++c) {
+        string currChrom = chromList[c];
+
+        // create a "bit vector" for the chrom
+        CHRPOS currChromSize = _genome->getChromSize(currChrom);
+        vector<bool> chromMasks(currChromSize, 0);
+
+        // mask the chrom for every feature in the BED file
+        bedVector::const_iterator bItr = _bed->bedMapNoBin[currChrom].begin();
+        bedVector::const_iterator bEnd = _bed->bedMapNoBin[currChrom].end();
+        for (; bItr != bEnd; ++bItr) {
+            if (bItr->end > currChromSize) {
+                cout << "Warninge: end of BED entry exceeds chromosome length. Please correct." << endl;
+                _bed->reportBedNewLine(*bItr);
+                exit(1);
+            }
+
+            // mask all of the positions spanned by this BED entry.
+            for (CHRPOS b = bItr->start; b < bItr->end; b++)
+                chromMasks[b] = 1;
+        }
+
+        // report the unmasked, that is, complemented parts of the chrom
+        CHRPOS i = 0;
+        CHRPOS start;
+        while (i < chromMasks.size()) {
+            if (chromMasks[i] == 0) {
+                start = i;
+                while ((chromMasks[i] == 0) && (i < chromMasks.size()))
+                    i++;
+
+                if (start > 0)
+                    cout << currChrom << "\t" << start << "\t" << i << endl;
+                else
+                    cout << currChrom << "\t" << 0 << "\t" << i << endl;
+            }
+            i++;
+        }
+    }
+}
+
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/complementBed/complementBed.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/complementBed/complementBed.h Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,47 @@
+/*****************************************************************************
+  complementBed.h
+
+  (c) 2009 - Aaron Quinlan
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licenced under the GNU General Public License 2.0 license.
+******************************************************************************/
+#include "bedFile.h"
+#include "genomeFile.h"
+
+#include <vector>
+#include <bitset>
+#include <algorithm>
+#include <iostream>
+#include <fstream>
+#include <limits.h>
+#include <stdlib.h>
+
+using namespace std;
+
+
+//************************************************
+// Class methods and elements
+//************************************************
+class BedComplement {
+
+public:
+
+  // constructor
+  BedComplement(string &bedFile, string &genomeFile);
+
+  // destructor
+  ~BedComplement(void);
+
+  void ComplementBed();
+
+private:
+
+    string _bedFile;
+    string _genomeFile;
+    BedFile *_bed;
+    GenomeFile *_genome;
+};
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/complementBed/complementMain.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/complementBed/complementMain.cpp Thu Nov 03 10:25:04 2011 -0400
[
@@ -0,0 +1,116 @@
+/*****************************************************************************
+  complementBedMain.cpp
+
+  (c) 2009 - Aaron Quinlan
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licenced under the GNU General Public License 2.0 license.
+******************************************************************************/
+#include "complementBed.h"
+#include "version.h"
+
+using namespace std;
+
+// define our program name
+#define PROGRAM_NAME "complementBed"
+
+
+// define our parameter checking macro
+#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)
+
+// function declarations
+void ShowHelp(void);
+
+int main(int argc, char* argv[]) {
+
+    // our configuration variables
+    bool showHelp = false;
+
+    // input files
+    string bedFile = "stdin";
+    string genomeFile;
+
+    bool haveBed = true;
+    bool haveGenome = false;
+
+    for(int i = 1; i < argc; i++) {
+        int parameterLength = (int)strlen(argv[i]);
+
+        if((PARAMETER_CHECK("-h", 2, parameterLength)) ||
+        (PARAMETER_CHECK("--help", 5, parameterLength))) {
+            showHelp = true;
+        }
+    }
+
+    if(showHelp) ShowHelp();
+
+    // do some parsing (all of these parameters require 2 strings)
+    for(int i = 1; i < argc; i++) {
+
+        int parameterLength = (int)strlen(argv[i]);
+
+        if(PARAMETER_CHECK("-i", 2, parameterLength)) {
+            if ((i+1) < argc) {
+                bedFile = argv[i + 1];
+                i++;
+            }
+        }
+        else if(PARAMETER_CHECK("-g", 2, parameterLength)) {
+            if ((i+1) < argc) {
+                haveGenome = true;
+                genomeFile = argv[i + 1];
+                i++;
+            }
+        }
+        else {
+          cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl;
+            showHelp = true;
+        }
+    }
+
+    // make sure we have both input files
+    if (!haveBed || !haveGenome) {
+      cerr << endl << "*****" << endl << "*****ERROR: Need -i BED file and -g Genome file. " << endl << "*****" << endl;
+      showHelp = true;
+    }
+    if (!showHelp) {
+        BedComplement *bc = new BedComplement(bedFile, genomeFile);
+        bc->ComplementBed();
+        return 0;
+    }
+    else {
+        ShowHelp();
+    }
+}
+
+void ShowHelp(void) {
+
+    cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl;
+
+    cerr << "Author:  Aaron Quinlan (aaronquinlan@gmail.com)" << endl;
+
+    cerr << "Summary: Returns the base pair complement of a feature file." << endl << endl;
+
+    cerr << "Usage:   " << PROGRAM_NAME << " [OPTIONS] -i <bed/gff/vcf> -g <genome>" << endl << endl;
+
+    cerr << "Notes: " << endl;
+    cerr << "\t(1)  The genome file should tab delimited and structured as follows:" << endl;
+    cerr << "\t     <chromName><TAB><chromSize>" << endl << endl;
+    cerr << "\tFor example, Human (hg19):" << endl;
+    cerr << "\tchr1\t249250621" << endl;
+    cerr << "\tchr2\t243199373" << endl;
+    cerr << "\t..." << endl;
+    cerr << "\tchr18_gl000207_random\t4262" << endl << endl;
+
+    cerr << "Tips: " << endl;
+    cerr << "\tOne can use the UCSC Genome Browser's MySQL database to extract" << endl;
+    cerr << "\tchromosome sizes. For example, H. sapiens:" << endl << endl;
+    cerr << "\tmysql --user=genome --host=genome-mysql.cse.ucsc.edu -A -e \\" << endl;
+    cerr << "\t\"select chrom, size from hg19.chromInfo\"  > hg19.genome" << endl << endl;
+
+    exit(1);
+
+}
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/coverageBed/Makefile
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/coverageBed/Makefile Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,51 @@
+UTILITIES_DIR = ../utils/
+OBJ_DIR = ../../obj/
+BIN_DIR = ../../bin/
+
+# -------------------
+# define our includes
+# -------------------
+INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \
+           -I$(UTILITIES_DIR)/version/ \
+           -I$(UTILITIES_DIR)/gzstream/ \
+           -I$(UTILITIES_DIR)/genomeFile/ \
+           -I$(UTILITIES_DIR)/lineFileUtilities/ \
+           -I$(UTILITIES_DIR)/fileType/ \
+           -I$(UTILITIES_DIR)/BamTools/include \
+    -I$(UTILITIES_DIR)/BamTools-Ancillary
+# ----------------------------------
+# define our source and object files
+# ----------------------------------
+SOURCES= coverageMain.cpp coverageBed.cpp
+OBJECTS= $(SOURCES:.cpp=.o)
+_EXT_OBJECTS=bedFile.o lineFileUtilities.o gzstream.o fileType.o BamAncillary.o
+EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS))
+BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
+PROGRAM= coverageBed
+
+
+all: $(PROGRAM)
+
+.PHONY: all
+
+$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS)
+ @echo "  * linking $(PROGRAM)"
+ @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ -L$(UTILITIES_DIR)/BamTools/lib/ -lbamtools $(LIBS)
+
+$(BUILT_OBJECTS): $(SOURCES)
+ @echo "  * compiling" $(*F).cpp
+ @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES)
+
+$(EXT_OBJECTS):
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/BamTools/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/BamTools-Ancillary/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/
+
+clean:
+ @echo "Cleaning up."
+ @rm -f $(OBJ_DIR)/* $(BIN_DIR)/*
+
+.PHONY: clean
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/coverageBed/coverageBed.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/coverageBed/coverageBed.cpp Thu Nov 03 10:25:04 2011 -0400
[
b'@@ -0,0 +1,293 @@\n+/*****************************************************************************\n+  coverageBed.cpp\n+\n+  (c) 2009 - Aaron Quinlan\n+  Hall Laboratory\n+  Department of Biochemistry and Molecular Genetics\n+  University of Virginia\n+  aaronquinlan@gmail.com\n+\n+  Licenced under the GNU General Public License 2.0 license.\n+******************************************************************************/\n+#include "lineFileUtilities.h"\n+#include "coverageBed.h"\n+\n+// build\n+BedCoverage::BedCoverage(string &bedAFile, string &bedBFile, bool sameStrand, bool diffStrand,\n+                         bool writeHistogram, bool bamInput, bool obeySplits, \n+                         bool eachBase, bool countsOnly) {\n+\n+    _bedAFile       = bedAFile;\n+    _bedBFile       = bedBFile;\n+\n+    _bedA           = new BedFile(bedAFile);\n+    _bedB           = new BedFile(bedBFile);\n+\n+    _sameStrand     = sameStrand;\n+    _diffStrand     = diffStrand;\n+    _obeySplits     = obeySplits;\n+    _eachBase       = eachBase;\n+    _writeHistogram = writeHistogram;\n+    _bamInput       = bamInput;\n+    _countsOnly     = countsOnly;\n+\n+\n+    if (_bamInput == false)\n+        CollectCoverageBed();\n+    else\n+        CollectCoverageBam(_bedA->bedFile);\n+}\n+\n+// destroy\n+BedCoverage::~BedCoverage(void) {\n+    delete _bedA;\n+    delete _bedB;\n+}\n+\n+\n+void BedCoverage::CollectCoverageBed() {\n+\n+    // load the "B" bed file into a map so\n+    // that we can easily compare "A" to it for overlaps\n+    _bedB->loadBedCovFileIntoMap();\n+\n+    int lineNum = 0;                    // current input line number\n+    BED a, nullBed;\n+    BedLineStatus bedStatus;\n+\n+    _bedA->Open();\n+    // process each entry in A\n+    while ((bedStatus = _bedA->GetNextBed(a, lineNum)) != BED_INVALID) {\n+        if (bedStatus == BED_VALID) {\n+            // process the BED entry as a single block\n+            if (_obeySplits == false)\n+                _bedB->countHits(a, _sameStrand, _diffStrand, _countsOnly);\n+            // split the BED into discrete blocksand process each independently.\n+            else {\n+                bedVector bedBlocks;\n+                splitBedIntoBlocks(a, lineNum, bedBlocks);\n+\n+                // use countSplitHits to avoid over-counting each split chunk\n+                // as distinct read coverage.\n+                _bedB->countSplitHits(bedBlocks, _sameStrand, _diffStrand, _countsOnly);\n+            }\n+            a = nullBed;\n+        }\n+    }\n+    _bedA->Close();\n+\n+    // report the coverage (summary or histogram) for BED B.\n+    if (_countsOnly == true)\n+        ReportCounts();\n+    else \n+        ReportCoverage();\n+}\n+\n+\n+void BedCoverage::CollectCoverageBam(string bamFile) {\n+\n+    // load the "B" bed file into a map so\n+    // that we can easily compare "A" to it for overlaps\n+    _bedB->loadBedCovFileIntoMap();\n+\n+    // open the BAM file\n+    BamReader reader;\n+    reader.Open(bamFile);\n+\n+    // get header & reference information\n+    string header = reader.GetHeaderText();\n+    RefVector refs = reader.GetReferenceData();\n+\n+    // convert each aligned BAM entry to BED\n+    // and compute coverage on B\n+    BamAlignment bam;\n+    while (reader.GetNextAlignment(bam)) {\n+        if (bam.IsMapped()) {\n+            // treat the BAM alignment as a single "block"\n+            if (_obeySplits == false) {\n+                // construct a new BED entry from the current BAM alignment.\n+                BED a;\n+                a.chrom  = refs.at(bam.RefID).RefName;\n+                a.start  = bam.Position;\n+                a.end    = bam.GetEndPosition(false, false);\n+                a.strand = "+";\n+                if (bam.IsReverseStrand()) a.strand = "-";\n+\n+                _bedB->countHits(a, _sameStrand, _diffStrand, _countsOnly);\n+            }\n+            // split the BAM alignment into discrete blocks and\n+            // look for overlaps only within each block.\n+            else {\n+                // vec to store the discrete BED "blocks"'..b'         // update our histograms, assuming we are not reporting "per-base" coverage.\n+                        if (_eachBase == false) {\n+                            depthHist[depth]++;\n+                            allDepthHist[depth]++;\n+                        }\n+                        else if ((_eachBase == true) && (bedItr->zeroLength == false))\n+                        {\n+                            _bedB->reportBedTab(*bedItr);\n+                            printf("%d\\t%d\\n", pos-bedItr->start, depth);\n+                        }\n+                    }\n+                    // decrement coverage if ends observed at this position.\n+                    if (depthItr != bedItr->depthMap.end())\n+                        depth = depth - depthItr->second.ends;\n+                }\n+\n+                // handle the special case where the user wants "per-base" depth\n+                // but the current feature is length = 0.\n+                if ((_eachBase == true) && (bedItr->zeroLength == true)) {\n+                    _bedB->reportBedTab(*bedItr);\n+                    printf("1\\t%d\\n",depth);\n+                }\n+                // Summarize the coverage for the current interval,\n+                // assuming the user has not requested "per-base" coverage.\n+                else if (_eachBase == false) \n+                {\n+                    CHRPOS length     = bedItr->end - bedItr->start;\n+                    if (bedItr->zeroLength == true) {\n+                        length = 0;\n+                    }\n+                    totalLength       += length;\n+                    int nonZeroBases   = (length - zeroDepthCount);\n+                    \n+                    float fractCovered = 0.0;\n+                    if (bedItr->zeroLength == false) {\n+                        fractCovered = (float) nonZeroBases / length;\n+                    }\n+                    \n+                    // print a summary of the coverage\n+                    if (_writeHistogram == false) {\n+                        _bedB->reportBedTab(*bedItr);\n+                        printf("%d\\t%d\\t%d\\t%0.7f\\n", bedItr->count, nonZeroBases, length, fractCovered);\n+                    }\n+                    // HISTOGRAM\n+                    // report the number of bases with coverage == x\n+                    else {\n+                        // produce a histogram when not a zero length feature.\n+                        if (bedItr->zeroLength == false) {\n+                            map<unsigned int, unsigned int>::const_iterator histItr = depthHist.begin();\n+                            map<unsigned int, unsigned int>::const_iterator histEnd = depthHist.end();\n+                            for (; histItr != histEnd; ++histItr)\n+                            {\n+                                float fractAtThisDepth = (float) histItr->second / length;\n+                                _bedB->reportBedTab(*bedItr);\n+                                printf("%d\\t%d\\t%d\\t%0.7f\\n", histItr->first, histItr->second, length, fractAtThisDepth);\n+                            }\n+                        }\n+                        // special case when it is a zero length feauture.\n+                        else {\n+                            _bedB->reportBedTab(*bedItr);\n+                            printf("%d\\t%d\\t%d\\t%0.7f\\n", bedItr->count, 0, 0, 1.0000000);\n+                        }\n+                    }\n+                }\n+            }\n+        }\n+    }\n+    // report a histogram of coverage among _all_\n+    // features in B.\n+    if (_writeHistogram == true) {\n+        map<unsigned int, unsigned int>::const_iterator histItr = allDepthHist.begin();\n+        map<unsigned int, unsigned int>::const_iterator histEnd = allDepthHist.end();\n+        for (; histItr != histEnd; ++histItr) {\n+            float fractAtThisDepth = (float) histItr->second / totalLength;\n+            printf("all\\t%d\\t%d\\t%d\\t%0.7f\\n", histItr->first, histItr->second, totalLength, fractAtThisDepth);\n+        }\n+    }\n+}\n+\n+\n'
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/coverageBed/coverageBed.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/coverageBed/coverageBed.h Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,83 @@
+/*****************************************************************************
+  coverageBed.h
+
+  (c) 2009 - Aaron Quinlan
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licenced under the GNU General Public License 2.0 license.
+******************************************************************************/
+#ifndef COVERAGEBED_H
+#define COVERAGEBED_H
+
+#include "bedFile.h"
+
+#include "api/BamReader.h"
+#include "api/BamAux.h"
+#include "BamAncillary.h"
+using namespace BamTools;
+
+#include <vector>
+#include <algorithm>
+#include <iostream>
+#include <iomanip>
+#include <fstream>
+#include <stdlib.h>
+
+using namespace std;
+
+//************************************************
+// Class methods and elements
+//************************************************
+class BedCoverage {
+
+public:
+
+    // constructor
+    BedCoverage(string &bedAFile, string &bedBFile, bool sameStrand, bool diffStrand, bool writeHistogram,
+                bool bamInput, bool obeySplits, bool eachBase, bool countsOnly);
+
+    // destructor
+    ~BedCoverage(void);
+
+private:
+
+    // input files.
+    string _bedAFile;
+    string _bedBFile;
+
+    // instance of a bed file class.
+    BedFile *_bedA, *_bedB;
+
+    // do we care about same or opposite strandedness when counting coverage?
+    bool _sameStrand;
+    bool _diffStrand;
+
+    // should we write a histogram for each feature in B?
+    bool _writeHistogram;
+
+    // are we dealing with BAM input for "A"?
+    bool _bamInput;
+
+    // should we split BED/BAM into discrete blocks?
+    bool _obeySplits;
+
+    // should discrete coverage be reported for each base in each feature?
+    bool _eachBase;
+    
+    // should we just count overlaps and not try to describe the breadth?
+    bool _countsOnly;
+
+    // private function for reporting coverage information
+    void ReportCoverage();
+    
+    // private function for reporting overlap counts
+    void ReportCounts();
+
+    void CollectCoverageBed();
+
+    void CollectCoverageBam(string bamFile);
+};
+#endif /* COVERAGEBED_H */
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/coverageBed/coverageMain.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/coverageBed/coverageMain.cpp Thu Nov 03 10:25:04 2011 -0400
[
@@ -0,0 +1,182 @@
+/*****************************************************************************
+  coverageMain.cpp
+
+  (c) 2009 - Aaron Quinlan
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licenced under the GNU General Public License 2.0 license.
+******************************************************************************/
+#include "coverageBed.h"
+#include "version.h"
+
+using namespace std;
+
+// define the version
+#define PROGRAM_NAME "coverageBed"
+
+// define our parameter checking macro
+#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)
+
+// function declarations
+void ShowHelp(void);
+
+int main(int argc, char* argv[]) {
+
+    // our configuration variables
+    bool showHelp = false;
+
+    // input files
+    string bedAFile;
+    string bedBFile;
+
+    // parm flags
+    bool sameStrand    = false;
+    bool diffStrand    = false;
+    bool writeHistogram = false;
+    bool eachBase       = false;
+    bool obeySplits     = false;
+    bool bamInput       = false;
+    bool haveBedA       = false;
+    bool haveBedB       = false;
+    bool countsOnly     = false;
+
+    // check to see if we should print out some help
+    if(argc <= 1) showHelp = true;
+
+    for(int i = 1; i < argc; i++) {
+        int parameterLength = (int)strlen(argv[i]);
+
+        if((PARAMETER_CHECK("-h", 2, parameterLength)) ||
+        (PARAMETER_CHECK("--help", 5, parameterLength))) {
+            showHelp = true;
+        }
+    }
+
+    if(showHelp) ShowHelp();
+
+    // do some parsing (all of these parameters require 2 strings)
+    for(int i = 1; i < argc; i++) {
+
+        int parameterLength = (int)strlen(argv[i]);
+
+        if(PARAMETER_CHECK("-a", 2, parameterLength)) {
+            if ((i+1) < argc) {
+                haveBedA = true;
+                bedAFile = argv[i + 1];
+                i++;
+            }
+        }
+        else if(PARAMETER_CHECK("-abam", 5, parameterLength)) {
+            if ((i+1) < argc) {
+                haveBedA = true;
+                bamInput = true;
+                bedAFile = argv[i + 1];
+                i++;
+            }
+        }
+        else if(PARAMETER_CHECK("-b", 2, parameterLength)) {
+            if ((i+1) < argc) {
+                haveBedB = true;
+                bedBFile = argv[i + 1];
+                i++;
+            }
+        }
+        else if (PARAMETER_CHECK("-s", 2, parameterLength)) {
+            sameStrand = true;
+        }
+        else if (PARAMETER_CHECK("-S", 2, parameterLength)) {
+            diffStrand = true;
+        }
+        else if (PARAMETER_CHECK("-hist", 5, parameterLength)) {
+            writeHistogram = true;
+        }
+        else if(PARAMETER_CHECK("-d", 2, parameterLength)) {
+            eachBase = true;
+        }
+        else if (PARAMETER_CHECK("-split", 6, parameterLength)) {
+            obeySplits = true;
+        }
+        else if (PARAMETER_CHECK("-counts", 7, parameterLength)) {
+            countsOnly = true;
+        }
+        else {
+            cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl;
+            showHelp = true;
+        }
+    }
+
+    // make sure we have both input files
+    if (!haveBedA || !haveBedB) {
+        cerr << endl << "*****" << endl << "*****ERROR: Need -a and -b files. " << endl << "*****" << endl;
+        showHelp = true;
+    }
+    
+    if (sameStrand && diffStrand) {
+        cerr << endl << "*****" << endl << "*****ERROR: Request either -s OR -S, not both." << endl << "*****" << endl;
+        showHelp = true;
+    }
+
+    if (!showHelp) {
+        BedCoverage *bg = new BedCoverage(bedAFile, bedBFile, sameStrand, diffStrand,
+                                          writeHistogram, bamInput, obeySplits, eachBase, countsOnly);
+        delete bg;
+        return 0;
+    }
+    else {
+        ShowHelp();
+    }
+}
+
+void ShowHelp(void) {
+
+    cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl;
+
+    cerr << "Author:  Aaron Quinlan (aaronquinlan@gmail.com)" << endl;
+
+    cerr << "Summary: Returns the depth and breadth of coverage of features from A" << endl;
+    cerr << "\t on the intervals in B." << endl << endl;
+
+    cerr << "Usage:   " << PROGRAM_NAME << " [OPTIONS] -a <bed/gff/vcf> -b <bed/gff/vcf>" << endl << endl;
+
+    cerr << "Options: " << endl;
+
+    cerr << "\t-abam\t"         << "The A input file is in BAM format." << endl << endl;
+
+    cerr << "\t-s\t"            << "Require same strandedness.  That is, only counts hits in A that" << endl;
+    cerr                        << "\t\toverlap B on the _same_ strand." << endl;
+    cerr                        << "\t\t- By default, overlaps are counted without respect to strand." << endl << endl;
+
+    cerr << "\t-S\t"            << "Require different strandedness.  That is, only report hits in A that" << endl;
+    cerr                        << "\t\toverlap B on the _opposite_ strand." << endl;
+    cerr                        << "\t\t- By default, overlaps are counted without respect to strand." << endl << endl;
+
+    cerr << "\t-hist\t"         << "Report a histogram of coverage for each feature in B" << endl;
+    cerr                        << "\t\tas well as a summary histogram for _all_ features in B." << endl << endl;
+    cerr                        << "\t\tOutput (tab delimited) after each feature in B:" << endl;
+    cerr                        << "\t\t  1) depth\n\t\t  2) # bases at depth\n\t\t  3) size of B\n\t\t  4) % of B at depth" << endl << endl;
+
+    cerr << "\t-d\t"            << "Report the depth at each position in each B feature." << endl;
+    cerr                        << "\t\tPositions reported are one based.  Each position" << endl;
+    cerr                        << "\t\tand depth follow the complete B feature." << endl << endl;
+    
+    cerr << "\t-counts\t"       << "Only report the count of overlaps, don't compute fraction, etc." << endl << endl;
+
+    cerr << "\t-split\t"        << "Treat \"split\" BAM or BED12 entries as distinct BED intervals." << endl;
+    cerr                        << "\t\twhen computing coverage." << endl;
+    cerr                        << "\t\tFor BAM files, this uses the CIGAR \"N\" and \"D\" operations " << endl;
+    cerr                        << "\t\tto infer the blocks for computing coverage." << endl;
+    cerr                        << "\t\tFor BED12 files, this uses the BlockCount, BlockStarts," << endl;
+    cerr                        << "\t\tand BlockEnds fields (i.e., columns 10,11,12)." << endl << endl;
+
+    cerr << "Default Output:  " << endl;
+    cerr << "\t" << " After each entry in B, reports: " << endl;
+    cerr << "\t   1) The number of features in A that overlapped the B interval." << endl;
+    cerr << "\t   2) The number of bases in B that had non-zero coverage." << endl;
+    cerr << "\t   3) The length of the entry in B." << endl;
+    cerr << "\t   4) The fraction of bases in B that had non-zero coverage." << endl << endl;
+
+    exit(1);
+}
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/cuffToTrans/Makefile
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/cuffToTrans/Makefile Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,44 @@
+UTILITIES_DIR = ../utils/
+OBJ_DIR = ../../obj/
+BIN_DIR = ../../bin/
+
+# -------------------
+# define our includes
+# -------------------
+INCLUDES = -I$(UTILITIES_DIR)/bedFile/ -I$(UTILITIES_DIR)/sequenceUtilities/ -I$(UTILITIES_DIR)/lineFileUtilities/ -I$(UTILITIES_DIR)/version/ -I$(UTILITIES_DIR)/gzstream/ -I$(UTILITIES_DIR)/fileType/
+
+# ----------------------------------
+# define our source and object files
+# ----------------------------------
+SOURCES= cuffToTransMain.cpp cuffToTrans.cpp Fasta.cpp split.cpp
+OBJECTS= $(SOURCES:.cpp=.o)
+_EXT_OBJECTS=bedFile.o sequenceUtils.o lineFileUtilities.o gzstream.o fileType.o
+EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS))
+BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
+PROGRAM= cuffToTrans
+
+
+all: $(PROGRAM)
+
+.PHONY: all
+
+$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS)
+ @echo "  * linking $(PROGRAM)"
+ @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ $(LIBS)
+
+$(BUILT_OBJECTS): $(SOURCES)
+ @echo "  * compiling" $(*F).cpp
+ @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES)
+
+$(EXT_OBJECTS):
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/sequenceUtilities/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/
+
+clean:
+ @echo "Cleaning up."
+ @rm -f $(OBJ_DIR)/* $(BIN_DIR)/*
+
+.PHONY: clean
\ No newline at end of file
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/fastaFromBed/Makefile
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/fastaFromBed/Makefile Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,52 @@
+UTILITIES_DIR = ../utils/
+OBJ_DIR = ../../obj/
+BIN_DIR = ../../bin/
+
+# -------------------
+# define our includes
+# -------------------
+INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \
+           -I$(UTILITIES_DIR)/version/ \
+           -I$(UTILITIES_DIR)/gzstream/ \
+           -I$(UTILITIES_DIR)/genomeFile/ \
+           -I$(UTILITIES_DIR)/lineFileUtilities/ \
+           -I$(UTILITIES_DIR)/sequenceUtilities/ \
+           -I$(UTILITIES_DIR)/fileType/ \
+           -I$(UTILITIES_DIR)/Fasta/ \
+
+# ----------------------------------
+# define our source and object files
+# ----------------------------------
+SOURCES= fastaFromBedMain.cpp fastaFromBed.cpp
+OBJECTS= $(SOURCES:.cpp=.o)
+_EXT_OBJECTS=bedFile.o sequenceUtils.o lineFileUtilities.o gzstream.o fileType.o Fasta.o split.o
+EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS))
+BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
+PROGRAM= fastaFromBed
+
+
+all: $(PROGRAM)
+
+.PHONY: all
+
+$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS)
+ @echo "  * linking $(PROGRAM)"
+ @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ $(LIBS)
+
+$(BUILT_OBJECTS): $(SOURCES)
+ @echo "  * compiling" $(*F).cpp
+ @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES)
+
+$(EXT_OBJECTS):
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/sequenceUtilities/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/Fasta/
+
+clean:
+ @echo "Cleaning up."
+ @rm -f $(OBJ_DIR)/* $(BIN_DIR)/*
+
+.PHONY: clean
\ No newline at end of file
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/fastaFromBed/fastaFromBed.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/fastaFromBed/fastaFromBed.cpp Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,141 @@
+/*****************************************************************************
+  fastaFromBed.cpp
+
+  (c) 2009 - Aaron Quinlan
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licenced under the GNU General Public License 2.0 license.
+******************************************************************************/
+#include "lineFileUtilities.h"
+#include "fastaFromBed.h"
+
+
+Bed2Fa::Bed2Fa(bool useName, const string &dbFile, const string &bedFile,
+    const string &fastaOutFile, bool useFasta, bool useStrand) {
+
+    _useName      = useName;
+    _dbFile       = dbFile;
+    _bedFile      = bedFile;
+    _fastaOutFile = fastaOutFile;
+    _useFasta     = useFasta;
+    _useStrand    = useStrand;
+
+    _bed = new BedFile(_bedFile);
+
+    // Figure out what the output file should be.
+    if (fastaOutFile == "stdout") {
+        _faOut = &cout;
+    }
+    else {
+        // Make sure we can open the file.
+        ofstream fa(fastaOutFile.c_str(), ios::out);
+        if ( !fa ) {
+            cerr << "Error: The requested fasta output file (" << fastaOutFile << ") could not be opened. Exiting!" << endl;
+            exit (1);
+        }
+        else {
+            fa.close();
+            _faOut = new ofstream(fastaOutFile.c_str(), ios::out);
+        }
+    }
+
+    // Extract the requested intervals from the FASTA input file.
+    ExtractDNA();
+}
+
+
+Bed2Fa::~Bed2Fa(void) {
+}
+
+
+//******************************************************************************
+// ReportDNA
+//******************************************************************************
+void Bed2Fa::ReportDNA(const BED &bed, string &dna) {
+
+    // revcomp if necessary.  Thanks to Thomas Doktor.
+    if ((_useStrand == true) && (bed.strand == "-"))
+        reverseComplement(dna);
+
+    if (!(_useName)) {
+        if (_useFasta == true) {
+            if (_useStrand == true)
+                *_faOut << ">" << bed.chrom << ":" << bed.start << "-" << bed.end   << "(" << bed.strand << ")" << endl << dna << endl;
+            else
+                *_faOut << ">" << bed.chrom << ":" << bed.start << "-" << bed.end << endl << dna << endl;
+        }
+        else {
+            if (_useStrand == true)
+                *_faOut << bed.chrom << ":" << bed.start << "-" << bed.end << "(" << bed.strand << ")" << "\t" << dna << endl;
+            else
+                *_faOut << bed.chrom << ":" << bed.start << "-" << bed.end << "\t" << dna << endl;
+        }
+    }
+    else {
+        if (_useFasta == true)
+            *_faOut << ">" << bed.name << endl << dna << endl;
+        else
+            *_faOut << bed.name << "\t" << dna << endl;
+    }
+}
+
+
+
+//******************************************************************************
+// ExtractDNA
+//******************************************************************************
+void Bed2Fa::ExtractDNA() {
+
+    /* Make sure that we can oen all of the files successfully*/
+
+    // open the fasta database for reading
+    ifstream faDb(_dbFile.c_str(), ios::in);
+    if ( !faDb ) {
+        cerr << "Error: The requested fasta database file (" << _dbFile << ") could not be opened. Exiting!" << endl;
+        exit (1);
+    }
+
+    // open and memory-map genome file
+    FastaReference *fr = new FastaReference;
+    bool memmap = true;
+    fr->open(_dbFile, memmap);
+
+    BED bed, nullBed;
+    int lineNum = 0;
+    BedLineStatus bedStatus;
+    string sequence;
+
+    _bed->Open();
+    while ((bedStatus = _bed->GetNextBed(bed, lineNum)) != BED_INVALID) {
+        if (bedStatus == BED_VALID) {
+            // make sure we are extracting >= 1 bp
+            if (bed.zeroLength == false) {
+                size_t seqLength = fr->sequenceLength(bed.chrom);
+                // make sure this feature will not exceed the end of the chromosome.
+                if ( (bed.start <= seqLength) && (bed.end <= seqLength) ) 
+                {
+                    int length = bed.end - bed.start;
+                    sequence = fr->getSubSequence(bed.chrom, bed.start, length);
+                    ReportDNA(bed, sequence);
+                }
+                else
+                {
+                    cerr << "Feature (" << bed.chrom << ":" << bed.start << "-" << bed.end << ") beyond the length of "
+                        << bed.chrom << " size (" << seqLength << " bp).  Skipping." << endl;
+                }
+            }
+            // handle zeroLength 
+            else {
+                cerr << "Feature (" << bed.chrom << ":" << bed.start+1 << "-" << bed.end-1 << ") has length = 0, Skipping." << endl;
+            }
+            bed = nullBed;
+        }
+    }
+    _bed->Close();
+}
+
+
+
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/fastaFromBed/fastaFromBed.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/fastaFromBed/fastaFromBed.h Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,56 @@
+/*****************************************************************************
+  fastaFromBed.h
+
+  (c) 2009 - Aaron Quinlan
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licenced under the GNU General Public License 2.0 license.
+******************************************************************************/
+#ifndef FASTAFROMBED_H
+#define FASTAFROMBED_H
+
+#include "bedFile.h"
+#include "sequenceUtils.h"
+#include "Fasta.h"
+#include <vector>
+#include <iostream>
+#include <fstream>
+
+using namespace std;
+
+//************************************************
+// Class methods and elements
+//************************************************
+class Bed2Fa {
+
+public:
+
+    // constructor
+    Bed2Fa(bool useName, const string &dbFile, const string &bedFile, const string &fastaOutFile,
+        bool useFasta, bool useStrand);
+
+    // destructor
+    ~Bed2Fa(void);
+
+    void ExtractDNA();
+    void ReportDNA(const BED &bed, string &dna);
+
+
+private:
+
+    bool _useName;
+    string _dbFile;
+    string _bedFile;
+    string _fastaOutFile;
+    bool _useFasta;
+    bool _useStrand;
+
+    // instance of a bed file class.
+    BedFile  *_bed;
+    ostream *_faOut;
+};
+
+#endif
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/fastaFromBed/fastaFromBedMain.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/fastaFromBed/fastaFromBedMain.cpp Thu Nov 03 10:25:04 2011 -0400
[
@@ -0,0 +1,146 @@
+/*****************************************************************************
+  fastaFromBedMain.cpp
+
+  (c) 2009 - Aaron Quinlan
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licenced under the GNU General Public License 2.0 license.
+******************************************************************************/
+#include "fastaFromBed.h"
+#include "version.h"
+
+using namespace std;
+
+// define our program name
+#define PROGRAM_NAME "fastaFromBed"
+
+
+// define our parameter checking macro
+#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)
+
+// function declarations
+void ShowHelp(void);
+
+int main(int argc, char* argv[]) {
+
+    // our configuration variables
+    bool showHelp = false;
+
+    // input files
+    string fastaDbFile;
+    string bedFile;
+
+    // output files
+    string fastaOutFile;
+
+    // checks for existence of parameters
+    bool haveFastaDb = false;
+    bool haveBed = false;
+    bool haveFastaOut = false;
+    bool useNameOnly = false;
+    bool useFasta = true;
+    bool useStrand = false;
+
+    // check to see if we should print out some help
+    if(argc <= 1) showHelp = true;
+
+    for(int i = 1; i < argc; i++) {
+        int parameterLength = (int)strlen(argv[i]);
+
+        if((PARAMETER_CHECK("-h", 2, parameterLength)) ||
+        (PARAMETER_CHECK("--help", 5, parameterLength))) {
+            showHelp = true;
+        }
+    }
+
+    if(showHelp) ShowHelp();
+
+    // do some parsing (all of these parameters require 2 strings)
+    for(int i = 1; i < argc; i++) {
+
+        int parameterLength = (int)strlen(argv[i]);
+
+        if(PARAMETER_CHECK("-fi", 3, parameterLength)) {
+            if ((i+1) < argc) {
+                haveFastaDb = true;
+                fastaDbFile = argv[i + 1];
+                i++;
+            }
+        }
+        else if(PARAMETER_CHECK("-fo", 3, parameterLength)) {
+            if ((i+1) < argc) {
+                haveFastaOut = true;
+                fastaOutFile = argv[i + 1];
+                i++;
+            }
+        }
+        else if(PARAMETER_CHECK("-bed", 4, parameterLength)) {
+            if ((i+1) < argc) {
+                haveBed = true;
+                bedFile = argv[i + 1];
+                i++;
+            }
+        }
+        else if(PARAMETER_CHECK("-name", 5, parameterLength)) {
+            useNameOnly = true;
+        }
+        else if(PARAMETER_CHECK("-tab", 4, parameterLength)) {
+            useFasta = false;
+        }
+        else if(PARAMETER_CHECK("-s", 2, parameterLength)) {
+            useStrand = true;
+        }
+        else {
+            cerr << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl;
+            showHelp = true;
+        }
+    }
+
+    if (!haveFastaDb || !haveFastaOut || !haveBed) {
+        showHelp = true;
+    }
+
+    if (!showHelp) {
+
+        Bed2Fa *b2f = new Bed2Fa(useNameOnly, fastaDbFile, bedFile, fastaOutFile, useFasta, useStrand);
+        delete b2f;
+
+        return 0;
+    }
+    else {
+        ShowHelp();
+    }
+}
+
+void ShowHelp(void) {
+
+    cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl;
+
+    cerr << "Author:  Aaron Quinlan (aaronquinlan@gmail.com)" << endl;
+
+    cerr << "Summary: Extract DNA sequences into a fasta file based on feature coordinates." << endl << endl;
+
+    cerr << "Usage:   " << PROGRAM_NAME << " [OPTIONS] -fi <fasta> -bed <bed/gff/vcf> -fo <fasta> " << endl << endl;
+
+    cerr << "Options: " << endl;
+    cerr << "\t-fi\tInput FASTA file" << endl;
+    cerr << "\t-bed\tBED/GFF/VCF file of ranges to extract from -fi" << endl;
+    cerr << "\t-fo\tOutput file (can be FASTA or TAB-delimited)" << endl;
+    cerr << "\t-name\tUse the name field for the FASTA header" << endl;
+
+    cerr << "\t-tab\tWrite output in TAB delimited format." << endl;
+    cerr << "\t\t- Default is FASTA format." << endl << endl;
+
+    cerr << "\t-s\tForce strandedness. If the feature occupies the antisense strand," << endl;
+    cerr << "\t\tthe sequence will be reverse complemented." << endl;
+    cerr << "\t\t- By default, strand information is ignored." << endl << endl;
+
+
+
+    // end the program here
+    exit(1);
+
+}
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/fjoin/Makefile
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/fjoin/Makefile Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,42 @@
+UTILITIES_DIR = ../utils/
+OBJ_DIR = ../../obj/
+BIN_DIR = ../../bin/
+
+# -------------------
+# define our includes
+# -------------------
+INCLUDES = -I$(UTILITIES_DIR)/bedFile/ -I$(UTILITIES_DIR)/lineFileUtilities/ -I$(UTILITIES_DIR)/version/ -I$(UTILITIES_DIR)/gzstream/ -I$(UTILITIES_DIR)/fileType/
+
+# ----------------------------------
+# define our source and object files
+# ----------------------------------
+SOURCES= fjoinMain.cpp fjoin.cpp
+OBJECTS= $(SOURCES:.cpp=.o)
+_EXT_OBJECTS=bedFile.o lineFileUtilities.o gzstream.o fileType.o
+EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS))
+BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
+PROGRAM= fjoin
+
+all: $(PROGRAM)
+
+.PHONY: all
+
+$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS)
+ @echo "  * linking $(PROGRAM)"
+ @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ $(LIBS)
+
+$(BUILT_OBJECTS): $(SOURCES)
+ @echo "  * compiling" $(*F).cpp
+ @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES)
+
+$(EXT_OBJECTS):
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/
+
+clean:
+ @echo "Cleaning up."
+ @rm -f $(OBJ_DIR)/* $(BIN_DIR)/*
+
+.PHONY: clean
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/fjoin/fjoin.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/fjoin/fjoin.cpp Thu Nov 03 10:25:04 2011 -0400
[
b'@@ -0,0 +1,350 @@\n+/*****************************************************************************\n+  intersectBed.cpp\n+\n+  (c) 2009 - Aaron Quinlan\n+  Hall Laboratory\n+  Department of Biochemistry and Molecular Genetics\n+  University of Virginia\n+  aaronquinlan@gmail.com\n+\n+  Licenced under the GNU General Public License 2.0 license.\n+******************************************************************************/\n+#include "lineFileUtilities.h"\n+#include "fjoin.h"\n+#include <queue>\n+#include <set>\n+\n+bool leftOf(const BED &a, const BED &b);\n+\n+\n+bool BedIntersect::processHits(BED &a, vector<BED> &hits) {\n+    // how many overlaps are there b/w the bed and the set of hits?\n+    int s, e, overlapBases;\n+    int  numOverlaps = 0;\n+    bool hitsFound   = false;\n+    int aLength      = (a.end - a.start);   // the length of a in b.p.\n+\n+    // loop through the hits and report those that meet the user\'s criteria\n+    vector<BED>::const_iterator h       = hits.begin();\n+    vector<BED>::const_iterator hitsEnd = hits.end();\n+    for (; h != hitsEnd; ++h) {\n+        s            = max(a.start, h->start);\n+        e            = min(a.end, h->end);\n+        overlapBases = (e - s);             // the number of overlapping bases b/w a and b\n+\n+        // is there enough overlap relative to the user\'s request? (default ~ 1bp)\n+        if ( ( (float) overlapBases / (float) aLength ) >= _overlapFraction ) {\n+            // Report the hit if the user doesn\'t care about reciprocal overlap between A and B.\n+            if (_reciprocal == false) {\n+                hitsFound = true;\n+                numOverlaps++;\n+                if (_printable == true)\n+                    ReportOverlapDetail(overlapBases, a, *h, s, e);\n+            }\n+            // we require there to be sufficient __reciprocal__ overlap\n+            else {\n+                int bLength    = (h->end - h->start);\n+                float bOverlap = ( (float) overlapBases / (float) bLength );\n+                if (bOverlap >= _overlapFraction) {\n+                    hitsFound = true;\n+                    numOverlaps++;\n+                    if (_printable == true)\n+                        ReportOverlapDetail(overlapBases, a, *h, s, e);\n+                }\n+            }\n+        }\n+    }\n+    // report the summary of the overlaps if requested.\n+    ReportOverlapSummary(a, numOverlaps);\n+    // were hits found for this BED feature?\n+    return hitsFound;\n+}\n+\n+/*\n+    Constructor\n+*/\n+BedIntersect::BedIntersect(string bedAFile, string bedBFile, bool anyHit,\n+                           bool writeA, bool writeB, bool writeOverlap, bool writeAllOverlap,\n+                           float overlapFraction, bool noHit, bool writeCount, bool forceStrand,\n+                           bool reciprocal, bool obeySplits, bool bamInput, bool bamOutput) {\n+\n+    _bedAFile            = bedAFile;\n+    _bedBFile            = bedBFile;\n+    _anyHit              = anyHit;\n+    _noHit               = noHit;\n+    _writeA              = writeA;\n+    _writeB              = writeB;\n+    _writeOverlap        = writeOverlap;\n+    _writeAllOverlap     = writeAllOverlap;\n+    _writeCount          = writeCount;\n+    _overlapFraction     = overlapFraction;\n+    _forceStrand         = forceStrand;\n+    _reciprocal          = reciprocal;\n+    _obeySplits          = obeySplits;\n+    _bamInput            = bamInput;\n+    _bamOutput           = bamOutput;\n+\n+    if (_anyHit || _noHit || _writeCount)\n+        _printable = false;\n+    else\n+        _printable = true;\n+\n+    // create new BED file objects for A and B\n+    _bedA = new BedFile(bedAFile);\n+    _bedB = new BedFile(bedBFile);\n+\n+    IntersectBed();\n+}\n+\n+\n+/*\n+    Destructor\n+*/\n+BedIntersect::~BedIntersect(void) {\n+}\n+\n+\n+bool leftOf(const BED &a, const BED &b) {\n+    return (a.end <= b.start);\n+}\n+\n+\n+void BedIntersect::ReportOverlapDetail(const int &overlapBases, const BED &a, const BED &b,\n+                                       const CHRPOS &s, const CHRPOS'..b'\n+        it = _windowA.find(chrom);\n+        if (it != _windowA.end()) {\n+            return & _windowA[chrom];\n+        }\n+        else {\n+            _windowA.insert(pair<string, vector<BED *> >(chrom, vector<BED *>()));\n+            return & _windowA[chrom];\n+        }\n+    }\n+    else {\n+        it = _windowB.find(chrom);\n+        if (it != _windowB.end()) {\n+            return & _windowB[chrom];\n+        }\n+        else {\n+            _windowB.insert(pair<string, vector<BED *> >(chrom, vector<BED *>()));\n+            return & _windowB[chrom];\n+        }\n+    }\n+}\n+\n+\n+void BedIntersect::ChromSwitch(const string &chrom) {\n+\n+    vector<BED*>::iterator windowAIter = _windowA[chrom].begin();\n+    vector<BED*>::iterator windowAEnd  = _windowA[chrom].end();\n+    for (; windowAIter != windowAEnd; ++windowAIter)\n+        (*windowAIter)->finished = true;\n+\n+    vector<BED*>::iterator windowBIter = _windowB[chrom].begin();\n+    vector<BED*>::iterator windowBEnd  = _windowB[chrom].end();\n+    for (; windowBIter != windowBEnd; ++windowBIter)\n+        (*windowBIter)->finished = true;\n+\n+    FlushOutputBuffer();\n+}\n+\n+\n+void BedIntersect::IntersectBed() {\n+\n+    int aLineNum = 0;\n+    int bLineNum = 0;\n+\n+    // current feature from each file\n+    BED *a, *b, *prevA, *prevB;\n+\n+    // status of the current lines\n+    BedLineStatus aStatus, bStatus;\n+\n+    // open the files; get the first line from each\n+    _bedA->Open();\n+    _bedB->Open();\n+\n+    prevA = NULL;\n+    prevB = NULL;\n+    a = new BED();\n+    b = new BED();\n+    aStatus = _bedA->GetNextBed(*a, aLineNum);\n+    bStatus = _bedB->GetNextBed(*b, bLineNum);\n+\n+    cout << a->chrom << " " << a->start << " " << a->chrom << " " << b->start << endl;\n+    while (aStatus != BED_INVALID || bStatus != BED_INVALID) {\n+        \n+        if ((a->start <= b->start) && (a->chrom == b->chrom)) {\n+            prevA = a;\n+            _lastPick = 0;\n+            Scan(a, GetWindow(a->chrom, true),  aStatus,\n+                *b, GetWindow(a->chrom, false), bStatus);\n+\n+            a = new BED();\n+            aStatus = _bedA->GetNextBed(*a, aLineNum);\n+        }\n+        else if ((a->start > b->start) && (a->chrom == b->chrom)) {\n+            prevB = b;\n+            _lastPick = 1;\n+            Scan(b, GetWindow(b->chrom, false), bStatus,\n+                *a, GetWindow(b->chrom, true),  aStatus);\n+\n+            b = new BED();\n+            bStatus = _bedB->GetNextBed(*b, bLineNum);\n+        }\n+        else if (a->chrom != b->chrom) {\n+            // A was most recently read\n+            if (_lastPick == 0) {\n+                prevB = b;\n+                while (b->chrom == prevA->chrom){\n+                    _windowB[prevA->chrom].push_back(b);\n+                    b = new BED();\n+                    bStatus = _bedB->GetNextBed(*b, bLineNum);\n+                }\n+                Scan(prevA, GetWindow(prevA->chrom, true),  aStatus,\n+                    *prevB, GetWindow(prevA->chrom, false),  bStatus);\n+            }\n+            // B was most recently read\n+            else {\n+                prevA = a;\n+                while (a->chrom == prevB->chrom) {\n+                    _windowA[prevB->chrom].push_back(a);\n+                    a = new BED();\n+                    aStatus = _bedA->GetNextBed(*a, aLineNum);\n+                }\n+                Scan(prevB, GetWindow(prevB->chrom, false), bStatus,\n+                    *prevA, GetWindow(prevB->chrom, true),  aStatus);\n+            }\n+            FlushOutputBuffer(true);\n+        }\n+        if (prevA!=NULL&&prevB!=NULL)\n+            //cout << prevA->chrom << " " << a->chrom << " " << a->start << " "\n+            //     << prevB->chrom << " " << b->chrom << " " << b->start << "\\n";\n+        if (aStatus == BED_INVALID) a->start = INT_MAX;\n+        if (bStatus == BED_INVALID) b->start = INT_MAX;\n+    }\n+\n+    // clear out the final bit of staged output\n+    FlushOutputBuffer(true);\n+\n+    // close the files\n+    _bedA->Close();\n+    _bedB->Close();\n+}\n+\n+\n'
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/fjoin/fjoin.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/fjoin/fjoin.h Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,114 @@
+/*****************************************************************************
+  intersectBed.h
+
+  (c) 2009 - Aaron Quinlan
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licenced under the GNU General Public License 2.0 license.
+******************************************************************************/
+#ifndef INTERSECTBED_H
+#define INTERSECTBED_H
+
+#include "bedFile.h"
+// #include "BamReader.h"
+// #include "BamWriter.h"
+// #include "BamAncillary.h"
+// #include "BamAux.h"
+// using namespace BamTools;
+
+
+#include <vector>
+#include <queue>
+#include <iostream>
+#include <fstream>
+#include <stdlib.h>
+using namespace std;
+
+
+
+class BedIntersect {
+
+public:
+
+    // constructor
+    BedIntersect(string bedAFile, string bedBFile, bool anyHit,
+                               bool writeA, bool writeB, bool writeOverlap, bool writeAllOverlap,
+                               float overlapFraction, bool noHit, bool writeCount, bool forceStrand,
+                               bool reciprocal, bool obeySplits, bool bamInput, bool bamOutput);
+
+    // destructor
+    ~BedIntersect(void);
+
+private:
+
+    //------------------------------------------------
+    // private attributes
+    //------------------------------------------------
+    string _bedAFile;
+    string _bedBFile;
+
+    bool  _writeA;            // should the original A feature be reported?
+    bool  _writeB;            // should the original B feature be reported?
+    bool  _writeOverlap;
+    bool  _writeAllOverlap;
+
+    bool  _forceStrand;
+    bool  _reciprocal;
+    float _overlapFraction;
+
+    bool  _anyHit;
+    bool  _noHit;
+    bool  _writeCount;        // do we want a count of the number of overlaps in B?
+    bool  _obeySplits;
+    bool  _bamInput;
+    bool  _bamOutput;
+
+    bool _printable;
+
+    queue<BED*> _outputBuffer;
+    bool  _lastPick;
+
+    map<string, vector<BED*> > _windowA;
+    map<string, vector<BED*> > _windowB;
+
+    // instance of a bed file class.
+    BedFile *_bedA, *_bedB;
+
+    //------------------------------------------------
+    // private methods
+    //------------------------------------------------
+    void IntersectBed(istream &bedInput);
+
+    void Scan(BED *x, vector<BED *> *windowX, BedLineStatus xStatus,
+        const BED &y, vector<BED *> *windowY, BedLineStatus yStatus);
+
+    void AddHits(BED *x, const BED &y);
+
+    void FlushOutputBuffer(bool final = false);
+
+    vector<BED*>* GetWindow(const string &chrom, bool isA);
+
+    void ChromSwitch(const string &chrom);
+
+    void IntersectBed();
+
+    void IntersectBam(string bamFile);
+
+    bool processHits(BED &a, vector<BED> &hits);
+
+    bool FindOverlaps(const BED &a, vector<BED> &hits);
+
+    bool FindOneOrMoreOverlap(const BED &a);
+
+    void ReportOverlapDetail(const int &overlapBases, const BED &a, const BED &b,
+                             const CHRPOS &s, const CHRPOS &e);
+    void ReportOverlapSummary(const BED &a, const int &numOverlapsFound);
+
+    void ReportHits(set<BED> &A, set<BED> &B);
+
+};
+
+#endif /* INTERSECTBED_H */
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/fjoin/fjoinMain.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/fjoin/fjoinMain.cpp Thu Nov 03 10:25:04 2011 -0400
[
b'@@ -0,0 +1,271 @@\n+/*****************************************************************************\n+  intersectMain.cpp\n+\n+  (c) 2009 - Aaron Quinlan\n+  Hall Laboratory\n+  Department of Biochemistry and Molecular Genetics\n+  University of Virginia\n+  aaronquinlan@gmail.com\n+\n+  Licenced under the GNU General Public License 2.0 license.\n+******************************************************************************/\n+#include "fjoin.h"\n+#include "version.h"\n+\n+using namespace std;\n+\n+// define our program name\n+#define PROGRAM_NAME "fjoin"\n+\n+\n+// define our parameter checking macro\n+#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)\n+\n+// function declarations\n+void ShowHelp(void);\n+\n+int main(int argc, char* argv[]) {\n+\n+    // our configuration variables\n+    bool showHelp = false;\n+\n+    // input files\n+    string bedAFile;\n+    string bedBFile;\n+\n+    // input arguments\n+    float overlapFraction = 1E-9;\n+\n+    bool haveBedA           = false;\n+    bool haveBedB           = false;\n+    bool noHit              = false;\n+    bool anyHit             = false;\n+    bool writeA             = false;\n+    bool writeB             = false;\n+    bool writeCount         = false;\n+    bool writeOverlap       = false;\n+    bool writeAllOverlap    = false;\n+    bool haveFraction       = false;\n+    bool reciprocalFraction = false;\n+    bool forceStrand        = false;\n+    bool obeySplits         = false;\n+    bool inputIsBam         = false;\n+    bool outputIsBam        = true;\n+\n+    // check to see if we should print out some help\n+    if(argc <= 1) showHelp = true;\n+\n+    for(int i = 1; i < argc; i++) {\n+        int parameterLength = (int)strlen(argv[i]);\n+\n+        if((PARAMETER_CHECK("-h", 2, parameterLength)) ||\n+        (PARAMETER_CHECK("--help", 5, parameterLength))) {\n+            showHelp = true;\n+        }\n+    }\n+\n+    if(showHelp) ShowHelp();\n+\n+    // do some parsing (all of these parameters require 2 strings)\n+    for(int i = 1; i < argc; i++) {\n+\n+        int parameterLength = (int)strlen(argv[i]);\n+\n+        if(PARAMETER_CHECK("-a", 2, parameterLength)) {\n+            if ((i+1) < argc) {\n+                haveBedA = true;\n+                outputIsBam = false;\n+                bedAFile = argv[i + 1];\n+                i++;\n+            }\n+        }\n+        else if(PARAMETER_CHECK("-abam", 5, parameterLength)) {\n+            if ((i+1) < argc) {\n+                haveBedA = true;\n+                inputIsBam = true;\n+                bedAFile = argv[i + 1];\n+                i++;\n+            }\n+        }\n+        else if(PARAMETER_CHECK("-b", 2, parameterLength)) {\n+            if ((i+1) < argc) {\n+                haveBedB = true;\n+                bedBFile = argv[i + 1];\n+                i++;\n+            }\n+        }\n+        else if(PARAMETER_CHECK("-bed", 4, parameterLength)) {\n+            outputIsBam = false;\n+        }\n+        else if(PARAMETER_CHECK("-u", 2, parameterLength)) {\n+            anyHit = true;\n+        }\n+        else if(PARAMETER_CHECK("-f", 2, parameterLength)) {\n+            if ((i+1) < argc) {\n+                haveFraction = true;\n+                overlapFraction = atof(argv[i + 1]);\n+                i++;\n+            }\n+        }\n+        else if(PARAMETER_CHECK("-wa", 3, parameterLength)) {\n+            writeA = true;\n+        }\n+        else if(PARAMETER_CHECK("-wb", 3, parameterLength)) {\n+            writeB = true;\n+        }\n+        else if(PARAMETER_CHECK("-wo", 3, parameterLength)) {\n+            writeOverlap = true;\n+        }\n+        else if(PARAMETER_CHECK("-wao", 4, parameterLength)) {\n+            writeAllOverlap = true;\n+            writeOverlap = true;\n+        }\n+        else if(PARAMETER_CHECK("-c", 2, parameterLength)) {\n+            writeCount = true;\n+        }\n+        else if(PARAMETER_CHECK("-r", 2, parameterLength)) {\n+            reciprocalFraction = true;\n+        }\n+        else if (PARAMETER'..b'IsBam, outputIsBam);\n+        delete bi;\n+        return 0;\n+    }\n+    else {\n+        ShowHelp();\n+    }\n+}\n+\n+void ShowHelp(void) {\n+\n+    cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl;\n+\n+    cerr << "Author:  Aaron Quinlan (aaronquinlan@gmail.com)" << endl;\n+\n+    cerr << "Summary: Report overlaps between two feature files." << endl << endl;\n+\n+    cerr << "Usage:   " << PROGRAM_NAME << " [OPTIONS] -a <bed/gff/vcf> -b <bed/gff/vcf>" << endl << endl;\n+\n+    cerr << "Options: " << endl;\n+\n+    cerr << "\\t-abam\\t"         << "The A input file is in BAM format.  Output will be BAM as well." << endl << endl;\n+\n+    cerr << "\\t-bed\\t"          << "When using BAM input (-abam), write output as BED. The default" << endl;\n+    cerr                        << "\\t\\tis to write output in BAM when using -abam." << endl << endl;\n+\n+    cerr << "\\t-wa\\t"           << "Write the original entry in A for each overlap." << endl << endl;\n+\n+    cerr << "\\t-wb\\t"           << "Write the original entry in B for each overlap." << endl;\n+    cerr                        << "\\t\\t- Useful for knowing _what_ A overlaps. Restricted by -f and -r." << endl << endl;\n+\n+    cerr << "\\t-wo\\t"           << "Write the original A and B entries plus the number of base" << endl;\n+    cerr                        << "\\t\\tpairs of overlap between the two features." << endl;\n+    cerr                        << "\\t\\t- Overlaps restricted by -f and -r." << endl;\n+    cerr                        << "\\t\\t  Only A features with overlap are reported." << endl << endl;\n+\n+    cerr << "\\t-wao\\t"          << "Write the original A and B entries plus the number of base" << endl;\n+    cerr                        << "\\t\\tpairs of overlap between the two features." << endl;\n+    cerr                        << "\\t\\t- Overlapping features restricted by -f and -r." << endl;\n+    cerr                        << "\\t\\t  However, A features w/o overlap are also reported" << endl;\n+    cerr                        << "\\t\\t  with a NULL B feature and overlap = 0." << endl << endl;\n+\n+    cerr << "\\t-u\\t"            << "Write the original A entry _once_ if _any_ overlaps found in B." << endl;\n+    cerr                        << "\\t\\t- In other words, just report the fact >=1 hit was found." << endl;\n+    cerr                        << "\\t\\t- Overlaps restricted by -f and -r." << endl << endl;\n+\n+    cerr << "\\t-c\\t"            << "For each entry in A, report the number of overlaps with B." << endl;\n+    cerr                        << "\\t\\t- Reports 0 for A entries that have no overlap with B." << endl;\n+    cerr                        << "\\t\\t- Overlaps restricted by -f and -r." << endl << endl;\n+\n+    cerr << "\\t-v\\t"            << "Only report those entries in A that have _no overlaps_ with B." << endl;\n+    cerr                        << "\\t\\t- Similar to \\"grep -v\\" (an homage)." << endl << endl;\n+\n+    cerr << "\\t-f\\t"            << "Minimum overlap required as a fraction of A." << endl;\n+    cerr                        << "\\t\\t- Default is 1E-9 (i.e., 1bp)." << endl;\n+    cerr                        << "\\t\\t- FLOAT (e.g. 0.50)" << endl << endl;\n+\n+    cerr << "\\t-r\\t"            << "Require that the fraction overlap be reciprocal for A and B." << endl;\n+    cerr                        << "\\t\\t- In other words, if -f is 0.90 and -r is used, this requires" << endl;\n+    cerr                        << "\\t\\t  that B overlap 90% of A and A _also_ overlaps 90% of B." << endl << endl;\n+\n+    cerr << "\\t-s\\t"            << "Force strandedness.  That is, only report hits in B that" << endl;\n+    cerr                        << "\\t\\toverlap A on the same strand." << endl;\n+    cerr                        << "\\t\\t- By default, overlaps are reported without respect to strand." << endl << endl;\n+\n+    cerr << "\\t-split\\t"        << "Treat \\"split\\" BAM or BED12 entries as distinct BED intervals." << endl << endl;\n+\n+\n+    // end the program here\n+    exit(1);\n+\n+}\n'
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/flankBed/Makefile
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/flankBed/Makefile Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,50 @@
+UTILITIES_DIR = ../utils/
+OBJ_DIR = ../../obj/
+BIN_DIR = ../../bin/
+
+# -------------------
+# define our includes
+# -------------------
+INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \
+           -I$(UTILITIES_DIR)/genomeFile/ \
+           -I$(UTILITIES_DIR)/lineFileUtilities/ \
+           -I$(UTILITIES_DIR)/version/ \
+           -I$(UTILITIES_DIR)/gzstream/ \
+           -I$(UTILITIES_DIR)/fileType/ \
+           -I$(UTILITIES_DIR)/BamTools/include
+
+# ----------------------------------
+# define our source and object files
+# ----------------------------------
+SOURCES= flankBedMain.cpp flankBed.cpp
+OBJECTS= $(SOURCES:.cpp=.o)
+_EXT_OBJECTS=bedFile.o genomeFile.o lineFileUtilities.o gzstream.o fileType.o
+EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS))
+BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
+PROGRAM= flankBed
+
+
+all: $(PROGRAM)
+
+.PHONY: all
+
+$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS)
+ @echo "  * linking $(PROGRAM)"
+ @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ $(LIBS)
+
+$(BUILT_OBJECTS): $(SOURCES)
+ @echo "  * compiling" $(*F).cpp
+ @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES)
+
+$(EXT_OBJECTS):
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/genomeFile/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/
+
+clean:
+ @echo "Cleaning up."
+ @rm -f $(OBJ_DIR)/* $(BIN_DIR)/*
+
+.PHONY: clean
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/flankBed/flankBed.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/flankBed/flankBed.cpp Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,163 @@
+/*****************************************************************************
+  flankBed.cpp
+
+  (c) 2009 - Aaron Quinlan
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licensed under the GNU General Public License 2.0 license.
+******************************************************************************/
+#include "lineFileUtilities.h"
+#include "flankBed.h"
+
+
+BedFlank::BedFlank(string &bedFile, string &genomeFile, bool forceStrand, float leftFlank, float rightFlank, bool fractional) {
+
+    _bedFile      = bedFile;
+    _genomeFile   = genomeFile;
+    _forceStrand  = forceStrand;
+    _leftFlank    = leftFlank;
+    _rightFlank   = rightFlank;
+    _fractional   = fractional; 
+
+    _bed    = new BedFile(bedFile);
+    _genome = new GenomeFile(genomeFile);
+
+    // get going, slop it up.
+    FlankBed();
+}
+
+
+BedFlank::~BedFlank(void) {
+
+}
+
+
+void BedFlank::FlankBed() {
+
+    int lineNum = 0;
+    BED bedEntry, nullBed;     // used to store the current BED line from the BED file.
+    BedLineStatus bedStatus;
+
+    _bed->Open();
+    bedStatus = _bed->GetNextBed(bedEntry, lineNum);
+    while (bedStatus != BED_INVALID) {
+        if (bedStatus == BED_VALID) {
+
+            int leftFlank  = _leftFlank;
+            int rightFlank = _rightFlank;            
+            if (_fractional == true) {
+                leftFlank  = (int) (_leftFlank  * bedEntry.size());
+                rightFlank = (int) (_rightFlank * bedEntry.size());
+            }
+            
+            if ((_forceStrand == false) || (bedEntry.strand == "+"))
+            {
+                AddFlank(bedEntry,  leftFlank, rightFlank);
+            }
+            else if ((_forceStrand == true) && (bedEntry.strand == "-" ))
+            {
+                AddStrandedFlank(bedEntry,  leftFlank, rightFlank);                    
+            }
+            bedEntry = nullBed;
+        }
+        bedStatus = _bed->GetNextBed(bedEntry, lineNum);
+    }
+    _bed->Close();
+}
+
+
+void BedFlank::AddFlank(BED &bed, int leftFlank, int rightFlank) {
+
+    int chromSize = _genome->getChromSize(bed.chrom);
+    if (chromSize == -1) {
+        cerr << "ERROR: chrom \"" << bed.chrom << "\" not found in genome file. Exiting." << endl;
+        exit(1);
+    }
+
+    // init. our left and right flanks to the original BED entry.
+    // we'll create the flanks from these coordinates.
+    BED left  = bed;
+    BED right = bed;
+    
+    // make the left flank (if necessary)
+    if (leftFlank > 0) {
+        if ( (static_cast<int>(left.start) - leftFlank) > 0) 
+        {
+            left.end    = left.start;
+            left.start -= leftFlank;
+        }
+        else 
+        {
+            left.end    = left.start;
+            left.start  = 0;
+        }
+        // report the left flank
+        _bed->reportBedNewLine(left);
+    }
+    
+    // make the left flank (if necessary)
+    if (rightFlank > 0) {
+        if ( (static_cast<int>(right.end) + (rightFlank+1)) <= static_cast<int>(chromSize)) 
+        {
+            right.start    = right.end;
+            right.end     += (rightFlank);
+        }
+        else {
+            right.start    = right.end;
+            right.end     += chromSize;
+        }
+        // report the right flank
+        _bed->reportBedNewLine(right);
+    }    
+}
+
+
+void BedFlank::AddStrandedFlank(BED &bed, int leftFlank, int rightFlank) {
+
+    int chromSize = _genome->getChromSize(bed.chrom);
+    if (chromSize == -1) {
+        cerr << "ERROR: chrom \"" << bed.chrom << "\" not found in genome file. Exiting." << endl;
+        exit(1);
+    }
+
+    // init. our left and right flanks to the original BED entry.
+    // we'll create the flanks from these coordinates.
+    BED left  = bed;
+    BED right = bed;
+    
+    // make the left flank (if necessary)
+    if (rightFlank > 0) {
+        if ( (static_cast<int>(left.start) - rightFlank) > 0) 
+        {
+            left.end    = left.start;
+            left.start -= rightFlank;
+        }
+        else 
+        {
+            left.end    = left.start;
+            left.start  = 0;
+        }
+        // report the left flank
+        _bed->reportBedNewLine(left);
+    }
+    
+    // make the left flank (if necessary)
+    if (leftFlank > 0) {
+        if ( (static_cast<int>(right.end) + leftFlank) <= static_cast<int>(chromSize)) 
+        {
+            right.start    = right.end;
+            right.end     += leftFlank;
+        }
+        else {
+            right.start    = right.end;
+            right.end      = chromSize;
+        }
+        // report the right flank
+        _bed->reportBedNewLine(right);
+    }   
+}
+
+
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/flankBed/flankBed.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/flankBed/flankBed.h Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,63 @@
+/*****************************************************************************
+  flankBed.h
+
+  (c) 2009 - Aaron Quinlan
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licenced under the GNU General Public License 2.0 license.
+******************************************************************************/
+
+#include "bedFile.h"
+#include "genomeFile.h"
+
+#include <vector>
+#include <iostream>
+#include <fstream>
+#include <map>
+#include <cstdlib>
+#include <ctime>
+using namespace std;
+
+
+//************************************************
+// Class methods and elements
+//************************************************
+class BedFlank {
+
+public:
+
+    // constructor
+    BedFlank(string &bedFile, string &genomeFile, bool forceStrand, float leftSlop, float rightSlop, bool fractional);
+
+    // destructor
+    ~BedFlank(void);
+
+
+
+private:
+
+    string _bedFile;
+    string _genomeFile;
+
+    bool   _forceStrand;
+    float  _leftFlank;
+    float  _rightFlank;
+    bool   _fractional;
+
+    BedFile *_bed;
+    GenomeFile *_genome;
+
+    // methods
+
+    void FlankBed();
+
+    // method to grab requested flank w.r.t. a single BED entry
+    void AddFlank(BED &bed, int leftSlop, int rightSlop);
+    
+    // method to grab requested flank w.r.t. a single BED entry, 
+    // while choosing flanks based on strand
+    void AddStrandedFlank(BED &bed, int leftSlop, int rightSlop);
+};
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/flankBed/flankBedMain.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/flankBed/flankBedMain.cpp Thu Nov 03 10:25:04 2011 -0400
[
@@ -0,0 +1,190 @@
+/*****************************************************************************
+  flankBedMain.cpp
+
+  (c) 2009 - Aaron Quinlan
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licenced under the GNU General Public License 2.0 license.
+******************************************************************************/
+#include "flankBed.h"
+#include "version.h"
+
+using namespace std;
+
+// define our program name
+#define PROGRAM_NAME "flankBed"
+
+
+// define our parameter checking macro
+#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)
+
+// function declarations
+void ShowHelp(void);
+
+int main(int argc, char* argv[]) {
+
+    // our configuration variables
+    bool showHelp = false;
+
+    // input files
+    string bedFile = "stdin";
+    string genomeFile;
+
+    bool haveBed    = true;
+    bool haveGenome = false;
+    bool haveLeft   = false;
+    bool haveRight  = false;
+    bool haveBoth   = false;
+
+    bool forceStrand = false;
+    float leftSlop   = 0.0;
+    float rightSlop  = 0.0;
+    bool  fractional = false;
+
+    for(int i = 1; i < argc; i++) {
+        int parameterLength = (int)strlen(argv[i]);
+
+        if((PARAMETER_CHECK("-h", 2, parameterLength)) ||
+        (PARAMETER_CHECK("--help", 5, parameterLength))) {
+            showHelp = true;
+        }
+    }
+
+    if(showHelp) ShowHelp();
+
+    // do some parsing (all of these parameters require 2 strings)
+    for(int i = 1; i < argc; i++) {
+
+        int parameterLength = (int)strlen(argv[i]);
+
+        if(PARAMETER_CHECK("-i", 2, parameterLength)) {
+            if ((i+1) < argc) {
+                bedFile = argv[i + 1];
+                i++;
+            }
+        }
+        else if(PARAMETER_CHECK("-g", 2, parameterLength)) {
+            if ((i+1) < argc) {
+                haveGenome = true;
+                genomeFile = argv[i + 1];
+                i++;
+            }
+        }
+        else if(PARAMETER_CHECK("-l", 2, parameterLength)) {
+            if ((i+1) < argc) {
+                haveLeft = true;
+                leftSlop = atof(argv[i + 1]);
+                i++;
+            }
+        }
+        else if(PARAMETER_CHECK("-r", 2, parameterLength)) {
+            if ((i+1) < argc) {
+                haveRight = true;
+                rightSlop = atof(argv[i + 1]);
+                i++;
+            }
+        }
+        else if(PARAMETER_CHECK("-b", 2, parameterLength)) {
+            if ((i+1) < argc) {
+                haveBoth = true;
+                leftSlop = atof(argv[i + 1]);
+                rightSlop = atof(argv[i + 1]);
+                i++;
+            }
+        }
+        else if(PARAMETER_CHECK("-s", 2, parameterLength)) {
+            forceStrand = true;
+        }
+        else if(PARAMETER_CHECK("-pct", 4, parameterLength)) {
+            fractional = true;
+        }
+        else {
+          cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl;
+            showHelp = true;
+        }
+    }
+
+    // make sure we have both input files
+    if (!haveBed || !haveGenome) {
+      cerr << endl << "*****" << endl << "*****ERROR: Need both a BED (-i) and a genome (-g) file. " << endl << "*****" << endl;
+      showHelp = true;
+    }
+    if (!haveLeft && !haveRight && !haveBoth) {
+      cerr << endl << "*****" << endl << "*****ERROR: Need -l and -r together or -b alone. " << endl << "*****" << endl;
+      showHelp = true;
+    }
+    if ((!haveLeft && haveRight) || (haveLeft && !haveRight)) {
+      cerr << endl << "*****" << endl << "*****ERROR: Need both -l and -r. " << endl << "*****" << endl;
+      showHelp = true;
+    }
+    if (forceStrand && (!(haveLeft) || !(haveRight))) {
+      cerr << endl << "*****" << endl << "*****ERROR: Must supply -l and -r with -s. " << endl << "*****" << endl;
+      showHelp = true;
+    }
+
+    if (!showHelp) {
+        BedFlank *bc = new BedFlank(bedFile, genomeFile, forceStrand, leftSlop, rightSlop, fractional);
+        delete bc;
+
+        return 0;
+    }
+    else {
+        ShowHelp();
+    }
+}
+
+void ShowHelp(void) {
+
+    cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl;
+
+    cerr << "Author:  Aaron Quinlan (aaronquinlan@gmail.com)" << endl;
+
+    cerr << "Summary: Creates flanking interval(s) for each BED/GFF/VCF feature." << endl << endl;
+
+    cerr << "Usage:   " << PROGRAM_NAME << " [OPTIONS] -i <bed/gff/vcf> -g <genome> [-b <int> or (-l and -r)]" << endl << endl;
+
+    cerr << "Options: " << endl;
+    cerr << "\t-b\t"                << "Create flanking intervak using -b base pairs in each direction." << endl;
+    cerr                            << "\t\t- (Integer) or (Float, e.g. 0.1) if used with -pct." << endl << endl;
+
+    cerr << "\t-l\t"                << "The number of base pairs that a flank should start from orig. start coordinate." << endl;
+    cerr                            << "\t\t- (Integer) or (Float, e.g. 0.1) if used with -pct." << endl << endl;
+        
+    cerr << "\t-r\t"                << "The number of base pairs that a flank should end from orig. end coordinate." << endl;
+    cerr                            << "\t\t- (Integer) or (Float, e.g. 0.1) if used with -pct." << endl << endl;
+        
+    cerr << "\t-s\t"                << "Define -l and -r based on strand." << endl;
+    cerr                            << "\t\tE.g. if used, -l 500 for a negative-stranded feature, " << endl;
+    cerr                            << "\t\tit will start the flank 500 bp downstream.  Default = false." << endl << endl;
+
+    cerr << "\t-pct\t"              << "Define -l and -r as a fraction of the feature's length." << endl;
+    cerr                            << "\t\tE.g. if used on a 1000bp feature, -l 0.50, " << endl;
+    cerr                            << "\t\twill add 500 bp \"upstream\".  Default = false." << endl << endl;
+
+    cerr << "Notes: " << endl;
+    cerr << "\t(1)  Starts will be set to 0 if options would force it below 0." << endl;
+    cerr << "\t(2)  Ends will be set to the chromosome length if requested flank would" << endl;
+    cerr <<        "\tforce it above the max chrom length." << endl;
+
+    cerr << "\t(3)  The genome file should tab delimited and structured as follows:" << endl;
+    cerr << "\n\t<chromName><TAB><chromSize>" << endl << endl;
+    cerr << "\tFor example, Human (hg19):" << endl;
+    cerr << "\tchr1\t249250621" << endl;
+    cerr << "\tchr2\t243199373" << endl;
+    cerr << "\t..." << endl;
+    cerr << "\tchr18_gl000207_random\t4262" << endl << endl;
+
+
+    cerr << "Tips: " << endl;
+    cerr << "\tOne can use the UCSC Genome Browser's MySQL database to extract" << endl;
+    cerr << "\tchromosome sizes. For example, H. sapiens:" << endl << endl;
+    cerr << "\tmysql --user=genome --host=genome-mysql.cse.ucsc.edu -A -e \\" << endl;
+    cerr << "\t\"select chrom, size from hg19.chromInfo\"  > hg19.genome" << endl << endl;
+
+
+    // end the program here
+    exit(1);
+}
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/genomeCoverageBed/Makefile
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/genomeCoverageBed/Makefile Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,52 @@
+UTILITIES_DIR = ../utils/
+OBJ_DIR = ../../obj/
+BIN_DIR = ../../bin/
+
+# -------------------
+# define our includes
+# -------------------
+INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \
+           -I$(UTILITIES_DIR)/version/ \
+           -I$(UTILITIES_DIR)/gzstream/ \
+           -I$(UTILITIES_DIR)/genomeFile/ \
+           -I$(UTILITIES_DIR)/lineFileUtilities/ \
+           -I$(UTILITIES_DIR)/fileType/ \
+           -I$(UTILITIES_DIR)/BamTools/include \
+    -I$(UTILITIES_DIR)/BamTools-Ancillary
+# ----------------------------------
+# define our source and object files
+# ----------------------------------
+SOURCES= genomeCoverageMain.cpp genomeCoverageBed.cpp
+OBJECTS= $(SOURCES:.cpp=.o)
+_EXT_OBJECTS=bedFile.o genomeFile.o lineFileUtilities.o BamAncillary.o gzstream.o fileType.o
+EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS))
+BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
+PROGRAM= genomeCoverageBed
+
+
+all: $(PROGRAM)
+
+.PHONY: all
+
+$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS)
+ @echo "  * linking $(PROGRAM)"
+ @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ -L$(UTILITIES_DIR)/BamTools/lib/ -lbamtools $(LIBS)
+
+$(BUILT_OBJECTS): $(SOURCES)
+ @echo "  * compiling" $(*F).cpp
+ @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES)
+
+$(EXT_OBJECTS):
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/genomeFile
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/BamTools/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/BamTools-Ancillary/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/
+
+clean:
+ @echo "Cleaning up."
+ @rm -f $(OBJ_DIR)/* $(BIN_DIR)/*
+
+.PHONY: clean
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/genomeCoverageBed/genomeCoverageBed.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/genomeCoverageBed/genomeCoverageBed.cpp Thu Nov 03 10:25:04 2011 -0400
[
b'@@ -0,0 +1,396 @@\n+/*****************************************************************************\n+genomeCoverage.cpp\n+\n+(c) 2009 - Aaron Quinlan\n+Hall Laboratory\n+Department of Biochemistry and Molecular Genetics\n+University of Virginia\n+aaronquinlan@gmail.com\n+\n+Licenced under the GNU General Public License 2.0 license.\n+******************************************************************************/\n+#include "lineFileUtilities.h"\n+#include "genomeCoverageBed.h"\n+\n+\n+BedGenomeCoverage::BedGenomeCoverage(string bedFile, string genomeFile,\n+                                     bool eachBase, bool startSites, \n+                                     bool bedGraph, bool bedGraphAll,\n+                                     int max, float scale,\n+                                     bool bamInput, bool obeySplits,\n+                                     bool filterByStrand, string requestedStrand,\n+                                     bool only_5p_end, bool only_3p_end,\n+                                     bool eachBaseZeroBased,\n+                                     bool add_gb_track_line, string gb_track_line_opts) {\n+\n+    _bedFile = bedFile;\n+    _genomeFile = genomeFile;\n+    _eachBase = eachBase;\n+    _eachBaseZeroBased = eachBaseZeroBased;\n+    _startSites = startSites;\n+    _bedGraph = bedGraph;\n+    _bedGraphAll = bedGraphAll;\n+    _max = max;\n+    _scale = scale;\n+    _bamInput = bamInput;\n+    _obeySplits = obeySplits;\n+    _filterByStrand = filterByStrand;\n+    _requestedStrand = requestedStrand;\n+    _only_3p_end = only_3p_end;\n+    _only_5p_end = only_5p_end;\n+    _add_gb_track_line = add_gb_track_line;\n+    _gb_track_line_opts = gb_track_line_opts;\n+    _currChromName = "";\n+    _currChromSize = 0 ;\n+\n+    \n+    if (_bamInput == false) {\n+        _genome = new GenomeFile(genomeFile);\n+    }\n+    \n+    PrintTrackDefinitionLine();\n+\n+    if (_bamInput == false) {\n+        _bed = new BedFile(bedFile);\n+        CoverageBed();\n+    }\n+    else {\n+        CoverageBam(_bedFile);\n+    }\n+}\n+\n+void BedGenomeCoverage::PrintTrackDefinitionLine()\n+{\n+    //Print Track Definition line (if requested)\n+    if ( (_bedGraph||_bedGraphAll) && _add_gb_track_line) {\n+        string line = "track type=bedGraph";\n+        if (!_gb_track_line_opts.empty()) {\n+            line += " " ;\n+            line += _gb_track_line_opts ;\n+        }\n+        cout << line << endl;\n+    }\n+\n+}\n+\n+\n+BedGenomeCoverage::~BedGenomeCoverage(void) {\n+    delete _bed;\n+    delete _genome;\n+}\n+\n+\n+void BedGenomeCoverage::ResetChromCoverage() {\n+    _currChromName = "";\n+    _currChromSize = 0 ;\n+    std::vector<DEPTH>().swap(_currChromCoverage);\n+}\n+\n+\n+void BedGenomeCoverage::StartNewChrom(const string& newChrom) {\n+    // If we\'ve moved beyond the first encountered chromosomes,\n+    // process the results of the previous chromosome.\n+    if (_currChromName.length() > 0) {\n+        ReportChromCoverage(_currChromCoverage, _currChromSize,\n+                _currChromName, _currChromDepthHist);\n+    }\n+\n+    // empty the previous chromosome and reserve new\n+    std::vector<DEPTH>().swap(_currChromCoverage);\n+\n+    if (_visitedChromosomes.find(newChrom) != _visitedChromosomes.end()) {\n+        cerr << "Input error: Chromosome " << _currChromName\n+             << " found in non-sequential lines. This suggests that the input file is not sorted correctly." << endl;\n+\n+    }\n+    _visitedChromosomes.insert(newChrom);\n+\n+    _currChromName = newChrom;\n+\n+    // get the current chrom size and allocate space\n+    _currChromSize = _genome->getChromSize(_currChromName);\n+\n+    if (_currChromSize >= 0)\n+        _currChromCoverage.resize(_currChromSize);\n+    else {\n+        cerr << "Input error: Chromosome " << _currChromName << " found in your input file but not in your genome file." << endl;\n+        exit(1);\n+    }\n+}\n+\n+\n+void BedGenomeCoverage::AddCoverage(int start, int end) {\n+    // process the first line for this chromosome.\n+    // make sure the coordinates fit within'..b'pth - chromCov[pos].ends;\n+        }\n+        // report the histogram for each chromosome\n+        histMap::const_iterator depthIt = chromDepthHist[chrom].begin();\n+        histMap::const_iterator depthEnd = chromDepthHist[chrom].end();\n+        for (; depthIt != depthEnd; ++depthIt) {\n+            int depth = depthIt->first;\n+            unsigned int numBasesAtDepth = depthIt->second;\n+            cout << chrom << "\\t" << depth << "\\t" << numBasesAtDepth << "\\t"\n+                << chromSize << "\\t" << (float) ((float)numBasesAtDepth / (float)chromSize) << endl;\n+        }\n+    }\n+}\n+\n+\n+\n+void BedGenomeCoverage::ReportGenomeCoverage(chromHistMap &chromDepthHist) {\n+\n+    // get the list of chromosome names in the genome\n+    vector<string> chromList = _genome->getChromList();\n+\n+    unsigned int genomeSize = 0;\n+    vector<string>::const_iterator chromItr = chromList.begin();\n+    vector<string>::const_iterator chromEnd = chromList.end();\n+    for (; chromItr != chromEnd; ++chromItr) {\n+        string chrom = *chromItr;\n+        genomeSize += _genome->getChromSize(chrom);\n+        // if there were no reads for a give chromosome, then\n+        // add the length of the chrom to the 0 bin.\n+        if ( chromDepthHist.find(chrom) == chromDepthHist.end() ) {\n+            chromDepthHist[chrom][0] += _genome->getChromSize(chrom);\n+        }\n+    }\n+\n+    histMap genomeHist; // depth histogram for the entire genome\n+\n+    // loop through each chromosome and add the depth and number of bases at each depth\n+    // to the aggregate histogram for the entire genome\n+    for (chromHistMap::iterator chromIt = chromDepthHist.begin(); chromIt != chromDepthHist.end(); ++chromIt) {\n+        string chrom = chromIt->first;\n+        for (histMap::iterator depthIt = chromDepthHist[chrom].begin(); depthIt != chromDepthHist[chrom].end(); ++depthIt) {\n+            int depth = depthIt->first;\n+            unsigned int numBasesAtDepth = depthIt->second;\n+            genomeHist[depth] += numBasesAtDepth;\n+        }\n+    }\n+\n+    // loop through the depths for the entire genome\n+    // and report the number and fraction of bases in\n+    // the entire genome that are at said depth.\n+    for (histMap::iterator genomeDepthIt = genomeHist.begin(); genomeDepthIt != genomeHist.end(); ++genomeDepthIt) {\n+        int depth = genomeDepthIt->first;\n+        unsigned int numBasesAtDepth = genomeDepthIt->second;\n+\n+        cout << "genome" << "\\t" << depth << "\\t" << numBasesAtDepth << "\\t"\n+            << genomeSize << "\\t" << (float) ((float)numBasesAtDepth / (float)genomeSize) << endl;\n+    }\n+}\n+\n+\n+void BedGenomeCoverage::ReportChromCoverageBedGraph(const vector<DEPTH> &chromCov, const int &chromSize, const string &chrom) {\n+\n+    int depth = 0; // initialize the depth\n+    int lastStart = -1;\n+    int lastDepth = -1;\n+\n+    for (int pos = 0; pos < chromSize; pos++) {\n+        depth += chromCov[pos].starts;\n+\n+        if (depth != lastDepth) {\n+            // Coverage depth has changed, print the last interval coverage (if any)\n+            // Print if:\n+            // (1) depth>0 (the default running mode),\n+            // (2) depth==0 and the user requested to print zero covered regions (_bedGraphAll)\n+            if ( (lastDepth != -1) && (lastDepth > 0 || _bedGraphAll) ) {\n+                cout << chrom << "\\t" << lastStart << "\\t" << pos << "\\t" << lastDepth * _scale << endl;\n+            }\n+            //Set current position as the new interval start + depth\n+            lastDepth = depth;\n+            lastStart = pos;\n+        }\n+        // Default: the depth has not changed, so we will not print anything.\n+        // Proceed until the depth changes.\n+        // Update depth\n+        depth = depth - chromCov[pos].ends;\n+    }\n+    //Print information about the last position\n+    if ( (lastDepth != -1) && (lastDepth > 0 || _bedGraphAll) ) {\n+        cout << chrom << "\\t" << lastStart << "\\t" << chromSize << "\\t" << lastDepth * _scale << endl;\n+    }\n+}\n'
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/genomeCoverageBed/genomeCoverageBed.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/genomeCoverageBed/genomeCoverageBed.h Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,104 @@
+/*****************************************************************************
+genomeCoverage.h
+
+(c) 2009 - Aaron Quinlan
+Hall Laboratory
+Department of Biochemistry and Molecular Genetics
+University of Virginia
+aaronquinlan@gmail.com
+
+Licenced under the GNU General Public License 2.0 license.
+******************************************************************************/
+#include "bedFile.h"
+#include "genomeFile.h"
+
+#include "BamAncillary.h"
+#include "api/BamReader.h"
+#include "api/BamAux.h"
+using namespace BamTools;
+
+#include <vector>
+#include <set>
+#include <iostream>
+#include <fstream>
+using namespace std;
+
+
+//***********************************************
+// Typedefs
+//***********************************************
+typedef map<int, DEPTH, less<int> > depthMap;
+typedef map<string, depthMap, less<string> > chromDepthMap;
+
+typedef map<int, unsigned int, less<int> > histMap;
+typedef map<string, histMap, less<string> > chromHistMap;
+
+//************************************************
+// Class methods and elements
+//************************************************
+class BedGenomeCoverage {
+
+public:
+
+    // constructor
+    BedGenomeCoverage(string bedFile, string genomeFile, 
+                      bool eachBase, bool startSites,
+                      bool bedGraph, bool bedGraphAll, 
+                      int max, float scale, 
+                      bool bamInput, bool obeySplits,
+                      bool filterByStrand, string requestedStrand,
+                      bool only_5p_end, bool only_3p_end,
+                      bool eachBaseZeroBased,
+                      bool add_gb_track_line, string gb_track_line_opts);
+
+    // destructor
+    ~BedGenomeCoverage(void);
+
+private:
+
+    // data (parms)
+    string _bedFile;
+    string _genomeFile;
+    bool _bamInput;
+    bool _eachBase;
+    bool _eachBaseZeroBased;
+    bool _startSites;
+    bool _bedGraph;
+    bool _bedGraphAll;
+    int _max;
+    float _scale;
+    bool _obeySplits;
+    bool _filterByStrand;
+    bool _only_5p_end;
+    bool _only_3p_end;
+    bool _add_gb_track_line;
+    string _gb_track_line_opts;
+    string _requestedStrand;
+
+    BedFile *_bed;
+    GenomeFile *_genome;
+
+    // data for internal processing
+    chromDepthMap _chromCov;
+    string _currChromName ;
+    vector<DEPTH> _currChromCoverage;
+    chromHistMap _currChromDepthHist;
+    int _currChromSize ;
+    set<string> _visitedChromosomes;
+
+
+    // methods
+    void CoverageBed();
+    void CoverageBam(string bamFile);
+    void LoadBamHeaderIntoGenomeFile(const string &bamFile);
+    void ReportChromCoverage(const vector<DEPTH> &, const int &chromSize, const string &chrom, chromHistMap&);
+    void ReportGenomeCoverage(chromHistMap &chromDepthHist);
+    void ReportChromCoverageBedGraph(const vector<DEPTH> &chromCov, const int &chromSize, const string &chrom);
+    void ResetChromCoverage();
+    void StartNewChrom (const string& chrom);
+    void AddCoverage (int start, int end);
+    void AddBlockedCoverage(const vector<BED> &bedBlocks);
+    void PrintFinalCoverage();
+    void PrintTrackDefinitionLine();
+};
+
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/genomeCoverageBed/genomeCoverageMain.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/genomeCoverageBed/genomeCoverageMain.cpp Thu Nov 03 10:25:04 2011 -0400
[
b'@@ -0,0 +1,312 @@\n+/*****************************************************************************\n+genomeCoverageMain.cpp\n+\n+(c) 2009 - Aaron Quinlan\n+Hall Laboratory\n+Department of Biochemistry and Molecular Genetics\n+University of Virginia\n+aaronquinlan@gmail.com\n+\n+Licenced under the GNU General Public License 2.0 license.\n+******************************************************************************/\n+#include "genomeCoverageBed.h"\n+#include "version.h"\n+\n+using namespace std;\n+\n+// define our program name\n+#define PROGRAM_NAME "genomeCoverageBed"\n+\n+\n+// define our parameter checking macro\n+#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)\n+\n+// function declarations\n+void ShowHelp(void);\n+\n+int main(int argc, char* argv[]) {\n+\n+    // our configuration variables\n+    bool showHelp = false;\n+\n+    // input files\n+    string bedFile;\n+    string genomeFile;\n+    int max = INT_MAX;\n+    float scale = 1.0;\n+\n+    bool haveBed = false;\n+    bool bamInput = false;\n+    bool haveGenome = false;\n+    bool startSites = false;\n+    bool bedGraph = false;\n+    bool bedGraphAll = false;\n+    bool eachBase = false;\n+    bool eachBaseZeroBased = false;\n+    bool obeySplits = false;\n+    bool haveScale = false;\n+    bool filterByStrand = false;\n+    bool only_5p_end = false;\n+    bool only_3p_end = false;\n+    bool add_gb_track_line = false;\n+    string gb_track_opts;\n+    string requestedStrand = "X";\n+\n+    // check to see if we should print out some help\n+    if(argc <= 1) showHelp = true;\n+\n+    for(int i = 1; i < argc; i++) {\n+        int parameterLength = (int)strlen(argv[i]);\n+\n+        if((PARAMETER_CHECK("-h", 2, parameterLength)) ||\n+        (PARAMETER_CHECK("--help", 5, parameterLength))) {\n+            showHelp = true;\n+        }\n+    }\n+\n+    if(showHelp) ShowHelp();\n+\n+    // do some parsing (all of these parameters require 2 strings)\n+    for(int i = 1; i < argc; i++) {\n+\n+        int parameterLength = (int)strlen(argv[i]);\n+\n+        if(PARAMETER_CHECK("-i", 2, parameterLength)) {\n+            if ((i+1) < argc) {\n+                haveBed = true;\n+                bedFile = argv[i + 1];\n+                i++;\n+            }\n+        }\n+        else if(PARAMETER_CHECK("-ibam", 5, parameterLength)) {\n+            if ((i+1) < argc) {\n+                haveBed = true;\n+                bamInput = true;\n+                bedFile = argv[i + 1];\n+                i++;\n+            }\n+        }\n+        else if(PARAMETER_CHECK("-g", 2, parameterLength)) {\n+            if ((i+1) < argc) {\n+                haveGenome = true;\n+                genomeFile = argv[i + 1];\n+                i++;\n+            }\n+        }\n+        else if(PARAMETER_CHECK("-d", 2, parameterLength)) {\n+            eachBase = true;\n+        }\n+        else if(PARAMETER_CHECK("-dz", 3, parameterLength)) {\n+            eachBase = true;\n+            eachBaseZeroBased = true;\n+        }\n+        else if(PARAMETER_CHECK("-bg", 3, parameterLength)) {\n+            bedGraph = true;\n+        }\n+        else if(PARAMETER_CHECK("-bga", 4, parameterLength)) {\n+            bedGraphAll = true;\n+        }\n+        else if(PARAMETER_CHECK("-max", 4, parameterLength)) {\n+            if ((i+1) < argc) {\n+                max = atoi(argv[i + 1]);\n+                i++;\n+            }\n+        }\n+        else if(PARAMETER_CHECK("-scale", 6, parameterLength)) {\n+            if ((i+1) < argc) {\n+                haveScale = true;\n+                scale = atof(argv[i + 1]);\n+                i++;\n+            }\n+        }\n+        else if(PARAMETER_CHECK("-split", 6, parameterLength)) {\n+            obeySplits = true;\n+        }\n+        else if(PARAMETER_CHECK("-strand", 7, parameterLength)) {\n+            if ((i+1) < argc) {\n+                filterByStrand = true;\n+                requestedStrand = argv[i+1][0];\n+                if (!(requestedStrand == "-" || requestedStrand == "+")) {\n+               '..b'\\tquickly extract all regions of a genome with 0 " << endl;\n+    cerr << "\\t\\t\\tcoverage by applying: \\"grep -w 0$\\" to the output." << endl << endl;\n+\n+    cerr << "\\t-split\\t\\t" << "Treat \\"split\\" BAM or BED12 entries as distinct BED intervals." << endl;\n+    cerr << "\\t\\t\\twhen computing coverage." << endl;\n+    cerr << "\\t\\t\\tFor BAM files, this uses the CIGAR \\"N\\" and \\"D\\" operations " << endl;\n+    cerr << "\\t\\t\\tto infer the blocks for computing coverage." << endl;\n+    cerr << "\\t\\t\\tFor BED12 files, this uses the BlockCount, BlockStarts, and BlockEnds" << endl;\n+    cerr << "\\t\\t\\tfields (i.e., columns 10,11,12)." << endl << endl;\n+\n+    cerr << "\\t-strand\\t\\t" << "Calculate coverage of intervals from a specific strand." << endl;\n+    cerr << "\\t\\t\\tWith BED files, requires at least 6 columns (strand is column 6). " << endl;\n+    cerr << "\\t\\t\\t- (STRING): can be + or -" << endl << endl;\n+\n+    cerr << "\\t-5\\t\\t" << "Calculate coverage of 5\\" positions (instead of entire interval)." << endl << endl;\n+\n+    cerr << "\\t-3\\t\\t" << "Calculate coverage of 3\\" positions (instead of entire interval)." << endl << endl;\n+\n+    cerr << "\\t-max\\t\\t" << "Combine all positions with a depth >= max into" << endl;\n+    cerr << "\\t\\t\\ta single bin in the histogram. Irrelevant" << endl;\n+    cerr << "\\t\\t\\tfor -d and -bedGraph" << endl;\n+    cerr << "\\t\\t\\t- (INTEGER)" << endl << endl;\n+\n+    cerr << "\\t-scale\\t\\t" << "Scale the coverage by a constant factor." << endl;\n+    cerr << "\\t\\t\\tEach coverage value is multiplied by this factor before being reported." << endl;\n+    cerr << "\\t\\t\\tUseful for normalizing coverage by, e.g., reads per million (RPM)." << endl;\n+    cerr << "\\t\\t\\t- Default is 1.0; i.e., unscaled." << endl;\n+    cerr << "\\t\\t\\t- (FLOAT)" << endl << endl;\n+\n+    cerr << "\\t-trackline\\t" << "Adds a UCSC/Genome-Browser track line definition in the first line of the output." << endl;\n+    cerr <<"\\t\\t\\t- See here for more details about track line definition:" << endl;\n+    cerr <<"\\t\\t\\t      http://genome.ucsc.edu/goldenPath/help/bedgraph.html" << endl;\n+    cerr <<"\\t\\t\\t- NOTE: When adding a trackline definition, the output BedGraph can be easily" << endl;\n+    cerr <<"\\t\\t\\t      uploaded to the Genome Browser as a custom track," << endl;\n+    cerr <<"\\t\\t\\t      BUT CAN NOT be converted into a BigWig file (w/o removing the first line)." << endl << endl;\n+\n+    cerr << "\\t-trackopts\\t"<<"Writes additional track line definition parameters in the first line." << endl;\n+    cerr <<"\\t\\t\\t- Example:" << endl;\n+    cerr <<"\\t\\t\\t   -trackopts \'name=\\"My Track\\" visibility=2 color=255,30,30\'" << endl;\n+    cerr <<"\\t\\t\\t   Note the use of single-quotes if you have spaces in your parameters." << endl;\n+    cerr <<"\\t\\t\\t- (TEXT)" << endl << endl;\n+\n+    cerr << "Notes: " << endl;\n+    cerr << "\\t(1) The genome file should tab delimited and structured as follows:" << endl;\n+    cerr << "\\t <chromName><TAB><chromSize>" << endl << endl;\n+    cerr << "\\tFor example, Human (hg19):" << endl;\n+    cerr << "\\tchr1\\t249250621" << endl;\n+    cerr << "\\tchr2\\t243199373" << endl;\n+    cerr << "\\t..." << endl;\n+    cerr << "\\tchr18_gl000207_random\\t4262" << endl << endl;\n+\n+    cerr << "\\t(2) The input BED (-i) file must be grouped by chromosome." << endl;\n+    cerr << "\\t A simple \\"sort -k 1,1 <BED> > <BED>.sorted\\" will suffice."<< endl << endl;\n+\n+    cerr << "\\t(3) The input BAM (-ibam) file must be sorted by position." << endl;\n+    cerr << "\\t A \\"samtools sort <BAM>\\" should suffice."<< endl << endl;\n+\n+    cerr << "Tips: " << endl;\n+    cerr << "\\tOne can use the UCSC Genome Browser\'s MySQL database to extract" << endl;\n+    cerr << "\\tchromosome sizes. For example, H. sapiens:" << endl << endl;\n+    cerr << "\\tmysql --user=genome --host=genome-mysql.cse.ucsc.edu -A -e \\\\" << endl;\n+    cerr << "\\t\\"select chrom, size from hg19.chromInfo\\" > hg19.genome" << endl << endl;\n+\n+\n+    // end the program here\n+    exit(1);\n+}\n+\n'
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/intersectBed/Makefile
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/intersectBed/Makefile Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,53 @@
+UTILITIES_DIR = ../utils/
+OBJ_DIR = ../../obj/
+BIN_DIR = ../../bin/
+
+# -------------------
+# define our includes
+# -------------------
+INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \
+           -I$(UTILITIES_DIR)/version/ \
+           -I$(UTILITIES_DIR)/gzstream/ \
+           -I$(UTILITIES_DIR)/genomeFile/ \
+           -I$(UTILITIES_DIR)/lineFileUtilities/ \
+           -I$(UTILITIES_DIR)/fileType/ \
+           -I$(UTILITIES_DIR)/BamTools/include \
+           -I$(UTILITIES_DIR)/BamTools-Ancillary \
+           -I$(UTILITIES_DIR)/chromsweep \
+
+# ----------------------------------
+# define our source and object files
+# ----------------------------------
+SOURCES= intersectMain.cpp intersectBed.cpp
+OBJECTS= $(SOURCES:.cpp=.o)
+_EXT_OBJECTS=bedFile.o lineFileUtilities.o BamAncillary.o gzstream.o fileType.o chromsweep.o
+EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS))
+BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
+PROGRAM= intersectBed
+
+all: $(PROGRAM)
+
+.PHONY: all
+
+$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS)
+ @echo "  * linking $(PROGRAM)"
+ @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ -L$(UTILITIES_DIR)/BamTools/lib/ -lbamtools $(LIBS)
+
+$(BUILT_OBJECTS): $(SOURCES)
+ @echo "  * compiling" $(*F).cpp
+ @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES)
+
+$(EXT_OBJECTS):
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/BamTools/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/BamTools-Ancillary/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/chromsweep/
+
+clean:
+ @echo "Cleaning up."
+ @rm -f $(OBJ_DIR)/* $(BIN_DIR)/*
+
+.PHONY: clean
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/intersectBed/intersectBed.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/intersectBed/intersectBed.cpp Thu Nov 03 10:25:04 2011 -0400
b
b'@@ -0,0 +1,367 @@\n+/*****************************************************************************\n+  intersectBed.cpp\n+\n+  (c) 2009 - Aaron Quinlan\n+  Hall Laboratory\n+  Department of Biochemistry and Molecular Genetics\n+  University of Virginia\n+  aaronquinlan@gmail.com\n+\n+  Licenced under the GNU General Public License 2.0 license.\n+******************************************************************************/\n+#include "lineFileUtilities.h"\n+#include "intersectBed.h"\n+\n+/************************************\n+Helper functions\n+************************************/\n+bool BedIntersect::processHits(const BED &a, const vector<BED> &hits) {\n+\n+    // how many overlaps are there b/w the bed and the set of hits?\n+    CHRPOS s, e;\n+    int overlapBases;\n+    int  numOverlaps = 0;\n+    bool hitsFound   = false;\n+    int aLength      = (a.end - a.start);   // the length of a in b.p.\n+\n+    // loop through the hits and report those that meet the user\'s criteria\n+    vector<BED>::const_iterator h       = hits.begin();\n+    vector<BED>::const_iterator hitsEnd = hits.end();\n+    for (; h != hitsEnd; ++h) {\n+        s            = max(a.start, h->start);\n+        e            = min(a.end, h->end);\n+        overlapBases = (e - s);             // the number of overlapping bases b/w a and b\n+\n+        // is there enough overlap relative to the user\'s request? (default ~ 1bp)\n+        if ( ( (float) overlapBases / (float) aLength ) >= _overlapFraction ) {\n+            // Report the hit if the user doesn\'t care about reciprocal overlap between A and B.\n+            if (_reciprocal == false) {\n+                hitsFound = true;\n+                numOverlaps++;\n+                if (_printable == true)\n+                    ReportOverlapDetail(overlapBases, a, *h, s, e);\n+            }\n+            // we require there to be sufficient __reciprocal__ overlap\n+            else {\n+                int bLength    = (h->end - h->start);\n+                float bOverlap = ( (float) overlapBases / (float) bLength );\n+                if (bOverlap >= _overlapFraction) {\n+                    hitsFound = true;\n+                    numOverlaps++;\n+                    if (_printable == true)\n+                        ReportOverlapDetail(overlapBases, a, *h, s, e);\n+                }\n+            }\n+        }\n+    }\n+    // report the summary of the overlaps if requested.\n+    ReportOverlapSummary(a, numOverlaps);\n+    // were hits found for this BED feature?\n+    return hitsFound;\n+}\n+\n+\n+/*\n+    Constructor\n+*/\n+BedIntersect::BedIntersect(string bedAFile, string bedBFile, bool anyHit,\n+                           bool writeA, bool writeB, bool writeOverlap, bool writeAllOverlap,\n+                           float overlapFraction, bool noHit, bool writeCount, bool sameStrand, bool diffStrand,\n+                           bool reciprocal, bool obeySplits, bool bamInput, bool bamOutput, bool isUncompressedBam,\n+                           bool sortedInput) {\n+\n+    _bedAFile            = bedAFile;\n+    _bedBFile            = bedBFile;\n+    _anyHit              = anyHit;\n+    _noHit               = noHit;\n+    _writeA              = writeA;\n+    _writeB              = writeB;\n+    _writeOverlap        = writeOverlap;\n+    _writeAllOverlap     = writeAllOverlap;\n+    _writeCount          = writeCount;\n+    _overlapFraction     = overlapFraction;\n+    _sameStrand          = sameStrand;\n+    _diffStrand          = diffStrand;\n+    _reciprocal          = reciprocal;\n+    _obeySplits          = obeySplits;\n+    _bamInput            = bamInput;\n+    _bamOutput           = bamOutput;\n+    _isUncompressedBam   = isUncompressedBam;\n+    _sortedInput         = sortedInput;\n+\n+    // should we print each overlap, or does the user want summary information?\n+    _printable = true;\n+    if (_anyHit || _noHit || _writeCount)\n+        _printable = false;\n+        \n+    if (_bamInput == false)\n+        IntersectBed();\n+    else\n+        IntersectBam(bedAFile);\n+}\n+\n+\n+/*\n+    Destructor'..b'der.Open(bamFile);\n+\n+    // get header & reference information\n+    string bamHeader  = reader.GetHeaderText();\n+    RefVector refs    = reader.GetReferenceData();\n+\n+    // open a BAM output to stdout if we are writing BAM\n+    if (_bamOutput == true) {\n+        // set compression mode\n+        BamWriter::CompressionMode compressionMode = BamWriter::Compressed;\n+        if ( _isUncompressedBam ) compressionMode = BamWriter::Uncompressed;\n+        writer.SetCompressionMode(compressionMode);\n+        // open our BAM writer\n+        writer.Open("stdout", bamHeader, refs);\n+    }\n+\n+    vector<BED> hits;\n+    // reserve some space\n+    hits.reserve(100);\n+\n+    \n+    BamAlignment bam;    \n+    // get each set of alignments for each pair.\n+    while (reader.GetNextAlignment(bam)) {\n+\n+        if (bam.IsMapped()) {\n+            BED a;\n+            a.chrom = refs.at(bam.RefID).RefName;\n+            a.start = bam.Position;\n+            a.end   = bam.GetEndPosition(false, false);\n+\n+            // build the name field from the BAM alignment.\n+            a.name = bam.Name;\n+            if (bam.IsFirstMate()) a.name += "/1";\n+            if (bam.IsSecondMate()) a.name += "/2";\n+\n+            a.score  = ToString(bam.MapQuality);\n+\n+            a.strand = "+";\n+            if (bam.IsReverseStrand()) a.strand = "-";\n+\n+            if (_bamOutput == true) {\n+                bool overlapsFound = false;\n+                // treat the BAM alignment as a single "block"\n+                if (_obeySplits == false) {\n+                    overlapsFound = FindOneOrMoreOverlap(a);\n+                }\n+                // split the BAM alignment into discrete blocks and\n+                // look for overlaps only within each block.\n+                else {\n+                    bool overlapFoundForBlock;\n+                    bedVector bedBlocks;  // vec to store the discrete BED "blocks" from a\n+                    // we don\'t want to split on "D" ops, hence the "false"\n+                    getBamBlocks(bam, refs, bedBlocks, false);\n+\n+                    vector<BED>::const_iterator bedItr  = bedBlocks.begin();\n+                    vector<BED>::const_iterator bedEnd  = bedBlocks.end();\n+                    for (; bedItr != bedEnd; ++bedItr) {\n+                        overlapFoundForBlock = FindOneOrMoreOverlap(*bedItr);\n+                        if (overlapFoundForBlock == true)\n+                            overlapsFound = true;\n+                    }\n+                }\n+                if (overlapsFound == true) {\n+                    if (_noHit == false)\n+                        writer.SaveAlignment(bam);\n+                }\n+                else {\n+                    if (_noHit == true) {\n+                        writer.SaveAlignment(bam);\n+                    }\n+                }\n+            }\n+            else {\n+                // treat the BAM alignment as a single BED "block"\n+                if (_obeySplits == false) {\n+                    FindOverlaps(a, hits);\n+                    hits.clear();\n+                }\n+                // split the BAM alignment into discrete BED blocks and\n+                // look for overlaps only within each block.\n+                else {\n+                    bedVector bedBlocks;  // vec to store the discrete BED "blocks" from a\n+                    getBamBlocks(bam, refs, bedBlocks, false);\n+\n+                    vector<BED>::const_iterator bedItr  = bedBlocks.begin();\n+                    vector<BED>::const_iterator bedEnd  = bedBlocks.end();\n+                    for (; bedItr != bedEnd; ++bedItr) {\n+                        FindOverlaps(*bedItr, hits);\n+                        hits.clear();\n+                    }\n+                }\n+            }\n+        }\n+        // BAM IsMapped() is false\n+        else if (_noHit == true) {\n+            writer.SaveAlignment(bam);\n+        }\n+    }\n+\n+    // close the relevant BAM files.\n+    reader.Close();\n+    if (_bamOutput == true) {\n+        writer.Close();\n+    }\n+}\n+\n'
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/intersectBed/intersectBed.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/intersectBed/intersectBed.h Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,98 @@
+/*****************************************************************************
+  intersectBed.h
+
+  (c) 2009 - Aaron Quinlan
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licenced under the GNU General Public License 2.0 license.
+******************************************************************************/
+#ifndef INTERSECTBED_H
+#define INTERSECTBED_H
+
+#include "bedFile.h"
+#include "chromsweep.h"
+#include "api/BamReader.h"
+#include "api/BamWriter.h"
+#include "api/BamAux.h"
+#include "BamAncillary.h"
+using namespace BamTools;
+
+
+#include <vector>
+#include <iostream>
+#include <fstream>
+#include <stdlib.h>
+using namespace std;
+
+
+
+class BedIntersect {
+
+public:
+
+    // constructor
+    BedIntersect(string bedAFile, string bedBFile, bool anyHit,
+                               bool writeA, bool writeB, bool writeOverlap, bool writeAllOverlap,
+                               float overlapFraction, bool noHit, bool writeCount, bool sameStrand, bool diffStrand,
+                               bool reciprocal, bool obeySplits, bool bamInput, bool bamOutput, bool isUncompressedBam,
+                               bool sortedInput);
+
+    // destructor
+    ~BedIntersect(void);
+
+private:
+
+    //------------------------------------------------
+    // private attributes
+    //------------------------------------------------
+    string _bedAFile;
+    string _bedBFile;
+
+    bool  _writeA;            // should the original A feature be reported?
+    bool  _writeB;            // should the original B feature be reported?
+    bool  _writeOverlap;
+    bool  _writeAllOverlap;
+
+    bool  _sameStrand;
+    bool  _diffStrand;
+    bool  _reciprocal;
+    float _overlapFraction;
+
+    bool  _anyHit;
+    bool  _noHit;
+    bool  _writeCount;        // do we want a count of the number of overlaps in B?
+    bool  _obeySplits;
+    bool  _bamInput;
+    bool  _bamOutput;
+    bool  _isUncompressedBam;
+    bool  _sortedInput;
+    bool  _printable;
+    
+    // instance of a bed file class.
+    BedFile *_bedA, *_bedB;
+
+    //------------------------------------------------
+    // private methods
+    //------------------------------------------------
+    void IntersectBed(istream &bedInput);
+
+    void IntersectBed();
+
+    void IntersectBam(string bamFile);
+
+    bool processHits(const BED &a, const vector<BED> &hits);
+
+    bool FindOverlaps(const BED &a, vector<BED> &hits);
+
+    bool FindOneOrMoreOverlap(const BED &a);
+
+    void ReportOverlapDetail(int overlapBases, const BED &a, const BED &b, CHRPOS s, CHRPOS e);
+    
+    void ReportOverlapSummary(const BED &a, const int &numOverlapsFound);
+
+};
+
+#endif /* INTERSECTBED_H */
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/intersectBed/intersectMain.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/intersectBed/intersectMain.cpp Thu Nov 03 10:25:04 2011 -0400
[
b'@@ -0,0 +1,294 @@\n+/*****************************************************************************\n+  intersectMain.cpp\n+\n+  (c) 2009 - Aaron Quinlan\n+  Hall Laboratory\n+  Department of Biochemistry and Molecular Genetics\n+  University of Virginia\n+  aaronquinlan@gmail.com\n+\n+  Licenced under the GNU General Public License 2.0 license.\n+******************************************************************************/\n+#include "intersectBed.h"\n+#include "version.h"\n+\n+using namespace std;\n+\n+// define our program name\n+#define PROGRAM_NAME "intersectBed"\n+\n+\n+// define our parameter checking macro\n+#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)\n+\n+// function declarations\n+void ShowHelp(void);\n+\n+int main(int argc, char* argv[]) {\n+\n+    // our configuration variables\n+    bool showHelp = false;\n+\n+    // input files\n+    string bedAFile;\n+    string bedBFile;\n+\n+    // input arguments\n+    float overlapFraction = 1E-9;\n+\n+    bool haveBedA           = false;\n+    bool haveBedB           = false;\n+    bool noHit              = false;\n+    bool anyHit             = false;\n+    bool writeA             = false;\n+    bool writeB             = false;\n+    bool writeCount         = false;\n+    bool writeOverlap       = false;\n+    bool writeAllOverlap    = false;\n+    bool haveFraction       = false;\n+    bool reciprocalFraction = false;\n+    bool sameStrand         = false;\n+    bool diffStrand         = false;\n+    bool obeySplits         = false;\n+    bool inputIsBam         = false;\n+    bool outputIsBam        = true;\n+    bool uncompressedBam    = false;\n+    bool sortedInput        = false;\n+    // check to see if we should print out some help\n+    if(argc <= 1) showHelp = true;\n+\n+    for(int i = 1; i < argc; i++) {\n+        int parameterLength = (int)strlen(argv[i]);\n+\n+        if((PARAMETER_CHECK("-h", 2, parameterLength)) ||\n+        (PARAMETER_CHECK("--help", 5, parameterLength))) {\n+            showHelp = true;\n+        }\n+    }\n+\n+    if(showHelp) ShowHelp();\n+\n+    // do some parsing (all of these parameters require 2 strings)\n+    for(int i = 1; i < argc; i++) {\n+\n+        int parameterLength = (int)strlen(argv[i]);\n+\n+        if(PARAMETER_CHECK("-a", 2, parameterLength)) {\n+            if ((i+1) < argc) {\n+                haveBedA = true;\n+                outputIsBam = false;\n+                bedAFile = argv[i + 1];\n+                i++;\n+            }\n+        }\n+        else if(PARAMETER_CHECK("-abam", 5, parameterLength)) {\n+            if ((i+1) < argc) {\n+                haveBedA = true;\n+                inputIsBam = true;\n+                bedAFile = argv[i + 1];\n+                i++;\n+            }\n+        }\n+        else if(PARAMETER_CHECK("-b", 2, parameterLength)) {\n+            if ((i+1) < argc) {\n+                haveBedB = true;\n+                bedBFile = argv[i + 1];\n+                i++;\n+            }\n+        }\n+        else if(PARAMETER_CHECK("-bed", 4, parameterLength)) {\n+            outputIsBam = false;\n+        }\n+        else if(PARAMETER_CHECK("-u", 2, parameterLength)) {\n+            anyHit = true;\n+        }\n+        else if(PARAMETER_CHECK("-f", 2, parameterLength)) {\n+            if ((i+1) < argc) {\n+                haveFraction = true;\n+                overlapFraction = atof(argv[i + 1]);\n+                i++;\n+            }\n+        }\n+        else if(PARAMETER_CHECK("-wa", 3, parameterLength)) {\n+            writeA = true;\n+        }\n+        else if(PARAMETER_CHECK("-wb", 3, parameterLength)) {\n+            writeB = true;\n+        }\n+        else if(PARAMETER_CHECK("-wo", 3, parameterLength)) {\n+            writeOverlap = true;\n+        }\n+        else if(PARAMETER_CHECK("-wao", 4, parameterLength)) {\n+            writeAllOverlap = true;\n+            writeOverlap = true;\n+        }\n+        else if(PARAMETER_CHECK("-c", 2, parameterLength)) {\n+            writeCount = true;\n+        }\n+        else '..b'BAM output. Default is to write compressed BAM." << endl << endl;\n+\n+    cerr << "\\t-bed\\t"          << "When using BAM input (-abam), write output as BED. The default" << endl;\n+    cerr                        << "\\t\\tis to write output in BAM when using -abam." << endl << endl;\n+\n+    cerr << "\\t-wa\\t"           << "Write the original entry in A for each overlap." << endl << endl;\n+\n+    cerr << "\\t-wb\\t"           << "Write the original entry in B for each overlap." << endl;\n+    cerr                        << "\\t\\t- Useful for knowing _what_ A overlaps. Restricted by -f and -r." << endl << endl;\n+\n+    cerr << "\\t-wo\\t"           << "Write the original A and B entries plus the number of base" << endl;\n+    cerr                        << "\\t\\tpairs of overlap between the two features." << endl;\n+    cerr                        << "\\t\\t- Overlaps restricted by -f and -r." << endl;\n+    cerr                        << "\\t\\t  Only A features with overlap are reported." << endl << endl;\n+\n+    cerr << "\\t-wao\\t"          << "Write the original A and B entries plus the number of base" << endl;\n+    cerr                        << "\\t\\tpairs of overlap between the two features." << endl;\n+    cerr                        << "\\t\\t- Overlapping features restricted by -f and -r." << endl;\n+    cerr                        << "\\t\\t  However, A features w/o overlap are also reported" << endl;\n+    cerr                        << "\\t\\t  with a NULL B feature and overlap = 0." << endl << endl;\n+\n+    cerr << "\\t-u\\t"            << "Write the original A entry _once_ if _any_ overlaps found in B." << endl;\n+    cerr                        << "\\t\\t- In other words, just report the fact >=1 hit was found." << endl;\n+    cerr                        << "\\t\\t- Overlaps restricted by -f and -r." << endl << endl;\n+\n+    cerr << "\\t-c\\t"            << "For each entry in A, report the number of overlaps with B." << endl;\n+    cerr                        << "\\t\\t- Reports 0 for A entries that have no overlap with B." << endl;\n+    cerr                        << "\\t\\t- Overlaps restricted by -f and -r." << endl << endl;\n+\n+    cerr << "\\t-v\\t"            << "Only report those entries in A that have _no overlaps_ with B." << endl;\n+    cerr                        << "\\t\\t- Similar to \\"grep -v\\" (an homage)." << endl << endl;\n+\n+    cerr << "\\t-f\\t"            << "Minimum overlap required as a fraction of A." << endl;\n+    cerr                        << "\\t\\t- Default is 1E-9 (i.e., 1bp)." << endl;\n+    cerr                        << "\\t\\t- FLOAT (e.g. 0.50)" << endl << endl;\n+\n+    cerr << "\\t-r\\t"            << "Require that the fraction overlap be reciprocal for A and B." << endl;\n+    cerr                        << "\\t\\t- In other words, if -f is 0.90 and -r is used, this requires" << endl;\n+    cerr                        << "\\t\\t  that B overlap 90% of A and A _also_ overlaps 90% of B." << endl << endl;\n+\n+    cerr << "\\t-s\\t"            << "Require same strandedness.  That is, only report hits in B that" << endl;\n+    cerr                        << "\\t\\toverlap A on the _same_ strand." << endl;\n+    cerr                        << "\\t\\t- By default, overlaps are reported without respect to strand." << endl << endl;\n+\n+    cerr << "\\t-S\\t"            << "Require different strandedness.  That is, only report hits in B that" << endl;\n+    cerr                        << "\\t\\toverlap A on the _opposite_ strand." << endl;\n+    cerr                        << "\\t\\t- By default, overlaps are reported without respect to strand." << endl << endl;\n+\n+    cerr << "\\t-split\\t"        << "Treat \\"split\\" BAM or BED12 entries as distinct BED intervals." << endl << endl;\n+\n+    cerr << "\\t-sorted\\t"        << "Use the \\"chromsweep\\" algorithm for sorted (-k1,1 -k2,2n) input" << endl;\n+    cerr                        << "\\t\\tNOTE: this will trust, but not enforce that data is sorted. Caveat emptor." << endl << endl;\n+\n+    // end the program here\n+    exit(1);\n+\n+}\n'
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/linksBed/Makefile
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/linksBed/Makefile Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,43 @@
+UTILITIES_DIR = ../utils/
+OBJ_DIR = ../../obj/
+BIN_DIR = ../../bin/
+
+# -------------------
+# define our includes
+# -------------------
+INCLUDES = -I$(UTILITIES_DIR)/bedFile/ -I$(UTILITIES_DIR)/lineFileUtilities/ -I$(UTILITIES_DIR)/version/ -I$(UTILITIES_DIR)/gzstream/ -I$(UTILITIES_DIR)/fileType/
+
+# ----------------------------------
+# define our source and object files
+# ----------------------------------
+SOURCES= linksMain.cpp linksBed.cpp
+OBJECTS= $(SOURCES:.cpp=.o)
+_EXT_OBJECTS=bedFile.o lineFileUtilities.o gzstream.o fileType.o
+EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS))
+BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
+PROGRAM= linksBed
+
+
+all: $(PROGRAM)
+
+.PHONY: all
+
+$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS)
+ @echo "  * linking $(PROGRAM)"
+ @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ $(LIBS)
+
+$(BUILT_OBJECTS): $(SOURCES)
+ @echo "  * compiling" $(*F).cpp
+ @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES)
+
+$(EXT_OBJECTS):
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/
+
+clean:
+ @echo "Cleaning up."
+ @rm -f $(OBJ_DIR)/* $(BIN_DIR)/*
+
+.PHONY: clean
\ No newline at end of file
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/linksBed/linksBed.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/linksBed/linksBed.cpp Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,122 @@
+/*****************************************************************************
+  linksBed.cpp
+
+  (c) 2009 - Aaron Quinlan
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licenced under the GNU General Public License 2.0 license.
+******************************************************************************/
+#include "lineFileUtilities.h"
+#include "linksBed.h"
+
+//
+// Constructor
+//
+BedLinks::BedLinks(string &bedFile, string &base, string &org, string &db) {
+    _bedFile = bedFile;
+    _bed = new BedFile(bedFile);
+
+    _base = base;
+    _org = org;
+    _db = db;
+
+    CreateLinks();
+}
+
+//
+// Destructor
+//
+BedLinks::~BedLinks(void) {
+}
+
+
+void BedLinks::WriteURL(BED &bed, string &base) {
+
+    string position = bed.chrom;
+    std::stringstream posStream;
+    posStream << ":" << bed.start << "-" << bed.end;
+    position.append(posStream.str());
+
+    cout << "<tr>" << endl;
+        cout << "\t<td>" << endl;
+            cout << "\t\t<a href=" << base << position << ">";
+            cout << bed.chrom << ":" << bed.start << "-" << bed.end;
+            cout << "</a>" << endl;
+        cout << "\t</td>" << endl;
+
+        if (_bed->bedType == 4) {
+            cout << "\t<td>" << endl;
+            cout << bed.name << endl;
+            cout << "\t</td>" << endl;
+        }
+        else if (_bed->bedType == 5) {
+            cout << "\t<td>" << endl;
+            cout << bed.name << endl;
+            cout << "\t</td>" << endl;
+
+            cout << "\t<td>" << endl;
+            cout << bed.score << endl;
+            cout << "\t</td>" << endl;
+        }
+        else if ((_bed->bedType == 6) || (_bed->bedType == 9) || (_bed->bedType == 12)) {
+            cout << "\t<td>" << endl;
+            cout << bed.name << endl;
+            cout << "\t</td>" << endl;
+
+            cout << "\t<td>" << endl;
+            cout << bed.score << endl;
+            cout << "\t</td>" << endl;
+
+            cout << "\t<td>" << endl;
+            cout << bed.strand << endl;
+            cout << "\t</td>" << endl;
+        }
+        cout << "</tr>" << endl;
+}
+
+
+void BedLinks::CreateLinks() {
+
+
+    // construct the html base.
+    string org = _org;
+    string db = _db;
+    string base = _base;
+    base.append("/cgi-bin/hgTracks?org=");
+    base.append(org);
+    base.append("&db=");
+    base.append(db);
+    base.append("&position=");
+
+    // create the HTML header
+    cout << "<html>" << endl <<"\t<body>" << endl;
+    cout << "<title>" << _bedFile << "</title>" << endl;
+
+    // start the table of entries
+    cout << "<br>Firefox users: Press and hold the \"apple\" or \"alt\" key and click link to open in new tab." << endl;
+    cout << "<p style=\"font-family:courier\">" << endl;
+    cout << "<table border=\"0\" align=\"justify\"" << endl;
+    cout << "<h3>BED Entries from: stdin </h3>" << endl;
+
+    int lineNum = 0;
+    BED bedEntry, nullBed;
+    BedLineStatus bedStatus;
+
+    _bed->Open();
+    while ((bedStatus = _bed->GetNextBed(bedEntry, lineNum)) != BED_INVALID) {
+        if (bedStatus == BED_VALID) {
+            WriteURL(bedEntry, base);
+            bedEntry = nullBed;
+        }
+    }
+    _bed->Close();
+
+    cout << "</table>" << endl;
+    cout << "</p>" << endl;
+    cout << "\t</body>" << endl <<"</html>" << endl;
+}
+
+
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/linksBed/linksBed.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/linksBed/linksBed.h Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,44 @@
+/*****************************************************************************
+  linksBed.h
+
+  (c) 2009 - Aaron Quinlan
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licenced under the GNU General Public License 2.0 license.
+******************************************************************************/
+#include "bedFile.h"
+#include <vector>
+#include <algorithm>
+#include <iostream>
+#include <fstream>
+
+using namespace std;
+
+//************************************************
+// Class methods and elements
+//************************************************
+class BedLinks {
+
+public:
+
+    // constructor
+    BedLinks(string &bedFile, string &base, string &org, string &db);
+
+    // destructor
+    ~BedLinks(void);
+
+private:
+    string _bedFile;
+    string _base;
+    string _org;
+    string _db;
+
+    // instance of a bed file class.
+    BedFile *_bed;
+
+    void WriteURL(BED &bed, string &base);
+    void CreateLinks();             // the default.  sorts by chrom (asc.) then by start (asc.)
+};
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/linksBed/linksMain.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/linksBed/linksMain.cpp Thu Nov 03 10:25:04 2011 -0400
[
@@ -0,0 +1,129 @@
+/*****************************************************************************
+  linksBedMain.cpp
+
+  (c) 2009 - Aaron Quinlan
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licenced under the GNU General Public License 2.0 license.
+******************************************************************************/
+#include "linksBed.h"
+#include "version.h"
+
+using namespace std;
+
+// define our program name
+#define PROGRAM_NAME "linksBed"
+
+
+// define our parameter checking macro
+#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)
+
+// function declarations
+void ShowHelp(void);
+
+int main(int argc, char* argv[]) {
+
+    // our configuration variables
+    bool showHelp = false;
+
+    // input files
+    string bedFile = "stdin";
+    bool haveBed   = true;
+
+    /* Defaults for everyone else */
+    string org = "human";
+    string db = "hg18";
+    string base = "http://genome.ucsc.edu";
+
+    for(int i = 1; i < argc; i++) {
+        int parameterLength = (int)strlen(argv[i]);
+
+        if((PARAMETER_CHECK("-h", 2, parameterLength)) ||
+        (PARAMETER_CHECK("--help", 5, parameterLength))) {
+            showHelp = true;
+        }
+    }
+
+    if(showHelp) ShowHelp();
+
+    // do some parsing (all of these parameters require 2 strings)
+    for(int i = 1; i < argc; i++) {
+
+        int parameterLength = (int)strlen(argv[i]);
+
+        if(PARAMETER_CHECK("-i", 2, parameterLength)) {
+            if ((i+1) < argc) {
+                bedFile = argv[i + 1];
+                i++;
+            }
+        }
+        else if(PARAMETER_CHECK("-base", 5, parameterLength)) {
+            if ((i+1) < argc) {
+                base = argv[i + 1];
+                i++;
+            }
+        }
+        else if(PARAMETER_CHECK("-org", 4, parameterLength)) {
+            if ((i+1) < argc) {
+                org = argv[i + 1];
+                i++;
+            }
+        }
+        else if(PARAMETER_CHECK("-db", 3, parameterLength)) {
+            if ((i+1) < argc) {
+                db = argv[i + 1];
+                i++;
+            }
+        }
+        else {
+            cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl;
+            showHelp = true;
+        }
+    }
+
+    // make sure we have both input files
+    if (!haveBed) {
+        cerr << endl << "*****" << endl << "*****ERROR: Need -i BED file. " << endl << "*****" << endl;
+        showHelp = true;
+    }
+
+    if (!showHelp) {
+        BedLinks *bl = new BedLinks(bedFile, base, org, db);
+        delete bl;
+        return 0;
+    }
+    else {
+        ShowHelp();
+    }
+}
+
+void ShowHelp(void) {
+
+    cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl;
+
+    cerr << "Author:  Aaron Quinlan (aaronquinlan@gmail.com)" << endl;
+
+    cerr << "Summary: Creates HTML links to an UCSC Genome Browser from a feature file." << endl << endl;
+    cerr << "Usage:   " << PROGRAM_NAME << " [OPTIONS] -i <bed/gff/vcf> > out.html" << endl << endl;
+
+    cerr << "Options: " << endl;
+    cerr << "\t-base\t" << "The browser basename.  Default: http://genome.ucsc.edu " << endl;
+    cerr << "\t-org\t"  << "The organism. Default: human" << endl;
+    cerr << "\t-db\t"   << "The build.  Default: hg18" << endl << endl;
+
+    cerr << "Example: " << endl;
+    cerr << "\t" << "By default, the links created will point to human (hg18) UCSC browser." << endl;
+    cerr <<         "\tIf you have a local mirror, you can override this behavior by supplying" << endl;
+    cerr <<         "\tthe -base, -org, and -db options."  << endl << endl;
+    cerr << "\t" << "For example, if the URL of your local mirror for mouse MM9 is called: " << endl;
+    cerr <<         "\thttp://mymirror.myuniversity.edu, then you would use the following:" << endl;
+    cerr <<         "\t" << "-base http://mymirror.myuniversity.edu" << endl;
+    cerr <<         "\t" << "-org mouse" << endl;
+    cerr <<         "\t" << "-db mm9" << endl;
+
+
+    exit(1);
+}
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/maskFastaFromBed/Makefile
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/maskFastaFromBed/Makefile Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,43 @@
+UTILITIES_DIR = ../utils/
+OBJ_DIR = ../../obj/
+BIN_DIR = ../../bin/
+
+# -------------------
+# define our includes
+# -------------------
+INCLUDES = -I$(UTILITIES_DIR)/bedFile/ -I$(UTILITIES_DIR)/sequenceUtilities/ -I$(UTILITIES_DIR)/lineFileUtilities/ -I$(UTILITIES_DIR)/version/ -I$(UTILITIES_DIR)/gzstream/ -I$(UTILITIES_DIR)/fileType/
+
+# ----------------------------------
+# define our source and object files
+# ----------------------------------
+SOURCES= maskFastaFromBedMain.cpp maskFastaFromBed.cpp
+OBJECTS= $(SOURCES:.cpp=.o)
+_EXT_OBJECTS=bedFile.o sequenceUtils.o lineFileUtilities.o gzstream.o fileType.o
+EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS))
+BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
+PROGRAM= maskFastaFromBed
+
+all: $(PROGRAM)
+
+.PHONY: all
+
+$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS)
+ @echo "  * linking $(PROGRAM)"
+ @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ $(LIBS)
+
+$(BUILT_OBJECTS): $(SOURCES)
+ @echo "  * compiling" $(*F).cpp
+ @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES)
+
+$(EXT_OBJECTS):
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/sequenceUtilities/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/
+
+clean:
+ @echo "Cleaning up."
+ @rm -f $(OBJ_DIR)/* $(BIN_DIR)/*
+
+.PHONY: clean
\ No newline at end of file
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/maskFastaFromBed/maskFastaFromBed.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/maskFastaFromBed/maskFastaFromBed.cpp Thu Nov 03 10:25:04 2011 -0400
[
@@ -0,0 +1,155 @@
+/*****************************************************************************
+  maskFastaFromBed.cpp
+
+  (c) 2009 - Aaron Quinlan
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licenced under the GNU General Public License 2.0 license.
+******************************************************************************/
+#include "lineFileUtilities.h"
+#include "maskFastaFromBed.h"
+
+
+MaskFastaFromBed::MaskFastaFromBed(const string &fastaInFile,  const string &bedFile, 
+                                   const string &fastaOutFile, bool softMask, char maskChar) {
+    _softMask     = softMask;
+    _fastaInFile  = fastaInFile;
+    _bedFile      = bedFile;
+    _fastaOutFile = fastaOutFile;
+    _maskChar     = maskChar;
+    _bed          = new BedFile(_bedFile);
+
+    _bed->loadBedFileIntoMapNoBin();
+    // start masking.
+    MaskFasta();
+}
+
+
+MaskFastaFromBed::~MaskFastaFromBed(void) {
+}
+
+
+//******************************************************************************
+// Mask the Fasta file based on the coordinates in the BED file.
+//******************************************************************************
+void MaskFastaFromBed::MaskFasta() {
+
+    /* Make sure that we can open all of the files successfully*/
+
+    // open the fasta database for reading
+    ifstream fa(_fastaInFile.c_str(), ios::in);
+    if ( !fa ) {
+        cerr << "Error: The requested fasta file (" << _fastaInFile << ") could not be opened. Exiting!" << endl;
+        exit (1);
+    }
+
+    // open the fasta database for reading
+    ofstream faOut(_fastaOutFile.c_str(), ios::out);
+    if ( !faOut ) {
+        cerr << "Error: The requested fasta output file (" << _fastaOutFile << ") could not be opened. Exiting!" << endl;
+        exit (1);
+    }
+
+
+    /* Read the fastaDb chromosome by chromosome*/
+    string fastaInLine;
+    string currChrom;
+    string currDNA = "";
+    currDNA.reserve(500000000);
+    int fastaWidth = -1;
+    bool widthSet  = false;
+    int start, end, length;
+    string replacement;
+
+    while (getline(fa,fastaInLine)) {
+
+        if (fastaInLine.find(">",0) != 0 ) {
+            if (widthSet == false) {
+                fastaWidth = fastaInLine.size();
+                widthSet = true;
+            }
+            currDNA += fastaInLine;
+        }
+        else {
+            if (currDNA.size() > 0) {
+
+                vector<BED> bedList = _bed->bedMapNoBin[currChrom];
+
+                /*
+                    loop through each BED entry for this chrom and
+                    mask the requested sequence in the FASTA file.
+                */
+                for (unsigned int i = 0; i < bedList.size(); i++) {
+                    start = bedList[i].start;
+                    end = bedList[i].end;
+                    length = end - start;
+
+                    /*
+                       (1) if soft masking, extract the sequence, lowercase it,
+                           then put it back
+                       (2) otherwise replace with Ns
+                    */
+                    if (_softMask) {
+                        replacement = currDNA.substr(start, length);
+                        toLowerCase(replacement);
+                        currDNA.replace(start, length, replacement);
+                    }
+                    else {
+                        string hardmask(length, _maskChar);
+                        currDNA.replace(start, length, hardmask);
+                    }
+                }
+                // write the masked chrom to the output file
+                PrettyPrintChrom(faOut, currChrom, currDNA, fastaWidth);
+            }
+
+            // reset for the next chromosome.
+            currChrom = fastaInLine.substr(1, fastaInLine.find_first_of(" ")-1);
+            currDNA = "";
+        }
+    }
+
+    // process the last chromosome.
+    // exact same logic as in the main loop.
+    if (currDNA.size() > 0) {
+
+        vector<BED> bedList = _bed->bedMapNoBin[currChrom];
+
+        for (unsigned int i = 0; i < bedList.size(); i++) {
+            start = bedList[i].start;
+            end = bedList[i].end;
+            length = end - start;
+
+            if (_softMask) {
+                replacement = currDNA.substr(start, length);
+                toLowerCase(replacement);
+                currDNA.replace(start, length, replacement);
+            }
+            else {
+                string hardmask(length, _maskChar);
+                currDNA.replace(start, length, hardmask);
+            }
+        }
+        PrettyPrintChrom(faOut, currChrom, currDNA, fastaWidth);
+    }
+
+    // closed for business.
+    fa.close();
+    faOut.close();
+}
+
+
+void MaskFastaFromBed::PrettyPrintChrom(ofstream &out, string chrom, const string &sequence, int width) {
+
+    int seqLength = sequence.size();
+
+    out << ">" << chrom << endl;
+    for(int i = 0; i < seqLength; i += width)  {
+        if (i + width < seqLength) out << sequence.substr(i, width) << endl;
+        else out << sequence.substr(i, seqLength-i) << endl;
+    }
+}
+
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/maskFastaFromBed/maskFastaFromBed.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/maskFastaFromBed/maskFastaFromBed.h Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,56 @@
+/*****************************************************************************
+  maskFastaFromBed.h
+  (c) 2009 - Aaron Quinlan
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licenced under the GNU General Public License 2.0 license.
+******************************************************************************/
+#ifndef MASKFASTAFROMBED_H
+#define MASKFASTAFROMBED_H
+
+#include "bedFile.h"
+#include "sequenceUtils.h"
+#include <vector>
+#include <iostream>
+#include <fstream>
+#include <cctype>   /* for tolower */
+
+using namespace std;
+
+//************************************************
+// Class methods and elements
+//************************************************
+class MaskFastaFromBed {
+
+public:
+
+    // constructor
+    MaskFastaFromBed(const string &fastaInFile,  const string &bedFile, 
+                     const string &fastaOutFile, bool softMask, char maskChar);
+
+    // destructor
+    ~MaskFastaFromBed(void);
+
+
+private:
+
+    bool _softMask;
+
+    string _fastaInFile;
+    string _bedFile;
+    string _fastaOutFile;
+    char   _maskChar;     // typically "N", but user's can choose something else, e.g., "X"
+
+    // instance of a bed file class.
+    BedFile *_bed;
+
+    void MaskFasta();
+
+    void PrettyPrintChrom(ofstream &out, string chrom, const string &sequence, int width);
+
+};
+
+#endif /* MASKFASTAFROMBED */
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/maskFastaFromBed/maskFastaFromBedMain.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/maskFastaFromBed/maskFastaFromBedMain.cpp Thu Nov 03 10:25:04 2011 -0400
[
@@ -0,0 +1,146 @@
+/*****************************************************************************
+  maskFastaFromBedMain.cpp
+
+  (c) 2009 - Aaron Quinlan
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licenced under the GNU General Public License 2.0 license.
+******************************************************************************/
+#include "maskFastaFromBed.h"
+#include "version.h"
+
+using namespace std;
+
+// define our program name
+#define PROGRAM_NAME "maskFastaFromBed"
+
+
+// define our parameter checking macro
+#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)
+
+// function declarations
+void ShowHelp(void);
+
+int main(int argc, char* argv[]) {
+
+    // our configuration variables
+    bool showHelp = false;
+
+    // input files
+    string fastaInFile;
+    string bedFile;
+
+    // output files
+    string fastaOutFile;
+
+    // defaults for parameters
+    bool haveFastaIn  = false;
+    bool haveBed      = false;
+    bool haveFastaOut = false;
+    bool softMask     = false;
+    char maskChar     = 'N';
+
+    // check to see if we should print out some help
+    if(argc <= 1) showHelp = true;
+
+    for(int i = 1; i < argc; i++) {
+        int parameterLength = (int)strlen(argv[i]);
+
+        if((PARAMETER_CHECK("-h", 2, parameterLength)) ||
+        (PARAMETER_CHECK("--help", 5, parameterLength))) {
+            showHelp = true;
+        }
+    }
+
+    if(showHelp) ShowHelp();
+
+    // do some parsing (all of these parameters require 2 strings)
+    for(int i = 1; i < argc; i++) {
+
+        int parameterLength = (int)strlen(argv[i]);
+
+        if(PARAMETER_CHECK("-fi", 3, parameterLength)) {
+            if ((i+1) < argc) {
+                haveFastaIn = true;
+                fastaInFile = argv[i + 1];
+                i++;
+            }
+        }
+        else if(PARAMETER_CHECK("-fo", 3, parameterLength)) {
+            if ((i+1) < argc) {
+                haveFastaOut = true;
+                fastaOutFile = argv[i + 1];
+                i++;
+            }
+        }
+        else if(PARAMETER_CHECK("-bed", 4, parameterLength)) {
+            if ((i+1) < argc) {
+                haveBed = true;
+                bedFile = argv[i + 1];
+                i++;
+            }
+        }
+        else if(PARAMETER_CHECK("-soft", 5, parameterLength)) {
+            softMask = true;
+        }
+        else if(PARAMETER_CHECK("-mc", 3, parameterLength)) {
+            if ((i+1) < argc) {
+                string mask = argv[i + 1];
+                if (mask.size() > 1) {
+                    cerr << "*****ERROR: The mask character (-mc) should be a single character.*****" << endl << endl;
+                    showHelp = true;
+                }
+                else {
+                    maskChar = mask[0];
+                }
+                i++;
+            }
+        }
+        else {
+            cerr << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl;
+            showHelp = true;
+        }
+    }
+
+    if (!haveFastaIn || !haveFastaOut || !haveBed) {
+        showHelp = true;
+    }
+
+    if (!showHelp) {
+
+        MaskFastaFromBed *maskFasta = new MaskFastaFromBed(fastaInFile, bedFile, fastaOutFile, softMask, maskChar);
+        delete maskFasta;
+        return 0;
+    }
+    else {
+        ShowHelp();
+    }
+}
+
+void ShowHelp(void) {
+
+
+
+    cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl;
+
+    cerr << "Author:  Aaron Quinlan (aaronquinlan@gmail.com)" << endl;
+
+    cerr << "Summary: Mask a fasta file based on feature coordinates." << endl << endl;
+
+    cerr << "Usage:   " << PROGRAM_NAME << " [OPTIONS] -fi <fasta> -out <fasta> -bed <bed/gff/vcf>" << endl << endl;
+
+    cerr << "Options:" << endl;
+    cerr << "\t-fi\tInput FASTA file" << endl;
+    cerr << "\t-bed\tBED/GFF/VCF file of ranges to mask in -fi" << endl;
+    cerr << "\t-fo\tOutput FASTA file" << endl;
+    cerr << "\t-soft\tEnforce \"soft\" masking.  That is, instead of masking with Ns," << endl;
+    cerr << "\t\tmask with lower-case bases." << endl;
+    cerr << "\t-mc\tReplace masking character.  That is, instead of masking with Ns, use another character." << endl;
+
+    // end the program here
+    exit(1);
+
+}
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/mergeBed/Makefile
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/mergeBed/Makefile Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,44 @@
+UTILITIES_DIR = ../utils/
+OBJ_DIR = ../../obj/
+BIN_DIR = ../../bin/
+
+# -------------------
+# define our includes
+# -------------------
+INCLUDES = -I$(UTILITIES_DIR)/bedFile/ -I$(UTILITIES_DIR)/lineFileUtilities/ -I$(UTILITIES_DIR)/version/ -I$(UTILITIES_DIR)/gzstream/ -I$(UTILITIES_DIR)/fileType/
+
+# ----------------------------------
+# define our source and object files
+# ----------------------------------
+SOURCES= mergeMain.cpp mergeBed.cpp
+OBJECTS= $(SOURCES:.cpp=.o)
+_EXT_OBJECTS=bedFile.o lineFileUtilities.o gzstream.o fileType.o
+EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS))
+BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
+PROGRAM= mergeBed
+
+
+all: $(PROGRAM)
+
+.PHONY: all
+
+$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS)
+ @echo "  * linking $(PROGRAM)"
+ @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ $(LIBS)
+
+$(BUILT_OBJECTS): $(SOURCES)
+ @echo "  * compiling" $(*F).cpp
+ @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES)
+
+$(EXT_OBJECTS):
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/
+
+clean:
+ @echo "Cleaning up."
+ @rm -f $(OBJ_DIR)/* $(BIN_DIR)/*
+
+.PHONY: clean
\ No newline at end of file
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/mergeBed/mergeBed.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/mergeBed/mergeBed.cpp Thu Nov 03 10:25:04 2011 -0400
[
b'@@ -0,0 +1,364 @@\n+/*****************************************************************************\n+  mergeBed.cpp\n+\n+  (c) 2009 - Aaron Quinlan\n+  Hall Laboratory\n+  Department of Biochemistry and Molecular Genetics\n+  University of Virginia\n+  aaronquinlan@gmail.com\n+\n+  Licenced under the GNU General Public License 2.0 license.\n+******************************************************************************/\n+#include "lineFileUtilities.h"\n+#include "mergeBed.h"\n+\n+\n+\n+void BedMerge::ReportMergedNames(const vector<string> &names) {\n+    if (names.size() > 0) {\n+        printf("\\t");\n+        vector<string>::const_iterator nameItr = names.begin();\n+        vector<string>::const_iterator nameEnd = names.end();\n+        for (; nameItr != nameEnd; ++nameItr) {\n+            if (nameItr < (nameEnd - 1))\n+                cout << *nameItr << ";";\n+            else\n+                cout << *nameItr;\n+        }\n+    }\n+    else {\n+        cerr << endl \n+             << "*****" << endl \n+             << "*****ERROR: No names found to report for the -names option. Exiting." << endl \n+             << "*****" << endl;\n+        exit(1);\n+    }\n+}\n+\n+\n+void BedMerge::ReportMergedScores(const vector<string> &scores) {\n+    if (scores.size() > 0) {\n+        printf("\\t");\n+\n+        // convert the scores to floats\n+        vector<float> data;\n+        for (size_t i = 0 ; i < scores.size() ; i++) {\n+            data.push_back(atof(scores[i].c_str()));\n+        }    \n+\n+        if (_scoreOp == "sum") {\n+            printf("%.3f", accumulate(data.begin(), data.end(), 0.0));\n+        }\n+        else if (_scoreOp == "min") {\n+            printf("%.3f", *min_element( data.begin(), data.end() ));\n+        }\n+        else if (_scoreOp == "max") {\n+            printf("%.3f", *max_element( data.begin(), data.end() ));\n+        }\n+        else if (_scoreOp == "mean") {\n+            double total = accumulate(data.begin(), data.end(), 0.0);\n+            double mean = total / data.size();\n+            printf("%.3f", mean);\n+        }\n+        else if (_scoreOp == "median") {\n+            double median = 0.0;\n+            sort(data.begin(), data.end());\n+            int totalLines = data.size();\n+            if ((totalLines % 2) > 0) {\n+                long mid;\n+                mid = totalLines / 2;\n+                median = data[mid];\n+            }\n+            else {\n+                long midLow, midHigh;\n+                midLow = (totalLines / 2) - 1;\n+                midHigh = (totalLines / 2);\n+                median = (data[midLow] + data[midHigh]) / 2.0;\n+            }\n+            printf("%.3f", median);\n+        }\n+        else if ((_scoreOp == "mode") || (_scoreOp == "antimode")) {\n+             // compute the frequency of each unique value\n+             map<string, int> freqs;\n+             vector<string>::const_iterator dIt  = scores.begin();\n+             vector<string>::const_iterator dEnd = scores.end();\n+             for (; dIt != dEnd; ++dIt) {\n+                 freqs[*dIt]++;\n+             }\n+\n+             // grab the mode and the anti mode\n+             string mode, antiMode;\n+             int    count = 0;\n+             int minCount = INT_MAX;\n+             for(map<string,int>::const_iterator iter = freqs.begin(); iter != freqs.end(); ++iter) {\n+                 if (iter->second > count) {\n+                     mode = iter->first;\n+                     count = iter->second;\n+                 }\n+                 if (iter->second < minCount) {\n+                     antiMode = iter->first;\n+                     minCount = iter->second;\n+                 }\n+             }\n+             // report\n+             if (_scoreOp == "mode") {\n+                 printf("%s", mode.c_str());\n+             }\n+             else if (_scoreOp == "antimode") {\n+                 printf("%s", antiMode.c_str());\n+             }\n+         }\n+         else if (_scoreOp == "collapse") {    \n+            vector<string>::const_iterator scoreItr = scores.begin();\n+'..b'lock, no overlap\n+            if ( (((int) bedItr->start - end) > _maxDistance) || (end < 0)) {\n+                if (start >= 0) {\n+                    Report(chrom, start, end, names, scores, mergeCount);\n+                    // reset\n+                    mergeCount = 1;\n+                    names.clear();\n+                    scores.clear();\n+                }\n+                start = bedItr->start;\n+                end   = bedItr->end;\n+                if (!bedItr->name.empty())  names.push_back(bedItr->name);\n+                if (!bedItr->score.empty()) scores.push_back(bedItr->score);\n+            }\n+            // same block, overlaps\n+            else {\n+                if ((int) bedItr-> end > end) end = bedItr->end;\n+                mergeCount++;\n+                if (!bedItr->name.empty())  names.push_back(bedItr->name);\n+                if (!bedItr->score.empty()) scores.push_back(bedItr->score);\n+            }\n+        }\n+        if (start >= 0) {\n+            Report(chrom, start, end, names, scores, mergeCount);\n+        }\n+    }\n+}\n+\n+\n+// ==================================================================================\n+// = Merge overlapping BED entries into a single entry, accounting for strandedness =\n+// ==================================================================================\n+void BedMerge::MergeBedStranded() {\n+\n+    // load the "B" bed file into a map so\n+    // that we can easily compare "A" to it for overlaps\n+    _bed->loadBedFileIntoMapNoBin();\n+\n+    // loop through each chromosome and merge their BED entries\n+    masterBedMapNoBin::const_iterator m    = _bed->bedMapNoBin.begin();\n+    masterBedMapNoBin::const_iterator mEnd = _bed->bedMapNoBin.end();\n+    for (; m != mEnd; ++m) {\n+        \n+        // bedList is already sorted by start position.\n+        string chrom        = m->first;\n+        vector<BED> bedList = m->second;\n+\n+        // make a list of the two strands to merge separately.\n+        vector<string> strands(2);\n+        strands[0] = "+";\n+        strands[1] = "-";\n+\n+        // do two passes, one for each strand.\n+        for (unsigned int s = 0; s < strands.size(); s++) {\n+\n+            int mergeCount = 1;\n+            int numOnStrand = 0;\n+            vector<string> names;\n+            vector<string> scores;\n+\n+            // merge overlapping features for this chromosome.\n+            int start = -1;\n+            int end   = -1;\n+            vector<BED>::const_iterator bedItr = bedList.begin();\n+            vector<BED>::const_iterator bedEnd = bedList.end();\n+            for (; bedItr != bedEnd; ++bedItr) {\n+\n+                // if forcing strandedness, move on if the hit\n+                // is not on the current strand.\n+                if (bedItr->strand != strands[s]) { continue; }\n+                else { numOnStrand++; }\n+                \n+                if ( (((int) bedItr->start - end) > _maxDistance) || (end < 0)) {\n+                    if (start >= 0) {\n+                        ReportStranded(chrom, start, end, names, scores, mergeCount, strands[s]);\n+                        // reset\n+                        mergeCount = 1;\n+                        names.clear();\n+                        scores.clear();\n+                    }\n+                    start = bedItr->start;\n+                    end   = bedItr->end;\n+                    if (!bedItr->name.empty())  names.push_back(bedItr->name);\n+                    if (!bedItr->score.empty()) scores.push_back(bedItr->score);\n+                }\n+                else {\n+                    if ((int) bedItr-> end > end) end = bedItr->end;\n+                    mergeCount++;\n+                    if (!bedItr->name.empty())  names.push_back(bedItr->name);\n+                    if (!bedItr->score.empty()) scores.push_back(bedItr->score);\n+                }\n+            }\n+            if (start >= 0) {\n+                ReportStranded(chrom, start, end, names, scores, mergeCount, strands[s]);\n+            }\n+        }\n+    }\n+}\n'
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/mergeBed/mergeBed.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/mergeBed/mergeBed.h Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,59 @@
+/*****************************************************************************
+  mergeBed.h
+
+  (c) 2009 - Aaron Quinlan
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licenced under the GNU General Public License 2.0 license.
+******************************************************************************/
+#include "bedFile.h"
+#include <vector>
+#include <algorithm>
+#include <numeric>
+#include <iostream>
+#include <fstream>
+#include <limits.h>
+#include <stdlib.h>
+
+using namespace std;
+
+
+//************************************************
+// Class methods and elements
+//************************************************
+class BedMerge {
+
+public:
+
+  // constructor
+  BedMerge(string &bedFile, bool numEntries, 
+           int maxDistance, bool forceStrand, 
+           bool reportNames, bool reportScores, const string &scoreOp);
+
+  // destructor
+  ~BedMerge(void);
+
+  void MergeBed();
+  void MergeBedStranded();
+
+private:
+
+    string _bedFile;
+    bool   _numEntries;
+    bool   _forceStrand;
+    bool   _reportNames;
+    bool   _reportScores;
+    string _scoreOp;
+    int    _maxDistance;
+    // instance of a bed file class.
+    BedFile *_bed;
+
+    void Report(string chrom, int start, int end, const vector<string> &names, const vector<string> &scores, int mergeCount);
+    void ReportStranded(string chrom, int start, int end, const vector<string> &names, const vector<string> &scores, int mergeCount, string strand);
+    void ReportMergedNames(const vector<string> &names);
+    void ReportMergedScores(const vector<string> &scores);
+    
+};
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/mergeBed/mergeMain.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/mergeBed/mergeMain.cpp Thu Nov 03 10:25:04 2011 -0400
[
@@ -0,0 +1,163 @@
+/*****************************************************************************
+  mergeMain.cpp
+
+  (c) 2009 - Aaron Quinlan
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licenced under the GNU General Public License 2.0 license.
+******************************************************************************/
+#include "mergeBed.h"
+#include "version.h"
+
+using namespace std;
+
+// define our program name
+#define PROGRAM_NAME "mergeBed"
+
+
+// define our parameter checking macro
+#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)
+
+// function declarations
+void ShowHelp(void);
+
+int main(int argc, char* argv[]) {
+
+    // our configuration variables
+    bool showHelp = false;
+
+    // input files
+    string bedFile  = "stdin";
+    int maxDistance = 0;
+    string scoreOp  = "";
+
+    // input arguments
+    bool haveBed         = true;
+    bool numEntries      = false;
+    bool haveMaxDistance = false;
+    bool forceStrand     = false;
+    bool reportNames     = false;
+    bool reportScores    = false;
+
+    for(int i = 1; i < argc; i++) {
+        int parameterLength = (int)strlen(argv[i]);
+
+        if((PARAMETER_CHECK("-h", 2, parameterLength)) ||
+        (PARAMETER_CHECK("--help", 5, parameterLength))) {
+            showHelp = true;
+        }
+    }
+
+    if(showHelp) ShowHelp();
+
+    // do some parsing (all of these parameters require 2 strings)
+    for(int i = 1; i < argc; i++) {
+
+        int parameterLength = (int)strlen(argv[i]);
+
+        if(PARAMETER_CHECK("-i", 2, parameterLength)) {
+            if ((i+1) < argc) {
+                bedFile = argv[i + 1];
+                i++;
+            }
+        }
+        else if(PARAMETER_CHECK("-n", 2, parameterLength)) {
+            numEntries = true;
+        }
+        else if(PARAMETER_CHECK("-d", 2, parameterLength)) {
+            if ((i+1) < argc) {
+                haveMaxDistance = true;
+                maxDistance = atoi(argv[i + 1]);
+                i++;
+            }
+        }
+        else if (PARAMETER_CHECK("-s", 2, parameterLength)) {
+            forceStrand = true;
+        }
+        else if (PARAMETER_CHECK("-nms", 4, parameterLength)) {
+            reportNames = true;
+        }
+        else if (PARAMETER_CHECK("-scores", 7, parameterLength)) {
+            reportScores = true;
+            if ((i+1) < argc) {
+                scoreOp      = argv[i + 1];
+                i++;
+            }
+        }
+        else {
+            cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl;
+            showHelp = true;
+        }
+    }
+
+    // make sure we have both input files
+    if (!haveBed) {
+        cerr << endl << "*****" << endl << "*****ERROR: Need -i BED file. " << endl << "*****" << endl;
+        showHelp = true;
+    }
+    if (reportNames && numEntries) {
+        cerr << endl << "*****" << endl << "*****ERROR: Request either -n OR -nms, not both." << endl << "*****" << endl;
+        showHelp = true;
+    }
+    if ((reportScores == true) && (scoreOp != "sum")  && (scoreOp != "max")    && (scoreOp != "min") && (scoreOp != "mean") &&
+        (scoreOp != "mode") && (scoreOp != "median") && (scoreOp != "antimode") && (scoreOp != "collapse")) 
+    {
+        cerr << endl << "*****" << endl << "*****ERROR: Invalid scoreOp selection \"" << scoreOp << endl << "\"  *****" << endl;
+        showHelp = true;
+    }
+
+    if (!showHelp) {
+        BedMerge *bm = new BedMerge(bedFile, numEntries, maxDistance, forceStrand, reportNames, reportScores, scoreOp);
+        delete bm;
+        return 0;
+    }
+    else {
+        ShowHelp();
+    }
+}
+
+void ShowHelp(void) {
+
+    cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl;
+
+    cerr << "Author:  Aaron Quinlan (aaronquinlan@gmail.com)" << endl;
+
+    cerr << "Summary: Merges overlapping BED/GFF/VCF entries into a single interval." << endl << endl;
+
+    cerr << "Usage:   " << PROGRAM_NAME << " [OPTIONS] -i <bed/gff/vcf>" << endl << endl;
+
+    cerr << "Options: " << endl;
+    cerr << "\t-s\t"                     << "Force strandedness.  That is, only merge features" << endl;
+    cerr                                 << "\t\tthat are the same strand." << endl;
+    cerr                                 << "\t\t- By default, merging is done without respect to strand." << endl << endl;
+
+    cerr << "\t-n\t"                     << "Report the number of BED entries that were merged." << endl;
+    cerr                                 << "\t\t- Note: \"1\" is reported if no merging occurred." << endl << endl;
+
+
+    cerr << "\t-d\t"                     << "Maximum distance between features allowed for features" << endl;
+    cerr                                 << "\t\tto be merged." << endl;
+    cerr                                 << "\t\t- Def. 0. That is, overlapping & book-ended features are merged." << endl;
+    cerr                                 << "\t\t- (INTEGER)" << endl << endl;
+
+    cerr << "\t-nms\t"                   << "Report the names of the merged features separated by semicolons." << endl << endl;
+    
+    cerr << "\t-scores\t"                << "Report the scores of the merged features. Specify one of " << endl;
+    cerr                                 << "\t\tthe following options for reporting scores:" << endl;
+    cerr                                 << "\t\t  sum, min, max," << endl;
+    cerr                                 << "\t\t  mean, median, mode, antimode," << endl;
+    cerr                                 << "\t\t  collapse (i.e., print a semicolon-separated list)," << endl;
+    cerr                                 << "\t\t- (INTEGER)" << endl << endl;
+    
+    cerr << "Notes: " << endl;
+    cerr << "\t(1) All output, regardless of input type (e.g., GFF or VCF)" << endl;
+    cerr << "\t    will in BED format with zero-based starts" << endl << endl;
+
+
+    // end the program here
+    exit(1);
+
+}
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/multiBamCov/Makefile
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/multiBamCov/Makefile Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,48 @@
+UTILITIES_DIR = ../utils/
+OBJ_DIR = ../../obj/
+BIN_DIR = ../../bin/
+
+# -------------------
+# define our includes
+# -------------------
+INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \
+           -I$(UTILITIES_DIR)/version/ \
+           -I$(UTILITIES_DIR)/gzstream/ \
+           -I$(UTILITIES_DIR)/lineFileUtilities/ \
+           -I$(UTILITIES_DIR)/fileType/ \
+           -I$(UTILITIES_DIR)/BamTools/include
+
+# ----------------------------------
+# define our source and object files
+# ----------------------------------
+SOURCES= multiBamCovMain.cpp multiBamCov.cpp
+OBJECTS= $(SOURCES:.cpp=.o)
+_EXT_OBJECTS=bedFile.o lineFileUtilities.o gzstream.o fileType.o
+EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS))
+BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
+PROGRAM= multiBamCov
+
+all: $(PROGRAM)
+
+.PHONY: all
+
+$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS)
+ @echo "  * linking $(PROGRAM)"
+ @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ -L$(UTILITIES_DIR)/BamTools/lib/ -lbamtools $(LIBS)
+
+$(BUILT_OBJECTS): $(SOURCES)
+ @echo "  * compiling" $(*F).cpp
+ @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES)
+
+$(EXT_OBJECTS):
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/BamTools/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/
+
+clean:
+ @echo "Cleaning up."
+ @rm -f $(OBJ_DIR)/* $(BIN_DIR)/*
+
+.PHONY: clean
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/multiBamCov/multiBamCov.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/multiBamCov/multiBamCov.cpp Thu Nov 03 10:25:04 2011 -0400
[
@@ -0,0 +1,134 @@
+/*****************************************************************************
+  multiBamCov.cpp
+
+  (c) 2009 - Aaron Quinlan
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licenced under the GNU General Public License 2.0 license.
+******************************************************************************/
+#include "lineFileUtilities.h"
+#include "multiBamCov.h"
+#include "api/BamMultiReader.h"
+
+
+/*
+    Constructor
+*/
+MultiCovBam::MultiCovBam(const vector<string> &bam_files, const string bed_file, 
+                         int minQual, bool properOnly,
+                         bool keepDuplicates, bool keepFailedQC)
+:
+_bam_files(bam_files),
+_bed_file(bed_file),
+_minQual(minQual),
+_properOnly(properOnly),
+_keepDuplicates(keepDuplicates),
+_keepFailedQC(keepFailedQC)
+{
+ _bed = new BedFile(_bed_file);
+    LoadBamFileMap();
+}
+
+
+/*
+    Destructor
+*/
+MultiCovBam::~MultiCovBam(void) 
+{}
+
+
+
+void MultiCovBam::CollectCoverage()
+{
+    BamMultiReader reader;
+    
+    if ( !reader.Open(_bam_files) )
+    {
+        cerr << "Could not open input BAM files." << endl;
+        exit(1);
+    }
+    else
+    {
+        // attempt to find index files
+        reader.LocateIndexes();
+
+        // if index data available for all BAM files, we can use SetRegion
+        if ( reader.HasIndexes() ) {
+            BED bed, nullBed;
+            int lineNum = 0;
+            BedLineStatus bedStatus;
+
+            _bed->Open();
+            // loop through each BED entry, jump to it, 
+            // and collect coverage from each BAM
+            while ((bedStatus = _bed->GetNextBed(bed, lineNum)) != BED_INVALID)
+            {
+                if (bedStatus == BED_VALID)
+                {
+                    // initialize counts for each file to 0
+                    vector<int> counts(_bam_files.size(), 0);
+                    // get the BAM refId for this chrom.
+                    int refId = reader.GetReferenceID(bed.chrom);
+                    // set up a BamRegion to which to attempt to jump
+                    BamRegion region(refId, (int)bed.start, refId, (int)bed.end);
+                    
+                    // everything checks out, just iterate through specified region, counting alignments
+                    if ( (refId != -1) && (reader.SetRegion(region)) ) {
+                        BamAlignment al;
+                        while ( reader.GetNextAlignment(al) )
+                        {
+                            bool duplicate = al.IsDuplicate();
+                            bool failedQC  = al.IsFailedQC();
+                            if (_keepDuplicates) duplicate = false;
+                            if (_keepFailedQC)    failedQC = false;
+                            // map qual must exceed minimum
+                            if ((al.MapQuality >= _minQual) && (!duplicate) && (!failedQC)) {
+                                // ignore if not properly paired and we actually care.
+                                if (_properOnly && !al.IsProperPair())
+                                    continue;
+
+                                // lookup the offset of the file name and tabulate 
+                                //coverage for the appropriate file
+                                counts[bamFileMap[al.Filename]]++;
+                            }
+                        }
+                    }
+                    // report the cov at this interval for each file and reset
+                    _bed->reportBedTab(bed);
+                    ReportCounts(counts);
+                    bed = nullBed;
+                }
+            }
+            _bed->Close();
+        }
+        else {
+            cerr << "Could not find indexes." << endl;
+            reader.Close();
+            exit(1);
+        }
+    }
+}
+
+
+void MultiCovBam::LoadBamFileMap(void) 
+{
+    for (size_t i = 0; i < _bam_files.size(); ++i)
+    {
+        bamFileMap[_bam_files[i]] = i;
+    }
+}
+
+void MultiCovBam::ReportCounts(const vector<int> &counts) 
+{
+    for (size_t i = 0; i < counts.size(); ++i)
+    {
+        if (i < counts.size() - 1)
+            cout << counts[i] << "\t";
+        else
+            cout << counts[i];
+    }
+    cout << endl;
+}
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/multiBamCov/multiBamCov.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/multiBamCov/multiBamCov.h Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,64 @@
+/*****************************************************************************
+  multiBamCov.h
+
+  (c) 2009 - Aaron Quinlan
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licenced under the GNU General Public License 2.0 license.
+******************************************************************************/
+#ifndef MULTICOVBAM_H
+#define MULTICOVBAM_H
+
+#include "bedFile.h"
+#include "api/BamMultiReader.h"
+using namespace BamTools;
+
+
+#include <vector>
+#include <iostream>
+#include <fstream>
+#include <stdlib.h>
+using namespace std;
+
+
+
+class MultiCovBam {
+
+public:
+
+    // constructor
+    MultiCovBam(const vector<string> &bam_files, const string bed_file, 
+                int minQual, bool properOnly, 
+                bool keepDuplicates, bool keepFailedQC);
+
+    // destructor
+    ~MultiCovBam(void);
+
+    void CollectCoverage();
+
+private:
+
+    //------------------------------------------------
+    // private attributes
+    //------------------------------------------------
+    vector<string> _bam_files;
+    string _bed_file;
+ BedFile *_bed;
+
+ // attributes to control what is counted
+    int _minQual;
+    bool _properOnly;
+    bool _keepDuplicates;
+    bool _keepFailedQC;
+    
+
+    map<string, int> bamFileMap;
+    
+    void LoadBamFileMap(void);
+    void ReportCounts(const vector<int> &counts);
+};
+
+#endif /* MULTIBAMCOV_H */
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/multiBamCov/multiBamCovMain.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/multiBamCov/multiBamCovMain.cpp Thu Nov 03 10:25:04 2011 -0400
[
@@ -0,0 +1,145 @@
+/*****************************************************************************
+  multiBamCovMain.cpp
+
+  (c) 2009 - Aaron Quinlan
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licenced under the GNU General Public License 2.0 license.
+******************************************************************************/
+#include "multiBamCov.h"
+#include "version.h"
+
+using namespace std;
+
+// define our program name
+#define PROGRAM_NAME "multiBamCov"
+
+
+// define our parameter checking macro
+#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)
+
+// function declarations
+void ShowHelp(void);
+
+int main(int argc, char* argv[]) {
+
+    // our configuration variables
+    bool showHelp = false;
+
+    // input files
+    string bedFile;
+    vector<string> bamFiles;
+    int minQual = 0;
+
+    // input arguments
+    bool haveBed           = false;
+    bool haveBams          = false;
+    bool properOnly        = false;
+    bool keepDuplicates    = false;
+    bool keepFailedQC      = false;
+    
+    // check to see if we should print out some help
+    if(argc <= 1) showHelp = true;
+
+    for(int i = 1; i < argc; i++) {
+        int parameterLength = (int)strlen(argv[i]);
+
+        if((PARAMETER_CHECK("-h", 2, parameterLength)) ||
+        (PARAMETER_CHECK("--help", 5, parameterLength))) {
+            showHelp = true;
+        }
+    }
+
+    if(showHelp) ShowHelp();
+
+    // do some parsing (all of these parameters require 2 strings)
+    for(int i = 1; i < argc; i++) {
+
+        int parameterLength = (int)strlen(argv[i]);
+
+        if(PARAMETER_CHECK("-bed", 4, parameterLength)) {
+            if ((i+1) < argc) {
+                haveBed = true;
+                bedFile = argv[i + 1];
+                i++;
+            }
+        }
+        else if(PARAMETER_CHECK("-bams", 5, parameterLength)) {
+            if ((i+1) < argc) {
+                haveBams = true;
+                i = i+1;
+                string file = argv[i];
+                while (file[0] != '-' && i < argc) {
+                    bamFiles.push_back(file);
+                    i++;
+                    if (i < argc)
+                        file = argv[i];
+                }
+                i--;
+            }
+        }
+        else if(PARAMETER_CHECK("-q", 2, parameterLength)) {
+            if ((i+1) < argc) {
+                minQual = atoi(argv[i + 1]);
+                i++;
+            }
+        }
+        else if(PARAMETER_CHECK("-p", 2, parameterLength)) {
+            properOnly = true;
+        }
+        else if(PARAMETER_CHECK("-D", 2, parameterLength)) {
+            keepDuplicates = true;
+        }
+        
+        else if(PARAMETER_CHECK("-F", 2, parameterLength)) {
+            keepFailedQC = true;
+        }
+        else {
+            cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl;
+            showHelp = true;
+        }
+    }
+
+    if (!showHelp) {
+        MultiCovBam *mc = new MultiCovBam(bamFiles, bedFile, minQual, properOnly, keepDuplicates, keepFailedQC);
+        mc->CollectCoverage();
+        delete mc;
+        return 0;
+    }
+    else {
+        ShowHelp();
+    }
+}
+
+void ShowHelp(void) {
+
+    cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl;
+
+    cerr << "Author:  Aaron Quinlan (aaronquinlan@gmail.com)" << endl;
+
+    cerr << "Summary: Counts sequence coverage for multiple bams at specific loci." << endl << endl;
+
+    cerr << "Usage:   " << PROGRAM_NAME << " [OPTIONS] -bams aln.1.bam aln.2.bam ... aln.n.bam -bed <bed/gff/vcf>" << endl << endl;
+
+    cerr << "Options: " << endl;
+
+    cerr << "\t-bams\t"        << "The bam files." << endl << endl;
+
+    cerr << "\t-bed\t"         << "The bed file." << endl << endl;
+
+    cerr << "\t-q\t"           << "Minimum mapping quality allowed. Default is 0." << endl << endl;
+
+    cerr << "\t-D\t"           << "Include duplicate-marked reads.  Default is to count non-duplicates only" << endl << endl;
+
+    cerr << "\t-F\t"           << "Include failed-QC reads.  Default is to count pass-QC reads only" << endl << endl;
+
+    cerr << "\t-p\t"           << "Only count proper pairs.  Default is to count all alignments with MAPQ" << endl;
+    cerr << "\t\t"             << "greater than the -q argument, regardless of the BAM FLAG field." << endl << endl;
+
+    // end the program here
+    exit(1);
+
+}
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/multiIntersectBed/Makefile
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/multiIntersectBed/Makefile Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,49 @@
+UTILITIES_DIR = ../utils/
+OBJ_DIR = ../../obj/
+BIN_DIR = ../../bin/
+
+# -------------------
+# define our includes
+# -------------------
+INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \
+    -I$(UTILITIES_DIR)/lineFileUtilities/ \
+    -I$(UTILITIES_DIR)/genomeFile/ \
+    -I$(UTILITIES_DIR)/version/ \
+    -I$(UTILITIES_DIR)/gzstream/ \
+    -I$(UTILITIES_DIR)/fileType/ \
+    -I$(UTILITIES_DIR)/BamTools/include
+
+# ----------------------------------
+# define our source and object files
+# ----------------------------------
+SOURCES= multiIntersectBed.cpp multiIntersectBedMain.cpp
+OBJECTS= $(SOURCES:.cpp=.o)
+_EXT_OBJECTS=bedFile.o genomeFile.o lineFileUtilities.o gzstream.o fileType.o
+EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS))
+BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
+PROGRAM= multiIntersectBed
+
+all: $(PROGRAM)
+
+.PHONY: all
+
+$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS)
+ @echo "  * linking $(PROGRAM)"
+ @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ $(LIBS)
+
+$(BUILT_OBJECTS): $(SOURCES)
+ @echo "  * compiling" $(*F).cpp
+ @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES)
+
+$(EXT_OBJECTS):
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedGraphFile/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/genomeFile/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/
+
+clean:
+ @echo "Cleaning up."
+ @rm -f $(OBJ_DIR)/* $(BIN_DIR)/*
+
+.PHONY: clean
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/multiIntersectBed/intervalItem.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/multiIntersectBed/intervalItem.h Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,64 @@
+/*****************************************************************************
+  intervalItem.h
+
+  (c) 2010 - Assaf Gordon
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licenced under the GNU General Public License 2.0 license.
+******************************************************************************/
+#ifndef INTERVALITEM_H
+#define INTERVALITEM_H
+
+#include <string>
+#include <queue>
+
+enum COORDINATE_TYPE {
+    START,
+    END
+};
+
+/*
+   An interval item in the priority queue.
+
+   An IntervalItem can mark either a START position or an END position.
+ */
+class IntervalItem
+{
+
+
+public:
+    int source_index;           // which source BedGraph file this came from
+    COORDINATE_TYPE coord_type; // is this the start or the end position?
+    CHRPOS coord;
+
+    IntervalItem () :
+       source_index(-1),
+       coord_type(START),
+       coord(0)
+    {}
+    
+    IntervalItem(int _index, COORDINATE_TYPE _type, CHRPOS _coord) :
+        source_index(_index),
+        coord_type(_type),
+        coord(_coord)
+    {}
+
+    IntervalItem(const IntervalItem &other) :
+        source_index(other.source_index),
+        coord_type(other.coord_type),
+        coord(other.coord)
+    {}
+
+    bool operator< ( const IntervalItem& other ) const
+    {
+        return this->coord > other.coord;
+    }
+};
+
+// our priority queue
+typedef std::priority_queue<IntervalItem> INTERVALS_PRIORITY_QUEUE;
+
+#endif
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/multiIntersectBed/multiIntersectBed.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/multiIntersectBed/multiIntersectBed.cpp Thu Nov 03 10:25:04 2011 -0400
[
b'@@ -0,0 +1,289 @@\n+/*****************************************************************************\n+  unionBedGraphs.cpp\n+\n+  (c) 2010 - Assaf Gordon, CSHL\n+           - Aaron Quinlan, UVA\n+  Hall Laboratory\n+  Department of Biochemistry and Molecular Genetics\n+  University of Virginia\n+  aaronquinlan@gmail.com\n+\n+  Licenced under the GNU General Public License 2.0 license.\n+******************************************************************************/\n+#include <cassert>\n+#include <cstring>\n+#include <cstdlib>\n+#include <iostream>\n+#include <algorithm>\n+\n+#include "bedFile.h"\n+#include "multiIntersectBed.h"\n+\n+using namespace std;\n+\n+\n+MultiIntersectBed::MultiIntersectBed(std::ostream& _output,\n+                            const vector<string>& _filenames,\n+                            const vector<string>& _titles,\n+                            bool _print_empty_regions,\n+                            const std::string& _genome_size_filename,\n+                            const std::string& _no_coverage_value   ) :\n+    filenames(_filenames),\n+    titles(_titles),\n+    output(_output),\n+    current_non_zero_inputs(0),\n+    print_empty_regions(_print_empty_regions),\n+    haveTitles(false),\n+    genome_sizes(NULL),\n+    no_coverage_value(_no_coverage_value)\n+{\n+    if (print_empty_regions) {\n+        assert(!_genome_size_filename.empty());\n+\n+        genome_sizes = new GenomeFile(_genome_size_filename);\n+    }\n+    \n+    if (titles.size() > 0) {\n+        haveTitles = true;\n+    }\n+}\n+\n+\n+MultiIntersectBed::~MultiIntersectBed() {\n+    CloseFiles();\n+    if (genome_sizes) {\n+        delete genome_sizes;\n+        genome_sizes = NULL ;\n+    }\n+}\n+\n+\n+void MultiIntersectBed::MultiIntersect() {\n+    OpenFiles();\n+\n+    // Add the first interval from each file\n+    for(size_t i = 0;i < input_files.size(); ++i)\n+        LoadNextItem(i);\n+\n+    // Chromosome loop - once per chromosome\n+    do {\n+        // Find the first chromosome to use\n+        current_chrom = DetermineNextChrom();\n+\n+        // Populate the queue with initial values from all files\n+        // (if they belong to the correct chromosome)\n+        for(size_t i = 0; i < input_files.size(); ++i)\n+            AddInterval(i);\n+\n+        CHRPOS current_start = ConsumeNextCoordinate();\n+\n+        // User wanted empty regions, and the first coordinate is not 0 - print a dummy empty coverage\n+        if (print_empty_regions && current_start > 0)\n+            PrintEmptyCoverage(0,current_start);\n+\n+        // Intervals loop - until all intervals (of current chromosome) from all files are used.\n+        do {\n+            CHRPOS current_end = queue.top().coord;\n+            PrintCoverage(current_start, current_end);\n+            current_start = ConsumeNextCoordinate();\n+        } while (!queue.empty());\n+\n+        // User wanted empty regions, and the last coordinate is not the last coordinate of the chromosome\n+            // print a dummy empty coverage\n+        if (print_empty_regions) {\n+            CHRPOS chrom_size = genome_sizes->getChromSize(current_chrom);\n+            if (current_start < chrom_size)\n+                PrintEmptyCoverage(current_start, chrom_size);\n+        }\n+\n+    } while (!AllFilesDone());\n+}\n+\n+\n+CHRPOS MultiIntersectBed::ConsumeNextCoordinate() {\n+    assert(!queue.empty());\n+\n+    CHRPOS new_position = queue.top().coord;\n+    do {\n+        IntervalItem item = queue.top();\n+        UpdateInformation(item);\n+        queue.pop();\n+    } while (!queue.empty() && queue.top().coord == new_position);\n+\n+    return new_position;\n+}\n+\n+\n+void MultiIntersectBed::UpdateInformation(const IntervalItem &item) {\n+    // Update the depth coverage for this file\n+\n+    // Which coordinate is it - start or end?\n+    switch (item.coord_type)\n+    {\n+    case START:\n+        current_depth[item.source_index] = 1;\n+        current_non_zero_inputs++;\n+        files_with_coverage[item.source_index] = true;\n+        break;\n+    case END:\n+        //Read the next interval from thi'..b'rval(int index) {\n+    assert(static_cast<unsigned int>(index) < input_files.size());\n+\n+    //This file has no more intervals\n+    if (current_item[index].chrom.empty())\n+        return;\n+\n+    //If the next interval belongs to a different chrom, don\'t add it\n+    if (current_item[index].chrom!=current_chrom)\n+        return;\n+\n+    const BED &bed(current_item[index]);\n+\n+    IntervalItem start_item(index, START, bed.start);\n+    IntervalItem end_item(index, END, bed.end);\n+\n+    queue.push(start_item);\n+    queue.push(end_item);\n+\n+    LoadNextItem(index);\n+}\n+\n+\n+void MultiIntersectBed::PrintHeader() {\n+    output << "chrom\\tstart\\tend\\tnum\\tlist" ;\n+    for (size_t i=0;i<titles.size();++i)\n+        output << "\\t" <<titles[i];\n+    output << endl;\n+}\n+\n+\n+void MultiIntersectBed::PrintCoverage(CHRPOS start, CHRPOS end) {\n+    if ( current_non_zero_inputs == 0 && ! print_empty_regions )\n+        return ;\n+\n+    output << current_chrom << "\\t"\n+        << start << "\\t"\n+        << end   << "\\t"\n+        << current_non_zero_inputs << "\\t";\n+    \n+    ostringstream file_list_string;\n+    ostringstream file_bool_string;\n+    int depth_count = 0;\n+    for (size_t i = 0; i < current_depth.size(); ++i)\n+    {\n+        if (current_depth[i] > 0) {\n+            if (depth_count < current_non_zero_inputs - 1) {\n+                if (!haveTitles)\n+                    file_list_string << i+1 << ",";\n+                else \n+                    file_list_string << titles[i] << ",";\n+            }\n+            else {\n+                if (!haveTitles)\n+                    file_list_string << i+1;\n+                else \n+                    file_list_string << titles[i];\n+            }\n+            depth_count++;\n+        }\n+        file_bool_string << "\\t" << current_depth[i];\n+    }\n+    if (current_non_zero_inputs > 0) {\n+        cout << file_list_string.str() << file_bool_string.str() << endl;\n+    }\n+    else {\n+        cout << "none" << file_bool_string.str() << endl;\n+    }\n+}\n+\n+\n+void MultiIntersectBed::PrintEmptyCoverage(CHRPOS start, CHRPOS end) {\n+    output << current_chrom << "\\t"\n+        << start << "\\t"\n+        << end   << "\\t"\n+        << "0"   << "\\t" << "none";\n+        \n+    for (size_t i=0;i<current_depth.size();++i)\n+        output << "\\t0";\n+\n+    output << endl;\n+}\n+\n+\n+void MultiIntersectBed::LoadNextItem(int index) {\n+    assert(static_cast<unsigned int>(index) < input_files.size());\n+\n+    current_item[index].chrom="";\n+\n+    BedFile *file = input_files[index];\n+    BED merged_bed;\n+    int lineNum = 0;\n+    //\n+    // TO DO: Do the mergeing on the fly.  How best to do this?\n+    // \n+    // IDEA: Implement a Merge class with GetNextMerge element.\n+    //\n+\n+    while (file->GetNextMergedBed(merged_bed, lineNum))\n+    {\n+        current_item[index] = merged_bed;\n+        break;\n+    }\n+}\n+\n+\n+bool MultiIntersectBed::AllFilesDone() {\n+    for (size_t i=0;i<current_item.size();++i)\n+        if (!current_item[i].chrom.empty())\n+            return false;\n+    return true;\n+}\n+\n+\n+string MultiIntersectBed::DetermineNextChrom() {\n+    string next_chrom;\n+    for (size_t i=0;i<current_item.size();++i) {\n+        if (current_item[i].chrom.empty())\n+            continue;\n+\n+        if (next_chrom.empty())\n+            next_chrom = current_item[i].chrom;\n+        else\n+            if (current_item[i].chrom < next_chrom)\n+                next_chrom = current_item[i].chrom ;\n+    }\n+    return next_chrom;\n+}\n+\n+\n+void MultiIntersectBed::OpenFiles() {\n+    for (size_t i = 0; i < filenames.size(); ++i) {\n+        BedFile *file = new BedFile(filenames[i]);\n+        file->Open();\n+        input_files.push_back(file);\n+        current_depth.push_back(0);\n+    }\n+    current_item.resize(filenames.size());\n+}\n+\n+\n+void MultiIntersectBed::CloseFiles() {\n+    for (size_t i=0; i < input_files.size(); ++i) {\n+        BedFile *file = input_files[i];\n+        delete file;\n+        input_files[i] = NULL ;\n+    }\n+    input_files.clear();\n+}\n'
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/multiIntersectBed/multiIntersectBed.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/multiIntersectBed/multiIntersectBed.h Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,125 @@
+/*****************************************************************************
+  multiIntersectBed.h
+
+  (c) 2010 - Aaron Quinlan, UVA
+           - Assaf Gordon, CSHL
+  Quinlan Laboratory
+  Department of Public Health Sciences
+  Center for Public Health Genomics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licenced under the GNU General Public License 2.0 license.
+******************************************************************************/
+#ifndef MULTIINTERSECTBED_H
+#define MULTIINTERSECTBED_H
+
+#include <vector>
+#include <string>
+#include "bedFile.h"
+#include "genomeFile.h"
+#include "intervalItem.h"
+
+class MultiIntersectBed
+{
+private:
+
+    vector<string>  filenames;
+    vector<string>  titles;
+
+    vector<BedFile*>   input_files;
+    vector<int>        current_depth;
+    vector<BED>        current_item;
+
+    std::ostream    &output;
+
+    INTERVALS_PRIORITY_QUEUE queue;
+    std::string              current_chrom;
+    map<int, bool>           files_with_coverage;
+    int                      current_non_zero_inputs;
+    bool                     print_empty_regions;
+    bool                     haveTitles;
+    
+    GenomeFile* genome_sizes;
+
+    std::string no_coverage_value;
+
+public:
+    MultiIntersectBed(std::ostream& _output,
+            const vector<string>& _filenames,
+            const vector<string>& _titles,
+            bool _print_empty_regions,
+            const std::string& _genomeFileName,
+            const std::string& _no_coverage_value);
+
+    virtual ~MultiIntersectBed();
+
+    // Combines all interval files
+    void MultiIntersect();
+
+    // Print the header line: chrom/start/end + name of each bedgraph file.
+    void PrintHeader();
+
+
+private:
+
+    // Open all input files, initialize "current_XXX" vectors
+    void OpenFiles();
+
+    // Close the input files.
+    void CloseFiles();
+
+    /*
+       Add an interval from BedGraph file 'index' into the queue.
+       will only be added if it belongs to the current chromosome.
+
+       If the interval was added (=consumed), the next interval will be read from the file
+       using 'LoadNextItem'
+     */
+    void AddInterval(int index);
+
+    /*
+       Loads the next interval from Bed file 'index'.
+       Stores it in 'current_bed_item' vector.
+     */
+    void LoadNextItem(int index);
+
+    /*
+       Scans the 'current_bedgraph_item' vector,
+       find the 'first' chromosome to use (different BedGraph files can start with different chromosomes).
+     */
+    std::string DetermineNextChrom();
+
+    /*
+       Returns 'true' if ALL intervals from ALL BedGraph files were used
+    */
+    bool        AllFilesDone();
+
+    /*
+       Extract the next coordinate from the queue, and updates the current coverage information.
+       If multiple interval share the same coordinate values, all of them are handled.
+       If an END coordinate is consumed, the next interval (from the corresponding file) is read.
+     */
+    CHRPOS ConsumeNextCoordinate();
+
+    /*
+       Updates the coverage information based on the given item.
+       Item can be a START coordinate or an END coordiante.
+     */
+    void UpdateInformation(const IntervalItem &item);
+
+    /*
+       prints chrom/start/end and the current depth coverage values of all the files.
+     */
+    void PrintCoverage(CHRPOS start, CHRPOS end);
+
+    /*
+       prints chrom/start/end and the ZERO depth coverage values of all the files.
+     */
+    void PrintEmptyCoverage(CHRPOS start, CHRPOS end);
+
+    void DebugPrintQueue();
+};
+
+
+#endif
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/multiIntersectBed/multiIntersectBedMain.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/multiIntersectBed/multiIntersectBedMain.cpp Thu Nov 03 10:25:04 2011 -0400
[
b'@@ -0,0 +1,294 @@\n+/*****************************************************************************\n+  unionBedGraphsMain.cpp\n+\n+  (c) 2010 - Assaf Gordon, CSHL\n+           - Aaron Quinlan, UVA\n+  Hall Laboratory\n+  Department of Biochemistry and Molecular Genetics\n+  University of Virginia\n+  aaronquinlan@gmail.com\n+\n+  Licenced under the GNU General Public License 2.0 license.\n+******************************************************************************/\n+#include <climits>\n+#include <cstring>\n+#include <cstdlib>\n+#include <vector>\n+#include <string>\n+#include <iostream>\n+#include <getopt.h>\n+#include <libgen.h> //for basename()\n+#include "version.h"\n+\n+#include "genomeFile.h"\n+#include "multiIntersectBed.h"\n+\n+using namespace std;\n+\n+// define our program name\n+#define PROGRAM_NAME "multiIntersectBed"\n+\n+// define our parameter checking macro\n+#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)\n+\n+//STLized version of basename()\n+// (because POSIX basename() modifies the input string pointer)\n+// Additionally: removes any extension the basename might have.\n+std::string stl_basename(const std::string& path);\n+\n+// function declarations\n+void ShowHelp(void);\n+void ShowExamples(void);\n+\n+\n+int main(int argc, char* argv[])\n+{\n+    bool haveFiles         = false;\n+    bool haveTitles        = false;\n+    bool haveGenome        = false;\n+    bool haveFiller        = true;\n+    bool printHeader       = false;\n+    bool printEmptyRegions = false;\n+    bool showHelp          = false;\n+    string genomeFile;\n+    string basePath;\n+    string noCoverageValue = "0";\n+    vector<string> inputFiles;\n+    vector<string> inputTitles;\n+\n+    //Parse command line options\n+    if(argc <= 1)\n+        ShowHelp();\n+\n+    for(int i = 1; i < argc; i++) {\n+        int parameterLength = (int)strlen(argv[i]);\n+\n+        if((PARAMETER_CHECK("-h", 2, parameterLength)) ||\n+        (PARAMETER_CHECK("--help", 5, parameterLength))) {\n+            showHelp = true;\n+        }\n+    }\n+\n+    if(showHelp == true) {\n+        ShowHelp();\n+        exit(1);\n+    }\n+\n+    // do some parsing (all of these parameters require 2 strings)\n+    for(int i = 1; i < argc; i++) {\n+\n+        int parameterLength = (int)strlen(argv[i]);\n+\n+        if(PARAMETER_CHECK("-i", 2, parameterLength)) {\n+            if ((i+1) < argc) {\n+                haveFiles = true;\n+                i = i+1;\n+                string file = argv[i];\n+                while (file[0] != \'-\' && i < argc) {\n+                    inputFiles.push_back(file);\n+                    i++;\n+                    if (i < argc)\n+                        file = argv[i];\n+                }\n+                i--;\n+            }\n+        }\n+        else if(PARAMETER_CHECK("-names", 6, parameterLength)) {\n+            if ((i+1) < argc) {\n+                haveTitles = true;\n+                i = i+1;\n+                string title = argv[i];\n+                while (title[0] != \'-\' && i < argc) {\n+                    inputTitles.push_back(title);\n+                    i++;\n+                    if (i < argc)\n+                        title = argv[i];\n+                }\n+                i--;\n+            }\n+        }\n+        else if(PARAMETER_CHECK("-g", 2, parameterLength)) {\n+            if ((i+1) < argc) {\n+                haveGenome = true;\n+                genomeFile = argv[i + 1];\n+                i++;\n+            }\n+        }\n+        else if(PARAMETER_CHECK("-filler", 7, parameterLength)) {\n+            if ((i+1) < argc) {\n+                haveFiller      = true;\n+                noCoverageValue = argv[i + 1];\n+                i++;\n+            }\n+        }\n+        else if(PARAMETER_CHECK("-header", 7, parameterLength)) {\n+            printHeader = true;\n+        }\n+        else if(PARAMETER_CHECK("-empty", 6, parameterLength)) {\n+            printEmptyRegions = true;\n+        }\n+        else if(PARAMETER_CHECK("-examples", 9, parameterLengt'..b'+\n+    cerr << "\\t-names\\t\\t"      << "A list of names (one / file) to describe each file in -i." << endl;\n+    cerr                        << "\\t\\t\\tThese names will be printed in the header line." << endl << endl;\n+\n+    cerr << "\\t-g\\t\\t"          << "Use genome file to calculate empty regions." << endl;\n+    cerr                        << "\\t\\t\\t- STRING." << endl << endl;\n+\n+    cerr << "\\t-empty\\t\\t"      << "Report empty regions (i.e., start/end intervals w/o" << endl;\n+    cerr                        << "\\t\\t\\tvalues in all files)." << endl;\n+    cerr                        << "\\t\\t\\t- Requires the \'-g FILE\' parameter.\\n" << endl;\n+\n+    cerr << "\\t-filler TEXT\\t"  << "Use TEXT when representing intervals having no value." << endl;\n+    cerr                        << "\\t\\t\\t- Default is \'0\', but you can use \'N/A\' or any other text." << endl << endl;\n+\n+    cerr << "\\t-examples\\t"     << "Show detailed usage examples." << endl << endl;\n+}\n+\n+\n+\n+void ShowExamples()\n+{\n+    cerr << "Example usage:\\n\\n"  \\\n+"== Input files: ==\\n" \\\n+"\\n" \\\n+" $ cat 1.bg\\n" \\\n+" chr1  1000    1500    10\\n" \\\n+" chr1  2000    2100    20\\n" \\\n+"\\n" \\\n+" $ cat 2.bg\\n" \\\n+" chr1  900 1600    60\\n" \\\n+" chr1  1700    2050    50\\n" \\\n+"\\n" \\\n+" $ cat 3.bg\\n" \\\n+" chr1  1980    2070    80\\n" \\\n+" chr1  2090    2100    20\\n" \\\n+"\\n" \\\n+" $ cat sizes.txt\\n" \\\n+" chr1  5000\\n" \\\n+"\\n" \\\n+"== Union/combine the files: ==\\n" \\\n+"\\n" \\\n+" $ unionBedGraphs -i 1.bg 2.bg 3.bg\\n" \\\n+" chr1  900 1000    0   60  0\\n" \\\n+" chr1  1000    1500    10  60  0\\n" \\\n+" chr1  1500    1600    0   60  0\\n" \\\n+" chr1  1700    1980    0   50  0\\n" \\\n+" chr1  1980    2000    0   50  80\\n" \\\n+" chr1  2000    2050    20  50  80\\n" \\\n+" chr1  2050    2070    20  0   80\\n" \\\n+" chr1  2070    2090    20  0   0\\n" \\\n+" chr1  2090    2100    20  0   20\\n" \\\n+"\\n" \\\n+"== Union/combine the files, with a header line (titles are the file names): ==\\n" \\\n+"\\n" \\\n+" $ unionBedGraphs -header -i 1.bg 2.bg 3.bg\\n" \\\n+" chrom start   end 1   2   3\\n" \\\n+" chr1  900 1000    0   60  0\\n" \\\n+" chr1  1000    1500    10  60  0\\n" \\\n+" chr1  1500    1600    0   60  0\\n" \\\n+" chr1  1700    1980    0   50  0\\n" \\\n+" chr1  1980    2000    0   50  80\\n" \\\n+" chr1  2000    2050    20  50  80\\n" \\\n+" chr1  2050    2070    20  0   80\\n" \\\n+" chr1  2070    2090    20  0   0\\n" \\\n+" chr1  2090    2100    20  0   20\\n" \\\n+"\\n" \\\n+"== Union/combine the files, with a header line and custom names: ==\\n" \\\n+"\\n" \\\n+" $ unionBedGraphs -header -i 1.bg 2.bg 3.bg -names WT-1 WT-2 KO-1\\n" \\\n+" chrom start   end WT-1    WT-2    KO-1\\n" \\\n+" chr1  900 1000    0   60  0\\n" \\\n+" chr1  1000    1500    10  60  0\\n" \\\n+" chr1  1500    1600    0   60  0\\n" \\\n+" chr1  1700    1980    0   50  0\\n" \\\n+" chr1  1980    2000    0   50  80\\n" \\\n+" chr1  2000    2050    20  50  80\\n" \\\n+" chr1  2050    2070    20  0   80\\n" \\\n+" chr1  2070    2090    20  0   0\\n" \\\n+" chr1  2090    2100    20  0   20\\n" \\\n+"\\n" \\\n+"== Union/combine, showing empty regions (note, requires -g): ==\\n" \\\n+"\\n" \\\n+" $ unionBedGraphs -header -empty -g sizes.TXT -i 1.bg 2.bg 3.bg\\n" \\\n+" chrom start   end 1   2   3\\n" \\\n+" chr1  0   900 0   0   0\\n" \\\n+" chr1  900 1000    0   60  0\\n" \\\n+" chr1  1000    1500    10  60  0\\n" \\\n+" chr1  1500    1600    0   60  0\\n" \\\n+" chr1  1600    1700    0   0   0\\n" \\\n+" chr1  1700    1980    0   50  0\\n" \\\n+" chr1  1980    2000    0   50  80\\n" \\\n+" chr1  2000    2050    20  50  80\\n" \\\n+" chr1  2050    2070    20  0   80\\n" \\\n+" chr1  2070    2090    20  0   0\\n" \\\n+" chr1  2090    2100    20  0   20\\n" \\\n+" chr1  2100    5000    0   0   0\\n" \\\n+"\\n" \\\n+;\n+}\n+\n+std::string stl_basename(const std::string& path)\n+{\n+    string result;\n+\n+    char* path_dup = strdup(path.c_str());\n+    char* basename_part = basename(path_dup);\n+    result = basename_part;\n+    free(path_dup);\n+\n+    size_t pos = result.find_last_of(\'.\');\n+    if (pos != string::npos )\n+        result = result.substr(0,pos);\n+\n+    return result;\n+}\n+\n'
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/nucBed/LargeFileSupport.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/nucBed/LargeFileSupport.h Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,13 @@
+#pragma once
+
+#define _FILE_OFFSET_BITS 64
+
+#ifdef WIN32
+#define ftell64(a)     _ftelli64(a)
+#define fseek64(a,b,c) _fseeki64(a,b,c)
+typedef __int64_t off_type;
+#else
+#define ftell64(a)     ftello(a)
+#define fseek64(a,b,c) fseeko(a,b,c)
+typedef off_t off_type;
+#endif
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/nucBed/Makefile
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/nucBed/Makefile Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,52 @@
+UTILITIES_DIR = ../utils/
+OBJ_DIR = ../../obj/
+BIN_DIR = ../../bin/
+
+# -------------------
+# define our includes
+# -------------------
+INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \
+ -I$(UTILITIES_DIR)/sequenceUtilities/ \
+ -I$(UTILITIES_DIR)/lineFileUtilities/ \
+ -I$(UTILITIES_DIR)/version/ \
+ -I$(UTILITIES_DIR)/gzstream/ \
+ -I$(UTILITIES_DIR)/fileType/ \
+ -I$(UTILITIES_DIR)/Fasta/
+
+# ----------------------------------
+# define our source and object files
+# ----------------------------------
+SOURCES= nucBedMain.cpp nucBed.cpp
+OBJECTS= $(SOURCES:.cpp=.o)
+_EXT_OBJECTS=bedFile.o sequenceUtils.o lineFileUtilities.o gzstream.o fileType.o Fasta.o split.o
+EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS))
+BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
+PROGRAM= nucBed
+
+
+all: $(PROGRAM)
+
+.PHONY: all
+
+$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS)
+ @echo "  * linking $(PROGRAM)"
+ @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ $(LIBS)
+
+$(BUILT_OBJECTS): $(SOURCES)
+ @echo "  * compiling" $(*F).cpp
+ @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES)
+
+$(EXT_OBJECTS):
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/sequenceUtilities/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/Fasta/
+
+
+clean:
+ @echo "Cleaning up."
+ @rm -f $(OBJ_DIR)/* $(BIN_DIR)/*
+
+.PHONY: clean
\ No newline at end of file
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/nucBed/nucBed.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/nucBed/nucBed.cpp Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,158 @@
+/*****************************************************************************
+  nucBed.cpp
+
+  (c) 2009 - Aaron Quinlan
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licenced under the GNU General Public License 2.0 license.
+******************************************************************************/
+#include "lineFileUtilities.h"
+#include "nucBed.h"
+
+
+NucBed::NucBed(string &dbFile, string &bedFile, bool printSeq, 
+               bool hasPattern, const string &pattern, bool forceStrand) {
+
+    _dbFile       = dbFile;
+    _bedFile      = bedFile;
+    _printSeq     = printSeq;
+    _hasPattern   = hasPattern;
+    _pattern      = pattern;
+    _forceStrand  = forceStrand;
+    
+    _bed = new BedFile(_bedFile);
+
+    // Compute the DNA content in each BED/GFF/VCF interval
+    ProfileDNA();
+}
+
+
+NucBed::~NucBed(void) 
+{}
+
+
+void NucBed::ReportDnaProfile(const BED& bed, const string &sequence, int seqLength)
+{
+    int a,c,g,t,n,other,userPatternCount;
+    a = c = g = t = n = other = userPatternCount = 0;
+    
+    getDnaContent(sequence,a,c,g,t,n,other);
+    
+    if (_hasPattern)
+        userPatternCount = countPattern(sequence, _pattern);
+    
+    
+    // report the original interval
+    _bed->reportBedTab(bed);
+    // report AT and GC content
+    printf("%f\t%f\t",(float)(a+t)/seqLength, (float)(c+g)/seqLength);
+    // report raw nucleotide counts
+    printf("%d\t%d\t%d\t%d\t%d\t%d\t%d",a,c,g,t,n,other,seqLength);
+    // add the original sequence if requested.
+
+    if (_printSeq)
+        printf("\t%s",sequence.c_str());
+    if (_hasPattern)
+        printf("\t%d",userPatternCount);
+    printf("\n");
+
+}
+
+
+void NucBed::PrintHeader(void) {
+    printf("#");
+    
+    int numOrigColumns = (int) _bed->bedType;
+    for (int i = 1; i <= numOrigColumns; ++i) {
+        printf("%d_usercol\t", i);
+    }
+    printf("%d_pct_at\t", numOrigColumns + 1);
+    printf("%d_pct_gc\t", numOrigColumns + 2);
+    printf("%d_num_A\t", numOrigColumns + 3);
+    printf("%d_num_C\t", numOrigColumns + 4);
+    printf("%d_num_G\t", numOrigColumns + 5);
+    printf("%d_num_T\t", numOrigColumns + 6);
+    printf("%d_num_N\t", numOrigColumns + 7);
+    printf("%d_num_oth\t", numOrigColumns + 8);
+    printf("%d_seq_len\t", numOrigColumns + 9);
+    
+    if (_printSeq)
+        printf("%d_seq", numOrigColumns + 10);
+    if (_hasPattern && !_printSeq)
+        printf("%d_user_patt_count", numOrigColumns + 10);
+    else if (_hasPattern && _printSeq)
+        printf("\t%d_user_patt_count", numOrigColumns + 11);
+    printf("\n");
+
+}
+
+
+//******************************************************************************
+// ExtractDNA
+//******************************************************************************
+void NucBed::ProfileDNA() {
+
+    /* Make sure that we can oen all of the files successfully*/
+
+    // open the fasta database for reading
+    ifstream faDb(_dbFile.c_str(), ios::in);
+    if ( !faDb ) {
+        cerr << "Error: The requested fasta database file (" << _dbFile << ") could not be opened. Exiting!" << endl;
+        exit (1);
+    }
+
+    // open and memory-map genome file
+    FastaReference fr;
+    bool memmap = true;    
+    fr.open(_dbFile, memmap);
+
+    bool headerReported = false;
+    BED bed, nullBed;
+    int lineNum = 0;
+    BedLineStatus bedStatus;
+    string sequence;
+
+    _bed->Open();
+    while ((bedStatus = _bed->GetNextBed(bed, lineNum)) != BED_INVALID) {
+        if (bedStatus == BED_VALID) {
+            if (headerReported == false) {
+                PrintHeader();
+                headerReported = true;
+            }
+            // make sure we are extracting >= 1 bp
+            if (bed.zeroLength == false) {
+                size_t seqLength = fr.sequenceLength(bed.chrom);
+                // make sure this feature will not exceed the end of the chromosome.
+                if ( (bed.start <= seqLength) && (bed.end <= seqLength) ) 
+                {
+                    // grab the dna at this interval
+                    int length = bed.end - bed.start;
+                    // report the sequence's content
+                    string dna = fr.getSubSequence(bed.chrom, bed.start, length);
+                    // rev comp si necessaire
+                    if ((_forceStrand == true) && (bed.strand == "-"))
+                        reverseComplement(dna);
+                    ReportDnaProfile(bed, dna, length);
+                    bed = nullBed;
+                }
+                else
+                {
+                    cerr << "Feature (" << bed.chrom << ":" << bed.start << "-" << bed.end << ") beyond the length of "
+                        << bed.chrom << " size (" << seqLength << " bp).  Skipping." << endl;
+                }
+            }
+            // handle zeroLength 
+            else {
+                cerr << "Feature (" << bed.chrom << ":" << bed.start+1 << "-" << bed.end-1 << ") has length = 0, Skipping." << endl;
+            }
+            bed = nullBed;
+        }
+    }
+    _bed->Close();
+}
+
+
+
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/nucBed/nucBed.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/nucBed/nucBed.h Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,55 @@
+/*****************************************************************************
+  nucBed.h
+
+  (c) 2009 - Aaron Quinlan
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licenced under the GNU General Public License 2.0 license.
+******************************************************************************/
+#ifndef NUCBED_H
+#define NUCBED_H
+
+#include "bedFile.h"
+#include "sequenceUtils.h"
+#include "Fasta.h"
+#include <vector>
+#include <iostream>
+#include <fstream>
+
+using namespace std;
+
+//************************************************
+// Class methods and elements
+//************************************************
+class NucBed {
+
+public:
+
+    // constructor
+    NucBed(string &dbFile, string &bedFile, bool printSeq, 
+           bool hasPattern, const string &pattern,
+           bool forceStrand);
+    // destructor
+    ~NucBed(void);
+
+    void ProfileDNA();
+
+
+private:
+    string _dbFile;
+    string _bedFile;
+    bool   _printSeq;
+    bool   _hasPattern;
+    string _pattern;
+    bool _forceStrand;
+
+    // instance of a bed file class.
+    BedFile  *_bed;
+    void PrintHeader(void);
+    void ReportDnaProfile(const BED& bed, const string &sequence, int seqLength);
+};
+
+#endif
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/nucBed/nucBedMain.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/nucBed/nucBedMain.cpp Thu Nov 03 10:25:04 2011 -0400
[
@@ -0,0 +1,147 @@
+/*****************************************************************************
+  nucBedMain.cpp
+
+  (c) 2009 - Aaron Quinlan
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licenced under the GNU General Public License 2.0 license.
+******************************************************************************/
+#include "nucBed.h"
+#include "version.h"
+
+using namespace std;
+
+// define our program name
+#define PROGRAM_NAME "nucBed"
+
+
+// define our parameter checking macro
+#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)
+
+// function declarations
+void ShowHelp(void);
+
+int main(int argc, char* argv[]) {
+
+    // our configuration variables
+    bool showHelp = false;
+
+    // input files
+    string fastaDbFile;
+    string bedFile;
+    string pattern;
+
+    // checks for existence of parameters
+    bool haveFastaDb = false;
+    bool haveBed     = false;
+    bool printSeq    = false;
+    bool hasPattern  = false;
+    bool forceStrand = false;
+
+    // check to see if we should print out some help
+    if(argc <= 1) showHelp = true;
+
+    for(int i = 1; i < argc; i++) {
+        int parameterLength = (int)strlen(argv[i]);
+
+        if((PARAMETER_CHECK("-h", 2, parameterLength)) ||
+        (PARAMETER_CHECK("--help", 5, parameterLength))) {
+            showHelp = true;
+        }
+    }
+
+    if(showHelp) ShowHelp();
+
+    // do some parsing (all of these parameters require 2 strings)
+    for(int i = 1; i < argc; i++) {
+
+        int parameterLength = (int)strlen(argv[i]);
+
+        if(PARAMETER_CHECK("-fi", 3, parameterLength)) {
+            if ((i+1) < argc) {
+                haveFastaDb = true;
+                fastaDbFile = argv[i + 1];
+                i++;
+            }
+        }
+        else if(PARAMETER_CHECK("-bed", 4, parameterLength)) {
+            if ((i+1) < argc) {
+                haveBed = true;
+                bedFile = argv[i + 1];
+                i++;
+            }
+        }
+        else if(PARAMETER_CHECK("-seq", 4, parameterLength)) {
+            printSeq = true;
+        }
+        else if(PARAMETER_CHECK("-s", 2, parameterLength)) {
+            forceStrand = true;
+        }
+        else if(PARAMETER_CHECK("-pattern", 8, parameterLength)) {
+            if ((i+1) < argc) {
+                hasPattern = true;
+                pattern = argv[i + 1];
+                i++;
+            }
+        }
+        else {
+            cerr << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl;
+            showHelp = true;
+        }
+    }
+
+    if (!haveFastaDb || !haveBed) {
+        showHelp = true;
+    }
+
+    if (!showHelp) {
+
+        NucBed *nuc = new NucBed(fastaDbFile, bedFile, printSeq, hasPattern, pattern, forceStrand);
+        delete nuc;
+
+        return 0;
+    }
+    else {
+        ShowHelp();
+    }
+}
+
+void ShowHelp(void) {
+
+    cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl;
+
+    cerr << "Author:  Aaron Quinlan (aaronquinlan@gmail.com)" << endl;
+
+    cerr << "Summary: Profiles the nucleotide content of intervals in a fasta file." << endl << endl;
+
+    cerr << "Usage:   " << PROGRAM_NAME << " [OPTIONS] -fi <fasta> -bed <bed/gff/vcf>" << endl << endl;
+
+    cerr << "Options: " << endl;
+    cerr << "\t-fi\tInput FASTA file" << endl << endl;
+    cerr << "\t-bed\tBED/GFF/VCF file of ranges to extract from -fi" << endl << endl;
+    cerr << "\t-s\tProfile the sequence according to strand." << endl << endl;
+    cerr << "\t-seq\tPrint the extracted sequence" << endl << endl;
+    cerr << "\t-pattern\tReport the number of times a user-defined sequence is observed (case-insensitive)." << endl << endl;
+    
+    
+    cerr << "Output format: " << endl;
+    cerr << "\tThe following information will be reported after each original BED entry:" << endl;
+    cerr << "\t    1) %AT content" << endl;
+    cerr << "\t    2) %GC content" << endl;
+    cerr << "\t    3) Number of As observed" << endl;
+    cerr << "\t    4) Number of Cs observed" << endl;
+    cerr << "\t    5) Number of Gs observed" << endl;
+    cerr << "\t    6) Number of Ts observed" << endl;
+    cerr << "\t    7) Number of Ns observed" << endl;
+    cerr << "\t    8) Number of other bases observed" << endl;
+    cerr << "\t    9) The length of the explored sequence/interval." << endl;
+    cerr << "\t    10) The sequence extracted from the FASTA file. (optional, if -seq is used)" << endl;
+    cerr << "\t    11) The number of times a user defined pattern was observed. (optional, if -pattern is used.)" << endl;
+
+    // end the program here
+    exit(1);
+
+}
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/overlap/Makefile
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/overlap/Makefile Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,47 @@
+UTILITIES_DIR = ../utils/
+OBJ_DIR = ../../obj/
+BIN_DIR = ../../bin/
+
+
+# -------------------
+# define our includes
+# -------------------
+INCLUDES = -I$(UTILITIES_DIR)/bedFile/ -I$(UTILITIES_DIR)/lineFileUtilities/ -I$(UTILITIES_DIR)/version/ -I$(UTILITIES_DIR)/gzstream/ -I$(UTILITIES_DIR)/fileType/
+
+# ----------------------------------
+# define our source and object files
+# ----------------------------------
+SOURCES= overlap.cpp
+OBJECTS= $(SOURCES:.cpp=.o)
+_EXT_OBJECTS=bedFile.o lineFileUtilities.o gzstream.o fileType.o
+EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS))
+BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
+PROGRAM= overlap
+
+
+all: $(PROGRAM)
+
+.PHONY: all
+
+
+$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS)
+ @echo "  * linking $(PROGRAM)"
+ @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ $(LIBS)
+
+$(BUILT_OBJECTS): $(SOURCES)
+ @echo "  * compiling" $(*F).cpp
+ @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES)
+
+$(EXT_OBJECTS):
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/
+
+
+
+clean:
+ @echo "Cleaning up."
+ @rm -f $(OBJ_DIR)/* $(BIN_DIR)/*
+
+.PHONY: clean
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/overlap/overlap.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/overlap/overlap.cpp Thu Nov 03 10:25:04 2011 -0400
[
@@ -0,0 +1,202 @@
+/*****************************************************************************
+  overlap.cpp
+
+  (c) 2009 - Aaron Quinlan
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licenced under the GNU General Public License 2.0 license.
+******************************************************************************/
+#include <vector>
+#include <iostream>
+#include <fstream>
+#include <stdlib.h>
+
+#include "version.h"
+#include "lineFileUtilities.h"
+#include "bedFile.h"
+using namespace std;
+
+
+// define our program name
+#define PROGRAM_NAME "overlap"
+
+// define our parameter checking macro
+#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)
+
+
+// function declarations
+void ShowHelp(void);
+void DetermineInput(string &inFile, short &s1Col, short &e1Col, short &s2Col, short &e2Col);
+void ComputeOverlaps(istream &input, short &s1Col, short &e1Col, short &s2Col, short &e2Col);
+
+int main(int argc, char* argv[]) {
+
+    // input files
+    string inFile = "stdin";
+    string columns;
+
+    // our configuration variables
+    bool showHelp = false;
+    bool haveInFile  = true;
+    bool haveColumns = false;
+
+
+    for(int i = 1; i < argc; i++) {
+        int parameterLength = (int)strlen(argv[i]);
+
+        if((PARAMETER_CHECK("-h", 2, parameterLength)) ||
+        (PARAMETER_CHECK("--help", 5, parameterLength))) {
+            showHelp = true;
+        }
+    }
+
+    if(showHelp) ShowHelp();
+
+    // do some parsing (all of these parameters require 2 strings)
+    for(int i = 1; i < argc; i++) {
+
+        int parameterLength = (int)strlen(argv[i]);
+
+        if(PARAMETER_CHECK("-i", 2, parameterLength)) {
+            if ((i+1) < argc) {
+                inFile     = argv[i + 1];
+                i++;
+            }
+        }
+        else if(PARAMETER_CHECK("-cols", 5, parameterLength)) {
+            haveColumns = true;
+            columns     = argv[i + 1];
+            i++;
+        }
+        else {
+            cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl;
+            showHelp = true;
+        }
+    }
+
+    // make sure we have an input files
+    if (!haveInFile ) {
+        cerr << endl << "*****" << endl << "*****ERROR: Need -i file. " << endl << "*****" << endl;
+        showHelp = true;
+    }
+
+    if (!showHelp) {
+
+        // Split the column string sent by the user into discrete column numbers
+        // A comma separated string is expected.
+        vector<string> posColumns;
+        Tokenize(columns, posColumns, ",");
+
+        if (posColumns.size() != 4) {
+            cerr << endl << "*****" << endl << "*****ERROR: Please specify 4, comma-separated position columns. " << endl << "*****" << endl;
+            ShowHelp();
+        }
+        else {
+            short s1, e1, s2, e2;
+            s1 = atoi(posColumns[0].c_str());
+            e1 = atoi(posColumns[1].c_str());
+            s2 = atoi(posColumns[2].c_str());
+            e2 = atoi(posColumns[3].c_str());
+
+            DetermineInput(inFile, s1, e1, s2, e2);
+        }
+    }
+    else {
+        ShowHelp();
+    }
+}
+
+void ShowHelp(void) {
+
+    cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl;
+
+    cerr << "Author:  Aaron Quinlan (aaronquinlan@gmail.com)" << endl;
+
+    cerr << "Summary: Computes the amount of overlap (positive values)" << endl;
+    cerr << "\t or distance (negative values) between genome features" << endl;
+    cerr << "\t and reports the result at the end of the same line." << endl << endl;
+
+    cerr << "Usage:   " << PROGRAM_NAME << " [OPTIONS] -i <input> -cols s1,e1,s2,e2 " << endl << endl;
+
+    cerr << "Options: " << endl;
+    cerr << "\t-i\t"        << "Input file. Use \"stdin\" for pipes." << endl << endl;
+
+    cerr << "\t-cols\t"     << "Specify the columns (1-based) for the starts and ends of the" << endl;
+    cerr                    << "\t\tfeatures for which you'd like to compute the overlap/distance." << endl;
+    cerr                    << "\t\tThe columns must be listed in the following order: " << endl << endl;
+    cerr                    << "\t\tstart1,end1,start2,end2" << endl << endl;
+
+    cerr << "Example: " << endl;
+    cerr << "\t$ windowBed -a A.bed -b B.bed -w 10" << endl;
+    cerr << "\tchr1 10  20  A   chr1    15  25  B" << endl;
+    cerr << "\tchr1 10  20  C   chr1    25  35  D" << endl << endl;
+    cerr << "\t$ windowBed -a A.bed -b B.bed -w 10 | overlap -i stdin -cols 2,3,6,7" << endl;
+    cerr << "\tchr1 10  20  A   chr1    15  25  B   5" << endl;
+    cerr << "\tchr1 10  20  C   chr1    25  35  D   -5" << endl;
+
+    // end the program here
+    exit(1);
+
+}
+
+
+void DetermineInput(string &inFile, short &s1Col, short &e1Col, short &s2Col, short &e2Col) {
+
+
+    if (inFile != "stdin") {   // process a file
+
+        ifstream in(inFile.c_str(), ios::in);
+        if ( !in ) {
+            cerr << "Error: The requested input file (" << inFile << ") could not be opened. Exiting!" << endl;
+            exit (1);
+        }
+        ComputeOverlaps(in, s1Col, e1Col, s2Col, e2Col);
+    }
+    else ComputeOverlaps(cin, s1Col, e1Col, s2Col, e2Col);
+}
+
+
+void ComputeOverlaps(istream &input, short &s1Col, short &e1Col, short &s2Col, short &e2Col) {
+
+    int lineNum = 0;
+    string inLine;
+    vector<string> inFields;
+
+    int overlap;
+
+    char *s1End, *e1End, *s2End, *e2End;
+    long s1, e1, s2, e2;
+
+    while (getline(input, inLine)) {
+        lineNum++;
+        Tokenize(inLine, inFields);
+
+        if (inFields.size() > 1) {
+
+            // test if columns  2 and 3 are integers.  If so, assume BED.
+            s1 = strtol(inFields[s1Col-1].c_str(), &s1End, 10);
+            e1 = strtol(inFields[e1Col-1].c_str(), &e1End, 10);
+            s2 = strtol(inFields[s2Col-1].c_str(), &s2End, 10);
+            e2 = strtol(inFields[e2Col-1].c_str(), &e2End, 10);
+
+            // strtol will set pointers to the start of the string if non-integral, base 10
+            // if they all check out, we have valid numeric columns.  Otherwise, complain.
+            if (s1End != inFields[s1Col-1].c_str() &&
+                e1End != inFields[e1Col-1].c_str() &&
+                s2End != inFields[s2Col-1].c_str() &&
+                e2End != inFields[e2Col-1].c_str()) {
+
+                overlap = overlaps(s1, e1, s2, e2);
+                printf("%s\t%d\n", inLine.c_str(), overlap);
+            }
+            else {
+                cerr << "One of your columns appears to be non-numeric at line " << lineNum << ". Exiting..." << endl << endl;
+                exit(1);
+            }
+        }
+        inFields.clear();
+    }
+}
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/pairToBed/Makefile
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/pairToBed/Makefile Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,52 @@
+UTILITIES_DIR = ../utils/
+OBJ_DIR = ../../obj/
+BIN_DIR = ../../bin/
+
+# -------------------
+# define our includes
+# -------------------
+INCLUDES = -I$(UTILITIES_DIR)/bedFilePE/ \
+            -I$(UTILITIES_DIR)/bedFile/ \
+            -I$(UTILITIES_DIR)/lineFileUtilities/ \
+            -I$(UTILITIES_DIR)/version/ \
+            -I$(UTILITIES_DIR)/gzstream/ \
+            -I$(UTILITIES_DIR)/fileType/ \
+            -I$(UTILITIES_DIR)/BamTools/include \
+            -I$(UTILITIES_DIR)/BamTools-Ancillary
+
+# ----------------------------------
+# define our source and object files
+# ----------------------------------
+SOURCES= pairToBedMain.cpp pairToBed.cpp
+OBJECTS= $(SOURCES:.cpp=.o)
+_EXT_OBJECTS=bedFilePE.o bedFile.o lineFileUtilities.o gzstream.o fileType.o
+EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS))
+BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
+PROGRAM= pairToBed
+
+all: $(PROGRAM)
+
+.PHONY: all
+
+$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS)
+ @echo "  * linking $(PROGRAM)"
+ @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ -L$(UTILITIES_DIR)/BamTools/lib/ -lbamtools $(LIBS)
+
+$(BUILT_OBJECTS): $(SOURCES)
+ @echo "  * compiling" $(*F).cpp
+ @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES)
+
+$(EXT_OBJECTS):
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFilePE/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/BamTools/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/BamTools-Ancillary/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/
+
+clean:
+ @echo "Cleaning up."
+ @rm -f $(OBJ_DIR)/* $(BIN_DIR)/*
+
+.PHONY: clean
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/pairToBed/pairToBed.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/pairToBed/pairToBed.cpp Thu Nov 03 10:25:04 2011 -0400
b
b'@@ -0,0 +1,525 @@\n+/*****************************************************************************\n+  pairToBed.cpp\n+\n+  (c) 2009 - Aaron Quinlan\n+  Hall Laboratory\n+  Department of Biochemistry and Molecular Genetics\n+  University of Virginia\n+  aaronquinlan@gmail.com\n+\n+  Licenced under the GNU General Public License 2.0 license.\n+******************************************************************************/\n+#include "lineFileUtilities.h"\n+#include "pairToBed.h"\n+\n+\n+bool IsCorrectMappingForBEDPE (const BamAlignment &bam) {\n+\n+    if ( (bam.RefID == bam.MateRefID) && (bam.InsertSize > 0) ) {\n+        return true;\n+    }\n+    else if ( (bam.RefID == bam.MateRefID) && (bam.InsertSize == 0) && bam.IsFirstMate() ) {\n+        return true;\n+    }\n+    else if ( (bam.RefID != bam.MateRefID) && bam.IsFirstMate() ) {\n+        return true;\n+    }\n+    else return false;\n+}\n+\n+\n+/*\n+    Constructor\n+*/\n+\n+\n+BedIntersectPE::BedIntersectPE(string bedAFilePE, string bedBFile, float overlapFraction,\n+                               string searchType, bool sameStrand, bool diffStrand, bool bamInput,\n+                               bool bamOutput,  bool uncompressedBam, bool useEditDistance) {\n+\n+    _bedAFilePE        = bedAFilePE;\n+    _bedBFile          = bedBFile;\n+    _overlapFraction   = overlapFraction;\n+    _sameStrand        = sameStrand;\n+    _diffStrand        = diffStrand;\n+    _useEditDistance   = useEditDistance;\n+    _searchType        = searchType;\n+    _bamInput          = bamInput;\n+    _bamOutput         = bamOutput;\n+    _isUncompressedBam = uncompressedBam;\n+\n+    _bedA = new BedFilePE(bedAFilePE);\n+    _bedB = new BedFile(bedBFile);\n+\n+    if (_bamInput == false)\n+        IntersectBedPE();\n+    else\n+        IntersectBamPE(_bedAFilePE);\n+}\n+\n+\n+/*\n+    Destructor\n+*/\n+\n+BedIntersectPE::~BedIntersectPE(void) {\n+}\n+\n+\n+\n+void BedIntersectPE::FindOverlaps(const BEDPE &a, vector<BED> &hits1, vector<BED> &hits2, const string &type) {\n+\n+    // list of hits on each end of BEDPE\n+    // that exceed the requested overlap fraction\n+    vector<BED> qualityHits1;\n+    vector<BED> qualityHits2;\n+\n+    // count of hits on each end of BEDPE\n+    // that exceed the requested overlap fraction\n+    int numOverlapsEnd1 = 0;\n+    int numOverlapsEnd2 = 0;\n+\n+    // make sure we have a valid chromosome before we search\n+    if (a.chrom1 != ".") {\n+        // Find the quality hits between ***end1*** of the BEDPE and the B BED file\n+        _bedB->FindOverlapsPerBin(a.chrom1, a.start1, a.end1, a.strand1, hits1, _sameStrand, _diffStrand);\n+\n+        vector<BED>::const_iterator h = hits1.begin();\n+        vector<BED>::const_iterator hitsEnd = hits1.end();\n+        for (; h != hitsEnd; ++h) {\n+\n+            int s = max(a.start1, h->start);\n+            int e = min(a.end1, h->end);\n+            int overlapBases = (e - s);             // the number of overlapping bases b/w a and b\n+            int aLength = (a.end1 - a.start1);      // the length of a in b.p.\n+\n+            // is there enough overlap relative to the user\'s request? (default ~ 1bp)\n+            if ( ( (float) overlapBases / (float) aLength ) >= _overlapFraction ) {\n+                numOverlapsEnd1++;\n+\n+                if (type == "either") {\n+                    _bedA->reportBedPETab(a);\n+                    _bedB->reportBedNewLine(*h);\n+                }\n+                else {\n+                    qualityHits1.push_back(*h);\n+                }\n+            }\n+        }\n+    }\n+\n+\n+    // make sure we have a valid chromosome before we search\n+    if (a.chrom2 != ".") {\n+        // Now find the quality hits between ***end2*** of the BEDPE and the B BED file\n+        _bedB->FindOverlapsPerBin(a.chrom2, a.start2, a.end2, a.strand2, hits2, _sameStrand, _diffStrand);\n+\n+        vector<BED>::const_iterator h = hits2.begin();\n+        vector<BED>::const_iterator hitsEnd = hits2.end();\n+        for (; h != hitsEnd; ++h) {\n+\n+            int s = max(a.start2, h->start);\n+            int '..b'\n+    reader.Close();\n+    if (_bamOutput == true) {\n+        writer.Close();\n+    }\n+}\n+\n+\n+void BedIntersectPE::ProcessBamBlock (const BamAlignment &bam1, const BamAlignment &bam2,\n+                                      const RefVector &refs, BamWriter &writer) {\n+\n+    vector<BED> hits, hits1, hits2;         // vector of potential hits\n+    hits.reserve(1000);                     // reserve some space\n+    hits1.reserve(1000);\n+    hits2.reserve(1000);\n+\n+    bool overlapsFound;                     // flag to indicate if overlaps were found\n+\n+    if ( (_searchType == "either") || (_searchType == "xor") ||\n+              (_searchType == "both") || (_searchType == "notboth") ||\n+              (_searchType == "neither") ) {\n+\n+        // create a new BEDPE feature from the BAM alignments.\n+        BEDPE a;\n+        ConvertBamToBedPE(bam1, bam2, refs, a);\n+        if (_bamOutput == true) {   // BAM output\n+            // write to BAM if correct hits found\n+            overlapsFound = FindOneOrMoreOverlaps(a, _searchType);\n+            if (overlapsFound == true) {\n+                writer.SaveAlignment(bam1);\n+                writer.SaveAlignment(bam2);\n+            }\n+        }\n+        else {  // BEDPE output\n+            FindOverlaps(a, hits1, hits2, _searchType);\n+            hits1.clear();\n+            hits2.clear();\n+        }\n+    }\n+    else if ( (_searchType == "ispan") || (_searchType == "ospan") ) {\n+        // only look for ispan and ospan when both ends are mapped.\n+        if (bam1.IsMapped() && bam2.IsMapped()) {\n+            // only do an inspan or outspan check if the alignment is intrachromosomal\n+            if (bam1.RefID == bam2.RefID) {\n+                // create a new BEDPE feature from the BAM alignments.\n+                BEDPE a;\n+                ConvertBamToBedPE(bam1, bam2, refs, a);\n+                if (_bamOutput == true) {   // BAM output\n+                    // look for overlaps, and write to BAM if >=1 were found\n+                    overlapsFound = FindOneOrMoreSpanningOverlaps(a, _searchType);\n+                    if (overlapsFound == true) {\n+                        writer.SaveAlignment(bam1);\n+                        writer.SaveAlignment(bam2);\n+                    }\n+                }\n+                else {  // BEDPE output\n+                    FindSpanningOverlaps(a, hits, _searchType);\n+                    hits.clear();\n+                }\n+            }\n+        }\n+    }\n+    else if ( (_searchType == "notispan") || (_searchType == "notospan") ) {\n+        // only look for notispan and notospan when both ends are mapped.\n+        if (bam1.IsMapped() && bam2.IsMapped()) {\n+            // only do an inspan or outspan check if the alignment is intrachromosomal\n+            if (bam1.RefID == bam2.RefID) {\n+                // create a new BEDPE feature from the BAM alignments.\n+                BEDPE a;\n+                ConvertBamToBedPE(bam1, bam2, refs, a);\n+                if (_bamOutput == true) {   // BAM output\n+                    // write to BAM if there were no overlaps\n+                    overlapsFound = FindOneOrMoreSpanningOverlaps(a, _searchType);\n+                    if (overlapsFound == false) {\n+                        writer.SaveAlignment(bam1);\n+                        writer.SaveAlignment(bam2);\n+                    }\n+                }\n+                else {  // BEDPE output\n+                    FindSpanningOverlaps(a, hits, _searchType);\n+                    hits.clear();\n+                }\n+            }\n+            // if inter-chromosomal or orphaned, we know it\'s not ispan and not ospan\n+            else if (_bamOutput == true) {\n+                writer.SaveAlignment(bam1);\n+                writer.SaveAlignment(bam2);\n+            }\n+        }\n+        // if both ends aren\'t mapped, we know that it\'s notispan and not ospan\n+        else if (_bamOutput == true) {\n+            writer.SaveAlignment(bam1);\n+            writer.SaveAlignment(bam2);\n+        }\n+    }\n+}\n+\n+\n'
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/pairToBed/pairToBed.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/pairToBed/pairToBed.h Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,161 @@
+/*****************************************************************************
+  pairToBed.h
+
+  (c) 2009 - Aaron Quinlan
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licenced under the GNU General Public License 2.0 license.
+******************************************************************************/
+#ifndef INTERSECTBED_H
+#define INTERSECTBED_H
+
+#include "api/BamReader.h"
+#include "api/BamWriter.h"
+#include "api/BamAux.h"
+using namespace BamTools;
+
+#include "bedFile.h"
+#include "bedFilePE.h"
+#include <vector>
+#include <iostream>
+#include <fstream>
+
+using namespace std;
+
+
+
+/**************************************************
+Helper function protoypes
+**************************************************/
+void IsCorrectMappingForBEDPE (const BamAlignment &bam, const RefVector &refs, BEDPE &a);
+
+
+
+//************************************************
+// Class methods and elements
+//************************************************
+class BedIntersectPE {
+
+public:
+
+    // constructor
+    BedIntersectPE(string bedAFilePE, string bedBFile, float overlapFraction,
+        string searchType, bool sameStrand, bool diffStrand, bool bamInput, bool bamOutput, bool uncompressedBam, bool useEditDistance);
+    // destructor
+    ~BedIntersectPE(void);
+
+    void FindOverlaps(const BEDPE &, vector<BED> &hits1, vector<BED> &hits2, const string &type);
+
+    bool FindOneOrMoreOverlaps(const BEDPE &, const string &type);
+
+    void FindSpanningOverlaps(const BEDPE &a, vector<BED> &hits, const string &type);
+    bool FindOneOrMoreSpanningOverlaps(const BEDPE &a, const string &type);
+
+    void IntersectBedPE();
+    void IntersectBamPE(string bamFile);
+
+    void DetermineBedPEInput();
+
+private:
+
+    string _bedAFilePE;
+    string _bedBFile;
+    float _overlapFraction;
+    string _searchType;
+    bool _sameStrand;
+    bool _diffStrand;
+    bool _useEditDistance;
+    bool _bamInput;
+    bool _bamOutput;
+    bool  _isUncompressedBam;
+
+    // instance of a paired-end bed file class.
+    BedFilePE *_bedA;
+
+    // instance of a bed file class.
+    BedFile *_bedB;
+
+    inline
+    void ConvertBamToBedPE(const BamAlignment &bam1, const BamAlignment &bam2, const RefVector &refs, BEDPE &a) {
+
+        // initialize BEDPE variables
+        a.start1 = a.start2 = a.end1 = a.end2 = -1;
+        a.chrom1 = a.chrom2 = ".";
+        a.strand1 = a.strand2 = '.';
+        uint32_t editDistance1, editDistance2;
+        editDistance1 = editDistance2 = 0;
+
+        // take the qname from end 1.
+        a.name = bam1.Name;
+
+        // end 1
+        if (bam1.IsMapped()) {
+            a.chrom1  = refs.at(bam1.RefID).RefName;
+            a.start1  = bam1.Position;
+            a.end1    = bam1.GetEndPosition(false, false);
+            a.strand1 = "+";
+            if (bam1.IsReverseStrand()) a.strand1 = "-";
+
+            // extract the edit distance from the NM tag
+            // if possible. otherwise, complain.
+            if (_useEditDistance == true) {
+                if (bam1.GetTag("NM", editDistance1) == false) {
+                    cerr << "The edit distance tag (NM) was not found in the BAM file.  Please disable -ed.  Exiting\n";
+                    exit(1);
+                }
+            }
+        }
+
+        // end 2
+        if (bam2.IsMapped()) {
+            a.chrom2  = refs.at(bam2.RefID).RefName;
+            a.start2  = bam2.Position;
+            a.end2    = bam2.GetEndPosition(false, false);
+            a.strand2 = "+";
+            if (bam2.IsReverseStrand()) a.strand2 = "-";
+
+            // extract the edit distance from the NM tag
+            // if possible. otherwise, complain.
+            if (_useEditDistance == true) {
+                if (bam2.GetTag("NM", editDistance2) == false) {
+                    cerr << "The edit distance tag (NM) was not found in the BAM file.  Please disable -ed.  Exiting\n";
+                    exit(1);
+                }
+            }
+        }
+
+        // swap the ends if necessary
+        if ( a.chrom1 > a.chrom2 || ((a.chrom1 == a.chrom2) && (a.start1 > a.start2)) ) {
+            swap(a.chrom1, a.chrom2);
+            swap(a.start1, a.start2);
+            swap(a.end1, a.end2);
+            swap(a.strand1, a.strand2);
+        }
+
+        // compute the minimum mapping quality b/w the two ends of the pair.
+        a.score = "0";
+        if (_useEditDistance == false) {
+            if (bam1.IsMapped() == true && bam2.IsMapped() == true)
+                a.score = ToString(min(bam1.MapQuality, bam2.MapQuality));
+        }
+        // BEDPE using edit distance
+        else {
+            if (bam1.IsMapped() == true && bam2.IsMapped() == true)
+                a.score = ToString((int) (editDistance1 + editDistance2));
+            else if (bam1.IsMapped() == true)
+                a.score = ToString((int) editDistance1);
+            else if (bam2.IsMapped() == true)
+                a.score = ToString((int) editDistance2);
+        }
+    };
+
+    inline
+    void ProcessBamBlock (const BamAlignment &bam1, const BamAlignment &bam2,
+                                          const RefVector &refs,
+                                          BamWriter &writer);
+};
+
+#endif /* PEINTERSECTBED_H */
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/pairToBed/pairToBedMain.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/pairToBed/pairToBedMain.cpp Thu Nov 03 10:25:04 2011 -0400
[
b'@@ -0,0 +1,237 @@\n+/*****************************************************************************\n+  pairToBedMain.cpp\n+\n+  (c) 2009 - Aaron Quinlan\n+  Hall Laboratory\n+  Department of Biochemistry and Molecular Genetics\n+  University of Virginia\n+  aaronquinlan@gmail.com\n+\n+  Licenced under the GNU General Public License 2.0 license.\n+******************************************************************************/\n+#include "pairToBed.h"\n+#include "version.h"\n+\n+using namespace std;\n+\n+// define our program name\n+#define PROGRAM_NAME "pairToBed"\n+\n+// define our parameter checking macro\n+#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)\n+\n+// function declarations\n+void ShowHelp(void);\n+\n+int main(int argc, char* argv[]) {\n+\n+    // our configuration variables\n+    bool showHelp = false;\n+\n+    // input files\n+    string bedAFile;\n+    string bedBFile;\n+\n+    // input arguments\n+    float overlapFraction = 1E-9;\n+    string searchType = "either";\n+\n+    // flags to track parameters\n+    bool haveBedA           = false;\n+    bool haveBedB           = false;\n+    bool haveSearchType     = false;\n+    bool haveFraction       = false;\n+    bool sameStrand         = false;\n+    bool diffStrand         = false;\n+    bool useEditDistance    = false;\n+    bool inputIsBam         = false;\n+    bool outputIsBam        = true;\n+    bool uncompressedBam    = false;\n+\n+    // check to see if we should print out some help\n+    if(argc <= 1) showHelp = true;\n+\n+    for(int i = 1; i < argc; i++) {\n+        int parameterLength = (int)strlen(argv[i]);\n+\n+        if((PARAMETER_CHECK("-h", 2, parameterLength)) ||\n+        (PARAMETER_CHECK("--help", 5, parameterLength))) {\n+            showHelp = true;\n+        }\n+    }\n+\n+    if(showHelp) ShowHelp();\n+\n+    // do some parsing (all of these parameters require 2 strings)\n+    for(int i = 1; i < argc; i++) {\n+\n+        int parameterLength = (int)strlen(argv[i]);\n+\n+        if(PARAMETER_CHECK("-a", 2, parameterLength)) {\n+            if ((i+1) < argc) {\n+                haveBedA = true;\n+                outputIsBam  = false;\n+                bedAFile = argv[i + 1];\n+                i++;\n+            }\n+        }\n+        else if(PARAMETER_CHECK("-abam", 5, parameterLength)) {\n+            if ((i+1) < argc) {\n+                haveBedA = true;\n+                inputIsBam = true;\n+                bedAFile = argv[i + 1];\n+                i++;\n+            }\n+        }\n+        else if(PARAMETER_CHECK("-b", 2, parameterLength)) {\n+            if ((i+1) < argc) {\n+                haveBedB = true;\n+                bedBFile = argv[i + 1];\n+                i++;\n+            }\n+        }\n+        else if(PARAMETER_CHECK("-bedpe", 6, parameterLength)) {\n+            outputIsBam = false;\n+        }\n+        else if(PARAMETER_CHECK("-ed", 3, parameterLength)) {\n+            useEditDistance = true;\n+        }\n+        else if(PARAMETER_CHECK("-type", 5, parameterLength)) {\n+            if ((i+1) < argc) {\n+                haveSearchType = true;\n+                searchType = argv[i + 1];\n+                i++;\n+            }\n+        }\n+        else if(PARAMETER_CHECK("-f", 2, parameterLength)) {\n+            if ((i+1) < argc) {\n+                haveFraction = true;\n+                overlapFraction = atof(argv[i + 1]);\n+                i++;\n+            }\n+        }\n+        else if (PARAMETER_CHECK("-s", 2, parameterLength)) {\n+            sameStrand = true;\n+        }\n+        else if (PARAMETER_CHECK("-S", 2, parameterLength)) {\n+            diffStrand = true;\n+        }\n+        else if(PARAMETER_CHECK("-ubam", 5, parameterLength)) {\n+            uncompressedBam = true;\n+        }\n+        else {\n+            cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl;\n+            showHelp = true;\n+        }\n+    }\n+\n+\n+    // make sure we have both input files\n+    if (!haveBedA || !haveBedB)'..b' << endl << endl;\n+\n+    cerr << "Options: " << endl;\n+\n+    cerr << "\\t-abam\\t"         << "The A input file is in BAM format.  Output will be BAM as well." << endl;\n+    cerr                        << "\\t\\t- Requires BAM to be grouped or sorted by query." << endl << endl;\n+\n+    cerr << "\\t-ubam\\t"         << "Write uncompressed BAM output. Default is to write compressed BAM." << endl << endl;\n+    cerr                        << "\\t\\tis to write output in BAM when using -abam." << endl << endl;\n+\n+    cerr << "\\t-bedpe\\t"        << "When using BAM input (-abam), write output as BEDPE. The default" << endl;\n+    cerr                        << "\\t\\tis to write output in BAM when using -abam." << endl << endl;\n+\n+    cerr << "\\t-ed\\t"           << "Use BAM total edit distance (NM tag) for BEDPE score." << endl;\n+    cerr                        << "\\t\\t- Default for BEDPE is to use the minimum of" << endl;\n+    cerr                        << "\\t\\t  of the two mapping qualities for the pair." << endl;\n+    cerr                        << "\\t\\t- When -ed is used the total edit distance" << endl;\n+    cerr                        << "\\t\\t  from the two mates is reported as the score." << endl << endl;\n+\n+    cerr << "\\t-f\\t"                    << "Minimum overlap required as fraction of A (e.g. 0.05)." << endl;\n+    cerr                                << "\\t\\tDefault is 1E-9 (effectively 1bp)." << endl << endl;\n+\n+    cerr << "\\t-s\\t"                    << "Require same strandedness when finding overlaps." << endl;\n+    cerr                                << "\\t\\tDefault is to ignore stand." << endl;\n+    cerr                                << "\\t\\tNot applicable with -type inspan or -type outspan." << endl << endl;\n+\n+    cerr << "\\t-S\\t"                    << "Require different strandedness when finding overlaps." << endl;\n+    cerr                                << "\\t\\tDefault is to ignore stand." << endl;\n+    cerr                                << "\\t\\tNot applicable with -type inspan or -type outspan." << endl << endl;\n+\n+    cerr << "\\t-type \\t"                << "Approach to reporting overlaps between BEDPE and BED." << endl << endl;\n+    cerr                                << "\\t\\teither\\tReport overlaps if either end of A overlaps B." << endl;\n+    cerr                                    << "\\t\\t\\t- Default." << endl;\n+\n+    cerr                                << "\\t\\tneither\\tReport A if neither end of A overlaps B." << endl;\n+\n+    cerr                                << "\\t\\tboth\\tReport overlaps if both ends of A overlap  B." << endl;\n+\n+    cerr                                << "\\t\\txor\\tReport overlaps if one and only one end of A overlaps B." << endl;\n+\n+    cerr                                << "\\t\\tnotboth\\tReport overlaps if neither end or one and only one " << endl;\n+    cerr                                    << "\\t\\t\\tend of A overlap B.  That is, xor + neither." << endl << endl;\n+\n+    cerr                                << "\\t\\tispan\\tReport overlaps between [end1, start2] of A and B." << endl;\n+    cerr                                    << "\\t\\t\\t- Note: If chrom1 <> chrom2, entry is ignored." << endl << endl;\n+\n+    cerr                                << "\\t\\tospan\\tReport overlaps between [start1, end2] of A and B." << endl;\n+    cerr                                    << "\\t\\t\\t- Note: If chrom1 <> chrom2, entry is ignored." << endl << endl;\n+\n+    cerr                                << "\\t\\tnotispan\\tReport A if ispan of A doesn\'t overlap B." << endl;\n+    cerr                                    << "\\t\\t\\t\\t- Note: If chrom1 <> chrom2, entry is ignored." << endl << endl;\n+\n+    cerr                                << "\\t\\tnotospan\\tReport A if ospan of A doesn\'t overlap B." << endl;\n+    cerr                                    << "\\t\\t\\t\\t- Note: If chrom1 <> chrom2, entry is ignored." << endl << endl;\n+\n+    cerr << "Refer to the BEDTools manual for BEDPE format." << endl << endl;\n+\n+    exit(1);\n+}\n'
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/pairToPair/Makefile
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/pairToPair/Makefile Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,44 @@
+UTILITIES_DIR = ../utils/
+OBJ_DIR = ../../obj/
+BIN_DIR = ../../bin/
+
+# -------------------
+# define our includes
+# -------------------
+INCLUDES = -I$(UTILITIES_DIR)/bedFilePE/ -I$(UTILITIES_DIR)/bedFile/ -I$(UTILITIES_DIR)/lineFileUtilities/ -I$(UTILITIES_DIR)/gzstream/ -I$(UTILITIES_DIR)/version/ -I$(UTILITIES_DIR)/fileType/
+
+# ----------------------------------
+# define our source and object files
+# ----------------------------------
+SOURCES= pairToPairMain.cpp pairToPair.cpp
+OBJECTS= $(SOURCES:.cpp=.o)
+_EXT_OBJECTS=bedFilePE.o bedFile.o lineFileUtilities.o gzstream.o fileType.o
+EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS))
+BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
+PROGRAM= pairToPair
+
+
+all: $(PROGRAM)
+
+.PHONY: all
+
+$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS)
+ @echo "  * linking $(PROGRAM)"
+ @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ $(LIBS)
+
+$(BUILT_OBJECTS): $(SOURCES)
+ @echo "  * compiling" $(*F).cpp
+ @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES)
+
+$(EXT_OBJECTS):
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFilePE/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/
+
+clean:
+ @echo "Cleaning up."
+ @rm -f $(OBJ_DIR)/* $(BIN_DIR)/*
+
+.PHONY: clean
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/pairToPair/pairToPair.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/pairToPair/pairToPair.cpp Thu Nov 03 10:25:04 2011 -0400
[
b'@@ -0,0 +1,216 @@\n+/*****************************************************************************\n+  pairToPair.cpp\n+\n+  (c) 2009 - Aaron Quinlan\n+  Hall Laboratory\n+  Department of Biochemistry and Molecular Genetics\n+  University of Virginia\n+  aaronquinlan@gmail.com\n+\n+  Licenced under the GNU General Public License 2.0 license.\n+******************************************************************************/\n+#include "lineFileUtilities.h"\n+#include "pairToPair.h"\n+\n+\n+/*\n+    Constructor\n+*/\n+PairToPair::PairToPair(string &bedAFilePE, string &bedBFilePE, float &overlapFraction,\n+                           string searchType, bool ignoreStrand, bool reqDiffNames, int slop, bool strandedSlop) {\n+\n+    _bedAFilePE      = bedAFilePE;\n+    _bedBFilePE      = bedBFilePE;\n+    _overlapFraction = overlapFraction;\n+    _searchType      = searchType;\n+    _ignoreStrand    = ignoreStrand;\n+    _reqDiffNames    = reqDiffNames;\n+    _slop            = slop;\n+    _strandedSlop    = strandedSlop;\n+\n+    _bedA = new BedFilePE(bedAFilePE);\n+    _bedB = new BedFilePE(bedBFilePE);\n+\n+    IntersectPairs();\n+}\n+\n+\n+/*\n+    Destructor\n+*/\n+PairToPair::~PairToPair(void) {\n+}\n+\n+\n+\n+void PairToPair::IntersectPairs() {\n+\n+    // load the "B" bed file into a map so\n+    // that we can easily compare "A" to it for overlaps\n+    _bedB->loadBedPEFileIntoMap();\n+\n+    int lineNum = 0;\n+    BedLineStatus bedStatus;\n+    BEDPE a, nullBedPE;\n+\n+    _bedA->Open();\n+    while ((bedStatus = _bedA->GetNextBedPE(a, lineNum)) != BED_INVALID) {\n+        if (bedStatus == BED_VALID) {\n+            // identify overlaps b/w the pairs\n+            FindOverlaps(a);\n+            a = nullBedPE;\n+        }\n+    }\n+    _bedA->Close();\n+}\n+// END IntersectPE\n+\n+\n+\n+void PairToPair::FindOverlaps(const BEDPE &a) {\n+    //\n+    vector<MATE> hitsA1B1, hitsA1B2, hitsA2B1, hitsA2B2;\n+\n+    // add the appropriate slop to the starts and ends\n+    int start1 = a.start1;\n+    int end1   = a.end1;\n+    int start2 = a.start2;\n+    int end2   = a.end2;\n+\n+    if (_strandedSlop == true) {\n+        if (a.strand1 == "+")\n+            end1   += _slop;\n+        else\n+            start1 -= _slop;\n+        if (a.strand2 == "+")\n+            end2   += _slop;\n+        else\n+            start2 -= _slop;\n+    }\n+    else {\n+        (start1 - _slop) >= 0 ? start1 -= _slop : start1 = 0;\n+        (start2 - _slop) >= 0 ? start2 -= _slop : start2 = 0;\n+        end1   += _slop;\n+        end2   += _slop;\n+    }\n+\n+    // Find the _potential_ hits between each end of A and B\n+    _bedB->FindOverlapsPerBin(1, a.chrom1, start1, end1, a.name, a.strand1, hitsA1B1, _overlapFraction, !(_ignoreStrand), _reqDiffNames);   // hits b/w A1 & B1\n+    _bedB->FindOverlapsPerBin(1, a.chrom2, start2, end2, a.name, a.strand2, hitsA2B1, _overlapFraction, !(_ignoreStrand), _reqDiffNames);   // hits b/w A2 & B1\n+    _bedB->FindOverlapsPerBin(2, a.chrom1, start1, end1, a.name, a.strand1, hitsA1B2, _overlapFraction, !(_ignoreStrand), _reqDiffNames);   // hits b/w A1 & B2\n+    _bedB->FindOverlapsPerBin(2, a.chrom2, start2, end2, a.name, a.strand2, hitsA2B2, _overlapFraction, !(_ignoreStrand), _reqDiffNames);   // hits b/w A2 & B2\n+\n+    unsigned int matchCount1 = (hitsA1B1.size() + hitsA2B2.size());\n+    unsigned int matchCount2 = (hitsA2B1.size() + hitsA1B2.size());\n+\n+    \n+    // report the fact that no hits were found iff _searchType is neither.\n+    if ((matchCount1 == 0) && (matchCount2 == 0) && (_searchType == "neither")) {\n+        _bedA->reportBedPENewLine(a);\n+    }\n+    else if (_searchType == "both")  {\n+        bool found1 = false;\n+        bool found2 = false;\n+        if ((hitsA1B1.size() > 0) || (hitsA2B2.size() > 0))\n+            found1 = FindHitsOnBothEnds(a, hitsA1B1, hitsA2B2);\n+        if ((hitsA2B1.size() > 0) || (hitsA1B2.size() > 0))\n+            found2 = FindHitsOnBothEnds(a, hitsA2B1, hitsA1B2);\n+    }\n+    else if (_searchType == "notboth")  {\n+        bool found1 = false;\n+        bool found2 = false'..b'End(a, hitsA2B1, hitsA1B2);\n+    }\n+}\n+\n+\n+bool PairToPair::FindHitsOnBothEnds(const BEDPE &a, const vector<MATE> &qualityHitsEnd1,\n+                                    const vector<MATE> &qualityHitsEnd2) {\n+\n+    map<unsigned int, vector<MATE>, less<int> > hitsMap;\n+\n+    for (vector<MATE>::const_iterator h = qualityHitsEnd1.begin(); h != qualityHitsEnd1.end(); ++h) {\n+        hitsMap[h->lineNum].push_back(*h);\n+    }\n+    for (vector<MATE>::const_iterator h = qualityHitsEnd2.begin(); h != qualityHitsEnd2.end(); ++h) {\n+        hitsMap[h->lineNum].push_back(*h);\n+    }\n+\n+\n+    bool bothFound = false;\n+    for (map<unsigned int, vector<MATE>, less<unsigned int> >::iterator m = hitsMap.begin(); m != hitsMap.end(); ++m) {\n+        \n+        // hits on both sides\n+        if (m->second.size() >= 2) {\n+            bothFound = true;\n+            MATE b1 = m->second[0];\n+            MATE b2 = m->second[1];\n+\n+            if (_searchType == "both") {\n+                _bedA->reportBedPETab(a);\n+                printf("%s\\t%d\\t%d\\t%s\\t%d\\t%d\\t%s\\t%s\\t%s\\t%s", b1.bed.chrom.c_str(), b1.bed.start, b1.bed.end,\n+                                                                   b2.bed.chrom.c_str(), b2.bed.start, b2.bed.end,\n+                                                                   b1.bed.name.c_str(), b1.bed.score.c_str(),\n+                                                                   b1.bed.strand.c_str(), b2.bed.strand.c_str());\n+                for (size_t i = 0; i < b1.bed.otherFields.size(); ++i)\n+                    printf("\\t%s", b1.bed.otherFields[i].c_str());\n+                printf("\\n");\n+            }\n+        }\n+    }\n+    return bothFound;\n+}\n+\n+\n+void PairToPair::FindHitsOnEitherEnd(const BEDPE &a, const vector<MATE> &qualityHitsEnd1,\n+                                    const vector<MATE> &qualityHitsEnd2) {\n+\n+    map<unsigned int, vector<MATE>, less<int> > hitsMap;\n+\n+    for (vector<MATE>::const_iterator h = qualityHitsEnd1.begin(); h != qualityHitsEnd1.end(); ++h) {\n+        hitsMap[h->lineNum].push_back(*h);\n+    }\n+    for (vector<MATE>::const_iterator h = qualityHitsEnd2.begin(); h != qualityHitsEnd2.end(); ++h) {\n+        hitsMap[h->lineNum].push_back(*h);\n+    }\n+\n+    for (map<unsigned int, vector<MATE>, less<unsigned int> >::iterator m = hitsMap.begin(); m != hitsMap.end(); ++m) {\n+        if (m->second.size() >= 1) {\n+\n+            if ((m->second.size()) == 2) {\n+                MATE b1 = m->second[0];\n+                MATE b2 = m->second[1];\n+\n+                _bedA->reportBedPETab(a);\n+                printf("%s\\t%d\\t%d\\t%s\\t%d\\t%d\\t%s\\t%s\\t%s\\t%s", b1.bed.chrom.c_str(), b1.bed.start, b1.bed.end,\n+                                                                   b2.bed.chrom.c_str(), b2.bed.start, b2.bed.end,\n+                                                                   b1.bed.name.c_str(), b1.bed.score.c_str(),\n+                                                                   b1.bed.strand.c_str(), b2.bed.strand.c_str());\n+                for (size_t i = 0; i < b1.bed.otherFields.size(); ++i)\n+                    printf("\\t%s", b1.bed.otherFields[i].c_str());\n+                printf("\\n");\n+            }\n+            else {\n+                MATE b1 = m->second[0];\n+\n+                _bedA->reportBedPETab(a);\n+                printf("%s\\t%d\\t%d\\t%s\\t%d\\t%d\\t%s\\t%s\\t%s\\t%s", b1.bed.chrom.c_str(), b1.bed.start, b1.bed.end,\n+                                                                   b1.mate->bed.chrom.c_str(), b1.mate->bed.start, b1.mate->bed.end,\n+                                                                   b1.bed.name.c_str(), b1.bed.score.c_str(),\n+                                                                   b1.bed.strand.c_str(), b1.mate->bed.strand.c_str());\n+                for (size_t i = 0; i < b1.bed.otherFields.size(); ++i)\n+                    printf("\\t%s", b1.bed.otherFields[i].c_str());\n+                printf("\\n");\n+            }\n+        }\n+    }\n+}\n'
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/pairToPair/pairToPair.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/pairToPair/pairToPair.h Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,76 @@
+/*****************************************************************************
+  pairToPair.h
+
+  (c) 2009 - Aaron Quinlan
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licenced under the GNU General Public License 2.0 license.
+******************************************************************************/
+#ifndef PAIRTOPAIR_H
+#define PAIRTOPAIR_H
+
+#include "bedFile.h"
+#include "bedFilePE.h"
+#include <vector>
+#include <iostream>
+#include <fstream>
+
+using namespace std;
+
+
+
+//************************************************
+// Class methods and elements
+//************************************************
+class PairToPair {
+
+public:
+
+    // constructor
+    PairToPair(string &bedAFilePE, string &bedBFilePE, float &overlapFraction,
+        string searchType, bool ignoreStrand, bool reqDiffNames, int slop, bool strandedSlop);
+
+    // destructor
+    ~PairToPair(void);
+
+    void IntersectPairs();
+
+
+private:
+
+    string _bedAFilePE;
+    string _bedBFilePE;
+
+    float _overlapFraction;
+    string _searchType;
+    bool _ignoreStrand;
+    bool _reqDiffNames;
+    int _slop;
+    bool _strandedSlop;
+
+    // instance of a paired-end bed file class.
+    BedFilePE *_bedA;
+
+    // instance of a bed file class.
+    BedFilePE *_bedB;
+
+    // methods
+    // void FindOverlaps(const BEDPE &a, vector<MATE> &hitsA1B1, vector<MATE> &hitsA1B2,
+    //  vector<MATE> &hitsA2B1, vector<MATE> &hitsA2B2);
+    void FindOverlaps(const BEDPE &a);
+
+    void FindQualityHitsBetweenEnds(CHRPOS start, CHRPOS end,
+        const vector<MATE> &hits, vector<MATE> &qualityHits, int &numOverlaps);
+
+    bool FindHitsOnBothEnds(const BEDPE &a, const vector<MATE> &qualityHitsEnd1,
+        const vector<MATE> &qualityHitsEnd2);
+
+    void FindHitsOnEitherEnd(const BEDPE &a, const vector<MATE> &qualityHitsEnd1,
+        const vector<MATE> &qualityHitsEnd2);
+
+};
+
+#endif /* PAIRTOPAIR_H */
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/pairToPair/pairToPairMain.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/pairToPair/pairToPairMain.cpp Thu Nov 03 10:25:04 2011 -0400
[
@@ -0,0 +1,188 @@
+/*****************************************************************************
+  pairToPairMain.cpp
+
+  (c) 2009 - Aaron Quinlan
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licenced under the GNU General Public License 2.0 license.
+******************************************************************************/
+#include "pairToPair.h"
+#include "version.h"
+
+using namespace std;
+
+// define our program name
+#define PROGRAM_NAME "pairToPair"
+
+// define our parameter checking macro
+#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)
+
+// function declarations
+void ShowHelp(void);
+
+int main(int argc, char* argv[]) {
+
+    // our configuration variables
+    bool showHelp = false;
+
+    // input files
+    string bedAFile;
+    string bedBFile;
+
+    // input arguments
+    float overlapFraction = 1E-9;
+    int slop = 0;
+    string searchType = "both";
+
+    // flags to track parameters
+    bool haveBedA = false;
+    bool haveBedB = false;
+    bool haveSearchType = false;
+    bool haveFraction = false;
+    bool ignoreStrand = false;
+    bool requireDifferentNames = false;
+    bool haveSlop = false;
+    bool strandedSlop = false;
+    // check to see if we should print out some help
+    if(argc <= 1) showHelp = true;
+
+    for(int i = 1; i < argc; i++) {
+        int parameterLength = (int)strlen(argv[i]);
+
+        if((PARAMETER_CHECK("-h", 2, parameterLength)) ||
+        (PARAMETER_CHECK("--help", 5, parameterLength))) {
+            showHelp = true;
+        }
+    }
+
+    if(showHelp) ShowHelp();
+
+    // do some parsing (all of these parameters require 2 strings)
+    for(int i = 1; i < argc; i++) {
+
+        int parameterLength = (int)strlen(argv[i]);
+
+        if(PARAMETER_CHECK("-a", 2, parameterLength)) {
+            if ((i+1) < argc) {
+                haveBedA = true;
+                bedAFile = argv[i + 1];
+                i++;
+            }
+        }
+        else if(PARAMETER_CHECK("-b", 2, parameterLength)) {
+            if ((i+1) < argc) {
+                haveBedB = true;
+                bedBFile = argv[i + 1];
+                i++;
+            }
+        }
+        else if(PARAMETER_CHECK("-type", 5, parameterLength)) {
+            if ((i+1) < argc) {
+                haveSearchType = true;
+                searchType = argv[i + 1];
+                i++;
+            }
+        }
+        else if(PARAMETER_CHECK("-f", 2, parameterLength)) {
+            if ((i+1) < argc) {
+                haveFraction = true;
+                overlapFraction = atof(argv[i + 1]);
+                i++;
+            }
+        }
+        else if(PARAMETER_CHECK("-slop", 5, parameterLength)) {
+            if ((i+1) < argc) {
+                haveSlop = true;
+                slop = atoi(argv[i + 1]);
+                i++;
+            }
+        }
+        else if(PARAMETER_CHECK("-ss", 3, parameterLength)) {
+            strandedSlop = true;
+        }
+        else if(PARAMETER_CHECK("-rdn", 4, parameterLength)) {
+            requireDifferentNames = true;
+        }
+        else if(PARAMETER_CHECK("-is", 3, parameterLength)) {
+            ignoreStrand = true;
+        }
+        else {
+            cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl;
+            showHelp = true;
+        }
+    }
+
+
+    // make sure we have both input files
+    if (!haveBedA || !haveBedB) {
+        cerr << endl << "*****" << endl << "*****ERROR: Need -a and -b files. " << endl << "*****" << endl;
+        showHelp = true;
+    }
+
+    if (haveSearchType && (searchType != "neither") && (searchType != "both") && (searchType != "either") && (searchType != "notboth")) {
+        cerr << endl << "*****" << endl << "*****ERROR: Request \"both\",\"neither\",\"either\",or \"notboth\"" << endl << "*****" << endl;
+        showHelp = true;
+    }
+
+    if (strandedSlop == true && haveSlop == false) {
+        cerr << endl << "*****" << endl << "*****ERROR: Need a -slop value if requesting -ss." << endl << "*****" << endl;
+        showHelp = true;
+    }
+
+    if (!showHelp) {
+
+        PairToPair *bi = new PairToPair(bedAFile, bedBFile, overlapFraction, searchType,
+                                        ignoreStrand, requireDifferentNames, slop, strandedSlop);
+        delete bi;
+        return 0;
+    }
+    else {
+        ShowHelp();
+    }
+}
+
+
+void ShowHelp(void) {
+    cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl;
+
+    cerr << "Author:  Aaron Quinlan (aaronquinlan@gmail.com)" << endl;
+
+    cerr << "Summary: Report overlaps between two paired-end BED files (BEDPE)." << endl << endl;
+
+    cerr << "Usage:   " << PROGRAM_NAME << " [OPTIONS] -a <BEDPE> -b <BEDPE>" << endl << endl;
+
+    cerr << "Options: " << endl;
+    cerr << "\t-f\t"                    << "Minimum overlap required as fraction of A (e.g. 0.05)." << endl;
+    cerr                                << "\t\tDefault is 1E-9 (effectively 1bp)." << endl << endl;
+
+    cerr << "\t-type \t"                << "Approach to reporting overlaps between A and B." << endl << endl;
+    cerr                                << "\t\tneither\tReport overlaps if neither end of A overlaps B." << endl;
+    cerr                                << "\t\teither\tReport overlaps if either ends of A overlap B." << endl;
+    cerr                                << "\t\tboth\tReport overlaps if both ends of A overlap B." << endl;
+    cerr                                << "\t\tnotboth\tReport overlaps if one or neither of ends of A overlap B." << endl;
+    
+    cerr                                << "\t\t- Default = both." << endl << endl;
+
+    cerr << "\t-slop \t"                << "The amount of slop (in b.p.). to be added to each footprint." << endl;
+    cerr                                << "\t\t*Note*: Slop is subtracted from start1 and start2 and added to end1 and end2." << endl << endl;
+
+    cerr << "\t-ss\t"                   << "Add slop based to each BEDPE footprint based on strand." << endl;
+    cerr                                << "\t\t- If strand is \"+\", slop is only added to the end coordinates." << endl;
+    cerr                                << "\t\t- If strand is \"-\", slop is only added to the start coordinates." << endl;
+    cerr                                << "\t\t- By default, slop is added in both directions." << endl << endl;
+
+    cerr << "\t-is\t"                   << "Ignore strands when searching for overlaps." << endl;
+    cerr                                << "\t\t- By default, strands are enforced." << endl << endl;
+
+    cerr << "\t-rdn\t"                  << "Require the hits to have different names (i.e. avoid self-hits)." << endl;
+    cerr                                << "\t\t- By default, same names are allowed." << endl << endl;
+
+
+    cerr << "Refer to the BEDTools manual for BEDPE format." << endl << endl;
+
+    // end the program here
+    exit(1);
+}
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/shuffleBed/Makefile
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/shuffleBed/Makefile Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,50 @@
+UTILITIES_DIR = ../utils/
+OBJ_DIR = ../../obj/
+BIN_DIR = ../../bin/
+
+# -------------------
+# define our includes
+# -------------------
+INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \
+           -I$(UTILITIES_DIR)/genomeFile/ \
+           -I$(UTILITIES_DIR)/lineFileUtilities/ \
+           -I$(UTILITIES_DIR)/version/ \
+           -I$(UTILITIES_DIR)/gzstream/ \
+           -I$(UTILITIES_DIR)/fileType/ \
+           -I$(UTILITIES_DIR)/BamTools/include
+
+# ----------------------------------
+# define our source and object files
+# ----------------------------------
+SOURCES= shuffleBedMain.cpp shuffleBed.cpp
+OBJECTS= $(SOURCES:.cpp=.o)
+_EXT_OBJECTS=bedFile.o genomeFile.o lineFileUtilities.o gzstream.o fileType.o
+EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS))
+BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
+PROGRAM= shuffleBed
+
+
+all: $(PROGRAM)
+
+.PHONY: all
+
+$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS)
+ @echo "  * linking $(PROGRAM)"
+ @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ $(LIBS)
+
+$(BUILT_OBJECTS): $(SOURCES)
+ @echo "  * compiling" $(*F).cpp
+ @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES)
+
+$(EXT_OBJECTS):
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/genomeFile/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/
+
+clean:
+ @echo "Cleaning up."
+ @rm -f $(OBJ_DIR)/* $(BIN_DIR)/*
+
+.PHONY: clean
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/shuffleBed/shuffleBed.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/shuffleBed/shuffleBed.cpp Thu Nov 03 10:25:04 2011 -0400
[
b'@@ -0,0 +1,244 @@\n+/*****************************************************************************\n+  shuffleBed.cpp\n+\n+  (c) 2009 - Aaron Quinlan\n+  Hall Laboratory\n+  Department of Biochemistry and Molecular Genetics\n+  University of Virginia\n+  aaronquinlan@gmail.com\n+\n+  Licenced under the GNU General Public License 2.0 license.\n+******************************************************************************/\n+#include "lineFileUtilities.h"\n+#include "shuffleBed.h"\n+\n+\n+BedShuffle::BedShuffle(string &bedFile, string &genomeFile, string &excludeFile, string &includeFile, \n+                       bool haveSeed, bool haveExclude, bool haveInclude, bool sameChrom, \n+                       float overlapFraction, int seed) {\n+\n+    _bedFile         = bedFile;\n+    _genomeFile      = genomeFile;\n+    _excludeFile     = excludeFile;\n+    _includeFile     = includeFile;\n+    _sameChrom       = sameChrom;\n+    _haveExclude     = haveExclude;\n+    _haveInclude     = haveInclude;\n+    _overlapFraction = overlapFraction;\n+    _haveSeed        = haveSeed;\n+\n+\n+    // use the supplied seed for the random\n+    // number generation if given.  else,\n+    // roll our own.\n+    if (_haveSeed) {\n+        _seed = seed;\n+        srand(seed);\n+    }\n+    else {\n+        // thanks to Rob Long for the tip.\n+        _seed = (unsigned)time(0)+(unsigned)getpid();\n+        srand(_seed);\n+    }\n+\n+    _bed         = new BedFile(bedFile);\n+    _genome      = new GenomeFile(genomeFile);\n+    _chroms      = _genome->getChromList();\n+    _numChroms   = _genome->getNumberOfChroms();\n+\n+    if (_haveExclude) {\n+        _exclude = new BedFile(excludeFile);\n+        _exclude->loadBedFileIntoMap();\n+    }\n+    \n+    if (_haveInclude) {\n+        _include = new BedFile(includeFile);\n+        _include->loadBedFileIntoMapNoBin();\n+        \n+        _numIncludeChroms = 0;\n+        masterBedMapNoBin::const_iterator it    = _include->bedMapNoBin.begin(); \n+        masterBedMapNoBin::const_iterator itEnd = _include->bedMapNoBin.end();\n+        for(; it != itEnd; ++it) {\n+            _includeChroms.push_back(it->first);\n+            _numIncludeChroms++;\n+        }\n+    }\n+\n+    if (_haveExclude == true && _haveInclude == false)\n+        ShuffleWithExclusions();\n+    else if  (_haveExclude == false && _haveInclude == true)\n+        ShuffleWithInclusions();\n+    else\n+        Shuffle();\n+}\n+\n+\n+BedShuffle::~BedShuffle(void) {\n+\n+}\n+\n+\n+void BedShuffle::Shuffle() {\n+\n+    int lineNum = 0;\n+    BED bedEntry, nullBed;     // used to store the current BED line from the BED file.\n+    BedLineStatus bedStatus;\n+\n+    _bed->Open();\n+    while ((bedStatus = _bed->GetNextBed(bedEntry, lineNum)) != BED_INVALID) {\n+        if (bedStatus == BED_VALID) {\n+            ChooseLocus(bedEntry);\n+            _bed->reportBedNewLine(bedEntry);\n+            bedEntry = nullBed;\n+        }\n+    }\n+    _bed->Close();\n+}\n+\n+\n+\n+void BedShuffle::ShuffleWithExclusions() {\n+\n+    int lineNum = 0;\n+    BED bedEntry, nullBed;     // used to store the current BED line from the BED file.\n+    BedLineStatus bedStatus;\n+\n+    _bed->Open();\n+    while ((bedStatus = _bed->GetNextBed(bedEntry, lineNum)) != BED_INVALID) {\n+        if (bedStatus == BED_VALID) {\n+            // keep looking as long as the chosen\n+            // locus happens to overlap with regions\n+            // that the user wishes to exclude.\n+            int  tries = 0;\n+            bool haveOverlap = false;\n+            do \n+            {\n+                // choose a new locus\n+                ChooseLocus(bedEntry);\n+                haveOverlap = _exclude->FindOneOrMoreOverlapsPerBin(bedEntry.chrom, bedEntry.start, bedEntry.end,\n+                                                                    bedEntry.strand, false, _overlapFraction);\n+                tries++;\n+            } while ((haveOverlap == true) && (tries <= MAX_TRIES));\n+            \n+\n+            if (tries > MAX_TRIES) {\n+                cerr << "Error, line " << lineNum << ":'..b't avoid excluded regions.  Ignoring entry and moving on." << endl;\n+            }\n+            else {\n+                _bed->reportBedNewLine(bedEntry);\n+            }\n+        }\n+        bedEntry = nullBed;\n+    }\n+    _bed->Close();\n+}\n+\n+\n+void BedShuffle::ShuffleWithInclusions() {\n+\n+    int lineNum = 0;\n+    BED bedEntry, nullBed;     // used to store the current BED line from the BED file.\n+    BedLineStatus bedStatus;\n+\n+    _bed->Open();\n+    while ((bedStatus = _bed->GetNextBed(bedEntry, lineNum)) != BED_INVALID) {\n+        if (bedStatus == BED_VALID) {\n+            // choose a new locus\n+            ChooseLocusFromInclusionFile(bedEntry);\n+            _bed->reportBedNewLine(bedEntry);\n+        }\n+        bedEntry = nullBed;\n+    }\n+    _bed->Close();\n+}\n+\n+\n+void BedShuffle::ChooseLocus(BED &bedEntry) {\n+\n+    string chrom = bedEntry.chrom;\n+    CHRPOS start    = bedEntry.start;\n+    CHRPOS end      = bedEntry.end;\n+    CHRPOS length   = end - start;\n+\n+    string randomChrom;\n+    CHRPOS randomStart;\n+    CHRPOS chromSize;\n+\n+    if (_sameChrom == false) {\n+        randomChrom    = _chroms[rand() % _numChroms];\n+        chromSize      = _genome->getChromSize(randomChrom);\n+        randomStart    = rand() % chromSize;\n+        bedEntry.chrom = randomChrom;\n+        bedEntry.start = randomStart;\n+        bedEntry.end   = randomStart + length;\n+    }\n+    else {\n+        chromSize      = _genome->getChromSize(chrom);\n+        randomStart    = rand() % chromSize;\n+        bedEntry.start = randomStart;\n+        bedEntry.end   = randomStart + length;\n+    }\n+\n+    // ensure that the chosen location doesn\'t go past\n+    // the length of the chromosome. if so, keep looking\n+    // for a new spot.\n+    while (bedEntry.end > chromSize) {\n+        if (_sameChrom == false) {\n+            randomChrom    = _chroms[rand() % _numChroms];\n+            chromSize      = _genome->getChromSize(randomChrom);\n+            randomStart    = rand() % chromSize;\n+            bedEntry.chrom = randomChrom;\n+            bedEntry.start = randomStart;\n+            bedEntry.end   = randomStart + length;\n+        }\n+        else {\n+            chromSize      = _genome->getChromSize(chrom);\n+            randomStart    = rand() % chromSize;\n+            bedEntry.start = randomStart;\n+            bedEntry.end   = randomStart + length;\n+        }\n+    }\n+}\n+\n+\n+void BedShuffle::ChooseLocusFromInclusionFile(BED &bedEntry) {\n+\n+    string chrom    = bedEntry.chrom;\n+    CHRPOS length   = bedEntry.end - bedEntry.start;\n+\n+    string randomChrom;\n+    CHRPOS randomStart;\n+    BED includeInterval;\n+    \n+    if (_sameChrom == false) {\n+\n+        // grab a random chromosome from the inclusion file.\n+        randomChrom            = _includeChroms[rand() % _numIncludeChroms];\n+        // get the number of inclusion intervals for that chrom\n+        size_t size            =  _include->bedMapNoBin[randomChrom].size();\n+        // grab a random interval on the chosen chromosome.\n+        size_t interval        = rand() % size;\n+        // retreive a ranom -incl interval on the selected chrom\n+        includeInterval        = _include->bedMapNoBin[randomChrom][interval];\n+\n+        bedEntry.chrom = randomChrom;        \n+    }\n+    else {\n+        // get the number of inclusion intervals for the original chrom\n+        size_t size =  _include->bedMapNoBin[chrom].size();\n+        // grab a random interval on the chosen chromosome.\n+        includeInterval       = _include->bedMapNoBin[chrom][rand() % size];\n+    }\n+    \n+    randomStart    = includeInterval.start + rand() % (includeInterval.size());\n+    bedEntry.start = randomStart;\n+    bedEntry.end   = randomStart + length;\n+    \n+    // use recursion to ensure that the chosen location \n+    // doesn\'t go past the end of the chrom\n+    if (bedEntry.end > ((size_t) _genome->getChromSize(chrom))) {\n+        //bedEntry.end = _genome->getChromSize(chrom);\n+        ChooseLocusFromInclusionFile(bedEntry);\n+    }\n+}\n+\n'
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/shuffleBed/shuffleBed.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/shuffleBed/shuffleBed.h Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,76 @@
+/*****************************************************************************
+  shuffleBed.h
+
+  (c) 2009 - Aaron Quinlan
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licenced under the GNU General Public License 2.0 license.
+******************************************************************************/
+#include "bedFile.h"
+#include "genomeFile.h"
+
+#include <vector>
+#include <iostream>
+#include <fstream>
+#include <map>
+#include <cstdlib>
+#include <ctime>
+#include <sys/time.h>
+#include <unistd.h>
+#include <sys/types.h>
+using namespace std;
+
+const int MAX_TRIES = 1000000;
+
+//************************************************
+// Class methods and elements
+//************************************************
+class BedShuffle {
+
+public:
+
+    // constructor
+    BedShuffle(string &bedFile, string &genomeFile, string &excludeFile, string &includeFile, 
+                           bool haveSeed, bool haveExclude, bool haveInclude, bool sameChrom, 
+                           float overlapFraction, int seed);
+
+    // destructor
+    ~BedShuffle(void);
+
+private:
+
+    string _bedFile;
+    string _genomeFile;
+    string _excludeFile;
+    string _includeFile;
+    float  _overlapFraction;
+    int _seed;
+    bool _sameChrom;
+    bool _haveExclude;
+    bool _haveInclude;
+    bool _haveSeed;
+
+
+    // The BED file from which to compute coverage.
+    BedFile *_bed;
+    BedFile *_exclude;
+    BedFile *_include;
+
+    GenomeFile *_genome;
+
+    vector<string> _chroms;
+    int _numChroms;
+    vector<string> _includeChroms;
+    int _numIncludeChroms;
+
+    // methods
+    void Shuffle();
+    void ShuffleWithExclusions();
+    void ShuffleWithInclusions();
+
+    void ChooseLocus(BED &);
+    void ChooseLocusFromInclusionFile(BED &);
+};
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/shuffleBed/shuffleBedMain.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/shuffleBed/shuffleBedMain.cpp Thu Nov 03 10:25:04 2011 -0400
[
@@ -0,0 +1,188 @@
+/*****************************************************************************
+  shuffleBedMain.cpp
+
+  (c) 2009 - Aaron Quinlan
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licenced under the GNU General Public License 2.0 license.
+******************************************************************************/
+#include "shuffleBed.h"
+#include "version.h"
+
+using namespace std;
+
+// define our program name
+#define PROGRAM_NAME "shuffleBed"
+
+
+// define our parameter checking macro
+#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)
+
+// function declarations
+void ShowHelp(void);
+
+int main(int argc, char* argv[]) {
+
+    // our configuration variables
+    bool showHelp = false;
+
+    // input files
+    string bedFile = "stdin";
+    string excludeFile;
+    string includeFile;
+    string genomeFile;
+
+    bool haveBed          = true;
+    bool haveGenome       = false;
+    bool haveExclude      = false;
+    bool haveInclude      = false;
+    bool haveSeed         = false;
+    float overlapFraction = 0.0;
+    int seed              = -1;
+    bool sameChrom        = false;
+
+
+    for(int i = 1; i < argc; i++) {
+        int parameterLength = (int)strlen(argv[i]);
+
+        if((PARAMETER_CHECK("-h", 2, parameterLength)) ||
+        (PARAMETER_CHECK("--help", 5, parameterLength))) {
+            showHelp = true;
+        }
+    }
+
+    if(showHelp) ShowHelp();
+
+    // do some parsing (all of these parameters require 2 strings)
+    for(int i = 1; i < argc; i++) {
+
+        int parameterLength = (int)strlen(argv[i]);
+
+        if(PARAMETER_CHECK("-i", 2, parameterLength)) {
+            if ((i+1) < argc) {
+                bedFile = argv[i + 1];
+                i++;
+            }
+        }
+        else if(PARAMETER_CHECK("-g", 2, parameterLength)) {
+            if ((i+1) < argc) {
+                haveGenome = true;
+                genomeFile = argv[i + 1];
+                i++;
+            }
+        }
+        else if(PARAMETER_CHECK("-excl", 5, parameterLength)) {
+            if ((i+1) < argc) {
+                haveExclude = true;
+                excludeFile = argv[i + 1];
+                i++;
+            }
+        }
+        else if(PARAMETER_CHECK("-incl", 5, parameterLength)) {
+            if ((i+1) < argc) {
+                haveInclude = true;
+                includeFile = argv[i + 1];
+                i++;
+            }
+        }
+        else if(PARAMETER_CHECK("-seed", 5, parameterLength)) {
+            if ((i+1) < argc) {
+                haveSeed = true;
+                seed = atoi(argv[i + 1]);
+                i++;
+            }
+        }
+        else if(PARAMETER_CHECK("-chrom", 6, parameterLength)) {
+            sameChrom = true;
+        }
+        else if(PARAMETER_CHECK("-f", 2, parameterLength)) {
+            if ((i+1) < argc) {
+                overlapFraction = atof(argv[i + 1]);
+                i++;
+            }
+        }
+        else {
+          cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl;
+            showHelp = true;
+        }
+    }
+
+    // make sure we have both input files
+    if (!haveBed || !haveGenome) {
+      cerr << endl << "*****" << endl << "*****ERROR: Need both a BED (-i) and a genome (-g) file. " << endl << "*****" << endl;
+      showHelp = true;
+    }
+    
+    if (haveInclude && haveExclude) {
+      cerr << endl << "*****" << endl << "*****ERROR: Cannot use -incl and -excl together." << endl << "*****" << endl;
+      showHelp = true;
+    }
+
+    if (!showHelp) {
+        BedShuffle *bc = new BedShuffle(bedFile, genomeFile, excludeFile, includeFile, 
+                                        haveSeed, haveExclude, haveInclude, sameChrom, 
+                                        overlapFraction, seed);
+        delete bc;
+        return 0;
+    }
+    else {
+        ShowHelp();
+    }
+}
+
+void ShowHelp(void) {
+
+    cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl;
+
+    cerr << "Author:  Aaron Quinlan (aaronquinlan@gmail.com)" << endl;
+
+    cerr << "Summary: Randomly permute the locations of a feature file among a genome." << endl << endl;
+
+    cerr << "Usage:   " << PROGRAM_NAME << " [OPTIONS] -i <bed/gff/vcf> -g <genome>" << endl << endl;
+
+    cerr << "Options: " << endl;
+    cerr << "\t-excl\t"             << "A BED/GFF/VCF file of coordinates in which features in -i" << endl;
+    cerr                            << "\t\tshould not be placed (e.g. gaps.bed)." << endl << endl;
+
+    cerr << "\t-incl\t"             << "Instead of randomly placing features in a genome, the -incl" << endl;
+    cerr                            << "\t\toptions defines a BED/GFF/VCF file of coordinates in which " << endl;
+    cerr                            << "\t\tfeatures in -i should be randomly placed (e.g. genes.bed). " << endl << endl;
+
+    cerr << "\t-chrom\t"            << "Keep features in -i on the same chromosome."<< endl;
+    cerr                            << "\t\t- By default, the chrom and position are randomly chosen." << endl << endl;
+
+    cerr << "\t-seed\t"             << "Supply an integer seed for the shuffling." << endl;
+    cerr                            << "\t\t- By default, the seed is chosen automatically." << endl;
+    cerr                            << "\t\t- (INTEGER)" << endl << endl;
+
+    cerr << "\t-f\t"                << "Maximum overlap (as a fraction of the -i feature) with an -excl" << endl;
+    cerr                            << "\t\tfeature that is tolerated before searching for a new, " << endl;
+    cerr                            << "\t\trandomized locus. For example, -f 0.10 allows up to 10%" << endl;
+    cerr                            << "\t\tof a randomized feature to overlap with a given feature" << endl;
+    cerr                            << "\t\tin the -excl file. **Cannot be used with -incl file.**" << endl;
+    cerr                            << "\t\t- Default is 1E-9 (i.e., 1bp)." << endl;
+    cerr                            << "\t\t- FLOAT (e.g. 0.50)" << endl << endl;
+
+    cerr << "Notes: " << endl;
+    cerr << "\t(1)  The genome file should tab delimited and structured as follows:" << endl;
+    cerr << "\t     <chromName><TAB><chromSize>" << endl << endl;
+    cerr << "\tFor example, Human (hg19):" << endl;
+    cerr << "\tchr1\t249250621" << endl;
+    cerr << "\tchr2\t243199373" << endl;
+    cerr << "\t..." << endl;
+    cerr << "\tchr18_gl000207_random\t4262" << endl << endl;
+
+
+    cerr << "Tips: " << endl;
+    cerr << "\tOne can use the UCSC Genome Browser's MySQL database to extract" << endl;
+    cerr << "\tchromosome sizes. For example, H. sapiens:" << endl << endl;
+    cerr << "\tmysql --user=genome --host=genome-mysql.cse.ucsc.edu -A -e \\" << endl;
+    cerr << "\t\"select chrom, size from hg19.chromInfo\"  > hg19.genome" << endl << endl;
+
+
+    // end the program here
+    exit(1);
+}
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/slopBed/Makefile
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/slopBed/Makefile Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,50 @@
+UTILITIES_DIR = ../utils/
+OBJ_DIR = ../../obj/
+BIN_DIR = ../../bin/
+
+# -------------------
+# define our includes
+# -------------------
+INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \
+           -I$(UTILITIES_DIR)/genomeFile/ \
+           -I$(UTILITIES_DIR)/lineFileUtilities/ \
+           -I$(UTILITIES_DIR)/version/ \
+           -I$(UTILITIES_DIR)/gzstream/ \
+           -I$(UTILITIES_DIR)/fileType/ \
+           -I$(UTILITIES_DIR)/BamTools/include
+
+# ----------------------------------
+# define our source and object files
+# ----------------------------------
+SOURCES= slopBedMain.cpp slopBed.cpp
+OBJECTS= $(SOURCES:.cpp=.o)
+_EXT_OBJECTS=bedFile.o genomeFile.o lineFileUtilities.o gzstream.o fileType.o
+EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS))
+BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
+PROGRAM= slopBed
+
+
+all: $(PROGRAM)
+
+.PHONY: all
+
+$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS)
+ @echo "  * linking $(PROGRAM)"
+ @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ $(LIBS)
+
+$(BUILT_OBJECTS): $(SOURCES)
+ @echo "  * compiling" $(*F).cpp
+ @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES)
+
+$(EXT_OBJECTS):
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/genomeFile/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/
+
+clean:
+ @echo "Cleaning up."
+ @rm -f $(OBJ_DIR)/* $(BIN_DIR)/*
+
+.PHONY: clean
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/slopBed/slopBed.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/slopBed/slopBed.cpp Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,91 @@
+/*****************************************************************************
+  slopBed.cpp
+
+  (c) 2009 - Aaron Quinlan
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licensed under the GNU General Public License 2.0 license.
+******************************************************************************/
+#include "lineFileUtilities.h"
+#include "slopBed.h"
+
+
+BedSlop::BedSlop(string &bedFile, string &genomeFile, bool forceStrand, float leftSlop, float rightSlop, bool fractional) {
+
+    _bedFile     = bedFile;
+    _genomeFile  = genomeFile;
+    _forceStrand = forceStrand;
+    _leftSlop    = leftSlop;
+    _rightSlop   = rightSlop;
+    _fractional  = fractional; 
+
+    _bed    = new BedFile(bedFile);
+    _genome = new GenomeFile(genomeFile);
+
+    // get going, slop it up.
+    SlopBed();
+}
+
+
+BedSlop::~BedSlop(void) {
+
+}
+
+
+void BedSlop::SlopBed() {
+
+    int lineNum = 0;
+    BED bedEntry, nullBed;     // used to store the current BED line from the BED file.
+    BedLineStatus bedStatus;
+
+    _bed->Open();
+    bedStatus = _bed->GetNextBed(bedEntry, lineNum);
+    while (bedStatus != BED_INVALID) {
+        if (bedStatus == BED_VALID) {
+            if (_fractional == false) {
+                AddSlop(bedEntry, (int) _leftSlop, (int) _rightSlop);
+            }
+            else {
+                int leftSlop  = (int) (_leftSlop  * bedEntry.size());
+                int rightSlop = (int) (_rightSlop * bedEntry.size());
+                AddSlop(bedEntry, leftSlop, rightSlop);
+            }
+            _bed->reportBedNewLine(bedEntry);
+            bedEntry = nullBed;
+        }
+        bedStatus = _bed->GetNextBed(bedEntry, lineNum);
+    }
+    _bed->Close();
+}
+
+
+void BedSlop::AddSlop(BED &bed, int leftSlop, int rightSlop) {
+
+    // special handling if the BED entry is on the negative
+    // strand and the user cares about strandedness.
+    CHRPOS chromSize = _genome->getChromSize(bed.chrom);
+
+    if ( (_forceStrand) && (bed.strand == "-") ) {
+        // inspect the start
+        if ( (static_cast<int>(bed.start) - rightSlop) > 0 ) bed.start -= rightSlop;
+        else bed.start = 0;
+
+        // inspect the start
+        if ( (static_cast<int>(bed.end) + leftSlop) <= static_cast<int>(chromSize)) bed.end += leftSlop;
+        else bed.end = chromSize;
+    }
+    else {
+        // inspect the start
+        if ( (static_cast<int>(bed.start) - leftSlop) > 0) bed.start -= leftSlop;
+        else bed.start = 0;
+
+        // inspect the end
+        if ( (static_cast<int>(bed.end) + rightSlop) <= static_cast<int>(chromSize)) bed.end += rightSlop;
+        else bed.end = chromSize;
+    }
+}
+
+
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/slopBed/slopBed.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/slopBed/slopBed.h Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,59 @@
+/*****************************************************************************
+  slopBed.h
+
+  (c) 2009 - Aaron Quinlan
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licenced under the GNU General Public License 2.0 license.
+******************************************************************************/
+
+#include "bedFile.h"
+#include "genomeFile.h"
+
+#include <vector>
+#include <iostream>
+#include <fstream>
+#include <map>
+#include <cstdlib>
+#include <ctime>
+using namespace std;
+
+
+//************************************************
+// Class methods and elements
+//************************************************
+class BedSlop {
+
+public:
+
+    // constructor
+    BedSlop(string &bedFile, string &genomeFile, bool forceStrand, float leftSlop, float rightSlop, bool fractional);
+
+    // destructor
+    ~BedSlop(void);
+
+
+
+private:
+
+    string _bedFile;
+    string _genomeFile;
+
+    bool   _forceStrand;
+    float  _leftSlop;
+    float  _rightSlop;
+    bool   _fractional;
+
+    BedFile *_bed;
+    GenomeFile *_genome;
+
+    // methods
+
+    void SlopBed();
+
+    // method to add requested "slop" to a single BED entry
+    void AddSlop(BED &bed, int leftSlop, int rightSlop);
+};
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/slopBed/slopBedMain.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/slopBed/slopBedMain.cpp Thu Nov 03 10:25:04 2011 -0400
[
@@ -0,0 +1,190 @@
+/*****************************************************************************
+  slopBedMain.cpp
+
+  (c) 2009 - Aaron Quinlan
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licenced under the GNU General Public License 2.0 license.
+******************************************************************************/
+#include "slopBed.h"
+#include "version.h"
+
+using namespace std;
+
+// define our program name
+#define PROGRAM_NAME "slopBed"
+
+
+// define our parameter checking macro
+#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)
+
+// function declarations
+void ShowHelp(void);
+
+int main(int argc, char* argv[]) {
+
+    // our configuration variables
+    bool showHelp = false;
+
+    // input files
+    string bedFile = "stdin";
+    string genomeFile;
+
+    bool haveBed    = true;
+    bool haveGenome = false;
+    bool haveLeft   = false;
+    bool haveRight  = false;
+    bool haveBoth   = false;
+
+    bool forceStrand = false;
+    float leftSlop   = 0.0;
+    float rightSlop  = 0.0;
+    bool  fractional = false;
+
+    for(int i = 1; i < argc; i++) {
+        int parameterLength = (int)strlen(argv[i]);
+
+        if((PARAMETER_CHECK("-h", 2, parameterLength)) ||
+        (PARAMETER_CHECK("--help", 5, parameterLength))) {
+            showHelp = true;
+        }
+    }
+
+    if(showHelp) ShowHelp();
+
+    // do some parsing (all of these parameters require 2 strings)
+    for(int i = 1; i < argc; i++) {
+
+        int parameterLength = (int)strlen(argv[i]);
+
+        if(PARAMETER_CHECK("-i", 2, parameterLength)) {
+            if ((i+1) < argc) {
+                bedFile = argv[i + 1];
+                i++;
+            }
+        }
+        else if(PARAMETER_CHECK("-g", 2, parameterLength)) {
+            if ((i+1) < argc) {
+                haveGenome = true;
+                genomeFile = argv[i + 1];
+                i++;
+            }
+        }
+        else if(PARAMETER_CHECK("-l", 2, parameterLength)) {
+            if ((i+1) < argc) {
+                haveLeft = true;
+                leftSlop = atof(argv[i + 1]);
+                i++;
+            }
+        }
+        else if(PARAMETER_CHECK("-r", 2, parameterLength)) {
+            if ((i+1) < argc) {
+                haveRight = true;
+                rightSlop = atof(argv[i + 1]);
+                i++;
+            }
+        }
+        else if(PARAMETER_CHECK("-b", 2, parameterLength)) {
+            if ((i+1) < argc) {
+                haveBoth = true;
+                leftSlop = atof(argv[i + 1]);
+                rightSlop = atof(argv[i + 1]);
+                i++;
+            }
+        }
+        else if(PARAMETER_CHECK("-s", 2, parameterLength)) {
+            forceStrand = true;
+        }
+        else if(PARAMETER_CHECK("-pct", 4, parameterLength)) {
+            fractional = true;
+        }
+        else {
+          cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl;
+            showHelp = true;
+        }
+    }
+
+    // make sure we have both input files
+    if (!haveBed || !haveGenome) {
+      cerr << endl << "*****" << endl << "*****ERROR: Need both a BED (-i) and a genome (-g) file. " << endl << "*****" << endl;
+      showHelp = true;
+    }
+    if (!haveLeft && !haveRight && !haveBoth) {
+      cerr << endl << "*****" << endl << "*****ERROR: Need -l and -r together or -b alone. " << endl << "*****" << endl;
+      showHelp = true;
+    }
+    if ((!haveLeft && haveRight) || (haveLeft && !haveRight)) {
+      cerr << endl << "*****" << endl << "*****ERROR: Need both -l and -r. " << endl << "*****" << endl;
+      showHelp = true;
+    }
+    if (forceStrand && (!(haveLeft) || !(haveRight))) {
+      cerr << endl << "*****" << endl << "*****ERROR: Must supply -l and -r with -s. " << endl << "*****" << endl;
+      showHelp = true;
+    }
+
+    if (!showHelp) {
+        BedSlop *bc = new BedSlop(bedFile, genomeFile, forceStrand, leftSlop, rightSlop, fractional);
+        delete bc;
+
+        return 0;
+    }
+    else {
+        ShowHelp();
+    }
+}
+
+void ShowHelp(void) {
+
+    cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl;
+
+    cerr << "Author:  Aaron Quinlan (aaronquinlan@gmail.com)" << endl;
+
+    cerr << "Summary: Add requested base pairs of \"slop\" to each feature." << endl << endl;
+
+    cerr << "Usage:   " << PROGRAM_NAME << " [OPTIONS] -i <bed/gff/vcf> -g <genome> [-b <int> or (-l and -r)]" << endl << endl;
+
+    cerr << "Options: " << endl;
+    cerr << "\t-b\t"                << "Increase the BED/GFF/VCF entry by -b base pairs in each direction." << endl;
+    cerr                            << "\t\t- (Integer) or (Float, e.g. 0.1) if used with -pct." << endl << endl;
+
+    cerr << "\t-l\t"                << "The number of base pairs to subtract from the start coordinate." << endl;
+    cerr                            << "\t\t- (Integer) or (Float, e.g. 0.1) if used with -pct." << endl << endl;
+        
+    cerr << "\t-r\t"                << "The number of base pairs to add to the end coordinate." << endl;
+    cerr                            << "\t\t- (Integer) or (Float, e.g. 0.1) if used with -pct." << endl << endl;
+        
+    cerr << "\t-s\t"                << "Define -l and -r based on strand." << endl;
+    cerr                            << "\t\tE.g. if used, -l 500 for a negative-stranded feature, " << endl;
+    cerr                            << "\t\tit will add 500 bp downstream.  Default = false." << endl << endl;
+
+    cerr << "\t-pct\t"              << "Define -l and -r as a fraction of the feature's length." << endl;
+    cerr                            << "\t\tE.g. if used on a 1000bp feature, -l 0.50, " << endl;
+    cerr                            << "\t\twill add 500 bp \"upstream\".  Default = false." << endl << endl;
+
+    cerr << "Notes: " << endl;
+    cerr << "\t(1)  Starts will be set to 0 if options would force it below 0." << endl;
+    cerr << "\t(2)  Ends will be set to the chromosome length if  requested slop would" << endl;
+    cerr <<        "\tforce it above the max chrom length." << endl;
+
+    cerr << "\t(3)  The genome file should tab delimited and structured as follows:" << endl;
+    cerr << "\n\t<chromName><TAB><chromSize>" << endl << endl;
+    cerr << "\tFor example, Human (hg19):" << endl;
+    cerr << "\tchr1\t249250621" << endl;
+    cerr << "\tchr2\t243199373" << endl;
+    cerr << "\t..." << endl;
+    cerr << "\tchr18_gl000207_random\t4262" << endl << endl;
+
+
+    cerr << "Tips: " << endl;
+    cerr << "\tOne can use the UCSC Genome Browser's MySQL database to extract" << endl;
+    cerr << "\tchromosome sizes. For example, H. sapiens:" << endl << endl;
+    cerr << "\tmysql --user=genome --host=genome-mysql.cse.ucsc.edu -A -e \\" << endl;
+    cerr << "\t\"select chrom, size from hg19.chromInfo\"  > hg19.genome" << endl << endl;
+
+
+    // end the program here
+    exit(1);
+}
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/sortBed/Makefile
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/sortBed/Makefile Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,43 @@
+UTILITIES_DIR = ../utils/
+OBJ_DIR = ../../obj/
+BIN_DIR = ../../bin/
+
+# -------------------
+# define our includes
+# -------------------
+INCLUDES = -I$(UTILITIES_DIR)/bedFile/ -I$(UTILITIES_DIR)/lineFileUtilities/ -I$(UTILITIES_DIR)/version/ -I$(UTILITIES_DIR)/gzstream/ -I$(UTILITIES_DIR)/fileType/
+
+# ----------------------------------
+# define our source and object files
+# ----------------------------------
+SOURCES= sortMain.cpp sortBed.cpp
+OBJECTS= $(SOURCES:.cpp=.o)
+_EXT_OBJECTS=bedFile.o lineFileUtilities.o gzstream.o fileType.o
+EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS))
+BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
+PROGRAM= sortBed
+
+
+all: $(PROGRAM)
+
+.PHONY: all
+
+$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS)
+ @echo "  * linking $(PROGRAM)"
+ @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ $(LIBS)
+
+$(BUILT_OBJECTS): $(SOURCES)
+ @echo "  * compiling" $(*F).cpp
+ @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES)
+
+$(EXT_OBJECTS):
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/
+
+clean:
+ @echo "Cleaning up."
+ @rm -f $(OBJ_DIR)/* $(BIN_DIR)/*
+
+.PHONY: clean
\ No newline at end of file
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/sortBed/sortBed.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/sortBed/sortBed.cpp Thu Nov 03 10:25:04 2011 -0400
[
@@ -0,0 +1,201 @@
+/*****************************************************************************
+  sortBed.cpp
+
+  (c) 2009 - Aaron Quinlan
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licenced under the GNU General Public License 2.0 license.
+******************************************************************************/
+#include "lineFileUtilities.h"
+#include "sortBed.h"
+
+//
+// Constructor
+//
+BedSort::BedSort(string &bedFile) {
+    _bedFile = bedFile;
+    _bed = new BedFile(bedFile);
+}
+
+//
+// Destructor
+//
+BedSort::~BedSort(void) {
+}
+
+
+void BedSort::SortBed() {
+
+    // load the "B" bed file into a map so
+    // that we can easily compare "A" to it for overlaps
+    _bed->loadBedFileIntoMapNoBin();
+
+    // loop through each chromosome and merge their BED entries
+    for (masterBedMapNoBin::iterator m = _bed->bedMapNoBin.begin(); m != _bed->bedMapNoBin.end(); ++m) {
+
+        // bedList is already sorted by start position.
+        vector<BED> bedList = m->second;
+
+        for (unsigned int i = 0; i < bedList.size(); ++i) {
+            _bed->reportBedNewLine(bedList[i]);
+        }
+    }
+}
+
+
+void BedSort::SortBedBySizeAsc() {
+
+    // load the "B" bed file into a map so
+    // that we can easily compare "A" to it for overlaps
+    _bed->loadBedFileIntoMapNoBin();
+
+    vector<BED> masterList;
+    masterList.reserve(1000000);
+
+    // loop through each chromosome and merge their BED entries
+    for (masterBedMapNoBin::iterator m = _bed->bedMapNoBin.begin(); m != _bed->bedMapNoBin.end(); ++m) {
+
+        // bedList is already sorted by start position.
+        vector<BED> bedList = m->second;
+
+        // add the entries from this chromosome to the current list
+        for (unsigned int i = 0; i < m->second.size(); ++i) {
+            masterList.push_back(m->second[i]);
+        }
+    }
+
+    // sort the master list by size (asc.)
+    sort(masterList.begin(), masterList.end(), sortBySizeAsc);
+
+    // report the entries in ascending order
+    for (unsigned int i = 0; i < masterList.size(); ++i) {
+        _bed->reportBedNewLine(masterList[i]);
+    }
+}
+
+
+void BedSort::SortBedBySizeDesc() {
+
+    // load the "B" bed file into a map so
+    // that we can easily compare "A" to it for overlaps
+    _bed->loadBedFileIntoMapNoBin();
+
+    vector<BED> masterList;
+    masterList.reserve(1000000);
+
+    // loop through each chromosome and merge their BED entries
+    for (masterBedMapNoBin::iterator m = _bed->bedMapNoBin.begin(); m != _bed->bedMapNoBin.end(); ++m) {
+
+        // bedList is already sorted by start position.
+        vector<BED> bedList = m->second;
+
+        // add the entries from this chromosome to the current list
+        for (unsigned int i = 0; i < m->second.size(); ++i) {
+            masterList.push_back(m->second[i]);
+        }
+    }
+
+    // sort the master list by size (asc.)
+    sort(masterList.begin(), masterList.end(), sortBySizeDesc);
+
+    // report the entries in ascending order
+    for (unsigned int i = 0; i < masterList.size(); ++i) {
+        _bed->reportBedNewLine(masterList[i]);
+    }
+}
+
+void BedSort::SortBedByChromThenSizeAsc() {
+
+    // load the "B" bed file into a map so
+    // that we can easily compare "A" to it for overlaps
+    _bed->loadBedFileIntoMapNoBin();
+
+    // loop through each chromosome and merge their BED entries
+    for (masterBedMapNoBin::iterator m = _bed->bedMapNoBin.begin(); m != _bed->bedMapNoBin.end(); ++m) {
+
+        // bedList is already sorted by start position.
+        vector<BED> bedList = m->second;
+        sort(bedList.begin(), bedList.end(), sortBySizeAsc);
+
+        for (unsigned int i = 0; i < bedList.size(); ++i) {
+            _bed->reportBedNewLine(bedList[i]);
+        }
+    }
+}
+
+
+void BedSort::SortBedByChromThenSizeDesc() {
+
+    // load the "B" bed file into a map so
+    // that we can easily compare "A" to it for overlaps
+    _bed->loadBedFileIntoMapNoBin();
+
+    // loop through each chromosome and merge their BED entries
+    for (masterBedMapNoBin::iterator m = _bed->bedMapNoBin.begin(); m != _bed->bedMapNoBin.end(); ++m) {
+
+        // bedList is already sorted by start position.
+        vector<BED> bedList = m->second;
+
+        sort(bedList.begin(), bedList.end(), sortBySizeDesc);
+
+        for (unsigned int i = 0; i < bedList.size(); ++i) {
+            _bed->reportBedNewLine(bedList[i]);
+        }
+    }
+}
+
+
+void BedSort::SortBedByChromThenScoreAsc() {
+
+    // load the "B" bed file into a map so
+    // that we can easily compare "A" to it for overlaps
+    _bed->loadBedFileIntoMapNoBin();
+
+    if (_bed->bedType >= 5) {
+        // loop through each chromosome and merge their BED entries
+        for (masterBedMapNoBin::iterator m = _bed->bedMapNoBin.begin(); m != _bed->bedMapNoBin.end(); ++m) {
+
+            // bedList is already sorted by start position.
+            vector<BED> bedList = m->second;
+            sort(bedList.begin(), bedList.end(), sortByScoreAsc);
+
+            for (unsigned int i = 0; i < bedList.size(); ++i) {
+                _bed->reportBedNewLine(bedList[i]);
+            }
+        }
+    }
+    else {
+        cerr << "Error: Requested a sort by score, but your BED file does not appear to be in BED 5 format or greater.  Exiting." << endl;
+        exit(1);
+    }
+}
+
+
+void BedSort::SortBedByChromThenScoreDesc() {
+
+    // load the "B" bed file into a map so
+    // that we can easily compare "A" to it for overlaps
+    _bed->loadBedFileIntoMapNoBin();
+
+    if (_bed->bedType >= 5) {
+        // loop through each chromosome and merge their BED entries
+        for (masterBedMapNoBin::iterator m = _bed->bedMapNoBin.begin(); m != _bed->bedMapNoBin.end(); ++m) {
+
+            // bedList is already sorted by start position.
+            vector<BED> bedList = m->second;
+            sort(bedList.begin(), bedList.end(), sortByScoreDesc);
+
+            for (unsigned int i = 0; i < bedList.size(); ++i) {
+                _bed->reportBedNewLine(bedList[i]);
+            }
+        }
+    }
+    else {
+        cerr << "Error: Requested a sort by score, but your BED file does not appear to be in BED 5 format or greater.  Exiting." << endl;
+        exit(1);
+    }
+}
+
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/sortBed/sortBed.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/sortBed/sortBed.h Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,50 @@
+/*****************************************************************************
+  sortBed.h
+
+  (c) 2009 - Aaron Quinlan
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licenced under the GNU General Public License 2.0 license.
+******************************************************************************/
+#include "bedFile.h"
+#include <vector>
+#include <algorithm>
+#include <iostream>
+#include <fstream>
+
+using namespace std;
+
+
+//************************************************
+// Class methods and elements
+//************************************************
+class BedSort {
+
+public:
+
+    // constructor
+    BedSort(string &);
+
+    // destructor
+    ~BedSort(void);
+
+    void SortBed();             // the default.  sorts by chrom (asc.) then by start (asc.)
+    void SortBedBySizeAsc();
+    void SortBedBySizeDesc();
+    void SortBedByChromThenSizeAsc();
+    void SortBedByChromThenSizeDesc();
+    void SortBedByChromThenScoreAsc();
+    void SortBedByChromThenScoreDesc();
+
+private:
+    string _bedFile;
+
+    // instance of a bed file class.
+    BedFile *_bed;
+
+    // methods
+
+};
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/sortBed/sortMain.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/sortBed/sortMain.cpp Thu Nov 03 10:25:04 2011 -0400
[
@@ -0,0 +1,157 @@
+/*****************************************************************************
+  sortBedMain.cpp
+
+  (c) 2009 - Aaron Quinlan
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licenced under the GNU General Public License 2.0 license.
+******************************************************************************/
+#include "sortBed.h"
+#include "version.h"
+
+using namespace std;
+
+// define our program name
+#define PROGRAM_NAME "sortBed"
+
+
+// define our parameter checking macro
+#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)
+
+// function declarations
+void ShowHelp(void);
+
+int main(int argc, char* argv[]) {
+
+    // our configuration variables
+    bool showHelp = false;
+
+    // input files
+    string bedFile  = "stdin";
+    bool haveBed    = true;
+    int sortChoices = 0;
+
+    bool sortBySizeAsc            = false;
+    bool sortBySizeDesc           = false;
+    bool sortByChromThenSizeAsc   = false;
+    bool sortByChromThenSizeDesc  = false;
+    bool sortByChromThenScoreAsc  = false;
+    bool sortByChromThenScoreDesc = false;
+
+
+    for(int i = 1; i < argc; i++) {
+        int parameterLength = (int)strlen(argv[i]);
+
+        if((PARAMETER_CHECK("-h", 2, parameterLength)) ||
+        (PARAMETER_CHECK("--help", 5, parameterLength))) {
+            showHelp = true;
+        }
+    }
+
+    if(showHelp) ShowHelp();
+
+    // do some parsing (all of these parameters require 2 strings)
+    for(int i = 1; i < argc; i++) {
+
+        int parameterLength = (int)strlen(argv[i]);
+
+        if(PARAMETER_CHECK("-i", 2, parameterLength)) {
+            if ((i+1) < argc) {
+                bedFile = argv[i + 1];
+                i++;
+            }
+        }
+        else if(PARAMETER_CHECK("-sizeA", 6, parameterLength)) {
+            sortBySizeAsc = true;
+            sortChoices++;
+        }
+        else if(PARAMETER_CHECK("-sizeD", 6, parameterLength)) {
+            sortBySizeDesc = true;
+            sortChoices++;
+        }
+        else if(PARAMETER_CHECK("-chrThenSizeA", 13, parameterLength)) {
+            sortByChromThenSizeAsc = true;
+            sortChoices++;
+        }
+        else if(PARAMETER_CHECK("-chrThenSizeD", 13, parameterLength)) {
+            sortByChromThenSizeDesc = true;
+            sortChoices++;
+        }
+        else if(PARAMETER_CHECK("-chrThenScoreA", 14, parameterLength)) {
+            sortByChromThenScoreAsc = true;
+            sortChoices++;
+        }
+        else if(PARAMETER_CHECK("-chrThenScoreD", 14, parameterLength)) {
+            sortByChromThenScoreDesc = true;
+            sortChoices++;
+        }
+        else {
+            cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl;
+            showHelp = true;
+        }
+    }
+
+    // make sure we have both input files
+    if (!haveBed) {
+        cerr << endl << "*****" << endl << "*****ERROR: Need -i BED file. " << endl << "*****" << endl;
+        showHelp = true;
+    }
+    if (sortChoices > 1) {
+        cerr << endl << "*****" << endl << "*****ERROR: Sorting options are mutually exclusive.  Please choose just one. " << endl << "*****" << endl;
+        showHelp = true;
+    }
+
+
+    if (!showHelp) {
+        BedSort *bm = new BedSort(bedFile);
+
+        if (sortBySizeAsc) {
+            bm->SortBedBySizeAsc();
+        }
+        else if (sortBySizeDesc) {
+            bm->SortBedBySizeDesc();
+        }
+        else if (sortByChromThenSizeAsc) {
+            bm->SortBedByChromThenSizeAsc();
+        }
+        else if (sortByChromThenSizeDesc) {
+            bm->SortBedByChromThenSizeDesc();
+        }
+        else if (sortByChromThenScoreAsc) {
+            bm->SortBedByChromThenScoreAsc();
+        }
+        else if (sortByChromThenScoreDesc) {
+            bm->SortBedByChromThenScoreDesc();
+        }
+        else {
+            bm->SortBed();
+        }
+        return 0;
+    }
+    else {
+        ShowHelp();
+    }
+}
+
+void ShowHelp(void) {
+
+    cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl;
+
+    cerr << "Author:  Aaron Quinlan (aaronquinlan@gmail.com)" << endl;
+    cerr << "Summary: Sorts a feature file in various and useful ways." << endl << endl;
+    cerr << "Usage:   " << PROGRAM_NAME << " [OPTIONS] -i <bed/gff/vcf>" << endl << endl;
+
+    cerr << "Options: " << endl;
+    cerr << "\t" << "-sizeA\t\t"    << "Sort by feature size in ascending order." << endl;
+    cerr << "\t" << "-sizeD\t\t"    << "Sort by feature size in descending order." << endl;
+    cerr << "\t" << "-chrThenSizeA\t"   << "Sort by chrom (asc), then feature size (asc)." << endl;
+    cerr << "\t" << "-chrThenSizeD\t"   << "Sort by chrom (asc), then feature size (desc)." << endl;
+    cerr << "\t" << "-chrThenScoreA\t"  << "Sort by chrom (asc), then score (asc)." << endl;
+    cerr << "\t" << "-chrThenScoreD\t"  << "Sort by chrom (asc), then score (desc)." << endl << endl;
+
+    exit(1);
+
+}
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/subtractBed/Makefile
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/subtractBed/Makefile Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,47 @@
+UTILITIES_DIR = ../utils/
+OBJ_DIR = ../../obj/
+BIN_DIR = ../../bin/
+
+# -------------------
+# define our includes
+# -------------------
+INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \
+           -I$(UTILITIES_DIR)/lineFileUtilities/ \
+           -I$(UTILITIES_DIR)/version/ \
+           -I$(UTILITIES_DIR)/gzstream/ \
+           -I$(UTILITIES_DIR)/fileType/
+
+# ----------------------------------
+# define our source and object files
+# ----------------------------------
+SOURCES= subtractMain.cpp subtractBed.cpp
+OBJECTS= $(SOURCES:.cpp=.o)
+_EXT_OBJECTS=bedFile.o lineFileUtilities.o gzstream.o fileType.o
+EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS))
+BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
+PROGRAM= subtractBed
+
+
+all: $(PROGRAM)
+
+.PHONY: all
+
+$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS)
+ @echo "  * linking $(PROGRAM)"
+ @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ $(LIBS)
+
+$(BUILT_OBJECTS): $(SOURCES)
+ @echo "  * compiling" $(*F).cpp
+ @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES)
+
+$(EXT_OBJECTS):
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/
+
+clean:
+ @echo "Cleaning up."
+ @rm -f $(OBJ_DIR)/* $(BIN_DIR)/*
+
+.PHONY: clean
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/subtractBed/subtractBed.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/subtractBed/subtractBed.cpp Thu Nov 03 10:25:04 2011 -0400
[
@@ -0,0 +1,178 @@
+/*****************************************************************************
+  subtractBed.cpp
+
+  (c) 2009 - Aaron Quinlan
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licenced under the GNU General Public License 2.0 license.
+******************************************************************************/
+#include "lineFileUtilities.h"
+#include "subtractBed.h"
+
+
+/*
+    Constructor
+*/
+BedSubtract::BedSubtract(string &bedAFile, string &bedBFile, float overlapFraction, bool sameStrand, bool diffStrand) {
+
+    _bedAFile = bedAFile;
+    _bedBFile = bedBFile;
+    _overlapFraction = overlapFraction;
+    _sameStrand = sameStrand;
+    _diffStrand = diffStrand;
+
+    _bedA = new BedFile(bedAFile);
+    _bedB = new BedFile(bedBFile);
+
+    SubtractBed();
+}
+
+
+/*
+    Destructor
+*/
+BedSubtract::~BedSubtract(void) {
+}
+
+
+void BedSubtract::FindAndSubtractOverlaps(BED &a, vector<BED> &hits) {
+
+    // find all of the overlaps between a and B.
+    _bedB->FindOverlapsPerBin(a.chrom, a.start, a.end, a.strand, hits, _sameStrand, _diffStrand);
+
+    //  is A completely spanned by an entry in B?
+    //  if so, A should not be reported.
+    int numConsumedByB = 0;
+    int numOverlaps = 0;
+    vector<BED> bOverlaps;  // list of hits in B.  Special processing if there are multiple.
+
+    vector<BED>::const_iterator h = hits.begin();
+    vector<BED>::const_iterator hitsEnd = hits.end();
+    for (; h != hitsEnd; ++h) {
+
+        int s = max(a.start, h->start);
+        int e = min(a.end, h->end);
+        int overlapBases = (e - s);             // the number of overlapping bases b/w a and b
+        int aLength = (a.end - a.start);        // the length of a in b.p.
+
+        if (s < e) {
+
+            // is there enough overlap (default ~ 1bp)
+            float overlap = ((float) overlapBases / (float) aLength);
+
+            if (overlap >= 1.0) {
+                numOverlaps++;
+                numConsumedByB++;
+            }
+            else if ( overlap >= _overlapFraction ) {
+                numOverlaps++;
+                bOverlaps.push_back(*h);
+            }
+        }
+    }
+
+    if (numOverlaps == 0) {
+        // no overlap found, so just report A as-is.
+        _bedA->reportBedNewLine(a);
+    }
+    else if (numOverlaps == 1) {
+        // one overlap found.  only need to look at the single
+        // entry in bOverlaps.
+
+        // if A was not "consumed" by any entry in B
+        if (numConsumedByB == 0) {
+
+            BED theHit = bOverlaps[0];
+
+            // A    ++++++++++++
+            // B        ----
+            // Res. ====    ====
+            if ( (theHit.start > a.start) && (theHit.end < a.end) ) {
+                _bedA->reportBedRangeNewLine(a,a.start,theHit.start);
+                _bedA->reportBedRangeNewLine(a,theHit.end,a.end);
+            }
+            // A    ++++++++++++
+            // B    ----------
+            // Res.           ==
+            else if (theHit.start == a.start) {
+                _bedA->reportBedRangeNewLine(a,theHit.end,a.end);
+            }
+            // A          ++++++++++++
+            // B    ----------
+            // Res.       ====
+            else if (theHit.start < a.start) {
+                _bedA->reportBedRangeNewLine(a,theHit.end,a.end);
+            }
+            // A    ++++++++++++
+            // B           ----------
+            // Res. =======
+            else if (theHit.start > a.start) {
+                _bedA->reportBedRangeNewLine(a,a.start,theHit.start);
+            }
+        }
+    }
+    else if (numOverlaps > 1) {
+        // multiple overlapz found.  look at all the hits
+        // and figure out which bases in A survived.  then
+        // report the contigous intervals that survived.
+
+        vector<bool> aKeep(a.end - a.start, true);
+
+        if (numConsumedByB == 0) {
+            // track the number of hit starts and ends at each position in A
+            for (vector<BED>::iterator h = bOverlaps.begin(); h != bOverlaps.end(); ++h) {
+                int s = max(a.start, h->start);
+                int e = min(a.end, h->end);
+
+                for (int i = s+1; i <= e; ++i) {
+                    aKeep[i-a.start-1] = false;
+                }
+            }
+            // report the remaining blocks.
+            for (unsigned int i = 0; i < aKeep.size(); ++i) {
+                if (aKeep[i] == true) {
+                    CHRPOS blockStart = i + a.start;
+                    while ((aKeep[i] == true) && (i < aKeep.size())) {
+                        i++;
+                    }
+                    CHRPOS blockEnd = i + a.start;
+                    blockEnd = min(a.end, blockEnd);
+                    _bedA->reportBedRangeNewLine(a,blockStart,blockEnd);
+                }
+            }
+        }
+    }
+}
+
+
+
+void BedSubtract::SubtractBed() {
+
+    // load the "B" bed file into a map so
+    // that we can easily compare "A" to it for overlaps
+    _bedB->loadBedFileIntoMap();
+
+    BED a, nullBed;
+    BedLineStatus bedStatus;
+    int lineNum = 0;                    // current input line number
+    vector<BED> hits;                   // vector of potential hits
+    // reserve some space
+    hits.reserve(100);
+
+    _bedA->Open();
+    while ((bedStatus = _bedA->GetNextBed(a, lineNum)) != BED_INVALID) {
+        if (bedStatus == BED_VALID) {
+            FindAndSubtractOverlaps(a, hits);
+            hits.clear();
+            a = nullBed;
+        }
+    }
+    _bedA->Close();
+
+}
+// END Intersect
+
+
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/subtractBed/subtractBed.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/subtractBed/subtractBed.h Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,53 @@
+/*****************************************************************************
+  subtractBed.h
+
+  (c) 2009 - Aaron Quinlan
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licenced under the GNU General Public License 2.0 license.
+******************************************************************************/
+#ifndef SUBTRACTBED_H
+#define SUBTRACTBED_H
+
+#include "bedFile.h"
+#include <vector>
+#include <iostream>
+#include <fstream>
+
+using namespace std;
+
+//************************************************
+// Class methods and elements
+//************************************************
+class BedSubtract {
+
+public:
+
+    // constructor
+    BedSubtract(string &bedAFile, string &bedBFile, float overlapFraction, bool sameStrand, bool diffStrand);
+
+    // destructor
+    ~BedSubtract(void);
+
+private:
+
+    // processing variables
+    string _bedAFile;
+    string _bedBFile;
+    float _overlapFraction;
+    bool _sameStrand;
+    bool _diffStrand;
+
+
+    // instances of bed file class.
+    BedFile *_bedA, *_bedB;
+
+    // methods
+    void FindAndSubtractOverlaps(BED &a, vector<BED> &hits);
+    void SubtractBed();
+};
+
+#endif /* SUBTRACTBED_H */
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/subtractBed/subtractMain.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/subtractBed/subtractMain.cpp Thu Nov 03 10:25:04 2011 -0400
[
@@ -0,0 +1,145 @@
+/*****************************************************************************
+  subtractMain.cpp
+
+  (c) 2009 - Aaron Quinlan
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licenced under the GNU General Public License 2.0 license.
+******************************************************************************/
+#include "subtractBed.h"
+#include "version.h"
+
+using namespace std;
+
+// define our program name
+#define PROGRAM_NAME "subtractBed"
+
+
+// define our parameter checking macro
+#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)
+
+// function declarations
+void ShowHelp(void);
+
+int main(int argc, char* argv[]) {
+
+    // our configuration variables
+    bool showHelp = false;
+
+    // input files
+    string bedAFile;
+    string bedBFile;
+
+    // input arguments
+    float overlapFraction = 1E-9;
+
+    bool haveBedA = false;
+    bool haveBedB = false;
+    bool haveFraction = false;
+    bool sameStrand = false;
+    bool diffStrand = false;
+
+    // check to see if we should print out some help
+    if(argc <= 1) showHelp = true;
+
+    for(int i = 1; i < argc; i++) {
+        int parameterLength = (int)strlen(argv[i]);
+
+        if((PARAMETER_CHECK("-h", 2, parameterLength)) ||
+        (PARAMETER_CHECK("--help", 5, parameterLength))) {
+            showHelp = true;
+        }
+    }
+
+    if(showHelp) ShowHelp();
+
+    // do some parsing (all of these parameters require 2 strings)
+    for(int i = 1; i < argc; i++) {
+
+        int parameterLength = (int)strlen(argv[i]);
+
+        if(PARAMETER_CHECK("-a", 2, parameterLength)) {
+            if ((i+1) < argc) {
+                haveBedA = true;
+                bedAFile = argv[i + 1];
+                i++;
+            }
+        }
+        else if(PARAMETER_CHECK("-b", 2, parameterLength)) {
+            if ((i+1) < argc) {
+                haveBedB = true;
+                bedBFile = argv[i + 1];
+                i++;
+            }
+        }
+        else if(PARAMETER_CHECK("-f", 2, parameterLength)) {
+            if ((i+1) < argc) {
+                haveFraction = true;
+                overlapFraction = atof(argv[i + 1]);
+                i++;
+            }
+        }
+        else if (PARAMETER_CHECK("-s", 2, parameterLength)) {
+            sameStrand = true;
+        }
+        else if (PARAMETER_CHECK("-S", 2, parameterLength)) {
+            diffStrand = true;
+        }
+        else {
+            cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl;
+            showHelp = true;
+        }
+    }
+
+    // make sure we have both input files
+    if (!haveBedA || !haveBedB) {
+        cerr << endl << "*****" << endl << "*****ERROR: Need -a and -b files. " << endl << "*****" << endl;
+        showHelp = true;
+    }
+    
+    if (sameStrand && diffStrand) {
+        cerr << endl << "*****" << endl << "*****ERROR: Request either -s OR -S, not both." << endl << "*****" << endl;
+        showHelp = true;
+    }
+
+    if (!showHelp) {
+
+        BedSubtract *bs = new BedSubtract(bedAFile, bedBFile, overlapFraction, sameStrand, diffStrand);
+        delete bs;
+        return 0;
+    }
+    else {
+        ShowHelp();
+    }
+}
+
+void ShowHelp(void) {
+
+    cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl;
+
+    cerr << "Author:  Aaron Quinlan (aaronquinlan@gmail.com)" << endl;
+
+    cerr << "Summary: Removes the portion(s) of an interval that is overlapped" << endl;
+    cerr << "\t by another feature(s)." << endl << endl;
+
+    cerr << "Usage:   " << PROGRAM_NAME << " [OPTIONS] -a <bed/gff/vcf> -b <bed/gff/vcf>" << endl << endl;
+
+    cerr << "Options: " << endl;
+    cerr << "\t-f\t"            << "Minimum overlap required as a fraction of A." << endl;
+    cerr                        << "\t\t- Default is 1E-9 (i.e., 1bp)." << endl;
+    cerr                        << "\t\t- (FLOAT) (e.g. 0.50)" << endl << endl;
+
+    cerr << "\t-s\t"            << "Require same strandedness.  That is, only subtract hits in B that" << endl;
+    cerr                        << "\t\toverlap A on the _same_ strand." << endl;
+    cerr                        << "\t\t- By default, overlaps are subtracted without respect to strand." << endl << endl;
+
+    cerr << "\t-S\t"            << "Force strandedness.  That is, only subtract hits in B that" << endl;
+    cerr                        << "\t\toverlap A on the _opposite_ strand." << endl;
+    cerr                        << "\t\t- By default, overlaps are subtracted without respect to strand." << endl << endl;
+
+    // end the program here
+    exit(1);
+}
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/tagBam/Makefile
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/tagBam/Makefile Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,51 @@
+UTILITIES_DIR = ../utils/
+OBJ_DIR = ../../obj/
+BIN_DIR = ../../bin/
+
+# -------------------
+# define our includes
+# -------------------
+INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \
+           -I$(UTILITIES_DIR)/version/ \
+           -I$(UTILITIES_DIR)/gzstream/ \
+           -I$(UTILITIES_DIR)/genomeFile/ \
+           -I$(UTILITIES_DIR)/lineFileUtilities/ \
+           -I$(UTILITIES_DIR)/fileType/ \
+           -I$(UTILITIES_DIR)/BamTools/include \
+           -I$(UTILITIES_DIR)/BamTools-Ancillary
+# ----------------------------------
+# define our source and object files
+# ----------------------------------
+SOURCES= tagBamMain.cpp tagBam.cpp
+OBJECTS= $(SOURCES:.cpp=.o)
+_EXT_OBJECTS=bedFile.o lineFileUtilities.o gzstream.o fileType.o
+EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS))
+BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
+PROGRAM= tagBam
+
+
+all: $(PROGRAM)
+
+.PHONY: all
+
+$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS)
+ @echo "  * linking $(PROGRAM)"
+ @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ -L$(UTILITIES_DIR)/BamTools/lib/ -lbamtools $(LIBS)
+
+$(BUILT_OBJECTS): $(SOURCES)
+ @echo "  * compiling" $(*F).cpp
+ @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES)
+
+$(EXT_OBJECTS):
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/BamTools/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/BamTools-Ancillary/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/
+
+clean:
+ @echo "Cleaning up."
+ @rm -f $(OBJ_DIR)/* $(BIN_DIR)/*
+
+.PHONY: clean
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/tagBam/tagBam.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/tagBam/tagBam.cpp Thu Nov 03 10:25:04 2011 -0400
[
@@ -0,0 +1,136 @@
+/*****************************************************************************
+  tagBam.cpp
+
+  (c) 2009 - Aaron Quinlan
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licenced under the GNU General Public License 2.0 license.
+******************************************************************************/
+#include "lineFileUtilities.h"
+#include "tagBam.h"
+
+// build
+TagBam::TagBam(const string &bamFile, const vector<string> &annoFileNames,
+            const vector<string> &annoLables, const string &tag,
+            bool useNames, bool useScores, bool sameStrand, bool diffStrand, float overlapFraction):
+
+    _bamFile(bamFile),
+    _annoFileNames(annoFileNames),
+    _annoLabels(annoLables),
+    _tag(tag),
+    _useNames(useNames),
+    _useScores(useScores),
+    _sameStrand(sameStrand),
+    _diffStrand(diffStrand),
+    _overlapFraction(overlapFraction)
+{}
+
+
+// destroy and delete the open file pointers
+TagBam::~TagBam(void) {
+    delete _bed;
+    CloseAnnoFiles();
+}
+
+
+void TagBam::OpenAnnoFiles() {
+    for (size_t i=0; i < _annoFileNames.size(); ++i) {
+        BedFile *file = new BedFile(_annoFileNames[i]);
+        file->loadBedFileIntoMap();
+        _annoFiles.push_back(file);
+    }
+}
+
+
+void TagBam::CloseAnnoFiles() {
+    for (size_t i=0; i < _annoFiles.size(); ++i) {
+        BedFile *file = _annoFiles[i];
+        delete file;
+        _annoFiles[i] = NULL;
+    }
+}
+
+
+void TagBam::Tag() {
+
+    // open the annotations files for processing;
+    OpenAnnoFiles();
+
+    // open the BAM file
+    BamReader reader;
+    BamWriter writer;
+    reader.Open(_bamFile);
+    // get header & reference information
+    string bamHeader  = reader.GetHeaderText();
+    RefVector refs = reader.GetReferenceData();
+
+    // set compression mode
+    BamWriter::CompressionMode compressionMode = BamWriter::Compressed;
+//    if ( _isUncompressedBam ) compressionMode = BamWriter::Uncompressed;
+    writer.SetCompressionMode(compressionMode);
+    // open our BAM writer
+    writer.Open("stdout", bamHeader, refs);
+
+    // rip through the BAM file and test for overlaps with each annotation file.
+    BamAlignment al;
+    vector<BED> hits;
+
+    while (reader.GetNextAlignment(al)) {
+        if (al.IsMapped() == true) {
+            BED a;
+            a.chrom = refs.at(al.RefID).RefName;
+            a.start = al.Position;
+            a.end   = al.GetEndPosition(false, false);
+            a.strand = "+";
+            if (al.IsReverseStrand()) a.strand = "-";
+            
+            ostringstream annotations;
+            // annotate the BAM file based on overlaps with the annotation files.
+            for (size_t i = 0; i < _annoFiles.size(); ++i) 
+            {
+                // grab the current annotation file.
+                BedFile *anno = _annoFiles[i];
+                
+                if (!_useNames && !_useScores) {
+                    // add the label for this annotation file to tag if there is overlap
+                    if (anno->FindOneOrMoreOverlapsPerBin(a.chrom, a.start, a.end, a.strand, _sameStrand, _diffStrand, _overlapFraction))
+                    {
+                        annotations << _annoLabels[i] << ";";
+                    }
+                }
+                // use the score field
+                else if (!_useNames && _useScores) {
+                    anno->FindOverlapsPerBin(a.chrom, a.start, a.end, a.strand, hits, _sameStrand, _diffStrand);
+                    for (size_t i = 0; i < hits.size(); ++i) {
+                        annotations << hits[i].score;
+                        if (i < hits.size() - 1) annotations << ",";
+                    }
+                    if (hits.size() > 0) annotations << ";";
+                    hits.clear();
+                }
+                // use the name field from the annotation files to populate tag
+                else if (_useNames && !_useScores) {
+                    anno->FindOverlapsPerBin(a.chrom, a.start, a.end, a.strand, hits, _sameStrand, _diffStrand);
+                    for (size_t i = 0; i < hits.size(); ++i) {
+                        annotations << hits[i].name;
+                        if (i < hits.size() - 1) annotations << ",";
+                    }
+                    if (hits.size() > 0) annotations << ";";
+                    hits.clear();
+                }
+            }
+            // were there any overlaps with which to make a tag?
+            if (annotations.str().size() > 0) {
+                al.AddTag(_tag, "Z", annotations.str().substr(0, annotations.str().size() - 1)); // get rid of the last ";"
+            }
+            writer.SaveAlignment(al);
+        }
+    }
+    reader.Close();
+
+    // close the annotations files;
+    CloseAnnoFiles();
+}
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/tagBam/tagBam.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/tagBam/tagBam.h Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,83 @@
+/*****************************************************************************
+  tagBam.h
+
+  (c) 2009 - Aaron Quinlan
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licenced under the GNU General Public License 2.0 license.
+******************************************************************************/
+#ifndef TAGBAM_H
+#define TAGBAM_H
+
+#include "bedFile.h"
+
+#include "version.h"
+#include "api/BamReader.h"
+#include "api/BamWriter.h"
+#include "api/BamAux.h"
+#include "BamAncillary.h"
+using namespace BamTools;
+
+#include "bedFile.h"
+#include <vector>
+#include <algorithm>
+#include <iostream>
+#include <iomanip>
+#include <fstream>
+#include <stdlib.h>
+
+using namespace std;
+
+//************************************************
+// Class methods and elements
+//************************************************
+class TagBam {
+
+public:
+
+    // constructor
+    TagBam(const string &bamFile, const vector<string> &annoFileNames,
+                const vector<string> &annoLabels, const string &tag, 
+                bool useNames, bool useScores, bool sameStrand, 
+                bool diffStrand, float overlapFraction);
+
+    // destructor
+    ~TagBam(void);
+
+    // annotate the BAM file with all of the annotation files.
+    void Tag();
+
+private:
+
+    // input files.
+    string _bamFile;
+    vector<string> _annoFileNames;
+    vector<string> _annoLabels;
+        
+    string _tag;
+
+    // instance of a bed file class.
+    BedFile *_bed;
+    vector<BedFile*> _annoFiles;
+
+    // should we use the name field from the annotation files?
+    bool _useNames;
+    bool _useScores;
+    
+    // do we care about strandedness when tagging?
+    bool _sameStrand;
+    bool _diffStrand;
+    float _overlapFraction;
+
+    // private function for reporting coverage information
+    void ReportAnnotations();
+
+    void OpenAnnoFiles();
+
+    void CloseAnnoFiles();
+
+};
+#endif /* TAGBAM_H */
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/tagBam/tagBamMain.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/tagBam/tagBamMain.cpp Thu Nov 03 10:25:04 2011 -0400
[
@@ -0,0 +1,209 @@
+/*****************************************************************************
+  annotateMain.cpp
+
+  (c) 2009 - Aaron Quinlan
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licenced under the GNU General Public License 2.0 license.
+******************************************************************************/
+#include "tagBam.h"
+#include "version.h"
+
+using namespace std;
+
+// define the version
+#define PROGRAM_NAME "tagBam"
+
+// define our parameter checking macro
+#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)
+
+// function declarations
+void ShowHelp(void);
+
+int main(int argc, char* argv[]) {
+
+    // our configuration variables
+    bool showHelp = false;
+
+    // input file
+    string bamFile;
+    float overlapFraction = 1E-9;
+    string tag = "YB";
+
+    // parm flags
+    bool haveTag          = false;
+    bool haveFraction     = false;
+    bool useNames         = false;
+    bool useScores        = false;
+    bool sameStrand       = false;
+    bool diffStrand       = false;
+    bool haveBam          = false;
+    bool haveFiles        = false;
+    bool haveLabels       = false;
+
+
+    // list of annotation files / names
+    vector<string> inputFiles;
+    vector<string> inputLabels;
+
+    // check to see if we should print out some help
+    if(argc <= 1) showHelp = true;
+
+    for(int i = 1; i < argc; i++) {
+        int parameterLength = (int)strlen(argv[i]);
+
+        if((PARAMETER_CHECK("-h", 2, parameterLength)) ||
+        (PARAMETER_CHECK("--help", 5, parameterLength))) {
+            showHelp = true;
+        }
+    }
+
+    if(showHelp) ShowHelp();
+
+    // do some parsing (all of these parameters require 2 strings)
+    for(int i = 1; i < argc; i++) {
+
+        int parameterLength = (int)strlen(argv[i]);
+
+        if(PARAMETER_CHECK("-i", 2, parameterLength)) {
+            if ((i+1) < argc) {
+                haveBam  = true;
+                bamFile = argv[i + 1];
+                i++;
+            }
+        }
+        else if(PARAMETER_CHECK("-files", 6, parameterLength)) {
+            if ((i+1) < argc) {
+                haveFiles = true;
+                i = i+1;
+                string file = argv[i];
+                while (file[0] != '-' && i < argc) {
+                    inputFiles.push_back(file);
+                    i++;
+                    if (i < argc)
+                        file = argv[i];
+                }
+                i--;
+            }
+        }
+        else if(PARAMETER_CHECK("-labels", 7, parameterLength)) {
+            if ((i+1) < argc) {
+                haveLabels = true;
+                i = i+1;
+                string label = argv[i];
+                while (label[0] != '-' && i < argc) {
+                    inputLabels.push_back(label);
+                    i++;
+                    if (i < argc)
+                        label = argv[i];
+                }
+                i--;
+            }
+        }
+        else if (PARAMETER_CHECK("-names", 6, parameterLength)) {
+            useNames = true;
+        }
+        else if (PARAMETER_CHECK("-scores", 7, parameterLength)) {
+            useScores = true;
+        }
+        else if (PARAMETER_CHECK("-s", 2, parameterLength)) {
+            sameStrand = true;
+        }
+        else if (PARAMETER_CHECK("-S", 2, parameterLength)) {
+            diffStrand = true;
+        }
+        else if(PARAMETER_CHECK("-f", 2, parameterLength)) {
+            if ((i+1) < argc) {
+                haveFraction = true;
+                overlapFraction = atof(argv[i + 1]);
+                i++;
+            }
+        }
+        else if(PARAMETER_CHECK("-tag", 4, parameterLength)) {
+            if ((i+1) < argc) {
+                haveTag = true;
+                tag = argv[i + 1];
+                i++;
+            }
+        }
+        else {
+            cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl;
+            showHelp = true;
+        }
+    }
+
+    // make sure we have both input files
+    if (!haveBam || !haveFiles) {
+        cerr << endl << "*****" << endl << "*****ERROR: Need -i, -files" << endl << "*****" << endl;
+        showHelp = true;
+    }
+    if (!useNames && !haveLabels && !useScores) {
+        cerr << endl << "*****" << endl << "*****ERROR: Need -labels or -names or -scores" << endl << "*****" << endl;
+        showHelp = true;
+    }
+    if (sameStrand && diffStrand) {
+        cerr << endl << "*****" << endl << "*****ERROR: Use -s or -S, not both. " << endl << "*****" << endl;
+        showHelp = true;
+    }
+    if (haveLabels && useNames) {
+        cerr << endl << "*****" << endl << "*****ERROR: Use -labels or -names, not both. " << endl << "*****" << endl;
+        showHelp = true;
+    }
+    if (useScores && useNames) {
+        cerr << endl << "*****" << endl << "*****ERROR: Use -scores or -names, not both. " << endl << "*****" << endl;
+        showHelp = true;
+    }
+    if (haveTag && tag.size() > 2) {
+        cerr << endl << "*****" << endl << "*****ERROR: Custom tags should be at most two characters per the SAM specification. " << endl << "*****" << endl;
+        showHelp = true;
+    }
+
+    if (!showHelp) {
+        TagBam *ba = new TagBam(bamFile, inputFiles, inputLabels, tag, useNames, useScores, sameStrand, diffStrand, overlapFraction);
+        ba->Tag();
+        delete ba;
+        return 0;
+    }
+    else {
+        ShowHelp();
+    }
+}
+
+void ShowHelp(void) {
+
+    cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl;
+
+    cerr << "Author:  Aaron Quinlan (aaronquinlan@gmail.com)" << endl;
+
+    cerr << "Summary: Annotates a BAM file based on overlaps with multiple BED/GFF/VCF files" << endl;
+    cerr << "\t on the intervals in -i." << endl << endl;
+
+    cerr << "Usage:   " << PROGRAM_NAME << " [OPTIONS] -i <BAM> -files FILE1 .. FILEn  -labels LAB1 .. LABn" << endl << endl;
+
+    cerr << "Options: " << endl;
+
+    cerr << "\t-s\t"            << "Require overlaps on the same strand.  That is, only tag alignments that have the same" << endl;
+    cerr                        << "\t\tstrand as a feature in the annotation file(s)." << endl << endl;
+
+    cerr << "\t-S\t"            << "Require overlaps on the opposite strand.  That is, only tag alignments that have the opposite" << endl;
+    cerr                        << "\t\tstrand as a feature in the annotation file(s)." << endl << endl;
+
+    cerr << "\t-f\t"            << "Minimum overlap required as a fraction of the alignment." << endl;
+    cerr                        << "\t\t- Default is 1E-9 (i.e., 1bp)." << endl;
+    cerr                        << "\t\t- FLOAT (e.g. 0.50)" << endl << endl;
+
+    cerr << "\t-tag\t"          << "Dictate what the tag should be. Default is YB." << endl;
+    cerr                        << "\t\t- STRING (two characters, e.g., YK)" << endl << endl;
+    
+    cerr << "\t-names\t"        << "Use the name field from the annotation files to populate tags." << endl;
+    cerr                        << "\t\tBy default, the -labels values are used." << endl << endl;
+
+    cerr << "\t-scores\t"    << "A list of 1-based columns for each annotation file" << endl;
+    cerr                        << "\t\tin which a color can be found." << endl << endl;
+    
+    
+    exit(1);
+}
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/unionBedGraphs/Makefile
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/unionBedGraphs/Makefile Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,49 @@
+UTILITIES_DIR = ../utils/
+OBJ_DIR = ../../obj/
+BIN_DIR = ../../bin/
+
+# -------------------
+# define our includes
+# -------------------
+INCLUDES = -I$(UTILITIES_DIR)/bedGraphFile/ \
+    -I$(UTILITIES_DIR)/lineFileUtilities/ \
+    -I$(UTILITIES_DIR)/genomeFile/ \
+    -I$(UTILITIES_DIR)/version/ \
+    -I$(UTILITIES_DIR)/gzstream/ \
+    -I$(UTILITIES_DIR)/fileType/ \
+    -I$(UTILITIES_DIR)/BamTools/include
+
+# ----------------------------------
+# define our source and object files
+# ----------------------------------
+SOURCES= unionBedGraphs.cpp unionBedGraphsMain.cpp
+OBJECTS= $(SOURCES:.cpp=.o)
+_EXT_OBJECTS=bedGraphFile.o genomeFile.o lineFileUtilities.o gzstream.o fileType.o
+EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS))
+BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
+PROGRAM= unionBedGraphs
+
+all: $(PROGRAM)
+
+.PHONY: all
+
+$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS)
+ @echo "  * linking $(PROGRAM)"
+ @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ $(LIBS)
+
+$(BUILT_OBJECTS): $(SOURCES)
+ @echo "  * compiling" $(*F).cpp
+ @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES)
+
+$(EXT_OBJECTS):
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedGraphFile/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/genomeFile/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/
+
+clean:
+ @echo "Cleaning up."
+ @rm -f $(OBJ_DIR)/* $(BIN_DIR)/*
+
+.PHONY: clean
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/unionBedGraphs/intervalItem.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/unionBedGraphs/intervalItem.h Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,62 @@
+/*****************************************************************************
+  intervalItem.h
+
+  (c) 2010 - Assaf Gordon
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licenced under the GNU General Public License 2.0 license.
+******************************************************************************/
+#ifndef INTERVALITEM_H
+#define INTERVALITEM_H
+
+#include <string>
+#include <queue>
+
+enum COORDINATE_TYPE {
+    START,
+    END
+};
+
+/*
+   An interval item in the priority queue.
+
+   An IntervalItem can mark either a START position or an END position.
+ */
+class IntervalItem
+{
+private:
+    IntervalItem();
+
+public:
+    int source_index;           // which source BedGraph file this came from
+    COORDINATE_TYPE coord_type; // is this the start or the end position?
+    CHRPOS coord;
+    std::string depth;
+
+    IntervalItem(int _index, COORDINATE_TYPE _type, CHRPOS _coord, std::string _depth) :
+        source_index(_index),
+        coord_type(_type),
+        coord(_coord),
+        depth(_depth)
+    {}
+
+    IntervalItem(const IntervalItem &other) :
+        source_index(other.source_index),
+        coord_type(other.coord_type),
+        coord(other.coord),
+        depth(other.depth)
+    {}
+
+    bool operator< ( const IntervalItem& other ) const
+    {
+        return this->coord > other.coord;
+    }
+};
+
+// our priority queue
+typedef std::priority_queue<IntervalItem> INTERVALS_PRIORITY_QUEUE;
+
+#endif
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/unionBedGraphs/unionBedGraphs.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/unionBedGraphs/unionBedGraphs.cpp Thu Nov 03 10:25:04 2011 -0400
[
@@ -0,0 +1,255 @@
+/*****************************************************************************
+  unionBedGraphs.cpp
+
+  (c) 2010 - Assaf Gordon, CSHL
+           - Aaron Quinlan, UVA
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licenced under the GNU General Public License 2.0 license.
+******************************************************************************/
+#include <cassert>
+#include <cstring>
+#include <cstdlib>
+#include <iostream>
+#include <algorithm>
+
+#include "bedGraphFile.h"
+#include "unionBedGraphs.h"
+
+using namespace std;
+
+
+UnionBedGraphs::UnionBedGraphs(std::ostream& _output,
+                            const vector<string>& _filenames,
+                            const vector<string>& _titles,
+                            bool _print_empty_regions,
+                            const std::string& _genome_size_filename,
+                            const std::string& _no_coverage_value   ) :
+    filenames(_filenames),
+    titles(_titles),
+    output(_output),
+    current_non_zero_inputs(0),
+    print_empty_regions(_print_empty_regions),
+    genome_sizes(NULL),
+    no_coverage_value(_no_coverage_value)
+{
+    if (print_empty_regions) {
+        assert(!_genome_size_filename.empty());
+
+        genome_sizes = new GenomeFile(_genome_size_filename);
+    }
+}
+
+
+UnionBedGraphs::~UnionBedGraphs() {
+    CloseBedgraphFiles();
+    if (genome_sizes) {
+        delete genome_sizes;
+        genome_sizes = NULL ;
+    }
+}
+
+
+void UnionBedGraphs::Union() {
+    OpenBedgraphFiles();
+
+    // Add the first interval from each file
+    for(size_t i=0;i<bedgraph_files.size();++i)
+        LoadNextBedgraphItem(i);
+
+    // Chromosome loop - once per chromosome
+    do {
+        // Find the first chromosome to use
+        current_chrom = DetermineNextChrom();
+
+        // Populate the queue with initial values from all files
+        // (if they belong to the correct chromosome)
+        for(size_t i=0;i<bedgraph_files.size();++i)
+            AddInterval(i);
+
+        CHRPOS current_start = ConsumeNextCoordinate();
+
+        // User wanted empty regions, and the first coordinate is not 0 - print a dummy empty coverage
+        if (print_empty_regions && current_start > 0)
+            PrintEmptyCoverage(0,current_start);
+
+        // Intervals loop - until all intervals (of current chromosome) from all files are used.
+        do {
+            CHRPOS current_end = queue.top().coord;
+            PrintCoverage(current_start, current_end);
+            current_start = ConsumeNextCoordinate();
+        } while (!queue.empty());
+
+        // User wanted empty regions, and the last coordinate is not the last coordinate of the chromosome
+            // print a dummy empty coverage
+        if (print_empty_regions) {
+            CHRPOS chrom_size = genome_sizes->getChromSize(current_chrom);
+            if (current_start < chrom_size)
+                PrintEmptyCoverage(current_start, chrom_size);
+        }
+
+    } while (!AllFilesDone());
+}
+
+
+CHRPOS UnionBedGraphs::ConsumeNextCoordinate() {
+    assert(!queue.empty());
+
+    CHRPOS new_position = queue.top().coord;
+    do {
+        IntervalItem item = queue.top();
+        UpdateInformation(item);
+        queue.pop();
+    } while (!queue.empty() && queue.top().coord == new_position);
+
+    return new_position;
+}
+
+
+void UnionBedGraphs::UpdateInformation(const IntervalItem &item) {
+    // Update the depth coverage for this file
+
+    // Which coordinate is it - start or end?
+    switch (item.coord_type)
+    {
+    case START:
+        current_depth[item.source_index] = item.depth;
+        current_non_zero_inputs++;
+        break;
+    case END:
+        //Read the next interval from this file
+        AddInterval(item.source_index);
+        current_depth[item.source_index] = no_coverage_value;
+        current_non_zero_inputs--;
+        break;
+    default:
+        assert(0);
+    }
+}
+
+
+void UnionBedGraphs::PrintHeader() {
+    output << "chrom\tstart\tend" ;
+    for (size_t i=0;i<titles.size();++i)
+        output << "\t" <<titles[i];
+    output << endl;
+}
+
+
+void UnionBedGraphs::PrintCoverage(CHRPOS start, CHRPOS end) {
+    if ( current_non_zero_inputs == 0 && ! print_empty_regions )
+        return ;
+
+    output << current_chrom << "\t"
+        << start << "\t"
+        << end;
+
+    for (size_t i=0;i<current_depth.size();++i)
+        output << "\t" << current_depth[i] ;
+
+    output << endl;
+}
+
+
+void UnionBedGraphs::PrintEmptyCoverage(CHRPOS start, CHRPOS end) {
+    output << current_chrom << "\t"
+        << start << "\t"
+        << end;
+
+    for (size_t i=0;i<current_depth.size();++i)
+        output << "\t" << no_coverage_value ;
+
+    output << endl;
+}
+
+
+void UnionBedGraphs::LoadNextBedgraphItem(int index) {
+    assert(static_cast<unsigned int>(index) < bedgraph_files.size());
+
+    current_bedgraph_item[index].chrom="";
+
+    BedGraphFile *file = bedgraph_files[index];
+    BEDGRAPH_STR bg;
+    int lineNum = 0;
+    BedGraphLineStatus status;
+
+    while ( (status = file->GetNextBedGraph(bg, lineNum)) != BEDGRAPH_INVALID )  {
+        if (status != BEDGRAPH_VALID)
+            continue;
+
+        current_bedgraph_item[index] = bg ;
+        break;
+    }
+}
+
+
+bool UnionBedGraphs::AllFilesDone() {
+    for (size_t i=0;i<current_bedgraph_item.size();++i)
+        if (!current_bedgraph_item[i].chrom.empty())
+            return false;
+    return true;
+}
+
+
+string UnionBedGraphs::DetermineNextChrom() {
+    string next_chrom;
+    for (size_t i=0;i<current_bedgraph_item.size();++i) {
+        if (current_bedgraph_item[i].chrom.empty())
+            continue;
+
+        if (next_chrom.empty())
+            next_chrom = current_bedgraph_item[i].chrom;
+        else
+            if (current_bedgraph_item[i].chrom < next_chrom)
+                next_chrom = current_bedgraph_item[i].chrom ;
+    }
+    return next_chrom;
+}
+
+
+void UnionBedGraphs::AddInterval(int index) {
+    assert(static_cast<unsigned int>(index) < bedgraph_files.size());
+
+    //This file has no more intervals
+    if (current_bedgraph_item[index].chrom.empty())
+        return ;
+
+    //If the next interval belongs to a different chrom, don't add it
+    if (current_bedgraph_item[index].chrom!=current_chrom)
+        return ;
+
+    const BEDGRAPH_STR &bg(current_bedgraph_item[index]);
+
+    IntervalItem start_item(index, START, bg.start, bg.depth);
+    IntervalItem end_item(index, END, bg.end, bg.depth);
+
+    queue.push(start_item);
+    queue.push(end_item);
+
+    LoadNextBedgraphItem(index);
+}
+
+
+void UnionBedGraphs::OpenBedgraphFiles() {
+    for (size_t i=0;i<filenames.size();++i) {
+        BedGraphFile *file = new BedGraphFile(filenames[i]);
+        file->Open();
+        bedgraph_files.push_back(file);
+
+        current_depth.push_back(no_coverage_value);
+    }
+    current_bedgraph_item.resize(filenames.size());
+}
+
+
+void UnionBedGraphs::CloseBedgraphFiles() {
+    for (size_t i=0;i<bedgraph_files.size();++i) {
+        BedGraphFile *file = bedgraph_files[i];
+        delete file;
+        bedgraph_files[i] = NULL ;
+    }
+    bedgraph_files.clear();
+}
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/unionBedGraphs/unionBedGraphs.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/unionBedGraphs/unionBedGraphs.h Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,123 @@
+/*****************************************************************************
+  unionBedGraphs.h
+
+  (c) 2010 - Assaf Gordon, CSHL
+           - Aaron Quinlan, UVA
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licenced under the GNU General Public License 2.0 license.
+******************************************************************************/
+#ifndef UNIONBEDGRAPHS_H
+#define UNIONBEDGRAPHS_H
+
+#include <vector>
+#include <string>
+#include "bedGraphFile.h"
+#include "genomeFile.h"
+#include "intervalItem.h"
+
+class UnionBedGraphs
+{
+private:
+    typedef BEDGRAPH_STR BEDGRAPH_TYPE;
+
+    vector<string>  filenames;
+    vector<string>  titles;
+
+    vector<BedGraphFile*>               bedgraph_files;
+    vector<BEDGRAPH_TYPE::DEPTH_TYPE>   current_depth;
+    vector<BEDGRAPH_TYPE>               current_bedgraph_item;
+
+    std::ostream    &output;
+
+    INTERVALS_PRIORITY_QUEUE queue;
+    std::string              current_chrom;
+    int                      current_non_zero_inputs;
+    bool                     print_empty_regions;
+
+    GenomeFile* genome_sizes;
+
+    std::string no_coverage_value;
+
+public:
+    UnionBedGraphs(std::ostream& _output,
+            const vector<string>& _filenames,
+            const vector<string>& _titles,
+            bool _print_empty_regions,
+            const std::string& _genomeFileName,
+            const std::string& _no_coverage_value);
+
+    virtual ~UnionBedGraphs();
+
+    // Combines all bedgraph files
+    void Union();
+
+    // Print the header line: chrom/start/end + name of each bedgraph file.
+    void PrintHeader();
+
+
+private:
+
+    // Open all BedGraph files, initialize "current_XXX" vectors
+    void OpenBedgraphFiles();
+
+    // Close the BedGraph files.
+    void CloseBedgraphFiles();
+
+    /*
+       Add an interval from BedGraph file 'index' into the queue.
+       will only be added if it belongs to the current chromosome.
+
+       If the interval was added (=consumed), the next interval will be read from the file
+       using 'LoadNextBedgraphItem'
+     */
+    void AddInterval(int index);
+
+    /*
+       Loads the next interval from BedGraph file 'index'.
+       Stores it in 'current_bedgraph_item' vector.
+     */
+    void LoadNextBedgraphItem(int index);
+
+    /*
+       Scans the 'current_bedgraph_item' vector,
+       find the 'first' chromosome to use (different BedGraph files can start with different chromosomes).
+     */
+    std::string DetermineNextChrom();
+
+    /*
+       Returns 'true' if ALL intervals from ALL BedGraph files were used
+    */
+    bool        AllFilesDone();
+
+    /*
+       Extract the next coordinate from the queue, and updates the current coverage information.
+       If multiple interval share the same coordinate values, all of them are handled.
+       If an END coordinate is consumed, the next interval (from the corresponding file) is read.
+     */
+    CHRPOS ConsumeNextCoordinate();
+
+    /*
+       Updates the coverage information based on the given item.
+       Item can be a START coordinate or an END coordiante.
+     */
+    void UpdateInformation(const IntervalItem &item);
+
+    /*
+       prints chrom/start/end and the current depth coverage values of all the files.
+     */
+    void PrintCoverage(CHRPOS start, CHRPOS end);
+
+    /*
+       prints chrom/start/end and the ZERO depth coverage values of all the files.
+     */
+    void PrintEmptyCoverage(CHRPOS start, CHRPOS end);
+
+    void DebugPrintQueue();
+};
+
+
+#endif
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/unionBedGraphs/unionBedGraphsMain.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/unionBedGraphs/unionBedGraphsMain.cpp Thu Nov 03 10:25:04 2011 -0400
[
b'@@ -0,0 +1,294 @@\n+/*****************************************************************************\n+  unionBedGraphsMain.cpp\n+\n+  (c) 2010 - Assaf Gordon, CSHL\n+           - Aaron Quinlan, UVA\n+  Hall Laboratory\n+  Department of Biochemistry and Molecular Genetics\n+  University of Virginia\n+  aaronquinlan@gmail.com\n+\n+  Licenced under the GNU General Public License 2.0 license.\n+******************************************************************************/\n+#include <climits>\n+#include <cstring>\n+#include <cstdlib>\n+#include <vector>\n+#include <string>\n+#include <iostream>\n+#include <getopt.h>\n+#include <libgen.h> //for basename()\n+#include "version.h"\n+\n+#include "genomeFile.h"\n+#include "unionBedGraphs.h"\n+\n+using namespace std;\n+\n+// define our program name\n+#define PROGRAM_NAME "unionBedGraphs"\n+\n+// define our parameter checking macro\n+#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)\n+\n+//STLized version of basename()\n+// (because POSIX basename() modifies the input string pointer)\n+// Additionally: removes any extension the basename might have.\n+std::string stl_basename(const std::string& path);\n+\n+// function declarations\n+void ShowHelp(void);\n+void ShowExamples(void);\n+\n+\n+int main(int argc, char* argv[])\n+{\n+    bool haveFiles         = false;\n+    bool haveTitles        = false;\n+    bool haveGenome        = false;\n+    bool haveFiller        = true;\n+    bool printHeader       = false;\n+    bool printEmptyRegions = false;\n+    bool showHelp          = false;\n+    string genomeFile;\n+    string basePath;\n+    string noCoverageValue = "0";\n+    vector<string> inputFiles;\n+    vector<string> inputTitles;\n+\n+    //Parse command line options\n+    if(argc <= 1)\n+        ShowHelp();\n+\n+    for(int i = 1; i < argc; i++) {\n+        int parameterLength = (int)strlen(argv[i]);\n+\n+        if((PARAMETER_CHECK("-h", 2, parameterLength)) ||\n+        (PARAMETER_CHECK("--help", 5, parameterLength))) {\n+            showHelp = true;\n+        }\n+    }\n+\n+    if(showHelp == true) {\n+        ShowHelp();\n+        exit(1);\n+    }\n+\n+    // do some parsing (all of these parameters require 2 strings)\n+    for(int i = 1; i < argc; i++) {\n+\n+        int parameterLength = (int)strlen(argv[i]);\n+\n+        if(PARAMETER_CHECK("-i", 2, parameterLength)) {\n+            if ((i+1) < argc) {\n+                haveFiles = true;\n+                i = i+1;\n+                string file = argv[i];\n+                while (file[0] != \'-\' && i < argc) {\n+                    inputFiles.push_back(file);\n+                    i++;\n+                    if (i < argc)\n+                        file = argv[i];\n+                }\n+                i--;\n+            }\n+        }\n+        else if(PARAMETER_CHECK("-names", 6, parameterLength)) {\n+            if ((i+1) < argc) {\n+                haveTitles = true;\n+                i = i+1;\n+                string title = argv[i];\n+                while (title[0] != \'-\' && i < argc) {\n+                    inputTitles.push_back(title);\n+                    i++;\n+                    if (i < argc)\n+                        title = argv[i];\n+                }\n+                i--;\n+            }\n+        }\n+        else if(PARAMETER_CHECK("-g", 2, parameterLength)) {\n+            if ((i+1) < argc) {\n+                haveGenome = true;\n+                genomeFile = argv[i + 1];\n+                i++;\n+            }\n+        }\n+        else if(PARAMETER_CHECK("-filler", 7, parameterLength)) {\n+            if ((i+1) < argc) {\n+                haveFiller      = true;\n+                noCoverageValue = argv[i + 1];\n+                i++;\n+            }\n+        }\n+        else if(PARAMETER_CHECK("-header", 7, parameterLength)) {\n+            printHeader = true;\n+        }\n+        else if(PARAMETER_CHECK("-empty", 6, parameterLength)) {\n+            printEmptyRegions = true;\n+        }\n+        else if(PARAMETER_CHECK("-examples", 9, parameterLength)) {\n'..b'+\n+    cerr << "\\t-names\\t\\t"      << "A list of names (one / file) to describe each file in -i." << endl;\n+    cerr                        << "\\t\\t\\tThese names will be printed in the header line." << endl << endl;\n+\n+    cerr << "\\t-g\\t\\t"          << "Use genome file to calculate empty regions." << endl;\n+    cerr                        << "\\t\\t\\t- STRING." << endl << endl;\n+\n+    cerr << "\\t-empty\\t\\t"      << "Report empty regions (i.e., start/end intervals w/o" << endl;\n+    cerr                        << "\\t\\t\\tvalues in all files)." << endl;\n+    cerr                        << "\\t\\t\\t- Requires the \'-g FILE\' parameter.\\n" << endl;\n+\n+    cerr << "\\t-filler TEXT\\t"  << "Use TEXT when representing intervals having no value." << endl;\n+    cerr                        << "\\t\\t\\t- Default is \'0\', but you can use \'N/A\' or any other text." << endl << endl;\n+\n+    cerr << "\\t-examples\\t"     << "Show detailed usage examples." << endl << endl;\n+}\n+\n+\n+\n+void ShowExamples()\n+{\n+    cerr << "Example usage:\\n\\n"  \\\n+"== Input files: ==\\n" \\\n+"\\n" \\\n+" $ cat 1.bg\\n" \\\n+" chr1  1000    1500    10\\n" \\\n+" chr1  2000    2100    20\\n" \\\n+"\\n" \\\n+" $ cat 2.bg\\n" \\\n+" chr1  900 1600    60\\n" \\\n+" chr1  1700    2050    50\\n" \\\n+"\\n" \\\n+" $ cat 3.bg\\n" \\\n+" chr1  1980    2070    80\\n" \\\n+" chr1  2090    2100    20\\n" \\\n+"\\n" \\\n+" $ cat sizes.txt\\n" \\\n+" chr1  5000\\n" \\\n+"\\n" \\\n+"== Union/combine the files: ==\\n" \\\n+"\\n" \\\n+" $ unionBedGraphs -i 1.bg 2.bg 3.bg\\n" \\\n+" chr1  900 1000    0   60  0\\n" \\\n+" chr1  1000    1500    10  60  0\\n" \\\n+" chr1  1500    1600    0   60  0\\n" \\\n+" chr1  1700    1980    0   50  0\\n" \\\n+" chr1  1980    2000    0   50  80\\n" \\\n+" chr1  2000    2050    20  50  80\\n" \\\n+" chr1  2050    2070    20  0   80\\n" \\\n+" chr1  2070    2090    20  0   0\\n" \\\n+" chr1  2090    2100    20  0   20\\n" \\\n+"\\n" \\\n+"== Union/combine the files, with a header line (titles are the file names): ==\\n" \\\n+"\\n" \\\n+" $ unionBedGraphs -header -i 1.bg 2.bg 3.bg\\n" \\\n+" chrom start   end 1   2   3\\n" \\\n+" chr1  900 1000    0   60  0\\n" \\\n+" chr1  1000    1500    10  60  0\\n" \\\n+" chr1  1500    1600    0   60  0\\n" \\\n+" chr1  1700    1980    0   50  0\\n" \\\n+" chr1  1980    2000    0   50  80\\n" \\\n+" chr1  2000    2050    20  50  80\\n" \\\n+" chr1  2050    2070    20  0   80\\n" \\\n+" chr1  2070    2090    20  0   0\\n" \\\n+" chr1  2090    2100    20  0   20\\n" \\\n+"\\n" \\\n+"== Union/combine the files, with a header line and custom names: ==\\n" \\\n+"\\n" \\\n+" $ unionBedGraphs -header -i 1.bg 2.bg 3.bg -names WT-1 WT-2 KO-1\\n" \\\n+" chrom start   end WT-1    WT-2    KO-1\\n" \\\n+" chr1  900 1000    0   60  0\\n" \\\n+" chr1  1000    1500    10  60  0\\n" \\\n+" chr1  1500    1600    0   60  0\\n" \\\n+" chr1  1700    1980    0   50  0\\n" \\\n+" chr1  1980    2000    0   50  80\\n" \\\n+" chr1  2000    2050    20  50  80\\n" \\\n+" chr1  2050    2070    20  0   80\\n" \\\n+" chr1  2070    2090    20  0   0\\n" \\\n+" chr1  2090    2100    20  0   20\\n" \\\n+"\\n" \\\n+"== Union/combine, showing empty regions (note, requires -g): ==\\n" \\\n+"\\n" \\\n+" $ unionBedGraphs -header -empty -g sizes.TXT -i 1.bg 2.bg 3.bg\\n" \\\n+" chrom start   end 1   2   3\\n" \\\n+" chr1  0   900 0   0   0\\n" \\\n+" chr1  900 1000    0   60  0\\n" \\\n+" chr1  1000    1500    10  60  0\\n" \\\n+" chr1  1500    1600    0   60  0\\n" \\\n+" chr1  1600    1700    0   0   0\\n" \\\n+" chr1  1700    1980    0   50  0\\n" \\\n+" chr1  1980    2000    0   50  80\\n" \\\n+" chr1  2000    2050    20  50  80\\n" \\\n+" chr1  2050    2070    20  0   80\\n" \\\n+" chr1  2070    2090    20  0   0\\n" \\\n+" chr1  2090    2100    20  0   20\\n" \\\n+" chr1  2100    5000    0   0   0\\n" \\\n+"\\n" \\\n+;\n+}\n+\n+std::string stl_basename(const std::string& path)\n+{\n+    string result;\n+\n+    char* path_dup = strdup(path.c_str());\n+    char* basename_part = basename(path_dup);\n+    result = basename_part;\n+    free(path_dup);\n+\n+    size_t pos = result.find_last_of(\'.\');\n+    if (pos != string::npos )\n+        result = result.substr(0,pos);\n+\n+    return result;\n+}\n+\n'
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools-Ancillary/BamAncillary.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/BamTools-Ancillary/BamAncillary.cpp Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,73 @@
+/*****************************************************************************
+  bamAncillary.cpp
+
+  (c) 2009 - Aaron Quinlan
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licensed under the GNU General Public License 2.0 license.
+******************************************************************************/
+#include "BamAncillary.h"
+using namespace std;
+
+// 10   15   20      25    30               4000
+// acccctttggacct---ataggga.................aaaa
+// acccc---ggaccttttataggga.................aaaa
+// 5M   3D 6M    2I 7M      20N             4M
+
+namespace BamTools {
+    void getBamBlocks(const BamAlignment &bam, const RefVector &refs,
+                      vector<BED> &blocks, bool breakOnDeletionOps) {
+
+        CHRPOS currPosition = bam.Position;
+        CHRPOS blockStart   = bam.Position;
+        string chrom        = refs.at(bam.RefID).RefName;
+        string name         = bam.Name;
+        string strand       = "+";
+        string score        = ToString(bam.MapQuality);
+        char  prevOp        = '\0';
+        if (bam.IsReverseStrand()) strand = "-";
+        bool blocksFound = false;
+
+        vector<CigarOp>::const_iterator cigItr = bam.CigarData.begin();
+        vector<CigarOp>::const_iterator cigEnd = bam.CigarData.end();
+        for ( ; cigItr != cigEnd; ++cigItr ) {
+            if (cigItr->Type == 'M') {
+                currPosition += cigItr->Length;
+                // we only want to create a new block if the current M op
+                // was preceded by an N op or a D op (and we are breaking on D ops)
+                if ((prevOp == 'D' && breakOnDeletionOps == true) || (prevOp == 'N')) {
+                    blocks.push_back( BED(chrom, blockStart, currPosition, name, score, strand) );
+                    blockStart = currPosition;
+                }
+            }
+            else if (cigItr->Type == 'D') {
+                if (breakOnDeletionOps == false)
+                    currPosition += cigItr->Length;
+                else {
+                    currPosition += cigItr->Length;
+                    blockStart    = currPosition;
+                }
+            }
+            else if (cigItr->Type == 'N') {
+                currPosition += cigItr->Length;
+                blockStart    = currPosition;
+            }
+            else if (cigItr->Type == 'S' || cigItr->Type == 'H' || cigItr->Type == 'P' || cigItr->Type == 'I') {
+                // do nothing
+            }
+            else {
+                cerr << "Input error: invalid CIGAR type (" << cigItr->Type
+                    << ") for: " << bam.Name << endl;
+                exit(1);
+            }
+            prevOp = cigItr->Type;
+        }
+        // if there were no splits, we just create a block representing the contiguous alignment.
+        if (blocksFound == false) {
+            blocks.push_back( BED(chrom, bam.Position, currPosition, name, score, strand) );
+        }
+    }
+}
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools-Ancillary/BamAncillary.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/BamTools-Ancillary/BamAncillary.h Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,19 @@
+/*****************************************************************************
+  bamAncillary.h
+
+  (c) 2009 - Aaron Quinlan
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licensed under the GNU General Public License 2.0 license.
+******************************************************************************/
+#include "bedFile.h"
+#include "lineFileUtilities.h"
+#include "api/BamAlignment.h"
+
+namespace BamTools {
+    void getBamBlocks(const BamAlignment &bam, const RefVector &refs,
+                        vector<BED> &blocks, bool includeDeletions = true);
+}
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools-Ancillary/Makefile
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/BamTools-Ancillary/Makefile Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,26 @@
+OBJ_DIR = ../../../obj/
+BIN_DIR = ../../../bin/
+UTILITIES_DIR = ../
+
+INCLUDES = -I$(UTILITIES_DIR)/BamTools/include -I$(UTILITIES_DIR)/bedFile/ -I$(UTILITIES_DIR)/lineFileUtilities/ -I$(UTILITIES_DIR)/gzstream/ -I$(UTILITIES_DIR)/fileType/
+
+# ----------------------------------
+# define our source and object files
+# ----------------------------------
+SOURCES= BamAncillary.cpp
+OBJECTS= $(SOURCES:.cpp=.o)
+BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
+
+all: $(BUILT_OBJECTS)
+
+.PHONY: all
+
+$(BUILT_OBJECTS): $(SOURCES)
+ @echo "  * compiling" $(*F).cpp
+ @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) -L$(BT_ROOT)/lib
+
+clean:
+ @echo "Cleaning up."
+ @rm -f $(OBJ_DIR)/* $(BIN_DIR)/*
+
+.PHONY: clean
\ No newline at end of file
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/LICENSE
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/BamTools/LICENSE Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,22 @@
+The MIT License
+
+Copyright (c) 2009-2010 Derek Barnett, Erik Garrison, Gabor Marth, Michael Stromberg
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/Makefile
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/BamTools/Makefile Thu Nov 03 10:25:04 2011 -0400
[
@@ -0,0 +1,61 @@
+# -------------------
+# define our includes
+# -------------------
+OBJ_DIR = ../../../obj/
+INCLUDES = -Isrc/ -Iinclude/
+
+# ----------------------------------
+# define our source and object files
+# ----------------------------------
+
+SOURCES= src/api/BamAlignment.cpp \
+         src/api/BamMultiReader.cpp \
+         src/api/BamReader.cpp \
+         src/api/BamWriter.cpp \
+         src/api/SamHeader.cpp \
+         src/api/SamProgram.cpp \
+         src/api/SamProgramChain.cpp \
+         src/api/SamReadGroup.cpp \
+         src/api/SamReadGroupDictionary.cpp \
+         src/api/SamSequence.cpp \
+         src/api/SamSequenceDictionary.cpp \
+  src/api/internal/BamHeader_p.cpp \
+  src/api/internal/BamIndexFactory_p.cpp \
+  src/api/internal/BamMultiReader_p.cpp \
+  src/api/internal/BamRandomAccessController_p.cpp \
+  src/api/internal/BamReader_p.cpp \
+  src/api/internal/BamStandardIndex_p.cpp \
+  src/api/internal/BamToolsIndex_p.cpp \
+  src/api/internal/BamWriter_p.cpp \
+  src/api/internal/BgzfStream_p.cpp \
+  src/api/internal/SamFormatParser_p.cpp \
+  src/api/internal/SamFormatPrinter_p.cpp \
+  src/api/internal/SamHeaderValidator_p.cpp
+
+# ----------------------------------
+# define our source and object files
+# ----------------------------------
+OBJECTS= $(SOURCES:.cpp=.o)
+LIBRARY=libbamtools.a
+
+all: $(LIBRARY)
+    
+.PHONY: all
+
+$(LIBRARY): $(OBJECTS)
+ [ -d lib ] || mkdir -p lib
+ [ -d include ] || mkdir -p include
+ [ -d include/api ] || mkdir -p include/api
+ [ -d include/shared ] || mkdir -p include/shared
+
+ @cp src/api/*.h include/api
+ @cp src/shared/*.h include/shared
+
+
+ @echo "  * linking $(LIBRARY)"
+ ar cr lib/$@  $^
+
+$(OBJECTS): $(SOURCES)
+ @echo "  * compiling" $(*F).cpp
+ @$(CXX) -c $(*D)/$(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) -o $(*D)/$(*F).o
+
\ No newline at end of file
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamAlignment.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamAlignment.cpp Thu Nov 03 10:25:04 2011 -0400
b
b'@@ -0,0 +1,2433 @@\n+// ***************************************************************************\n+// BamAlignment.cpp (c) 2009 Derek Barnett\n+// Marth Lab, Department of Biology, Boston College\n+// All rights reserved.\n+// ---------------------------------------------------------------------------\n+// Last modified: 22 April 2011 (DB)\n+// ---------------------------------------------------------------------------\n+// Provides the BamAlignment data structure\n+// ***************************************************************************\n+\n+#include <api/BamAlignment.h>\n+#include <api/BamConstants.h>\n+using namespace BamTools;\n+\n+#include <cctype>\n+#include <cstdio>\n+#include <cstdlib>\n+#include <cstring>\n+#include <exception>\n+#include <iostream>\n+#include <map>\n+#include <utility>\n+using namespace std;\n+\n+/*! \\class BamTools::BamAlignment\n+    \\brief The main BAM alignment data structure.\n+\n+    Provides methods to query/modify BAM alignment data fields.\n+*/\n+/*! \\var BamAlignment::Name\n+    \\brief read name\n+*/\n+/*! \\var BamAlignment::Length\n+    \\brief length of query sequence\n+*/\n+/*! \\var BamAlignment::QueryBases\n+    \\brief \'original\' sequence (as reported from sequencing machine)\n+*/\n+/*! \\var BamAlignment::AlignedBases\n+    \\brief \'aligned\' sequence (includes any indels, padding, clipping)\n+*/\n+/*! \\var BamAlignment::Qualities\n+    \\brief FASTQ qualities (ASCII characters, not numeric values)\n+*/\n+/*! \\var BamAlignment::TagData\n+    \\brief tag data (use the provided methods to query/modify)\n+*/\n+/*! \\var BamAlignment::RefID\n+    \\brief ID number for reference sequence\n+*/\n+/*! \\var BamAlignment::Position\n+    \\brief position (0-based) where alignment starts\n+*/\n+/*! \\var BamAlignment::Bin\n+    \\brief BAM (standard) index bin number for this alignment\n+*/\n+/*! \\var BamAlignment::MapQuality\n+    \\brief mapping quality score\n+*/\n+/*! \\var BamAlignment::AlignmentFlag\n+    \\brief alignment bit-flag (use the provided methods to query/modify)\n+*/\n+/*! \\var BamAlignment::CigarData\n+    \\brief CIGAR operations for this alignment\n+*/\n+/*! \\var BamAlignment::MateRefID\n+    \\brief ID number for reference sequence where alignment\'s mate was aligned\n+*/\n+/*! \\var BamAlignment::MatePosition\n+    \\brief position (0-based) where alignment\'s mate starts\n+*/\n+/*! \\var BamAlignment::InsertSize\n+    \\brief mate-pair insert size\n+*/\n+/*! \\var BamAlignment::Filename\n+    \\brief name of BAM file which this alignment comes from\n+*/\n+\n+/*! \\fn BamAlignment::BamAlignment(void)\n+    \\brief constructor\n+*/\n+BamAlignment::BamAlignment(void)\n+    : RefID(-1)\n+    , Position(-1)\n+    , MateRefID(-1)\n+    , MatePosition(-1)\n+    , InsertSize(0)\n+{ }\n+\n+/*! \\fn BamAlignment::BamAlignment(const BamAlignment& other)\n+    \\brief copy constructor\n+*/\n+BamAlignment::BamAlignment(const BamAlignment& other)\n+    : Name(other.Name)\n+    , Length(other.Length)\n+    , QueryBases(other.QueryBases)\n+    , AlignedBases(other.AlignedBases)\n+    , Qualities(other.Qualities)\n+    , TagData(other.TagData)\n+    , RefID(other.RefID)\n+    , Position(other.Position)\n+    , Bin(other.Bin)\n+    , MapQuality(other.MapQuality)\n+    , AlignmentFlag(other.AlignmentFlag)\n+    , CigarData(other.CigarData)\n+    , MateRefID(other.MateRefID)\n+    , MatePosition(other.MatePosition)\n+    , InsertSize(other.InsertSize)\n+    , Filename(other.Filename)\n+    , SupportData(other.SupportData)\n+{ }\n+\n+/*! \\fn BamAlignment::~BamAlignment(void)\n+    \\brief destructor\n+*/\n+BamAlignment::~BamAlignment(void) { }\n+\n+/*! \\fn bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const std::string& value)\n+    \\brief Adds a field with string data to the BAM tags.\n+\n+    Does NOT modify an existing tag - use \\link BamAlignment::EditTag() \\endlink instead.\n+\n+    \\param tag   2-character tag name\n+    \\param type  1-character tag type (must be "Z" or "H")\n+    \\param value string data to store\n+\n+    \\return \\c true if the \\b new tag was added successfully\n+    \\sa \\samSpecURL for'..b'ped(bool ok)\n+    \\brief Complement of using SetIsMapped().\n+    \\deprecated For sake of symmetry with the query methods\n+    \\sa IsMapped(), SetIsMapped()\n+*/\n+void BamAlignment::SetIsUnmapped(bool ok) {\n+    SetIsMapped(!ok);\n+}\n+\n+/*! \\fn bool BamAlignment::SkipToNextTag(const char storageType, char*& pTagData, unsigned int& numBytesParsed)\n+    \\internal\n+\n+    Moves to next available tag in tag data string\n+\n+    \\param storageType    BAM tag type-code that determines how far to move cursor\n+    \\param pTagData       pointer to current position (cursor) in tag string\n+    \\param numBytesParsed report of how many bytes were parsed (cumulatively)\n+\n+    \\return \\c if storageType was a recognized BAM tag type\n+    \\post \\a pTagData will point to the byte where the next tag data begins.\n+          \\a numBytesParsed will correspond to the cursor\'s position in the full TagData string.\n+*/\n+bool BamAlignment::SkipToNextTag(const char storageType,\n+                                 char*& pTagData,\n+                                 unsigned int& numBytesParsed) const\n+{\n+    switch (storageType) {\n+\n+        case (Constants::BAM_TAG_TYPE_ASCII) :\n+        case (Constants::BAM_TAG_TYPE_INT8)  :\n+        case (Constants::BAM_TAG_TYPE_UINT8) :\n+            ++numBytesParsed;\n+            ++pTagData;\n+            break;\n+\n+        case (Constants::BAM_TAG_TYPE_INT16)  :\n+        case (Constants::BAM_TAG_TYPE_UINT16) :\n+            numBytesParsed += sizeof(uint16_t);\n+            pTagData       += sizeof(uint16_t);\n+            break;\n+\n+        case (Constants::BAM_TAG_TYPE_FLOAT)  :\n+        case (Constants::BAM_TAG_TYPE_INT32)  :\n+        case (Constants::BAM_TAG_TYPE_UINT32) :\n+            numBytesParsed += sizeof(uint32_t);\n+            pTagData       += sizeof(uint32_t);\n+            break;\n+\n+        case (Constants::BAM_TAG_TYPE_STRING) :\n+        case (Constants::BAM_TAG_TYPE_HEX)    :\n+            while( *pTagData ) {\n+                ++numBytesParsed;\n+                ++pTagData;\n+            }\n+            // increment for null-terminator\n+            ++numBytesParsed;\n+            ++pTagData;\n+            break;\n+\n+        case (Constants::BAM_TAG_TYPE_ARRAY) :\n+\n+        {\n+            // read array type\n+            const char arrayType = *pTagData;\n+            ++numBytesParsed;\n+            ++pTagData;\n+\n+            // read number of elements\n+            int32_t numElements;\n+            memcpy(&numElements, pTagData, sizeof(uint32_t)); // already endian-swapped if necessary\n+            numBytesParsed += sizeof(uint32_t);\n+            pTagData       += sizeof(uint32_t);\n+\n+            // calculate number of bytes to skip\n+            int bytesToSkip = 0;\n+            switch (arrayType) {\n+                case (Constants::BAM_TAG_TYPE_INT8)  :\n+                case (Constants::BAM_TAG_TYPE_UINT8) :\n+                    bytesToSkip = numElements;\n+                    break;\n+                case (Constants::BAM_TAG_TYPE_INT16)  :\n+                case (Constants::BAM_TAG_TYPE_UINT16) :\n+                    bytesToSkip = numElements*sizeof(uint16_t);\n+                    break;\n+                case (Constants::BAM_TAG_TYPE_FLOAT)  :\n+                case (Constants::BAM_TAG_TYPE_INT32)  :\n+                case (Constants::BAM_TAG_TYPE_UINT32) :\n+                    bytesToSkip = numElements*sizeof(uint32_t);\n+                    break;\n+                default:\n+                    cerr << "BamAlignment ERROR: unknown binary array type encountered: "\n+                         << arrayType << endl;\n+                    return false;\n+            }\n+\n+            // skip binary array contents\n+            numBytesParsed += bytesToSkip;\n+            pTagData       += bytesToSkip;\n+            break;\n+        }\n+\n+        default:\n+            cerr << "BamAlignment ERROR: unknown tag type encountered"\n+                 << storageType << endl;\n+            return false;\n+    }\n+\n+    // return success\n+    return true;\n+}\n'
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamAlignment.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamAlignment.h Thu Nov 03 10:25:04 2011 -0400
b
b'@@ -0,0 +1,207 @@\n+// ***************************************************************************\n+// BamAlignment.h (c) 2009 Derek Barnett\n+// Marth Lab, Department of Biology, Boston College\n+// All rights reserved.\n+// ---------------------------------------------------------------------------\n+// Last modified: 22 April 2011 (DB)\n+// ---------------------------------------------------------------------------\n+// Provides the BamAlignment data structure\n+// ***************************************************************************\n+\n+#ifndef BAMALIGNMENT_H\n+#define BAMALIGNMENT_H\n+\n+#include <api/api_global.h>\n+#include <api/BamAux.h>\n+#include <string>\n+#include <vector>\n+\n+namespace BamTools {\n+\n+// forward declaration of BamAlignment\'s friend classes\n+namespace Internal {\n+    class BamReaderPrivate;\n+    class BamWriterPrivate;\n+} // namespace Internal\n+\n+// BamAlignment data structure\n+struct API_EXPORT BamAlignment {\n+\n+    // constructors & destructor\n+    public:\n+        BamAlignment(void);\n+        BamAlignment(const BamAlignment& other);\n+        ~BamAlignment(void);\n+\n+    // queries against alignment flags\n+    public:        \n+        bool IsDuplicate(void) const;           // returns true if this read is a PCR duplicate\n+        bool IsFailedQC(void) const;            // returns true if this read failed quality control\n+        bool IsFirstMate(void) const;           // returns true if alignment is first mate on read\n+        bool IsMapped(void) const;              // returns true if alignment is mapped\n+        bool IsMateMapped(void) const;          // returns true if alignment\'s mate is mapped\n+        bool IsMateReverseStrand(void) const;   // returns true if alignment\'s mate mapped to reverse strand\n+        bool IsPaired(void) const;              // returns true if alignment part of paired-end read\n+        bool IsPrimaryAlignment(void) const;    // returns true if reported position is primary alignment\n+        bool IsProperPair(void) const;          // returns true if alignment is part of read that satisfied paired-end resolution\n+        bool IsReverseStrand(void) const;       // returns true if alignment mapped to reverse strand\n+        bool IsSecondMate(void) const;          // returns true if alignment is second mate on read\n+\n+    // manipulate alignment flags\n+    public:        \n+        void SetIsDuplicate(bool ok);           // sets value of "PCR duplicate" flag\n+        void SetIsFailedQC(bool ok);            // sets value of "failed quality control" flag\n+        void SetIsFirstMate(bool ok);           // sets value of "alignment is first mate" flag\n+        void SetIsMapped(bool ok);              // sets value of "alignment is mapped" flag\n+        void SetIsMateMapped(bool ok);          // sets value of "alignment\'s mate is mapped" flag\n+        void SetIsMateReverseStrand(bool ok);   // sets value of "alignment\'s mate mapped to reverse strand" flag\n+        void SetIsPaired(bool ok);              // sets value of "alignment part of paired-end read" flag\n+        void SetIsPrimaryAlignment(bool ok);    // sets value of "position is primary alignment" flag\n+        void SetIsProperPair(bool ok);          // sets value of "alignment is part of read that satisfied paired-end resolution" flag\n+        void SetIsReverseStrand(bool ok);       // sets value of "alignment mapped to reverse strand" flag\n+        void SetIsSecondMate(bool ok);          // sets value of "alignment is second mate on read" flag\n+\n+        // legacy methods (consider deprecated, but still available)\n+        void SetIsMateUnmapped(bool ok);        // complement of using SetIsMateMapped()\n+        void SetIsSecondaryAlignment(bool ok);  // complement of using SetIsPrimaryAlignment()\n+        void SetIsUnmapped(bool ok);            // complement of using SetIsMapped()\n+\n+    // tag data access methods\n+    public:\n+\n+        // -------------------------------------------------------------------------------------\n+    '..b', std::vector<uint32_t>& destination) const;\n+        bool GetTag(const std::string& tag, std::vector<int32_t>& destination) const;\n+        bool GetTag(const std::string& tag, std::vector<float>& destination) const;\n+\n+        // retrieves the BAM tag-type character for a tag\n+        bool GetTagType(const std::string& tag, char& type) const;\n+\n+        // legacy methods (consider deprecated, but still available)\n+        bool GetEditDistance(uint32_t& editDistance) const;         // retrieves value of "NM" tag\n+        bool GetReadGroup(std::string& readGroup) const;            // retrieves value of "RG" tag\n+        \n+        // returns true if alignment has a record for this tag name\n+        bool HasTag(const std::string& tag) const;\n+\n+        // removes a tag\n+        bool RemoveTag(const std::string& tag);\n+\n+    // additional methods\n+    public:\n+        // populates alignment string fields\n+        bool BuildCharData(void);\n+        // calculates alignment end position\n+        int GetEndPosition(bool usePadded = false, bool zeroBased = true) const;  \n+\n+    // public data fields\n+    public:\n+        std::string Name;               // read name\n+        int32_t     Length;             // length of query sequence\n+        std::string QueryBases;         // \'original\' sequence (as reported from sequencing machine)\n+        std::string AlignedBases;       // \'aligned\' sequence (includes any indels, padding, clipping)\n+        std::string Qualities;          // FASTQ qualities (ASCII characters, not numeric values)\n+        std::string TagData;            // tag data (use provided methods to query/modify)\n+        int32_t     RefID;              // ID number for reference sequence\n+        int32_t     Position;           // position (0-based) where alignment starts\n+        uint16_t    Bin;                // BAM (standard) index bin number for this alignment\n+        uint16_t    MapQuality;         // mapping quality score\n+        uint32_t    AlignmentFlag;      // alignment bit-flag (use provided methods to query/modify)\n+        std::vector<CigarOp> CigarData; // CIGAR operations for this alignment\n+        int32_t     MateRefID;          // ID number for reference sequence where alignment\'s mate was aligned\n+        int32_t     MatePosition;       // position (0-based) where alignment\'s mate starts\n+        int32_t     InsertSize;         // mate-pair insert size\n+        std::string Filename;           // name of BAM file which this alignment comes from\n+\n+    //! \\cond\n+    // internal utility methods\n+    private:\n+        bool FindTag(const std::string& tag,\n+                     char*& pTagData,\n+                     const unsigned int& tagDataLength,\n+                     unsigned int& numBytesParsed) const;\n+        bool IsValidSize(const std::string& tag,\n+                         const std::string& type) const;\n+        bool SkipToNextTag(const char storageType,\n+                           char*& pTagData,\n+                           unsigned int& numBytesParsed) const;\n+\n+    // internal data\n+    private:\n+\n+        struct BamAlignmentSupportData {\n+      \n+            // data members\n+            std::string AllCharData;\n+            uint32_t    BlockLength;\n+            uint32_t    NumCigarOperations;\n+            uint32_t    QueryNameLength;\n+            uint32_t    QuerySequenceLength;\n+            bool        HasCoreOnly;\n+            \n+            // constructor\n+            BamAlignmentSupportData(void)\n+                : BlockLength(0)\n+                , NumCigarOperations(0)\n+                , QueryNameLength(0)\n+                , QuerySequenceLength(0)\n+                , HasCoreOnly(false)\n+            { }\n+        };\n+        BamAlignmentSupportData SupportData;\n+        friend class Internal::BamReaderPrivate;\n+        friend class Internal::BamWriterPrivate;\n+    //! \\endcond\n+};\n+\n+typedef std::vector<BamAlignment> BamAlignmentVector;\n+\n+} // namespace BamTools\n+\n+#endif // BAMALIGNMENT_H\n'
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamAux.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamAux.h Thu Nov 03 10:25:04 2011 -0400
[
b'@@ -0,0 +1,457 @@\n+// ***************************************************************************\r\n+// BamAux.h (c) 2009 Derek Barnett, Michael Str\xef\xbf\xbdmberg\r\n+// Marth Lab, Department of Biology, Boston College\r\n+// All rights reserved.\r\n+// ---------------------------------------------------------------------------\r\n+// Last modified: 4 March 2011 (DB)\r\n+// ---------------------------------------------------------------------------\r\n+// Provides data structures & utility methods that are used throughout the API.\r\n+// ***************************************************************************\r\n+\r\n+#ifndef BAMAUX_H\r\n+#define BAMAUX_H\r\n+\r\n+#include <api/api_global.h>\r\n+#include <fstream> \r\n+#include <iostream>\r\n+#include <string>\r\n+#include <vector>\r\n+\r\n+/*! \\file BamAux.h\r\n+\r\n+    Provides data structures & utility methods that are used throughout the API.\r\n+*/\r\n+/*! \\namespace BamTools\r\n+    \\brief Contains all BamTools classes & methods.\r\n+\r\n+    The BamTools API contained in this namespace contains classes and methods\r\n+    for reading, writing, and manipulating BAM alignment files.\r\n+*/\r\n+namespace BamTools {\r\n+\r\n+// ----------------------------------------------------------------\r\n+// CigarOp\r\n+\r\n+/*! \\struct BamTools::CigarOp\r\n+    \\brief Represents a CIGAR alignment operation.\r\n+\r\n+    \\sa http://samtools.sourceforge.net/SAM-1.3.pdf for more details on using CIGAR operations.\r\n+*/\r\n+struct API_EXPORT CigarOp {\r\n+  \r\n+    char     Type;   //!< CIGAR operation type (MIDNSHP)\r\n+    uint32_t Length; //!< CIGAR operation length (number of bases)\r\n+    \r\n+    //! constructor\r\n+    CigarOp(const char type = \'\\0\', \r\n+            const uint32_t& length = 0)\r\n+        : Type(type)\r\n+        , Length(length) \r\n+    { }\r\n+};\r\n+\r\n+// ----------------------------------------------------------------\r\n+// RefData\r\n+\r\n+/*! \\struct BamTools::RefData\r\n+    \\brief Represents a reference sequence entry\r\n+*/\r\n+struct API_EXPORT RefData {\r\n+   \r\n+    std::string RefName;    //!< name of reference sequence\r\n+    int32_t     RefLength;  //!< length of reference sequence\r\n+    \r\n+    //! constructor\r\n+    RefData(const std::string& name = "",\r\n+            const int32_t& length = 0)\r\n+        : RefName(name)\r\n+        , RefLength(length)\r\n+    { }\r\n+};\r\n+\r\n+//! convenience typedef for vector of RefData entries\r\n+typedef std::vector<RefData> RefVector;\r\n+\r\n+// ----------------------------------------------------------------\r\n+// BamRegion\r\n+\r\n+/*! \\struct BamTools::BamRegion\r\n+    \\brief Represents a sequential genomic region\r\n+\r\n+    Allowed to span multiple (sequential) references.\r\n+*/\r\n+struct API_EXPORT BamRegion {\r\n+  \r\n+    int LeftRefID;      //!< reference ID for region\'s left boundary\r\n+    int LeftPosition;   //!< position for region\'s left boundary\r\n+    int RightRefID;     //!< reference ID for region\'s right boundary\r\n+    int RightPosition;  //!< position for region\'s right boundary\r\n+    \r\n+    //! constructor\r\n+    BamRegion(const int& leftID   = -1, \r\n+              const int& leftPos  = -1,\r\n+              const int& rightID  = -1,\r\n+              const int& rightPos = -1)\r\n+        : LeftRefID(leftID)\r\n+        , LeftPosition(leftPos)\r\n+        , RightRefID(rightID)\r\n+        , RightPosition(rightPos)\r\n+    { }\r\n+    \r\n+    //! copy constructor\r\n+    BamRegion(const BamRegion& other)\r\n+        : LeftRefID(other.LeftRefID)\r\n+        , LeftPosition(other.LeftPosition)\r\n+        , RightRefID(other.RightRefID)\r\n+        , RightPosition(other.RightPosition)\r\n+    { }\r\n+    \r\n+    //! Clears region boundaries\r\n+    void clear(void) {\r\n+        LeftRefID  = -1; LeftPosition  = -1;\r\n+        RightRefID = -1; RightPosition = -1;\r\n+    }\r\n+\r\n+    //! Returns true if region has a left boundary\r\n+    bool isLeftBoundSpecified(void) const {\r\n+        return ( LeftRefID >= 0 && LeftPosition >= 0 );\r\n+    }\r\n+\r\n+    //! Returns true if region boundaries are not defined\r\n+    bool isNull(void) const {\r\n+        return ( !isLeftBoundSpecified()'..b'oat) value read from the buffer\r\n+*/\r\n+API_EXPORT inline float UnpackFloat(char* buffer) {\r\n+    return UnpackFloat( (const char*)buffer );\r\n+}\r\n+\r\n+/*! \\fn signed int UnpackSignedInt(const char* buffer)\r\n+    \\brief reads a signed integer value from byte buffer\r\n+\r\n+    \\param buffer source byte buffer\r\n+    \\return the (signed int) value read from the buffer\r\n+*/\r\n+API_EXPORT inline signed int UnpackSignedInt(const char* buffer) {\r\n+    union { signed int value; unsigned char valueBuffer[sizeof(signed int)]; } un;\r\n+    un.value = 0;\r\n+    un.valueBuffer[0] = buffer[0];\r\n+    un.valueBuffer[1] = buffer[1];\r\n+    un.valueBuffer[2] = buffer[2];\r\n+    un.valueBuffer[3] = buffer[3];\r\n+    return un.value;\r\n+}\r\n+\r\n+/*! \\fn signed int UnpackSignedInt(char* buffer)\r\n+    \\brief reads a signed integer value from byte buffer\r\n+\r\n+    This is an overloaded function.\r\n+\r\n+    \\param buffer source byte buffer\r\n+    \\return the (signed int) value read from the buffer\r\n+*/\r\n+API_EXPORT inline signed int UnpackSignedInt(char* buffer) {\r\n+    return UnpackSignedInt( (const char*) buffer );\r\n+}\r\n+\r\n+/*! \\fn signed short UnpackSignedShort(const char* buffer)\r\n+    \\brief reads a signed short integer value from byte buffer\r\n+\r\n+    \\param buffer source byte buffer\r\n+    \\return the (signed short) value read from the buffer\r\n+*/\r\n+API_EXPORT inline signed short UnpackSignedShort(const char* buffer) {\r\n+    union { signed short value; unsigned char valueBuffer[sizeof(signed short)]; } un;\r\n+    un.value = 0;\r\n+    un.valueBuffer[0] = buffer[0];\r\n+    un.valueBuffer[1] = buffer[1];\r\n+    return un.value;\r\n+}\r\n+\r\n+/*! \\fn signed short UnpackSignedShort(char* buffer)\r\n+    \\brief reads a signed short integer value from byte buffer\r\n+\r\n+    This is an overloaded function.\r\n+\r\n+    \\param buffer source byte buffer\r\n+    \\return the (signed short) value read from the buffer\r\n+*/\r\n+API_EXPORT inline signed short UnpackSignedShort(char* buffer) {\r\n+    return UnpackSignedShort( (const char*)buffer );\r\n+}\r\n+\r\n+/*! \\fn unsigned int UnpackUnsignedInt(const char* buffer)\r\n+    \\brief reads an unsigned integer value from byte buffer\r\n+\r\n+    \\param buffer source byte buffer\r\n+    \\return the (unsigned int) value read from the buffer\r\n+*/\r\n+API_EXPORT inline unsigned int UnpackUnsignedInt(const char* buffer) {\r\n+    union { unsigned int value; unsigned char valueBuffer[sizeof(unsigned int)]; } un;\r\n+    un.value = 0;\r\n+    un.valueBuffer[0] = buffer[0];\r\n+    un.valueBuffer[1] = buffer[1];\r\n+    un.valueBuffer[2] = buffer[2];\r\n+    un.valueBuffer[3] = buffer[3];\r\n+    return un.value;\r\n+}\r\n+\r\n+/*! \\fn unsigned int UnpackUnsignedInt(char* buffer)\r\n+    \\brief reads an unsigned integer value from byte buffer\r\n+\r\n+    This is an overloaded function.\r\n+\r\n+    \\param buffer source byte buffer\r\n+    \\return the (unsigned int) value read from the buffer\r\n+*/\r\n+API_EXPORT inline unsigned int UnpackUnsignedInt(char* buffer) {\r\n+    return UnpackUnsignedInt( (const char*)buffer );\r\n+}\r\n+\r\n+/*! \\fn unsigned short UnpackUnsignedShort(const char* buffer)\r\n+    \\brief reads an unsigned short integer value from byte buffer\r\n+\r\n+    \\param buffer source byte buffer\r\n+    \\return the (unsigned short) value read from the buffer\r\n+*/\r\n+API_EXPORT inline unsigned short UnpackUnsignedShort(const char* buffer) {\r\n+    union { unsigned short value; unsigned char valueBuffer[sizeof(unsigned short)]; } un;\r\n+    un.value = 0;\r\n+    un.valueBuffer[0] = buffer[0];\r\n+    un.valueBuffer[1] = buffer[1];\r\n+    return un.value;\r\n+}\r\n+\r\n+/*! \\fn unsigned short UnpackUnsignedShort(char* buffer)\r\n+    \\brief reads an unsigned short integer value from byte buffer\r\n+\r\n+    This is an overloaded function.\r\n+\r\n+    \\param buffer source byte buffer\r\n+    \\return the (unsigned short) value read from the buffer\r\n+*/\r\n+API_EXPORT inline unsigned short UnpackUnsignedShort(char* buffer) {\r\n+    return UnpackUnsignedShort( (const char*)buffer );\r\n+}\r\n+\r\n+} // namespace BamTools\r\n+\r\n+#endif // BAMAUX_H\r\n'
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamConstants.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamConstants.h Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,128 @@
+// ***************************************************************************
+// BamConstants.h (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 19 April 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides basic constants for handling BAM files.
+// ***************************************************************************
+
+#ifndef BAM_CONSTANTS_H
+#define BAM_CONSTANTS_H
+
+#include <string>
+
+/*! \namespace BamTools::Constants
+    \brief Provides basic constants for handling BAM files.
+*/
+
+namespace BamTools {
+namespace Constants {
+
+const int BAM_SIZEOF_INT = 4;
+
+// header magic number
+const char* const  BAM_HEADER_MAGIC = "BAM\1";
+const unsigned int BAM_HEADER_MAGIC_LENGTH = 4;
+
+// BAM alignment core size
+const int BAM_CORE_SIZE = 32;
+const int BAM_CORE_BUFFER_SIZE = 8;
+
+// BAM alignment flags
+const int BAM_ALIGNMENT_PAIRED              = 0x0001;
+const int BAM_ALIGNMENT_PROPER_PAIR         = 0x0002;
+const int BAM_ALIGNMENT_UNMAPPED            = 0x0004;
+const int BAM_ALIGNMENT_MATE_UNMAPPED       = 0x0008;
+const int BAM_ALIGNMENT_REVERSE_STRAND      = 0x0010;
+const int BAM_ALIGNMENT_MATE_REVERSE_STRAND = 0x0020;
+const int BAM_ALIGNMENT_READ_1              = 0x0040;
+const int BAM_ALIGNMENT_READ_2              = 0x0080;
+const int BAM_ALIGNMENT_SECONDARY           = 0x0100;
+const int BAM_ALIGNMENT_QC_FAILED           = 0x0200;
+const int BAM_ALIGNMENT_DUPLICATE           = 0x0400;
+
+// CIGAR constants
+const char* const BAM_CIGAR_LOOKUP = "MIDNSHP=X";
+const int BAM_CIGAR_MATCH    = 0;
+const int BAM_CIGAR_INS      = 1;
+const int BAM_CIGAR_DEL      = 2;
+const int BAM_CIGAR_REFSKIP  = 3;
+const int BAM_CIGAR_SOFTCLIP = 4;
+const int BAM_CIGAR_HARDCLIP = 5;
+const int BAM_CIGAR_PAD      = 6;
+const int BAM_CIGAR_SEQMATCH = 7;
+const int BAM_CIGAR_MISMATCH = 8;
+
+const char BAM_CIGAR_MATCH_CHAR    = 'M';
+const char BAM_CIGAR_INS_CHAR      = 'I';
+const char BAM_CIGAR_DEL_CHAR      = 'D';
+const char BAM_CIGAR_REFSKIP_CHAR  = 'N';
+const char BAM_CIGAR_SOFTCLIP_CHAR = 'S';
+const char BAM_CIGAR_HARDCLIP_CHAR = 'H';
+const char BAM_CIGAR_PAD_CHAR      = 'P';
+const char BAM_CIGAR_SEQMATCH_CHAR = '=';
+const char BAM_CIGAR_MISMATCH_CHAR = 'X';
+
+const int BAM_CIGAR_SHIFT    = 4;
+const int BAM_CIGAR_MASK     = ((1 << BAM_CIGAR_SHIFT) - 1);
+
+// BAM tag types
+const char BAM_TAG_TYPE_ASCII  = 'A';
+const char BAM_TAG_TYPE_UINT8  = 'c';
+const char BAM_TAG_TYPE_INT8   = 'C';
+const char BAM_TAG_TYPE_UINT16 = 's';
+const char BAM_TAG_TYPE_INT16  = 'S';
+const char BAM_TAG_TYPE_UINT32 = 'i';
+const char BAM_TAG_TYPE_INT32  = 'I';
+const char BAM_TAG_TYPE_FLOAT  = 'f';
+const char BAM_TAG_TYPE_STRING = 'Z';
+const char BAM_TAG_TYPE_HEX    = 'H';
+const char BAM_TAG_TYPE_ARRAY  = 'B';
+
+const size_t BAM_TAG_TAGSIZE  = 2;
+const size_t BAM_TAG_TYPESIZE = 1;
+const int BAM_TAG_ARRAYBASE_SIZE = 8;
+
+// DNA bases
+const char* const BAM_DNA_LOOKUP = "=ACMGRSVTWYHKDBN";
+const unsigned char BAM_BASECODE_EQUAL = 0;
+const unsigned char BAM_BASECODE_A     = 1;
+const unsigned char BAM_BASECODE_C     = 2;
+const unsigned char BAM_BASECODE_G     = 4;
+const unsigned char BAM_BASECODE_T     = 8;
+const unsigned char BAM_BASECODE_N     = 15;
+
+const char BAM_DNA_EQUAL   = '=';
+const char BAM_DNA_A       = 'A';
+const char BAM_DNA_C       = 'C';
+const char BAM_DNA_G       = 'G';
+const char BAM_DNA_T       = 'T';
+const char BAM_DNA_N       = 'N';
+const char BAM_DNA_DEL     = '-';
+const char BAM_DNA_PAD     = '*';
+
+// zlib constants
+const int GZIP_ID1   = 31;
+const int GZIP_ID2   = 139;
+const int CM_DEFLATE = 8;
+const int FLG_FEXTRA = 4;
+const int OS_UNKNOWN = 255;
+const int BGZF_XLEN  = 6;
+const int BGZF_ID1   = 66;
+const int BGZF_ID2   = 67;
+const int BGZF_LEN   = 2;
+const int GZIP_WINDOW_BITS    = -15;
+const int Z_DEFAULT_MEM_LEVEL = 8;
+
+// BZGF constants
+const int BGZF_BLOCK_HEADER_LENGTH = 18;
+const int BGZF_BLOCK_FOOTER_LENGTH = 8;
+const int BGZF_MAX_BLOCK_SIZE      = 65536;
+const int BGZF_DEFAULT_BLOCK_SIZE  = 65536;
+
+} // namespace Constants
+} // namespace BamTools
+
+#endif // BAM_CONSTANTS_H
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamIndex.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamIndex.h Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,80 @@
+// ***************************************************************************
+// BamIndex.h (c) 2009 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 5 April 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides basic BAM index interface
+// ***************************************************************************
+
+#ifndef BAM_INDEX_H
+#define BAM_INDEX_H
+
+#include <api/api_global.h>
+#include <api/BamAux.h>
+#include <string>
+
+namespace BamTools {
+
+namespace Internal {
+    class BamReaderPrivate;
+} // namespace Internal
+
+/*! \class BamTools::BamIndex
+    \brief Provides methods for generating & loading BAM index files.
+
+    This class straddles the line between public API and internal
+    implementation detail. Most client code should never have to use this
+    class directly.
+
+    It is exposed to the public API to allow advanced users to implement
+    their own custom indexing schemes.
+
+    More documentation on methods & enums coming soon.
+*/
+
+class API_EXPORT BamIndex {
+
+    // enums
+    public:
+        // specify index-caching behavior
+        enum IndexCacheMode { FullIndexCaching = 0 // store entire index file contents in memory
+                            , LimitedIndexCaching  // store only index data for current reference
+                            , NoIndexCaching       // do not store any index data between jumps
+                            };
+
+        // list of supported BamIndex types
+        enum IndexType { BAMTOOLS = 0
+                       , STANDARD
+                       };
+  
+    // ctor & dtor
+    public:
+        BamIndex(Internal::BamReaderPrivate* reader) : m_reader(reader) { }
+        virtual ~BamIndex(void) { }
+        
+    // index interface
+    public:
+        // builds index from associated BAM file & writes out to index file
+        virtual bool Create(void) =0; // creates index file from BAM file
+        // returns whether reference has alignments or no
+        virtual bool HasAlignments(const int& referenceID) const =0;
+        // attempts to use index data to jump to @region, returns success/fail
+        // a "successful" jump indicates no error, but not whether this region has data
+        //   * thus, the method sets a flag to indicate whether there are alignments
+        //     available after the jump position
+        virtual bool Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion) =0;
+        // loads existing data from file into memory
+        virtual bool Load(const std::string& filename) =0;
+        // change the index caching behavior
+        virtual void SetCacheMode(const BamIndex::IndexCacheMode& mode) =0;
+
+    // data members
+    protected:
+        Internal::BamReaderPrivate* m_reader; // copy, not ownedprivate:
+};
+
+} // namespace BamTools
+
+#endif // BAM_INDEX_H
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamMultiReader.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamMultiReader.cpp Thu Nov 03 10:25:04 2011 -0400
b
b'@@ -0,0 +1,396 @@\n+// ***************************************************************************\n+// BamMultiReader.cpp (c) 2010 Erik Garrison, Derek Barnett\n+// Marth Lab, Department of Biology, Boston College\n+// All rights reserved.\n+// ---------------------------------------------------------------------------\n+// Last modified: 15 March 2011 (DB)\n+// ---------------------------------------------------------------------------\n+// Convenience class for reading multiple BAM files.\n+//\n+// This functionality allows applications to work on very large sets of files\n+// without requiring intermediate merge, sort, and index steps for each file\n+// subset.  It also improves the performance of our merge system as it\n+// precludes the need to sort merged files.\n+// ***************************************************************************\n+\n+#include <api/BamMultiReader.h>\n+#include <api/internal/BamMultiReader_p.h>\n+using namespace BamTools;\n+\n+#include <string>\n+#include <vector>\n+using namespace std;\n+\n+/*! \\class BamTools::BamReader\n+    \\brief Convenience class for reading multiple BAM files.\n+*/\n+\n+/*! \\fn BamMultiReader::BamMultiReader(void)\n+    \\brief constructor\n+*/\n+BamMultiReader::BamMultiReader(void)\n+    : d(new Internal::BamMultiReaderPrivate)\n+{ }\n+\n+/*! \\fn BamMultiReader::~BamMultiReader(void)\n+    \\brief destructor\n+*/\n+BamMultiReader::~BamMultiReader(void) {\n+    delete d;\n+    d = 0;\n+}\n+\n+/*! \\fn void BamMultiReader::Close(void)\n+    \\brief Closes all open BAM files.\n+\n+    Also clears out all header and reference data.\n+\n+    \\sa CloseFile(), IsOpen(), Open(), BamReader::Close()\n+*/\n+void BamMultiReader::Close(void) {\n+    d->Close();\n+}\n+\n+/*! \\fn void BamMultiReader::CloseFile(const std::string& filename)\n+    \\brief Closes requested BAM file.\n+\n+    Leaves any other file(s) open, along with header and reference data.\n+\n+    \\sa Close(), IsOpen(), Open(), BamReader::Close()\n+*/\n+void BamMultiReader::CloseFile(const std::string& filename) {\n+    d->CloseFile(filename);\n+}\n+\n+/*! \\fn bool BamMultiReader::CreateIndexes(const BamIndex::IndexType& type)\n+    \\brief Creates index files for the current BAM files.\n+\n+    \\param type file format to create, see BamIndex::IndexType for available formats\n+    \\return \\c true if index files created OK\n+    \\sa LocateIndexes(), OpenIndexes(), BamReader::CreateIndex()\n+*/\n+bool BamMultiReader::CreateIndexes(const BamIndex::IndexType& type) {\n+    return d->CreateIndexes(type);\n+}\n+\n+/*! \\fn const std::vector<std::string> BamMultiReader::Filenames(void) const\n+    \\brief Returns list of filenames for all open BAM files.\n+\n+    Retrieved filenames will contain whatever was passed via Open().\n+    If you need full directory paths here, be sure to include them\n+    when you open the BAM files.\n+\n+    \\returns names of open BAM files. If no files are open, returns an empty vector.\n+    \\sa IsOpen(), BamReader::GetFilename()\n+*/\n+const std::vector<std::string> BamMultiReader::Filenames(void) const {\n+    return d->Filenames();\n+}\n+\n+/*! \\fn SamHeader BamMultiReader::GetHeader(void) const\n+    \\brief Returns unified SAM-format header for all files\n+\n+    N.B. - Modifying the retrieved text does NOT affect the current\n+    BAM files. Thesse file have been opened in a read-only mode. However,\n+    your modified header text can be used in conjunction with BamWriter\n+    to generate a new BAM file with the appropriate header information.\n+\n+    \\returns header data wrapped in SamHeader object\n+    \\sa GetHeaderText(), BamReader::GetHeader()\n+*/\n+SamHeader BamMultiReader::GetHeader(void) const {\n+    return d->GetHeader();\n+}\n+\n+/*! \\fn std::string BamMultiReader::GetHeaderText(void) const\n+    \\brief Returns unified SAM-format header text for all files\n+\n+    N.B. - Modifying the retrieved text does NOT affect the current\n+    BAM files. Thesse file have been opened in a read-only mode. However,\n+    your modified header text can be used in conjunction with BamWriter\n+    to gener'..b'mMultiReader::OpenIndexes(const std::vector<std::string>& indexFilenames)\n+    \\brief Opens index files for current BAM files.\n+\n+    N.B. - Currently assumes that index filenames match the order (and number) of\n+    BAM files passed to Open().\n+\n+    \\param indexFilenames list of BAM index file names\n+    \\returns \\c true if BAM index file was opened & data loaded successfully\n+    \\sa LocateIndex(), Open(), SetIndex(), BamReader::OpenIndex()\n+*/\n+bool BamMultiReader::OpenIndexes(const std::vector<std::string>& indexFilenames) {\n+    return d->OpenIndexes(indexFilenames);\n+}\n+\n+/*! \\fn void BamMultiReader::PrintFilenames(void) const\n+    \\brief Convenience method for printing filenames to stdout.\n+    \\deprecated Doesn\'t really belong as an API function. Clients should\n+                determine how the data is reported.\n+    \\sa Filenames(), BamReader::GetFilename()\n+*/\n+void BamMultiReader::PrintFilenames(void) const {\n+    d->PrintFilenames();\n+}\n+\n+/*! \\fn bool BamMultiReader::Rewind(void)\n+    \\brief Returns the internal file pointers to the beginning of alignment records.\n+\n+    Useful for performing multiple sequential passes through BAM files.\n+    Calling this function clears any prior region that may have been set.\n+\n+    \\returns \\c true if rewind operation was successful\n+    \\sa Jump(), SetRegion(), BamReader::Rewind()\n+*/\n+bool BamMultiReader::Rewind(void) {\n+    return d->Rewind();\n+}\n+\n+/*! \\fn void BamMultiReader::SetIndexCacheMode(const BamIndex::IndexCacheMode& mode)\n+    \\brief Changes the caching behavior of the index data.\n+\n+    Default mode is BamIndex::LimitedIndexCaching.\n+\n+    \\param mode desired cache mode for index, see BamIndex::IndexCacheMode for\n+                description of the available cache modes\n+    \\sa HasIndex(), BamReader::SetIndexCacheMode()\n+*/\n+void BamMultiReader::SetIndexCacheMode(const BamIndex::IndexCacheMode& mode) {\n+    d->SetIndexCacheMode(mode);\n+}\n+\n+/*! \\fn bool BamMultiReader::SetRegion(const BamRegion& region)\n+    \\brief Sets a target region of interest\n+\n+    Equivalent to calling BamReader::SetRegion() on all open BAM files.\n+\n+    \\param region desired region-of-interest to activate\n+    \\returns \\c true if ALL readers set the region successfully\n+    \\sa HasIndexes(), Jump(), BamReader::SetRegion()\n+*/\n+bool BamMultiReader::SetRegion(const BamRegion& region) {\n+    return d->SetRegion(region);\n+}\n+\n+/*! \\fn bool BamMultiReader::SetRegion(const int& leftRefID,\n+                                       const int& leftPosition,\n+                                       const int& rightRefID,\n+                                       const int& rightPosition)\n+    \\brief Sets a target region of interest\n+\n+    This is an overloaded function.\n+\n+    Equivalent to calling BamReader::SetRegion() on all open BAM files.\n+\n+    \\param leftRefID     referenceID of region\'s left boundary\n+    \\param leftPosition  position of region\'s left boundary\n+    \\param rightRefID    reference ID of region\'s right boundary\n+    \\param rightPosition position of region\'s right boundary\n+\n+    \\returns \\c true if ALL readers set the region successfully\n+    \\sa HasIndexes(), Jump(), BamReader::SetRegion()\n+*/\n+bool BamMultiReader::SetRegion(const int& leftRefID,\n+                               const int& leftPosition,\n+                               const int& rightRefID,\n+                               const int& rightPosition)\n+{\n+    BamRegion region(leftRefID, leftPosition, rightRefID, rightPosition);\n+    return d->SetRegion(region);\n+}\n+\n+/*! \\fn void BamMultiReader::SetSortOrder(const SortOrder& order)\n+    \\brief Sets the expected sorting order for reading across multiple BAM files.\n+\n+    Default is BamMultiReader::SortedByPosition.\n+\n+    The SortOrder determines how the reader determines which alignment is "next"\n+    from among its open readers.\n+\n+    \\param order expected sort order\n+*/\n+void BamMultiReader::SetSortOrder(const SortOrder& order) {\n+    d->SetSortOrder(order);\n+}\n'
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamMultiReader.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamMultiReader.h Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,127 @@
+// ***************************************************************************
+// BamMultiReader.h (c) 2010 Erik Garrison, Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 15 March 2011 (DB)
+// ---------------------------------------------------------------------------
+// Convenience class for reading multiple BAM files.
+// ***************************************************************************
+
+#ifndef BAMMULTIREADER_H
+#define BAMMULTIREADER_H
+
+#include <api/api_global.h>
+#include <api/BamReader.h>
+#include <map>
+#include <sstream>
+#include <string>
+#include <utility>
+
+namespace BamTools {
+
+namespace Internal {
+    class BamMultiReaderPrivate;
+} // namespace Internal
+
+class API_EXPORT BamMultiReader {
+
+    public:
+        enum SortOrder { SortedByPosition = 0
+                       , SortedByReadName
+                       , Unsorted
+                       };
+
+    // constructor / destructor
+    public:
+        BamMultiReader(void);
+        ~BamMultiReader(void);
+
+    // public interface
+    public:
+
+        // ----------------------
+        // BAM file operations
+        // ----------------------
+
+        // closes all open BAM files
+        void Close(void);
+        // close only the requested BAM file
+        void CloseFile(const std::string& filename);
+        // returns list of filenames for all open BAM files
+        const std::vector<std::string> Filenames(void) const;
+        // returns true if multireader has any open BAM files
+        bool HasOpenReaders(void) const;
+        // performs random-access jump within current BAM files
+        bool Jump(int refID, int position = 0);
+        // opens BAM files
+        bool Open(const std::vector<std::string>& filenames);
+        // opens a single BAM file, adding to any other current BAM files
+        bool OpenFile(const std::string& filename);
+        // returns file pointers to beginning of alignments
+        bool Rewind(void);
+        // sets the target region of interest
+        bool SetRegion(const BamRegion& region);
+        // sets the target region of interest
+        bool SetRegion(const int& leftRefID,
+                       const int& leftPosition,
+                       const int& rightRefID,
+                       const int& rightPosition);
+
+        // ----------------------
+        // access alignment data
+        // ----------------------
+
+        // retrieves next available alignment
+        bool GetNextAlignment(BamAlignment& alignment);
+        // retrieves next available alignmnet (without populating the alignment's string data fields)
+        bool GetNextAlignmentCore(BamAlignment& alignment);
+
+        // sets the expected sorting order for reading across multiple BAM files
+        void SetSortOrder(const SortOrder& order);
+
+        // ----------------------
+        // access auxiliary data
+        // ----------------------
+
+        // returns unified SAM header for all files
+        SamHeader GetHeader(void) const;
+        // returns unified SAM header text for all files
+        std::string GetHeaderText(void) const;
+        // returns number of reference sequences
+        int GetReferenceCount(void) const;
+        // returns all reference sequence entries.
+        const BamTools::RefVector GetReferenceData(void) const;
+        // returns the ID of the reference with this name.
+        int GetReferenceID(const std::string& refName) const;
+
+        // ----------------------
+        // BAM index operations
+        // ----------------------
+
+        // creates index files for current BAM files
+        bool CreateIndexes(const BamIndex::IndexType& type = BamIndex::STANDARD);
+        // returns true if all BAM files have index data available
+        bool HasIndexes(void) const;
+        // looks for index files that match current BAM files
+        bool LocateIndexes(const BamIndex::IndexType& preferredType = BamIndex::STANDARD);
+        // opens index files for current BAM files.
+        bool OpenIndexes(const std::vector<std::string>& indexFilenames);
+        // changes the caching behavior of the index data
+        void SetIndexCacheMode(const BamIndex::IndexCacheMode& mode);
+
+    // deprecated methods
+    public:
+        // returns \c true if all BAM files have index data available.
+        bool IsIndexLoaded(void) const;
+        // convenience method for printing filenames to stdout
+        void PrintFilenames(void) const;
+
+    // private implementation
+    private:
+        Internal::BamMultiReaderPrivate* d;
+};
+
+} // namespace BamTools
+
+#endif // BAMMULTIREADER_H
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamReader.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamReader.cpp Thu Nov 03 10:25:04 2011 -0400
b
b"@@ -0,0 +1,370 @@\n+// ***************************************************************************\n+// BamReader.cpp (c) 2009 Derek Barnett, Michael Str\xef\xbf\xbdmberg\n+// Marth Lab, Department of Biology, Boston College\n+// All rights reserved.\n+// ---------------------------------------------------------------------------\n+// Last modified: 4 March 2011 (DB)\n+// ---------------------------------------------------------------------------\n+// Provides read access to BAM files.\n+// ***************************************************************************\n+\n+#include <api/BamReader.h>\n+#include <api/internal/BamReader_p.h>\n+using namespace BamTools;\n+using namespace BamTools::Internal;\n+\n+#include <algorithm>\n+#include <iostream>\n+#include <iterator>\n+#include <string>\n+#include <vector>\n+using namespace std;\n+\n+/*! \\class BamTools::BamReader\n+    \\brief Provides read access to BAM files.\n+*/\n+\n+/*! \\fn BamReader::BamReader(void)\n+    \\brief constructor\n+*/\n+BamReader::BamReader(void)\n+    : d(new BamReaderPrivate(this))\n+{ }\n+\n+/*! \\fn BamReader::~BamReader(void)\n+    \\brief destructor\n+*/\n+BamReader::~BamReader(void) {\n+    delete d;\n+    d = 0;\n+}\n+\n+/*! \\fn void BamReader::Close(void)\n+    \\brief Closes the current BAM file.\n+\n+    Also clears out all header and reference data.\n+\n+    \\sa IsOpen(), Open()\n+*/\n+void BamReader::Close(void) {\n+    d->Close();\n+}\n+\n+/*! \\fn bool BamReader::CreateIndex(const BamIndex::IndexType& type)\n+    \\brief Creates an index file for current BAM file.\n+\n+    \\param type file format to create, see BamIndex::IndexType for available formats\n+    \\return \\c true if index created OK\n+    \\sa LocateIndex(), OpenIndex()\n+*/\n+bool BamReader::CreateIndex(const BamIndex::IndexType& type) {\n+    return d->CreateIndex(type);\n+}\n+\n+/*! \\fn const std::string BamReader::GetFilename(void) const\n+    \\brief Returns name of current BAM file.\n+\n+    Retrieved filename will contain whatever was passed via Open().\n+    If you need full directory paths here, be sure to include them\n+    when you open the BAM file.\n+\n+    \\returns name of open BAM file. If no file is open, returns an empty string.\n+    \\sa IsOpen()\n+*/\n+const std::string BamReader::GetFilename(void) const {\n+    return d->Filename();\n+}\n+\n+/*! \\fn SamHeader BamReader::GetHeader(void) const\n+    \\brief Returns SAM header data.\n+\n+    Header data is wrapped in a SamHeader object that can be conveniently queried & modified.\n+\n+    N.B. - Modifying the retrieved SamHeader object does NOT affect the\n+    current BAM file. This file has been opened in a read-only mode.\n+    However, your modified SamHeader object can be used in conjunction with\n+    BamWriter to generate a new BAM file with the appropriate header information.\n+\n+    \\returns header data object\n+    \\sa GetHeaderText()\n+*/\n+SamHeader BamReader::GetHeader(void) const {\n+    return d->GetSamHeader();\n+}\n+\n+/*! \\fn std::string BamReader::GetHeaderText(void) const\n+    \\brief Returns SAM header data, as SAM-formatted text.\n+\n+    N.B. - Modifying the retrieved text does NOT affect the current\n+    BAM file. This file has been opened in a read-only mode. However,\n+    your modified header text can be used in conjunction with BamWriter\n+    to generate a new BAM file with the appropriate header information.\n+\n+    \\returns SAM-formatted header text\n+    \\sa GetHeader()\n+*/\n+std::string BamReader::GetHeaderText(void) const {\n+    return d->GetHeaderText();\n+}\n+\n+/*! \\fn bool BamReader::GetNextAlignment(BamAlignment& alignment)\n+    \\brief Retrieves next available alignment.\n+\n+    Attempts to read the next alignment record from BAM file, and checks to see\n+    if it overlaps the current region. If no region is currently set, then the\n+    next alignment available is always considered valid.\n+\n+    If a region has been set, via Jump() or SetRegion(), an alignment is only\n+    considered valid if it overlaps the region. If the actual 'next' alignment record\n+    in the BAM file does not overlap this r"..b"ng& indexFilename)\n+    \\brief Opens a BAM index file.\n+\n+    \\param indexFilename name of BAM index file\n+\n+    \\returns \\c true if BAM index file was opened & data loaded successfully\n+    \\sa LocateIndex(), Open(), SetIndex()\n+*/\n+bool BamReader::OpenIndex(const std::string& indexFilename) {\n+    return d->OpenIndex(indexFilename);\n+}\n+\n+/*! \\fn bool BamReader::Rewind(void)\n+    \\brief Returns the internal file pointer to the first alignment record.\n+\n+    Useful for performing multiple sequential passes through a BAM file.\n+    Calling this function clears any prior region that may have been set.\n+\n+    N.B. - Note that this function sets the file pointer to first alignment record\n+    in the BAM file, NOT the beginning of the file.\n+\n+    \\returns \\c true if rewind operation was successful\n+    \\sa Jump(), SetRegion()\n+*/\n+bool BamReader::Rewind(void) {\n+    return d->Rewind();\n+}\n+\n+/*! \\fn void BamReader::SetIndex(BamIndex* index)\n+    \\brief Sets a custom BamIndex on this reader.\n+\n+    Only necessary for custom BamIndex subclasses. Most clients should\n+    never have to use this function.\n+\n+    Example:\n+    \\code\n+        BamReader reader;\n+        reader.SetIndex(new MyCustomBamIndex);\n+    \\endcode\n+\n+    N.B. - BamReader takes ownership of \\a index - i.e. BamReader will\n+    take care of deleting the pointer when the reader is destructed,\n+    when the current BAM file is closed, or when a new index is requested.\n+\n+    \\param index custom BamIndex subclass created by client\n+    \\sa CreateIndex(), LocateIndex(), OpenIndex()\n+*/\n+void BamReader::SetIndex(BamIndex* index) {\n+    d->SetIndex(index);\n+}\n+\n+/*! \\fn void BamReader::SetIndexCacheMode(const BamIndex::IndexCacheMode& mode)\n+    \\brief Changes the caching behavior of the index data.\n+\n+    Default mode is BamIndex::LimitedIndexCaching.\n+\n+    \\param mode desired cache mode for index, see BamIndex::IndexCacheMode for\n+                description of the available cache modes\n+    \\sa HasIndex()\n+*/\n+void BamReader::SetIndexCacheMode(const BamIndex::IndexCacheMode& mode) {\n+    d->SetIndexCacheMode(mode);\n+}\n+\n+/*! \\fn bool BamReader::SetRegion(const BamRegion& region)\n+    \\brief Sets a target region of interest\n+\n+    Requires that index data be available. Attempts a random-access\n+    jump in the BAM file, near \\a region left boundary position.\n+\n+    Subsequent calls to GetNextAlignment() or GetNextAlignmentCore()\n+    will only return \\c true when alignments can be found that overlap\n+    this \\a region.\n+\n+    A \\a region with no right boundary is considered open-ended, meaning\n+    that all alignments that lie downstream of the left boundary are\n+    considered valid, continuing to the end of the BAM file.\n+\n+    \\param region desired region-of-interest to activate\n+    \\returns \\c true if reader was able to jump successfully to the region's left boundary\n+    \\sa HasIndex(), Jump()\n+*/\n+bool BamReader::SetRegion(const BamRegion& region) {\n+    return d->SetRegion(region);\n+}\n+\n+/*! \\fn bool BamReader::SetRegion(const int& leftRefID,\n+                                  const int& leftPosition,\n+                                  const int& rightRefID,\n+                                  const int& rightPosition)\n+    \\brief Sets a target region of interest.\n+\n+    This is an overloaded function.\n+\n+    \\param leftRefID     referenceID of region's left boundary\n+    \\param leftPosition  position of region's left boundary\n+    \\param rightRefID    reference ID of region's right boundary\n+    \\param rightPosition position of region's right boundary\n+\n+    \\returns \\c true if reader was able to jump successfully to the region's left boundary\n+    \\sa HasIndex(), Jump()\n+*/\n+bool BamReader::SetRegion(const int& leftRefID,\n+                          const int& leftBound,\n+                          const int& rightRefID,\n+                          const int& rightBound)\n+{\n+    return d->SetRegion( BamRegion(leftRefID, leftBound, rightRefID, rightBound) );\n+}\n"
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamReader.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamReader.h Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,118 @@
+// ***************************************************************************
+// BamReader.h (c) 2009 Derek Barnett, Michael Str�mberg
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 4 March 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides read access to BAM files.
+// ***************************************************************************
+
+#ifndef BAMREADER_H
+#define BAMREADER_H
+
+#include <api/api_global.h>
+#include <api/BamAlignment.h>
+#include <api/BamIndex.h>
+#include <api/SamHeader.h>
+#include <string>
+
+namespace BamTools {
+  
+namespace Internal {
+    class BamReaderPrivate;
+} // namespace Internal
+
+class API_EXPORT BamReader {
+
+    // constructor / destructor
+    public:
+        BamReader(void);
+        ~BamReader(void);
+
+    // public interface
+    public:
+
+        // ----------------------
+        // BAM file operations
+        // ----------------------
+
+        // closes the current BAM file
+        void Close(void);
+        // returns filename of current BAM file
+        const std::string GetFilename(void) const;
+        // returns true if a BAM file is open for reading
+        bool IsOpen(void) const;
+        // performs random-access jump within BAM file
+        bool Jump(int refID, int position = 0);
+        // opens a BAM file
+        bool Open(const std::string& filename);
+        // returns internal file pointer to beginning of alignment data
+        bool Rewind(void);
+        // sets the target region of interest
+        bool SetRegion(const BamRegion& region);
+        // sets the target region of interest
+        bool SetRegion(const int& leftRefID,
+                       const int& leftPosition,
+                       const int& rightRefID,
+                       const int& rightPosition);
+
+        // ----------------------
+        // access alignment data
+        // ----------------------
+
+        // retrieves next available alignment
+        bool GetNextAlignment(BamAlignment& alignment);
+        // retrieves next available alignmnet (without populating the alignment's string data fields)
+        bool GetNextAlignmentCore(BamAlignment& alignment);
+
+        // ----------------------
+        // access header data
+        // ----------------------
+
+        // returns SAM header data
+        SamHeader GetHeader(void) const;
+        // returns SAM header data, as SAM-formatted text
+        std::string GetHeaderText(void) const;
+
+        // ----------------------
+        // access reference data
+        // ----------------------
+
+        // returns the number of reference sequences
+        int GetReferenceCount(void) const;
+        // returns all reference sequence entries
+        const RefVector& GetReferenceData(void) const;
+        // returns the ID of the reference with this name
+        int GetReferenceID(const std::string& refName) const;
+
+        // ----------------------
+        // BAM index operations
+        // ----------------------
+
+        // creates an index file for current BAM file, using the requested index type
+        bool CreateIndex(const BamIndex::IndexType& type = BamIndex::STANDARD);
+        // returns true if index data is available
+        bool HasIndex(void) const;
+        // looks in BAM file's directory for a matching index file
+        bool LocateIndex(const BamIndex::IndexType& preferredType = BamIndex::STANDARD);
+        // opens a BAM index file
+        bool OpenIndex(const std::string& indexFilename);
+        // sets a custom BamIndex on this reader
+        void SetIndex(BamIndex* index);
+        // changes the caching behavior of the index data
+        void SetIndexCacheMode(const BamIndex::IndexCacheMode& mode);
+
+    // deprecated methods
+    public:
+        // returns true if index data is available
+        bool IsIndexLoaded(void) const;
+        
+    // private implementation
+    private:
+        Internal::BamReaderPrivate* d;
+};
+
+} // namespace BamTools
+
+#endif // BAMREADER_H
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamWriter.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamWriter.cpp Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,143 @@
+// ***************************************************************************
+// BamWriter.cpp (c) 2009 Michael Str�mberg, Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 4 March 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides the basic functionality for producing BAM files
+// ***************************************************************************
+
+#include <api/BamAlignment.h>
+#include <api/BamWriter.h>
+#include <api/SamHeader.h>
+#include <api/internal/BamWriter_p.h>
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+#include <iostream>
+using namespace std;
+
+/*! \class BamTools::BamWriter
+    \brief Provides write access for generating BAM files.
+*/
+/*! \enum BamTools::BamWriter::CompressionMode
+    \brief This enum describes the compression behaviors for output BAM files.
+*/
+/*! \var BamWriter::CompressionMode BamWriter::Compressed
+    \brief Use normal BAM compression
+*/
+/*! \var BamWriter::CompressionMode BamWriter::Uncompressed
+    \brief Disable BAM compression
+
+    Useful in situations where the BAM data is streamed (e.g. piping).
+    It would be wasteful to compress, and then immediately decompress
+    the data.
+*/
+
+/*! \fn BamWriter::BamWriter(void)
+    \brief constructor
+*/
+BamWriter::BamWriter(void)
+    : d(new BamWriterPrivate)
+{ }
+
+/*! \fn BamWriter::~BamWriter(void)
+    \brief destructor
+*/
+BamWriter::~BamWriter(void) {
+    delete d;
+    d = 0;
+}
+
+/*! \fn BamWriter::Close(void)
+    \brief Closes the current BAM file.
+    \sa Open()
+*/
+void BamWriter::Close(void) {
+    d->Close();
+}
+
+/*! \fn bool BamWriter::IsOpen(void) const
+    \brief Returns \c true if BAM file is open for writing.
+    \sa Open()
+*/
+bool BamWriter::IsOpen(void) const {
+    return d->IsOpen();
+}
+
+/*! \fn bool BamWriter::Open(const std::string& filename,
+                             const std::string& samHeaderText,
+                             const RefVector& referenceSequences)
+    \brief Opens a BAM file for writing.
+
+    Will overwrite the BAM file if it already exists.
+
+    \param filename           name of output BAM file
+    \param samHeaderText      header data, as SAM-formatted string
+    \param referenceSequences list of reference entries
+
+    \return \c true if opened successfully
+    \sa Close(), IsOpen(), BamReader::GetHeaderText(), BamReader::GetReferenceData()
+*/
+bool BamWriter::Open(const std::string& filename,
+                     const std::string& samHeaderText,
+                     const RefVector& referenceSequences)
+{
+    return d->Open(filename, samHeaderText, referenceSequences);
+}
+
+/*! \fn bool BamWriter::Open(const std::string& filename,
+                             const SamHeader& samHeader,
+                             const RefVector& referenceSequences)
+    \brief Opens a BAM file for writing.
+
+    This is an overloaded function.
+
+    Will overwrite the BAM file if it already exists.
+
+    \param filename           name of output BAM file
+    \param samHeader          header data, wrapped in SamHeader object
+    \param referenceSequences list of reference entries
+
+    \return \c true if opened successfully
+    \sa Close(), IsOpen(), BamReader::GetHeader(), BamReader::GetReferenceData()
+*/
+bool BamWriter::Open(const std::string& filename,
+                     const SamHeader& samHeader,
+                     const RefVector& referenceSequences)
+{
+    return d->Open(filename, samHeader.ToString(), referenceSequences);
+}
+
+/*! \fn void BamWriter::SaveAlignment(const BamAlignment& alignment)
+    \brief Saves an alignment to the BAM file.
+
+    \param alignment BamAlignment record to save
+    \sa BamReader::GetNextAlignment(), BamReader::GetNextAlignmentCore()
+*/
+void BamWriter::SaveAlignment(const BamAlignment& alignment) {
+    d->SaveAlignment(alignment);
+}
+
+/*! \fn void BamWriter::SetCompressionMode(const CompressionMode& compressionMode)
+    \brief Sets the output compression mode.
+
+    Default mode is BamWriter::Compressed.
+
+    N.B. - Changing the compression mode is disabled on open files (i.e. the request will be ignored).
+    Be sure to call this function before opening the BAM file.
+
+    \code
+        BamWriter writer;
+        writer.SetCompressionMode(BamWriter::Uncompressed);
+        writer.Open( ... );
+        // ...
+    \endcode
+
+    \param compressionMode desired output compression behavior
+    \sa IsOpen(), Open()
+*/
+void BamWriter::SetCompressionMode(const CompressionMode& compressionMode) {
+    d->SetWriteCompressed( compressionMode == BamWriter::Compressed );
+}
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamWriter.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamWriter.h Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,64 @@
+// ***************************************************************************
+// BamWriter.h (c) 2009 Michael Str�mberg, Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 4 March 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides the basic functionality for producing BAM files
+// ***************************************************************************
+
+#ifndef BAMWRITER_H
+#define BAMWRITER_H
+
+#include <api/api_global.h>
+#include <api/BamAux.h>
+#include <string>
+
+namespace BamTools {
+
+class BamAlignment;
+class SamHeader;
+
+namespace Internal {
+    class BamWriterPrivate;
+} // namespace Internal
+
+class API_EXPORT BamWriter {
+
+    public: enum CompressionMode { Compressed = 0
+                                 , Uncompressed
+                                 };
+
+    // ctor & dtor
+    public:
+        BamWriter(void);
+        ~BamWriter(void);
+
+    // public interface
+    public:
+        //  closes the current BAM file
+        void Close(void);
+        // returns true if BAM file is open for writing
+        bool IsOpen(void) const;
+        // opens a BAM file for writing
+        bool Open(const std::string& filename, 
+                  const std::string& samHeaderText,
+                  const RefVector& referenceSequences);
+        // opens a BAM file for writing
+        bool Open(const std::string& filename,
+                  const SamHeader& samHeader,
+                  const RefVector& referenceSequences);
+        // saves the alignment to the alignment archive
+        void SaveAlignment(const BamAlignment& alignment);
+        // sets the output compression mode
+        void SetCompressionMode(const CompressionMode& compressionMode);
+
+    // private implementation
+    private:
+        Internal::BamWriterPrivate* d;
+};
+
+} // namespace BamTools
+
+#endif // BAMWRITER_H
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/CMakeLists.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/CMakeLists.txt Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,78 @@
+# ==========================
+# BamTools CMakeLists.txt
+# (c) 2010 Derek Barnett
+#
+# src/api/
+# ==========================
+
+# list include paths
+include_directories( ${BamTools_SOURCE_DIR}/src )
+
+# add compiler definitions 
+add_definitions( -DBAMTOOLS_API_LIBRARY ) # (for proper exporting of library symbols)
+add_definitions( -fPIC ) # (attempt to force PIC compiling on some archs)
+
+# list of all BamTools API source (.cpp) files
+set( BamToolsAPISources
+        BamAlignment.cpp
+        BamMultiReader.cpp
+        BamReader.cpp
+        BamWriter.cpp
+        SamHeader.cpp
+        SamProgram.cpp
+        SamProgramChain.cpp
+        SamReadGroup.cpp
+        SamReadGroupDictionary.cpp
+        SamSequence.cpp
+        SamSequenceDictionary.cpp
+        internal/BamHeader_p.cpp
+        internal/BamIndexFactory_p.cpp
+        internal/BamMultiReader_p.cpp
+        internal/BamRandomAccessController_p.cpp
+        internal/BamReader_p.cpp
+        internal/BamStandardIndex_p.cpp
+        internal/BamToolsIndex_p.cpp
+        internal/BamWriter_p.cpp
+        internal/BgzfStream_p.cpp
+        internal/SamFormatParser_p.cpp
+        internal/SamFormatPrinter_p.cpp
+        internal/SamHeaderValidator_p.cpp
+)
+
+# create main BamTools API shared library
+add_library( BamTools SHARED ${BamToolsAPISources} )
+set_target_properties( BamTools PROPERTIES SOVERSION "1.0.2" )
+set_target_properties( BamTools PROPERTIES OUTPUT_NAME "bamtools" )
+
+# create main BamTools API static library
+add_library( BamTools-static STATIC ${BamToolsAPISources} )
+set_target_properties( BamTools-static PROPERTIES OUTPUT_NAME "bamtools" )
+set_target_properties( BamTools-static PROPERTIES PREFIX "lib" )
+
+# link libraries with zlib automatically
+target_link_libraries( BamTools z )
+target_link_libraries( BamTools-static z )
+
+# set library install destinations
+install( TARGETS BamTools LIBRARY DESTINATION "lib/bamtools")
+install( TARGETS BamTools-static ARCHIVE DESTINATION "lib/bamtools")
+
+# export API headers
+include(../ExportHeader.cmake)
+set(ApiIncludeDir "api")
+ExportHeader(APIHeaders api_global.h             ${ApiIncludeDir})
+ExportHeader(APIHeaders BamAlignment.h           ${ApiIncludeDir})
+ExportHeader(APIHeaders BamAux.h                 ${ApiIncludeDir})
+ExportHeader(APIHeaders BamConstants.h           ${ApiIncludeDir})
+ExportHeader(APIHeaders BamIndex.h               ${ApiIncludeDir})
+ExportHeader(APIHeaders BamMultiReader.h         ${ApiIncludeDir})
+ExportHeader(APIHeaders BamReader.h              ${ApiIncludeDir})
+ExportHeader(APIHeaders BamWriter.h              ${ApiIncludeDir})
+ExportHeader(APIHeaders SamConstants.h           ${ApiIncludeDir})
+ExportHeader(APIHeaders SamHeader.h              ${ApiIncludeDir})
+ExportHeader(APIHeaders SamProgram.h             ${ApiIncludeDir})
+ExportHeader(APIHeaders SamProgramChain.h        ${ApiIncludeDir})
+ExportHeader(APIHeaders SamReadGroup.h           ${ApiIncludeDir})
+ExportHeader(APIHeaders SamReadGroupDictionary.h ${ApiIncludeDir})
+ExportHeader(APIHeaders SamSequence.h            ${ApiIncludeDir})
+ExportHeader(APIHeaders SamSequenceDictionary.h  ${ApiIncludeDir})
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamConstants.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamConstants.h Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,96 @@
+// ***************************************************************************
+// SamConstants.h (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 19 April 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides constants for SAM header
+// ***************************************************************************
+
+#ifndef SAM_CONSTANTS_H
+#define SAM_CONSTANTS_H
+
+#include <api/api_global.h>
+#include <string>
+
+namespace BamTools {
+namespace Constants {
+
+// basic char constants used in SAM format
+const char SAM_COLON  = ':';
+const char SAM_EQUAL  = '=';
+const char SAM_PERIOD = '.';
+const char SAM_STAR   = '*';
+const char SAM_TAB    = '\t';
+const std::string SAM_DIGITS = "0123456789";
+
+// HD entries
+const std::string SAM_HD_BEGIN_TOKEN    = "@HD";
+const std::string SAM_HD_VERSION_TAG    = "VN";
+const std::string SAM_HD_SORTORDER_TAG  = "SO";
+const std::string SAM_HD_GROUPORDER_TAG = "GO";
+
+// SQ entries
+const std::string SAM_SQ_BEGIN_TOKEN    = "@SQ";
+const std::string SAM_SQ_ASSEMBLYID_TAG = "AS";
+const std::string SAM_SQ_CHECKSUM_TAG   = "M5";
+const std::string SAM_SQ_LENGTH_TAG     = "LN";
+const std::string SAM_SQ_NAME_TAG       = "SN";
+const std::string SAM_SQ_SPECIES_TAG    = "SP";
+const std::string SAM_SQ_URI_TAG        = "UR";
+
+// RG entries
+const std::string SAM_RG_BEGIN_TOKEN             = "@RG";
+const std::string SAM_RG_DESCRIPTION_TAG         = "DS";
+const std::string SAM_RG_FLOWORDER_TAG           = "FO";
+const std::string SAM_RG_ID_TAG                  = "ID";
+const std::string SAM_RG_KEYSEQUENCE_TAG         = "KS";
+const std::string SAM_RG_LIBRARY_TAG             = "LB";
+const std::string SAM_RG_PLATFORMUNIT_TAG        = "PU";
+const std::string SAM_RG_PREDICTEDINSERTSIZE_TAG = "PI";
+const std::string SAM_RG_PRODUCTIONDATE_TAG      = "DT";
+const std::string SAM_RG_PROGRAM_TAG             = "PG";
+const std::string SAM_RG_SAMPLE_TAG              = "SM";
+const std::string SAM_RG_SEQCENTER_TAG           = "CN";
+const std::string SAM_RG_SEQTECHNOLOGY_TAG       = "PL";
+
+// PG entries
+const std::string SAM_PG_BEGIN_TOKEN         = "@PG";
+const std::string SAM_PG_COMMANDLINE_TAG     = "CL";
+const std::string SAM_PG_ID_TAG              = "ID";
+const std::string SAM_PG_NAME_TAG            = "PN";
+const std::string SAM_PG_PREVIOUSPROGRAM_TAG = "PP";
+const std::string SAM_PG_VERSION_TAG         = "VN";
+
+// CO entries
+const std::string SAM_CO_BEGIN_TOKEN = "@CO";
+
+// HD:SO values
+const std::string SAM_HD_SORTORDER_COORDINATE = "coordinate";
+const std::string SAM_HD_SORTORDER_QUERYNAME  = "queryname";
+const std::string SAM_HD_SORTORDER_UNKNOWN    = "unknown";
+const std::string SAM_HD_SORTORDER_UNSORTED   = "unsorted";
+
+// HD:GO values
+const std::string SAM_HD_GROUPORDER_NONE      = "none";
+const std::string SAM_HD_GROUPORDER_QUERY     = "query";
+const std::string SAM_HD_GROUPORDER_REFERENCE = "reference";
+
+// SQ:LN values
+const unsigned int SAM_SQ_LENGTH_MIN = 1;
+const unsigned int SAM_SQ_LENGTH_MAX = 536870911; // 2^29 - 1
+
+// RG:PL values
+const std::string SAM_RG_SEQTECHNOLOGY_CAPILLARY  = "CAPILLARY";
+const std::string SAM_RG_SEQTECHNOLOGY_HELICOS    = "HELICOS";
+const std::string SAM_RG_SEQTECHNOLOGY_ILLUMINA   = "ILLUMINA";
+const std::string SAM_RG_SEQTECHNOLOGY_IONTORRENT = "IONTORRENT";
+const std::string SAM_RG_SEQTECHNOLOGY_LS454      = "LS454";
+const std::string SAM_RG_SEQTECHNOLOGY_PACBIO     = "PACBIO";
+const std::string SAM_RG_SEQTECHNOLOGY_SOLID      = "SOLID";
+
+} // namespace Constants
+} // namespace BamTools
+
+#endif // SAM_CONSTANTS_H
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamHeader.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamHeader.cpp Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,185 @@
+// ***************************************************************************
+// SamHeader.cpp (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 19 April 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides direct read/write access to the SAM header data fields.
+// ***************************************************************************
+
+#include <api/SamConstants.h>
+#include <api/SamHeader.h>
+#include <api/internal/SamFormatParser_p.h>
+#include <api/internal/SamFormatPrinter_p.h>
+#include <api/internal/SamHeaderValidator_p.h>
+using namespace BamTools;
+using namespace BamTools::Internal;
+using namespace std;
+
+/*! \struct BamTools::SamHeader
+    \brief Represents the SAM-formatted text header that is part of the BAM file header.
+
+    Provides direct read/write access to the SAM header data fields.
+
+    \sa \samSpecURL
+*/
+/*! \var SamHeader::Version
+    \brief corresponds to \@HD VN:\<Version\>
+
+    Required for valid SAM header, if @HD record is present.
+*/
+/*! \var SamHeader::SortOrder
+    \brief corresponds to \@HD SO:\<SortOrder\>
+*/
+/*! \var SamHeader::GroupOrder
+    \brief corresponds to \@HD GO:\<GroupOrder\>
+*/
+/*! \var SamHeader::Sequences
+    \brief corresponds to \@SQ entries
+    \sa SamSequence, SamSequenceDictionary
+*/
+/*! \var SamHeader::ReadGroups
+    \brief corresponds to \@RG entries
+    \sa SamReadGroup, SamReadGroupDictionary
+*/
+/*! \var SamHeader::ProgramName
+    \brief corresponds to \@PG ID:\<ProgramName\>
+*/
+/*! \var SamHeader::ProgramVersion
+    \brief corresponds to \@PG VN:\<ProgramVersion\>
+*/
+/*! \var SamHeader::ProgramCommandLine
+    \brief corresponds to \@PG CL:\<ProgramCommandLine\>
+*/
+/*! \var SamHeader::Comments
+    \brief corresponds to \@CO entries
+*/
+
+/*! \fn SamHeader::SamHeader(const std::string& headerText = "")
+    \brief constructor
+*/
+SamHeader::SamHeader(const std::string& headerText)
+    : Version("")
+    , SortOrder(Constants::SAM_HD_SORTORDER_UNKNOWN)
+    , GroupOrder("")
+{
+    SamFormatParser parser(*this);
+    parser.Parse(headerText);
+}
+
+/*! \fn SamHeader::SamHeader(const SamHeader& other)
+    \brief copy constructor
+*/
+SamHeader::SamHeader(const SamHeader& other)
+    : Version(other.Version)
+    , SortOrder(other.SortOrder)
+    , GroupOrder(other.GroupOrder)
+    , Sequences(other.Sequences)
+    , ReadGroups(other.ReadGroups)
+    , Programs(other.Programs)
+{ }
+
+/*! \fn SamHeader::~SamHeader(void)
+    \brief destructor
+*/
+SamHeader::~SamHeader(void) { }
+
+/*! \fn void SamHeader::Clear(void)
+    \brief Clears all header contents.
+*/
+void SamHeader::Clear(void) {
+    Version.clear();
+    SortOrder.clear();
+    GroupOrder.clear();
+    Sequences.Clear();
+    ReadGroups.Clear();
+    Programs.Clear();
+    Comments.clear();
+}
+
+/*! \fn bool SamHeader::HasVersion(void) const
+    \brief Returns \c true if header contains \@HD ID:\<Version\>
+*/
+bool SamHeader::HasVersion(void) const {
+    return (!Version.empty());
+}
+
+/*! \fn bool SamHeader::HasSortOrder(void) const
+    \brief Returns \c true if header contains \@HD SO:\<SortOrder\>
+*/
+bool SamHeader::HasSortOrder(void) const {
+    return (!SortOrder.empty());
+}
+
+/*! \fn bool SamHeader::HasGroupOrder(void) const
+    \brief Returns \c true if header contains \@HD GO:\<GroupOrder\>
+*/
+bool SamHeader::HasGroupOrder(void) const {
+    return (!GroupOrder.empty());
+}
+
+/*! \fn bool SamHeader::HasSequences(void) const
+    \brief Returns \c true if header contains any \@SQ entries
+*/
+bool SamHeader::HasSequences(void) const {
+    return (!Sequences.IsEmpty());
+}
+
+/*! \fn bool SamHeader::HasReadGroups(void) const
+    \brief Returns \c true if header contains any \@RG entries
+*/
+bool SamHeader::HasReadGroups(void) const {
+    return (!ReadGroups.IsEmpty());
+}
+
+/*! \fn bool SamHeader::HasPrograms(void) const
+    \brief Returns \c true if header contains any \@PG entries
+*/
+bool SamHeader::HasPrograms(void) const {
+    return (!Programs.IsEmpty());
+}
+
+/*! \fn bool SamHeader::HasComments(void) const
+    \brief Returns \c true if header contains any \@CO entries
+*/
+bool SamHeader::HasComments(void) const {
+    return (!Comments.empty());
+}
+
+/*! \fn bool SamHeader::IsValid(bool verbose = false) const
+    \brief Checks header contents for required data and proper formatting.
+    \param verbose If set to true, validation errors & warnings will be printed to stderr.
+                   Otherwise, output is suppressed and only validation check occurs.
+    \return \c true if SAM header is well-formed
+*/
+bool SamHeader::IsValid(bool verbose) const {
+    SamHeaderValidator validator(*this);
+    return validator.Validate(verbose);
+}
+
+/*! \fn void SamHeader::SetHeaderText(const std::string& headerText)
+    \brief Replaces header contents with \a headerText.
+    \param headerText SAM formatted-text that will be parsed into data fields
+*/
+void SamHeader::SetHeaderText(const std::string& headerText) {
+
+    // clear prior data
+    Clear();
+
+    // parse header text into data
+    SamFormatParser parser(*this);
+    parser.Parse(headerText);
+}
+
+/*! \fn std::string SamHeader::ToString(void) const
+    \brief Converts data fields to SAM-formatted text.
+
+    Applies any local modifications made since creating this object or calling SetHeaderText().
+
+    \return SAM-formatted header text
+*/
+string SamHeader::ToString(void) const {
+    SamFormatPrinter printer(*this);
+    return printer.ToString();
+}
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamHeader.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamHeader.h Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,69 @@
+// ***************************************************************************
+// SamHeader.h (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 18 April 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides direct read/write access to the SAM header data fields.
+// ***************************************************************************
+
+#ifndef SAM_HEADER_H
+#define SAM_HEADER_H
+
+#include <api/api_global.h>
+#include <api/SamProgramChain.h>
+#include <api/SamReadGroupDictionary.h>
+#include <api/SamSequenceDictionary.h>
+#include <string>
+#include <vector>
+
+namespace BamTools {
+
+struct API_EXPORT SamHeader {
+
+    // ctor & dtor
+    SamHeader(const std::string& headerText = "");
+    SamHeader(const SamHeader& other);
+    ~SamHeader(void);
+
+    // query/modify entire SamHeader
+    void Clear(void);                                   // clears all header contents
+    bool IsValid(bool verbose = false) const;           // returns true if SAM header is well-formed
+    void SetHeaderText(const std::string& headerText);  // replaces data fields with contents of SAM-formatted text
+    std::string ToString(void) const;                   // returns the printable, SAM-formatted header text
+
+    // convenience query methods
+    bool HasVersion(void) const;            // returns true if header contains format version entry
+    bool HasSortOrder(void) const;          // returns true if header contains sort order entry
+    bool HasGroupOrder(void) const;         // returns true if header contains group order entry
+    bool HasSequences(void) const;          // returns true if header contains any sequence entries
+    bool HasReadGroups(void) const;         // returns true if header contains any read group entries
+    bool HasPrograms(void) const;           // returns true if header contains any program record entries
+    bool HasComments(void) const;           // returns true if header contains comments
+
+    // --------------
+    // data members
+    // --------------
+
+    // header metadata (@HD line)
+    std::string Version;                    // VN:<Version>  *Required for valid SAM header, if @HD record is present*
+    std::string SortOrder;                  // SO:<SortOrder>
+    std::string GroupOrder;                 // GO:<GroupOrder>
+
+    // header sequences (@SQ entries)
+    SamSequenceDictionary Sequences;
+
+    // header read groups (@RG entries)
+    SamReadGroupDictionary ReadGroups;
+
+    // header program data (@PG entries)
+    SamProgramChain Programs;
+
+    // header comments (@CO entries)
+    std::vector<std::string> Comments;
+};
+
+} // namespace BamTools
+
+#endif // SAM_HEADER_H
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamProgram.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamProgram.cpp Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,140 @@
+// ***************************************************************************
+// SamProgram.cpp (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 19 April 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides direct read/write access to the SAM header program records.
+// ***************************************************************************
+
+#include <api/SamProgram.h>
+using namespace BamTools;
+using namespace std;
+
+/*! \struct BamTools::SamProgram
+    \brief Represents a SAM program record.
+
+    Provides direct read/write access to the SAM header program records.
+
+    \sa \samSpecURL
+*/
+/*! \var SamProgram::CommandLine
+    \brief corresponds to \@PG CL:\<CommandLine\>
+*/
+/*! \var SamProgram::ID
+    \brief corresponds to \@PG ID:\<ID\>
+
+    Required for valid SAM header.
+*/
+/*! \var SamProgram::Name
+    \brief corresponds to \@PG PN:\<Name\>
+*/
+/*! \var SamProgram::PreviousProgramID
+    \brief corresponds to \@PG PP:\<PreviousProgramID\>
+*/
+/*! \var SamProgram::Version
+    \brief corresponds to \@PG VN:\<Version\>
+*/
+/*! \var SamProgram::NextProgramID
+    \internal
+    Holds ID of the "next" program record in a SamProgramChain
+*/
+
+/*! \fn SamProgram::SamProgram(void)
+    \brief default constructor
+*/
+SamProgram::SamProgram(void)
+    : CommandLine("")
+    , ID("")
+    , Name("")
+    , PreviousProgramID("")
+    , Version("")
+    , NextProgramID("")
+{ }
+
+/*! \fn SamProgram::SamProgram(const std::string& id)
+    \brief constructs program record with \a id
+
+    \param id desired program record ID
+*/
+SamProgram::SamProgram(const std::string& id)
+    : CommandLine("")
+    , ID(id)
+    , Name("")
+    , PreviousProgramID("")
+    , Version("")
+    , NextProgramID("")
+{ }
+
+/*! \fn SamProgram::SamProgram(const SamProgram& other)
+    \brief copy constructor
+*/
+SamProgram::SamProgram(const SamProgram& other)
+    : CommandLine(other.CommandLine)
+    , ID(other.ID)
+    , Name(other.Name)
+    , PreviousProgramID(other.PreviousProgramID)
+    , Version(other.Version)
+    , NextProgramID(other.NextProgramID)
+{ }
+
+/*! \fn SamProgram::~SamProgram(void)
+    \brief destructor
+*/
+SamProgram::~SamProgram(void) { }
+
+/*! \fn void SamProgram::Clear(void)
+    \brief Clears all data fields.
+*/
+void SamProgram::Clear(void) {
+    CommandLine.clear();
+    ID.clear();
+    Name.clear();
+    PreviousProgramID.clear();
+    Version.clear();
+    NextProgramID.clear();
+}
+
+/*! \fn bool SamProgram::HasCommandLine(void) const
+    \brief Returns \c true if program record contains \@PG: CL:\<CommandLine\>
+*/
+bool SamProgram::HasCommandLine(void) const {
+    return (!CommandLine.empty());
+}
+
+/*! \fn bool SamProgram::HasID(void) const
+    \brief Returns \c true if program record contains \@PG: ID:\<ID\>
+*/
+bool SamProgram::HasID(void) const {
+    return (!ID.empty());
+}
+
+/*! \fn bool SamProgram::HasName(void) const
+    \brief Returns \c true if program record contains \@PG: PN:\<Name\>
+*/
+bool SamProgram::HasName(void) const {
+    return (!Name.empty());
+}
+
+/*! \fn bool SamProgram::HasNextProgramID(void) const
+    \internal
+    \return true if program has a "next" record in a SamProgramChain
+*/
+bool SamProgram::HasNextProgramID(void) const {
+    return (!NextProgramID.empty());
+}
+
+/*! \fn bool SamProgram::HasPreviousProgramID(void) const
+    \brief Returns \c true if program record contains \@PG: PP:\<PreviousProgramID\>
+*/
+bool SamProgram::HasPreviousProgramID(void) const {
+    return (!PreviousProgramID.empty());
+}
+
+/*! \fn bool SamProgram::HasVersion(void) const
+    \brief Returns \c true if program record contains \@PG: VN:\<Version\>
+*/
+bool SamProgram::HasVersion(void) const {
+    return (!Version.empty());
+}
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamProgram.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamProgram.h Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,62 @@
+// ***************************************************************************
+// SamProgram.h (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 19 April 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides direct read/write access to the SAM header program records.
+// ***************************************************************************
+
+#ifndef SAM_PROGRAM_H
+#define SAM_PROGRAM_H
+
+#include "api/api_global.h"
+#include <string>
+
+namespace BamTools {
+
+class SamProgramChain;
+
+struct API_EXPORT SamProgram {
+
+    // ctor & dtor
+    SamProgram(void);
+    SamProgram(const std::string& id);
+    SamProgram(const SamProgram& other);
+    ~SamProgram(void);
+
+    // query/modify entire program record
+    void Clear(void);                           // clears all data fields
+
+    // convenience query methods
+    bool HasCommandLine(void) const;            // returns true if program record has a command line entry
+    bool HasID(void) const;                     // returns true if program record has an ID
+    bool HasName(void) const;                   // returns true if program record has a name
+    bool HasPreviousProgramID(void) const;      // returns true if program record has a 'previous program ID'
+    bool HasVersion(void) const;                // returns true if program record has a version
+
+    // data members
+    std::string CommandLine;                    // CL:<CommandLine>
+    std::string ID;                             // ID:<ID>          *Required for valid SAM header*
+    std::string Name;                           // PN:<Name>
+    std::string PreviousProgramID;              // PP:<PreviousProgramID>
+    std::string Version;                        // VN:<Version>
+
+    // internal (non-standard) methods & fields
+    private:
+        bool HasNextProgramID(void) const;
+        std::string NextProgramID;
+        friend class BamTools::SamProgramChain;
+};
+
+/*! \fn bool operator==(const SamProgram& lhs, const SamProgram& rhs)
+    \brief tests equality by comparing program IDs
+*/
+API_EXPORT inline bool operator==(const SamProgram& lhs, const SamProgram& rhs) {
+    return lhs.ID == rhs.ID;
+}
+
+} // namespace BamTools
+
+#endif // SAM_PROGRAM_H
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamProgramChain.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamProgramChain.cpp Thu Nov 03 10:25:04 2011 -0400
[
b'@@ -0,0 +1,352 @@\n+// ***************************************************************************\n+// SamProgramChain.cpp (c) 2011 Derek Barnett\n+// Marth Lab, Department of Biology, Boston College\n+// All rights reserved.\n+// ---------------------------------------------------------------------------\n+// Last modified: 19 April 2011 (DB)\n+// ---------------------------------------------------------------------------\n+// Provides methods for operating on a SamProgram record "chain"\n+// ***************************************************************************\n+\n+#include <api/SamProgramChain.h>\n+using namespace BamTools;\n+\n+#include <algorithm>\n+#include <iostream>\n+#include <cstdlib>\n+using namespace std;\n+\n+/*! \\class BamTools::SamProgramChain\n+    \\brief Sorted container "chain" of SamProgram records.\n+\n+    Provides methods for operating on a collection of SamProgram records.\n+\n+    N.B. - Underlying container is *NOT* ordered by linkage, but by order of\n+    appearance in SamHeader and subsequent Add() calls. Using the current\n+    iterators will not allow you to step through the header\'s program history.\n+    Instead use First()/Last() to access oldest/newest records, respectively.\n+*/\n+\n+/*! \\fn SamProgramChain::SamProgramChain(void)\n+    \\brief constructor\n+*/\n+SamProgramChain::SamProgramChain(void) { }\n+\n+/*! \\fn SamProgramChain::SamProgramChain(const SamProgramChain& other)\n+    \\brief copy constructor\n+*/\n+SamProgramChain::SamProgramChain(const SamProgramChain& other)\n+    : m_data(other.m_data)\n+{ }\n+\n+/*! \\fn SamProgramChain::~SamProgramChain(void)\n+    \\brief destructor\n+*/\n+SamProgramChain::~SamProgramChain(void) { }\n+\n+/*! \\fn void SamProgramChain::Add(SamProgram& program)\n+    \\brief Appends a program to program chain.\n+\n+    Duplicate entries are silently discarded.\n+\n+    N.B. - Underlying container is *NOT* ordered by linkage, but by order of\n+    appearance in SamHeader and subsequent Add() calls. Using the current\n+    iterators will not allow you to step through the header\'s program history.\n+    Instead use First()/Last() to access oldest/newest records, respectively.\n+\n+    \\param program entry to be appended\n+*/\n+void SamProgramChain::Add(SamProgram& program) {\n+\n+    // ignore duplicated records\n+    if ( Contains(program) )\n+        return;\n+\n+    // if other programs already in chain, try to find the "next" record\n+    // tries to match another record\'s PPID with @program\'s ID\n+    if ( !IsEmpty() )\n+        program.NextProgramID = NextIdFor(program.ID);\n+\n+    // store program record\n+    m_data.push_back(program);\n+}\n+\n+/*! \\fn void SamProgramChain::Add(const std::vector<SamProgram>& programs)\n+    \\brief Appends a batch of programs to the end of the chain.\n+\n+    This is an overloaded function.\n+\n+    \\param programs batch of program records to append\n+    \\sa Add()\n+*/\n+void SamProgramChain::Add(std::vector<SamProgram>& programs) {\n+    vector<SamProgram>::iterator pgIter = programs.begin();\n+    vector<SamProgram>::iterator pgEnd  = programs.end();\n+    for ( ; pgIter != pgEnd; ++pgIter )\n+        Add(*pgIter);\n+}\n+\n+/*! \\fn SamProgramIterator SamProgramChain::Begin(void)\n+    \\return an STL iterator pointing to the first (oldest) program record\n+    \\sa ConstBegin(), End(), First()\n+*/\n+SamProgramIterator SamProgramChain::Begin(void) {\n+    return m_data.begin();\n+}\n+\n+/*! \\fn SamProgramConstIterator SamProgramChain::Begin(void) const\n+    \\return an STL const_iterator pointing to the first (oldest) program record\n+\n+    This is an overloaded function.\n+\n+    \\sa ConstBegin(), End(), First()\n+*/\n+SamProgramConstIterator SamProgramChain::Begin(void) const {\n+    return m_data.begin();\n+}\n+\n+/*! \\fn void SamProgramChain::Clear(void)\n+    \\brief Clears all program records.\n+*/\n+void SamProgramChain::Clear(void) {\n+    m_data.clear();\n+}\n+\n+/*! \\fn SamProgramConstIterator SamProgramChain::ConstBegin(void) const\n+    \\return an STL const_iterator pointing to the first (oldest) program record\n+  '..b'!= end; ++iter ) {\n+        const SamProgram& current = (*iter);\n+        if ( current.ID == programId )\n+            break;\n+    }\n+    return distance( begin, iter );\n+}\n+\n+/*! \\fn bool SamProgramChain::IsEmpty(void) const\n+    \\brief Returns \\c true if chain contains no records\n+    \\sa Size()\n+*/\n+bool SamProgramChain::IsEmpty(void) const {\n+    return m_data.empty();\n+}\n+\n+/*! \\fn SamProgram& SamProgramChain::Last(void)\n+    \\brief Fetches last (newest) record in the chain.\n+\n+    N.B. - This function will fail if the chain is empty. If this is possible,\n+    check the result of IsEmpty() before calling this function.\n+\n+    \\return a modifiable reference to the last (newest) program entry\n+    \\sa End(), First()\n+*/\n+SamProgram& SamProgramChain::Last(void) {\n+    // find first record in container that has no NextProgramID entry\n+    SamProgramIterator iter = Begin();\n+    SamProgramIterator end  = End();\n+    for ( ; iter != end; ++iter ) {\n+        SamProgram& current = (*iter);\n+        if ( !current.HasNextProgramID() )\n+            return current;\n+    }\n+\n+    // otherwise error\n+    cerr << "SamProgramChain ERROR - could not determine last record" << endl;\n+    exit(1);\n+}\n+\n+/*! \\fn const SamProgram& SamProgramChain::Last(void) const\n+    \\brief Fetches last (newest) record in the chain.\n+\n+    This is an overloaded function.\n+\n+    N.B. - This function will fail if the chain is empty. If this is possible,\n+    check the result of IsEmpty() before calling this function.\n+\n+    \\return a read-only reference to the last (newest) program entry\n+    \\sa End(), ConstEnd(), First()\n+*/\n+const SamProgram& SamProgramChain::Last(void) const {\n+    // find first record in container that has no NextProgramID entry\n+    SamProgramConstIterator iter = ConstBegin();\n+    SamProgramConstIterator end  = ConstEnd();\n+    for ( ; iter != end; ++iter ) {\n+        const SamProgram& current = (*iter);\n+        if ( !current.HasNextProgramID() )\n+            return current;\n+    }\n+\n+    // otherwise error\n+    cerr << "SamProgramChain ERROR - could not determine last record" << endl;\n+    exit(1);\n+}\n+\n+/*! \\fn const std::string SamProgramChain::NextIdFor(const std::string& programId) const\n+    \\internal\n+    \\return ID of program record, whose PreviousProgramID matches \\a programId.\n+    Otherwise, returns empty string if none found.\n+*/\n+const std::string SamProgramChain::NextIdFor(const std::string& programId) const {\n+\n+    // find first record in container whose PreviousProgramID matches @programId\n+    SamProgramConstIterator iter = ConstBegin();\n+    SamProgramConstIterator end  = ConstEnd();\n+    for ( ; iter != end; ++iter ) {\n+        const SamProgram& current = (*iter);\n+        if ( !current.HasPreviousProgramID() &&\n+              current.PreviousProgramID == programId\n+           )\n+        {\n+            return current.ID;\n+        }\n+    }\n+\n+    // none found\n+    return string();\n+}\n+\n+/*! \\fn int SamProgramChain::Size(void) const\n+    \\brief Returns number of program records in the chain.\n+    \\sa IsEmpty()\n+*/\n+int SamProgramChain::Size(void) const {\n+    return m_data.size();\n+}\n+\n+/*! \\fn SamProgram& SamProgramChain::operator[](const std::string& programId)\n+    \\brief Retrieves the modifiable SamProgram record that matches \\a programId.\n+\n+    NOTE - If the chain contains no read group matching this ID, this function will\n+    print an error and terminate.\n+\n+    \\param programId ID of program record to retrieve\n+    \\return a modifiable reference to the SamProgram associated with the ID\n+*/\n+SamProgram& SamProgramChain::operator[](const std::string& programId) {\n+\n+    // look up program record matching this ID\n+    int index = IndexOf(programId);\n+\n+    // if record not found\n+    if ( index == (int)m_data.size() ) {\n+        cerr << "SamProgramChain ERROR - unknown programId: " << programId << endl;\n+        exit(1);\n+    }\n+\n+    // otherwise return program record at index\n+    return m_data.at(index);\n+}\n'
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamProgramChain.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamProgramChain.h Thu Nov 03 10:25:04 2011 -0400
[
@@ -0,0 +1,86 @@
+// ***************************************************************************
+// SamProgramChain.h (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 19 April 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides methods for operating on a SamProgram record "chain"
+// ***************************************************************************
+
+#ifndef SAM_PROGRAMCHAIN_H
+#define SAM_PROGRAMCHAIN_H
+
+#include <api/api_global.h>
+#include <api/SamProgram.h>
+#include <string>
+#include <vector>
+
+namespace BamTools {
+
+// chain is *NOT* sorted in any order
+// use First()/Last() to retrieve oldest/newest programs, respectively
+typedef std::vector<SamProgram>             SamProgramContainer;
+typedef SamProgramContainer::iterator       SamProgramIterator;
+typedef SamProgramContainer::const_iterator SamProgramConstIterator;
+
+class API_EXPORT SamProgramChain {
+
+    // ctor & dtor
+    public:
+        SamProgramChain(void);
+        SamProgramChain(const SamProgramChain& other);
+        ~SamProgramChain(void);
+
+    // query/modify program data
+    public:
+        // appends a program record to the chain
+        void Add(SamProgram& program);
+        void Add(std::vector<SamProgram>& programs);
+
+        // clears all read group entries
+        void Clear(void);
+
+        // returns true if chain contains this program record (matches on ID)
+        bool Contains(const SamProgram& program) const;
+        bool Contains(const std::string& programId) const;
+
+        // returns the first (oldest) program in the chain
+        SamProgram& First(void);
+        const SamProgram& First(void) const;
+
+        // returns true if chain is empty
+        bool IsEmpty(void) const;
+
+        // returns last (most recent) program in the chain
+        SamProgram& Last(void);
+        const SamProgram& Last(void) const;
+
+        // returns number of program records in the chain
+        int Size(void) const;
+
+        // retrieves a modifiable reference to the SamProgram object associated with this ID
+        SamProgram& operator[](const std::string& programId);
+
+    // retrieve STL-compatible iterators
+    public:
+        SamProgramIterator      Begin(void);              // returns iterator to begin()
+        SamProgramConstIterator Begin(void) const;        // returns const_iterator to begin()
+        SamProgramConstIterator ConstBegin(void) const;   // returns const_iterator to begin()
+        SamProgramIterator      End(void);                // returns iterator to end()
+        SamProgramConstIterator End(void) const;          // returns const_iterator to end()
+        SamProgramConstIterator ConstEnd(void) const;     // returns const_iterator to end()
+
+    // internal methods
+    private:
+        int IndexOf(const std::string& programId) const;
+        const std::string NextIdFor(const std::string& programId) const;
+
+    // data members
+    private:
+        SamProgramContainer m_data;
+};
+
+} // namespace BamTools
+
+#endif // SAM_PROGRAMCHAIN_H
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamReadGroup.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamReadGroup.cpp Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,222 @@
+// ***************************************************************************
+// SamReadGroup.cpp (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 18 April 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides direct read/write access to the SAM read group data fields.
+// ***************************************************************************
+
+#include <api/SamReadGroup.h>
+using namespace BamTools;
+using namespace std;
+
+/*! \struct BamTools::SamReadGroup
+    \brief Represents a SAM read group entry.
+
+    Provides direct read/write access to the SAM read group data fields.
+
+    \sa \samSpecURL
+*/
+/*! \var SamReadGroup::Description
+    \brief corresponds to \@RG DS:\<Description\>
+*/
+/*! \var SamReadGroup::FlowOrder
+    \brief corresponds to \@RG FO:\<FlowOrder\>
+*/
+/*! \var SamReadGroup::ID
+    \brief corresponds to \@RG ID:\<ID\>
+
+    Required for valid SAM header.
+*/
+/*! \var SamReadGroup::KeySequence
+    \brief corresponds to \@RG KS:\<KeySequence\>
+*/
+/*! \var SamReadGroup::Library
+    \brief corresponds to \@RG LB:\<Library\>
+*/
+/*! \var SamReadGroup::PlatformUnit
+    \brief corresponds to \@RG PU:\<PlatformUnit\>
+*/
+/*! \var SamReadGroup::PredictedInsertSize
+    \brief corresponds to \@RG PI:\<PredictedInsertSize\>
+*/
+/*! \var SamReadGroup::ProductionDate
+    \brief corresponds to \@RG DT:\<ProductionDate\>
+*/
+/*! \var SamReadGroup::Program
+    \brief corresponds to \@RG PG:\<Program\>
+*/
+/*! \var SamReadGroup::Sample
+    \brief corresponds to \@RG SM:\<Sample\>
+*/
+/*! \var SamReadGroup::SequencingCenter
+    \brief corresponds to \@RG CN:\<SequencingCenter\>
+*/
+/*! \var SamReadGroup::SequencingTechnology
+    \brief corresponds to \@RG PL:\<SequencingTechnology\>
+*/
+
+/*! \fn SamReadGroup::SamReadGroup(void)
+    \brief default constructor
+*/
+SamReadGroup::SamReadGroup(void)
+    : Description("")
+    , FlowOrder("")
+    , ID("")
+    , KeySequence("")
+    , Library("")
+    , PlatformUnit("")
+    , PredictedInsertSize("")
+    , ProductionDate("")
+    , Program("")
+    , Sample("")
+    , SequencingCenter("")
+    , SequencingTechnology("")
+{ }
+
+/*! \fn SamReadGroup::SamReadGroup(const std::string& id)
+    \brief constructs read group with \a id
+
+    \param id desired read group ID
+*/
+SamReadGroup::SamReadGroup(const std::string& id)
+    : Description("")
+    , FlowOrder("")
+    , ID(id)
+    , KeySequence("")
+    , Library("")
+    , PlatformUnit("")
+    , PredictedInsertSize("")
+    , ProductionDate("")
+    , Program("")
+    , Sample("")
+    , SequencingCenter("")
+    , SequencingTechnology("")
+{ }
+
+/*! \fn SamReadGroup::SamReadGroup(const SamReadGroup& other)
+    \brief copy constructor
+*/
+SamReadGroup::SamReadGroup(const SamReadGroup& other)
+    : Description(other.Description)
+    , FlowOrder(other.FlowOrder)
+    , ID(other.ID)
+    , KeySequence(other.KeySequence)
+    , Library(other.Library)
+    , PlatformUnit(other.PlatformUnit)
+    , PredictedInsertSize(other.PredictedInsertSize)
+    , ProductionDate(other.ProductionDate)
+    , Program(other.Program)
+    , Sample(other.Sample)
+    , SequencingCenter(other.SequencingCenter)
+    , SequencingTechnology(other.SequencingTechnology)
+{ }
+
+/*! \fn SamReadGroup::~SamReadGroup(void)
+    \brief destructor
+*/
+SamReadGroup::~SamReadGroup(void) { }
+
+/*! \fn void SamReadGroup::Clear(void)
+    \brief Clears all data fields.
+*/
+void SamReadGroup::Clear(void) {
+    Description.clear();
+    FlowOrder.clear();
+    ID.clear();
+    KeySequence.clear();
+    Library.clear();
+    PlatformUnit.clear();
+    PredictedInsertSize.clear();
+    ProductionDate.clear();
+    Program.clear();
+    Sample.clear();
+    SequencingCenter.clear();
+    SequencingTechnology.clear();
+}
+
+/*! \fn bool SamReadGroup::HasDescription(void) const
+    \brief Returns \c true if read group contains \@RG DS:\<Description\>
+*/
+bool SamReadGroup::HasDescription(void) const {
+    return (!Description.empty());
+}
+
+/*! \fn bool SamReadGroup::HasFlowOrder(void) const
+    \brief Returns \c true if read group contains \@RG FO:\<FlowOrder\>
+*/
+bool SamReadGroup::HasFlowOrder(void) const {
+    return (!FlowOrder.empty());
+}
+
+/*! \fn bool SamReadGroup::HasID(void) const
+    \brief Returns \c true if read group contains \@RG: ID:\<ID\>
+*/
+bool SamReadGroup::HasID(void) const {
+    return (!ID.empty());
+}
+
+/*! \fn bool SamReadGroup::HasKeySequence(void) const
+    \brief Returns \c true if read group contains \@RG KS:\<KeySequence\>
+*/
+bool SamReadGroup::HasKeySequence(void) const {
+    return (!KeySequence.empty());
+}
+
+/*! \fn bool SamReadGroup::HasLibrary(void) const
+    \brief Returns \c true if read group contains \@RG LB:\<Library\>
+*/
+bool SamReadGroup::HasLibrary(void) const {
+    return (!Library.empty());
+}
+
+/*! \fn bool SamReadGroup::HasPlatformUnit(void) const
+    \brief Returns \c true if read group contains \@RG PU:\<PlatformUnit\>
+*/
+bool SamReadGroup::HasPlatformUnit(void) const {
+    return (!PlatformUnit.empty());
+}
+
+/*! \fn bool SamReadGroup::HasPredictedInsertSize(void) const
+    \brief Returns \c true if read group contains \@RG PI:\<PredictedInsertSize\>
+*/
+bool SamReadGroup::HasPredictedInsertSize(void) const {
+    return (!PredictedInsertSize.empty());
+}
+
+/*! \fn bool SamReadGroup::HasProductionDate(void) const
+    \brief Returns \c true if read group contains \@RG DT:\<ProductionDate\>
+*/
+bool SamReadGroup::HasProductionDate(void) const {
+    return (!ProductionDate.empty());
+}
+
+/*! \fn bool SamReadGroup::HasProgram(void) const
+    \brief Returns \c true if read group contains \@RG PG:\<Program\>
+*/
+bool SamReadGroup::HasProgram(void) const {
+    return (!Program.empty());
+}
+
+/*! \fn bool SamReadGroup::HasSample(void) const
+    \brief Returns \c true if read group contains \@RG SM:\<Sample\>
+*/
+bool SamReadGroup::HasSample(void) const {
+    return (!Sample.empty());
+}
+
+/*! \fn bool SamReadGroup::HasSequencingCenter(void) const
+    \brief Returns \c true if read group contains \@RG CN:\<SequencingCenter\>
+*/
+bool SamReadGroup::HasSequencingCenter(void) const {
+    return (!SequencingCenter.empty());
+}
+
+/*! \fn bool SamReadGroup::HasSequencingTechnology(void) const
+    \brief Returns \c true if read group contains \@RG PL:\<SequencingTechnology\>
+*/
+bool SamReadGroup::HasSequencingTechnology(void) const {
+    return (!SequencingTechnology.empty());
+}
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamReadGroup.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamReadGroup.h Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,69 @@
+// ***************************************************************************
+// SamReadGroup.h (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 18 April 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides direct read/write access to the SAM read group data fields.
+// ***************************************************************************
+
+#ifndef SAM_READGROUP_H
+#define SAM_READGROUP_H
+
+#include "api/api_global.h"
+#include <string>
+
+namespace BamTools {
+
+struct API_EXPORT SamReadGroup {
+
+    // ctor & dtor
+    SamReadGroup(void);
+    SamReadGroup(const std::string& id);
+    SamReadGroup(const SamReadGroup& other);
+    ~SamReadGroup(void);
+
+    // query/modify entire read group
+    void Clear(void);                           // clears all data fields
+
+    // convenience query methods
+    bool HasDescription(void) const;            // returns true if read group has a description
+    bool HasFlowOrder(void) const;              // returns true if read group has a flow order entry
+    bool HasID(void) const;                     // returns true if read group has a group ID
+    bool HasKeySequence(void) const;            // returns true if read group has a key sequence
+    bool HasLibrary(void) const;                // returns true if read group has a library name
+    bool HasPlatformUnit(void) const;           // returns true if read group has a platform unit ID
+    bool HasPredictedInsertSize(void) const;    // returns true if read group has a predicted insert size
+    bool HasProductionDate(void) const;         // returns true if read group has a production date
+    bool HasProgram(void) const;                // returns true if read group has a program entry
+    bool HasSample(void) const;                 // returns true if read group has a sample name
+    bool HasSequencingCenter(void) const;       // returns true if read group has a sequencing center ID
+    bool HasSequencingTechnology(void) const;   // returns true if read group has a sequencing technology ID
+
+
+    // data fields
+    std::string Description;                    // DS:<Description>
+    std::string FlowOrder;                      // FO:<FlowOrder>
+    std::string ID;                             // ID:<ID>              *Required for valid SAM header*
+    std::string KeySequence;                    // KS:<KeySequence>
+    std::string Library;                        // LB:<Library>
+    std::string PlatformUnit;                   // PU:<PlatformUnit>
+    std::string PredictedInsertSize;            // PI:<PredictedInsertSize>
+    std::string ProductionDate;                 // DT:<ProductionDate>
+    std::string Program;                        // PG:<Program>
+    std::string Sample;                         // SM:<Sample>
+    std::string SequencingCenter;               // CN:<SequencingCenter>
+    std::string SequencingTechnology;           // PL:<SequencingTechnology>
+};
+
+/*! \fn bool operator==(const SamReadGroup& lhs, const SamReadGroup& rhs)
+    \brief tests equality by comparing read group IDs
+*/
+API_EXPORT inline bool operator==(const SamReadGroup& lhs, const SamReadGroup& rhs) {
+    return lhs.ID == rhs.ID;
+}
+
+} // namespace BamTools
+
+#endif // SAM_READGROUP_H
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamReadGroupDictionary.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamReadGroupDictionary.cpp Thu Nov 03 10:25:04 2011 -0400
[
b'@@ -0,0 +1,290 @@\n+// ***************************************************************************\n+// SamReadGroupDictionary.cpp (c) 2010 Derek Barnett\n+// Marth Lab, Department of Biology, Boston College\n+// All rights reserved.\n+// ---------------------------------------------------------------------------\n+// Last modified: 18 April 2011 (DB)\n+// ---------------------------------------------------------------------------\n+// Provides methods for operating on a collection of SamReadGroup entries.\n+// ***************************************************************************\n+\n+#include <api/SamReadGroupDictionary.h>\n+using namespace BamTools;\n+\n+#include <algorithm>\n+#include <iostream>\n+using namespace std;\n+\n+/*! \\class BamTools::SamReadGroupDictionary\n+    \\brief Container of SamReadGroup entries.\n+\n+    Provides methods for operating on a collection of SamReadGroup entries.\n+*/\n+\n+/*! \\fn SamReadGroupDictionary::SamReadGroupDictionary(void)\n+    \\brief constructor\n+*/\n+SamReadGroupDictionary::SamReadGroupDictionary(void) { }\n+\n+/*! \\fn SamReadGroupDictionary::SamReadGroupDictionary(const SamReadGroupDictionary& other)\n+    \\brief copy constructor\n+*/\n+SamReadGroupDictionary::SamReadGroupDictionary(const SamReadGroupDictionary& other)\n+    : m_data(other.m_data)\n+{ }\n+\n+/*! \\fn SamReadGroupDictionary::~SamReadGroupDictionary(void)\n+    \\brief destructor\n+*/\n+SamReadGroupDictionary::~SamReadGroupDictionary(void) { }\n+\n+/*! \\fn void SamReadGroupDictionary::Add(const SamReadGroup& readGroup)\n+    \\brief Adds a read group to the dictionary.\n+\n+    Duplicate entries are silently discarded.\n+\n+    \\param readGroup entry to be added\n+*/\n+void SamReadGroupDictionary::Add(const SamReadGroup& readGroup) {\n+\n+    // TODO: report error on attempted duplicate?\n+\n+    if ( IsEmpty() || !Contains(readGroup) )\n+        m_data.push_back(readGroup);\n+}\n+\n+/*! \\fn void SamReadGroupDictionary::Add(const std::string& readGroupId)\n+    \\brief Adds a read group to the dictionary.\n+\n+    This is an overloaded function.\n+\n+    \\param readGroupId ID of read group to be added\n+    \\sa Add()\n+*/\n+void SamReadGroupDictionary::Add(const std::string& readGroupId) {\n+    Add( SamReadGroup(readGroupId) );\n+}\n+\n+/*! \\fn void SamReadGroupDictionary::Add(const std::vector<SamReadGroup>& readGroups)\n+    \\brief Adds multiple read groups to the dictionary.\n+\n+    This is an overloaded function.\n+\n+    \\param readGroups entries to be added\n+    \\sa Add()\n+*/\n+void SamReadGroupDictionary::Add(const std::vector<SamReadGroup>& readGroups) {\n+    vector<SamReadGroup>::const_iterator rgIter = readGroups.begin();\n+    vector<SamReadGroup>::const_iterator rgEnd  = readGroups.end();\n+    for ( ; rgIter!= rgEnd; ++rgIter )\n+        Add(*rgIter);\n+}\n+\n+/*! \\fn void SamReadGroupDictionary::Add(const std::vector<std::string>& readGroupIds)\n+    \\brief Adds multiple read groups to the dictionary.\n+\n+    This is an overloaded function.\n+\n+    \\param readGroupIds IDs of read groups to be added\n+    \\sa Add()\n+*/\n+void SamReadGroupDictionary::Add(const std::vector<std::string>& readGroupIds) {\n+    vector<string>::const_iterator rgIter = readGroupIds.begin();\n+    vector<string>::const_iterator rgEnd  = readGroupIds.end();\n+    for ( ; rgIter!= rgEnd; ++rgIter )\n+        Add(*rgIter);\n+}\n+\n+/*! \\fn SamReadGroupIterator SamReadGroupDictionary::Begin(void)\n+    \\return an STL iterator pointing to the first read group\n+    \\sa ConstBegin(), End()\n+*/\n+SamReadGroupIterator SamReadGroupDictionary::Begin(void) {\n+    return m_data.begin();\n+}\n+\n+/*! \\fn SamReadGroupConstIterator SamReadGroupDictionary::Begin(void) const\n+    \\return an STL const_iterator pointing to the first read group\n+\n+    This is an overloaded function.\n+\n+    \\sa ConstBegin(), End()\n+*/\n+SamReadGroupConstIterator SamReadGroupDictionary::Begin(void) const {\n+    return m_data.begin();\n+}\n+\n+/*! \\fn void SamReadGroupDictionary::Clear(void)\n+    \\brief Clears all read group entries.\n+*/\n+void SamReadGroupDicti'..b'    return m_data.end();\n+}\n+\n+/*! \\fn int SamReadGroupDictionary::IndexOf(const std::string& readGroupId) const\n+    \\internal\n+    \\return index of read group if found.  Otherwise, returns vector::size() (invalid index).\n+*/\n+int SamReadGroupDictionary::IndexOf(const std::string& readGroupId) const {\n+    SamReadGroupConstIterator begin = ConstBegin();\n+    SamReadGroupConstIterator iter  = begin;\n+    SamReadGroupConstIterator end   = ConstEnd();\n+    for ( ; iter != end; ++iter ) {\n+        const SamReadGroup& current = (*iter);\n+        if ( current.ID == readGroupId )\n+            break;\n+    }\n+    return distance( begin, iter );\n+}\n+\n+/*! \\fn bool SamReadGroupDictionary::IsEmpty(void) const\n+    \\brief Returns \\c true if dictionary contains no read groups\n+    \\sa Size()\n+*/\n+bool SamReadGroupDictionary::IsEmpty(void) const {\n+    return m_data.empty();\n+}\n+\n+/*! \\fn void SamReadGroupDictionary::Remove(const SamReadGroup& readGroup)\n+    \\brief Removes read group from dictionary, if found (matching on ID).\n+\n+    This is an overloaded function.\n+\n+    \\param readGroup read group to remove (matches on ID)\n+*/\n+void SamReadGroupDictionary::Remove(const SamReadGroup& readGroup) {\n+    Remove( readGroup.ID );\n+}\n+\n+/*! \\fn void SamReadGroupDictionary::Remove(const std::string& readGroupId)\n+    \\brief Removes read group from dictionary, if found.\n+    \\param readGroupId ID of read group to remove\n+    \\sa Remove()\n+*/\n+void SamReadGroupDictionary::Remove(const std::string& readGroupId) {\n+    if ( Contains(readGroupId) )\n+        m_data.erase( m_data.begin() + IndexOf(readGroupId) );\n+}\n+\n+/*! \\fn void SamReadGroupDictionary::Remove(const std::vector<SamReadGroup>& readGroups)\n+    \\brief Removes multiple read groups from dictionary (matching on ID).\n+\n+    This is an overloaded function.\n+\n+    \\param readGroups read groups to remove\n+    \\sa Remove()\n+*/\n+void SamReadGroupDictionary::Remove(const std::vector<SamReadGroup>& readGroups) {\n+    vector<SamReadGroup>::const_iterator rgIter = readGroups.begin();\n+    vector<SamReadGroup>::const_iterator rgEnd  = readGroups.end();\n+    for ( ; rgIter!= rgEnd; ++rgIter )\n+        Remove(*rgIter);\n+}\n+\n+/*! \\fn void SamReadGroupDictionary::Remove(const std::vector<std::string>& readGroupIds)\n+    \\brief Removes multiple read groups from dictionary.\n+\n+    This is an overloaded function.\n+\n+    \\param readGroupIds IDs of the read groups to remove\n+    \\sa Remove()\n+*/\n+void SamReadGroupDictionary::Remove(const std::vector<std::string>& readGroupIds) {\n+    vector<string>::const_iterator rgIter = readGroupIds.begin();\n+    vector<string>::const_iterator rgEnd  = readGroupIds.end();\n+    for ( ; rgIter!= rgEnd; ++rgIter )\n+        Remove(*rgIter);\n+}\n+\n+/*! \\fn int SamReadGroupDictionary::Size(void) const\n+    \\brief Returns number of read groups in dictionary.\n+    \\sa IsEmpty()\n+*/\n+int SamReadGroupDictionary::Size(void) const {\n+    return m_data.size();\n+}\n+\n+/*! \\fn SamReadGroup& SamReadGroupDictionary::operator[](const std::string& readGroupId)\n+    \\brief Retrieves the modifiable SamReadGroup that matches \\a readGroupId.\n+\n+    NOTE - If the dictionary contains no read group matching this ID, this function inserts\n+    a new one with this ID, and returns a reference to it.\n+\n+    If you want to avoid this insertion behavior, check the result of Contains() before\n+    using this operator.\n+\n+    \\param readGroupId ID of read group to retrieve\n+    \\return a modifiable reference to the SamReadGroup associated with the ID\n+*/\n+SamReadGroup& SamReadGroupDictionary::operator[](const std::string& readGroupId) {\n+\n+    // look up read group ID\n+    int index = IndexOf(readGroupId);\n+\n+    // if found, return read group at index\n+    if ( index != (int)m_data.size() )\n+        return m_data[index];\n+\n+    // otherwise, append new read group and return reference\n+    else {\n+        SamReadGroup rg(readGroupId);\n+        m_data.push_back(rg);\n+        return m_data.back();\n+    }\n+}\n'
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamReadGroupDictionary.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamReadGroupDictionary.h Thu Nov 03 10:25:04 2011 -0400
[
@@ -0,0 +1,87 @@
+// ***************************************************************************
+// SamReadGroupDictionary.h (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 18 April 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides methods for operating on a collection of SamReadGroup entries.
+// ***************************************************************************
+
+#ifndef SAM_READGROUP_DICTIONARY_H
+#define SAM_READGROUP_DICTIONARY_H
+
+#include <api/api_global.h>
+#include <api/SamReadGroup.h>
+#include <string>
+#include <vector>
+
+namespace BamTools {
+
+typedef std::vector<SamReadGroup>             SamReadGroupContainer;
+typedef SamReadGroupContainer::iterator       SamReadGroupIterator;
+typedef SamReadGroupContainer::const_iterator SamReadGroupConstIterator;
+
+class API_EXPORT SamReadGroupDictionary {
+
+    // ctor & dtor
+    public:
+        SamReadGroupDictionary(void);
+        SamReadGroupDictionary(const SamReadGroupDictionary& other);
+        ~SamReadGroupDictionary(void);
+
+    // query/modify read group data
+    public:
+        // adds a read group
+        void Add(const SamReadGroup& readGroup);
+        void Add(const std::string& readGroupId);
+
+        // adds multiple read groups
+        void Add(const std::vector<SamReadGroup>& readGroups);
+        void Add(const std::vector<std::string>& readGroupIds);
+
+        // clears all read group entries
+        void Clear(void);
+
+        // returns true if dictionary contains this read group
+        bool Contains(const SamReadGroup& readGroup) const;
+        bool Contains(const std::string& readGroupId) const;
+
+        // returns true if dictionary is empty
+        bool IsEmpty(void) const;
+
+        // removes read group, if found
+        void Remove(const SamReadGroup& readGroup);
+        void Remove(const std::string& readGroupId);
+
+        // removes multiple read groups
+        void Remove(const std::vector<SamReadGroup>& readGroups);
+        void Remove(const std::vector<std::string>& readGroupIds);
+
+        // returns number of read groups in dictionary
+        int Size(void) const;
+
+        // retrieves a modifiable reference to the SamReadGroup object associated with this ID
+        SamReadGroup& operator[](const std::string& readGroupId);
+
+    // retrieve STL-compatible iterators
+    public:
+        SamReadGroupIterator      Begin(void);              // returns iterator to begin()
+        SamReadGroupConstIterator Begin(void) const;        // returns const_iterator to begin()
+        SamReadGroupConstIterator ConstBegin(void) const;   // returns const_iterator to begin()
+        SamReadGroupIterator      End(void);                // returns iterator to end()
+        SamReadGroupConstIterator End(void) const;          // returns const_iterator to end()
+        SamReadGroupConstIterator ConstEnd(void) const;     // returns const_iterator to end()
+
+    // internal methods
+    private:
+        int IndexOf(const std::string& readGroupId) const;
+
+    // data members
+    private:
+        SamReadGroupContainer m_data;
+};
+
+} // namespace BamTools
+
+#endif // SAM_READGROUP_DICTIONARY_H
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamSequence.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamSequence.cpp Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,162 @@
+// ***************************************************************************
+// SamSequence.cpp (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 18 April 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides direct read/write access to the SAM sequence data fields.
+// ***************************************************************************
+
+#include <api/SamSequence.h>
+#include <sstream>
+using namespace BamTools;
+using namespace std;
+
+/*! \struct BamTools::SamSequence
+    \brief Represents a SAM sequence entry.
+
+    Provides direct read/write access to the SAM sequence data fields.
+
+    \sa \samSpecURL
+*/
+/*! \var SamSequence::AssemblyID
+    \brief corresponds to \@SQ AS:\<AssemblyID\>
+*/
+/*! \var SamSequence::Checksum
+    \brief corresponds to \@SQ M5:\<Checksum\>
+*/
+/*! \var SamSequence::Length
+    \brief corresponds to \@SQ LN:\<Length\>
+
+    Required for valid SAM header.
+*/
+/*! \var SamSequence::Name
+    \brief corresponds to \@SQ SN:\<Name\>
+
+    Required for valid SAM header.
+*/
+/*! \var SamSequence::Species
+    \brief corresponds to \@SQ SP:\<Species\>
+*/
+/*! \var SamSequence::URI
+    \brief corresponds to \@SQ UR:\<URI\>
+*/
+
+/*! \fn SamSequence::SamSequence(void)
+    \brief default constructor
+*/
+SamSequence::SamSequence(void)
+    : AssemblyID("")
+    , Checksum("")
+    , Length("")
+    , Name("")
+    , Species("")
+    , URI("")
+{ }
+
+/*! \fn SamSequence::SamSequence(const std::string& name, const int& length)
+    \brief constructs sequence with \a name and \a length
+
+    \param name   desired sequence name
+    \param length desired sequence length (numeric value)
+*/
+SamSequence::SamSequence(const std::string& name,
+                         const int& length)
+    : AssemblyID("")
+    , Checksum("")
+    , Name(name)
+    , Species("")
+    , URI("")
+{
+    stringstream s("");
+    s << length;
+    Length = s.str();
+}
+
+/*! \fn SamSequence::SamSequence(const std::string& name, const std::string& length)
+    \brief constructs sequence with \a name and \a length
+
+    \param name   desired sequence name
+    \param length desired sequence length (string value)
+*/
+SamSequence::SamSequence(const std::string& name,
+                         const std::string& length)
+    : AssemblyID("")
+    , Checksum("")
+    , Length(length)
+    , Name(name)
+    , Species("")
+    , URI("")
+{ }
+
+/*! \fn SamSequence::SamSequence(const SamSequence& other)
+    \brief copy constructor
+*/
+SamSequence::SamSequence(const SamSequence& other)
+    : AssemblyID(other.AssemblyID)
+    , Checksum(other.Checksum)
+    , Length(other.Length)
+    , Name(other.Name)
+    , Species(other.Species)
+    , URI(other.URI)
+{ }
+
+/*! \fn SamSequence::~SamSequence(void)
+    \brief destructor
+*/
+SamSequence::~SamSequence(void) { }
+
+/*! \fn void SamSequence::Clear(void)
+    \brief Clears all data fields.
+*/
+void SamSequence::Clear(void) {
+    AssemblyID.clear();
+    Checksum.clear();
+    Length.clear();
+    Name.clear();
+    Species.clear();
+    URI.clear();
+}
+
+/*! \fn bool SamSequence::HasAssemblyID(void) const
+    \brief Returns \c true if sequence contains \@SQ AS:\<AssemblyID\>
+*/
+bool SamSequence::HasAssemblyID(void) const {
+    return (!AssemblyID.empty());
+}
+
+/*! \fn bool SamSequence::HasChecksum(void) const
+    \brief Returns \c true if sequence contains \@SQ M5:\<Checksum\>
+*/
+bool SamSequence::HasChecksum(void) const {
+    return (!Checksum.empty());
+}
+
+/*! \fn bool SamSequence::HasLength(void) const
+    \brief Returns \c true if sequence contains \@SQ LN:\<Length\>
+*/
+bool SamSequence::HasLength(void) const {
+    return (!Length.empty());
+}
+
+/*! \fn bool SamSequence::HasName(void) const
+    \brief Returns \c true if sequence contains \@SQ SN:\<Name\>
+*/
+bool SamSequence::HasName(void) const {
+    return (!Name.empty());
+}
+
+/*! \fn bool SamSequence::HasSpecies(void) const
+    \brief Returns \c true if sequence contains \@SQ SP:\<Species\>
+*/
+bool SamSequence::HasSpecies(void) const {
+    return (!Species.empty());
+}
+
+/*! \fn bool SamSequence::HasURI(void) const
+    \brief Returns \c true if sequence contains \@SQ UR:\<URI\>
+*/
+bool SamSequence::HasURI(void) const {
+    return (!URI.empty());
+}
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamSequence.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamSequence.h Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,61 @@
+// ***************************************************************************
+// SamSequence.h (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 18 April 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides direct read/write access to the SAM sequence data fields.
+// ***************************************************************************
+
+#ifndef SAM_SEQUENCE_H
+#define SAM_SEQUENCE_H
+
+#include <api/api_global.h>
+#include <string>
+
+namespace BamTools {
+
+struct API_EXPORT SamSequence {
+
+    // ctor & dtor
+    SamSequence(void);
+    SamSequence(const std::string& name, const int& length);
+    SamSequence(const std::string& name, const std::string& length);
+    SamSequence(const SamSequence& other);
+    ~SamSequence(void);
+
+    // query/modify entire sequence
+    void Clear(void);                   // clears all contents
+
+    // convenience query methods
+    bool HasAssemblyID(void) const;     // returns true if sequence has an assembly ID
+    bool HasChecksum(void) const;       // returns true if sequence has an MD5 checksum
+    bool HasLength(void) const;         // returns true if sequence has a length
+    bool HasName(void) const;           // returns true if sequence has a name
+    bool HasSpecies(void) const;        // returns true if sequence has a species ID
+    bool HasURI(void) const;            // returns true if sequence has a URI
+
+    // data members
+    std::string AssemblyID;             // AS:<AssemblyID>
+    std::string Checksum;               // M5:<Checksum>
+    std::string Length;                 // LN:<Length>      *Required for valid SAM header*
+    std::string Name;                   // SN:<Name>        *Required for valid SAM header*
+    std::string Species;                // SP:<Species>
+    std::string URI;                    // UR:<URI>
+};
+
+/*! \fn bool operator==(const SamSequence& lhs, const SamSequence& rhs)
+    \brief tests equality by comparing sequence names, lengths, & checksums (if available)
+*/
+API_EXPORT inline bool operator==(const SamSequence& lhs, const SamSequence& rhs) {
+    if ( lhs.Name   != rhs.Name   ) return false;
+    if ( lhs.Length != rhs.Length ) return false;
+    if ( lhs.HasChecksum() && rhs.HasChecksum() )
+        return (lhs.Checksum == rhs.Checksum);
+    else return true;
+}
+
+} // namespace BamTools
+
+#endif // SAM_SEQUENCE_H
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamSequenceDictionary.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamSequenceDictionary.cpp Thu Nov 03 10:25:04 2011 -0400
[
b'@@ -0,0 +1,293 @@\n+// ***************************************************************************\n+// SamSequenceDictionary.cpp (c) 2010 Derek Barnett\n+// Marth Lab, Department of Biology, Boston College\n+// All rights reserved.\n+// ---------------------------------------------------------------------------\n+// Last modified: 18 April 2011 (DB)\n+// ---------------------------------------------------------------------------\n+// Provides methods for operating on a collection of SamSequence entries.\n+// *************************************************************************\n+\n+#include <api/SamSequenceDictionary.h>\n+using namespace BamTools;\n+\n+#include <iostream>\n+using namespace std;\n+\n+/*! \\class BamTools::SamSequenceDictionary\n+    \\brief Container of SamSequence entries.\n+\n+    Provides methods for operating on a collection of SamSequence entries.\n+*/\n+\n+/*! \\fn SamSequenceDictionary::SamSequenceDictionary(void)\n+    \\brief constructor\n+*/\n+SamSequenceDictionary::SamSequenceDictionary(void) { }\n+\n+/*! \\fn SamSequenceDictionary::SamSequenceDictionary(const SamSequenceDictionary& other)\n+    \\brief copy constructor\n+*/\n+SamSequenceDictionary::SamSequenceDictionary(const SamSequenceDictionary& other)\n+    : m_data(other.m_data)\n+{ }\n+\n+/*! \\fn SamSequenceDictionary::~SamSequenceDictionary(void)\n+    \\brief destructor\n+*/\n+SamSequenceDictionary::~SamSequenceDictionary(void) { }\n+\n+/*! \\fn void SamSequenceDictionary::Add(const SamSequence& sequence)\n+    \\brief Adds a sequence to the dictionary.\n+\n+    Duplicate entries are silently discarded.\n+\n+    \\param sequence entry to be added\n+*/\n+void SamSequenceDictionary::Add(const SamSequence& sequence) {\n+\n+    // TODO: report error on attempted duplicate?\n+\n+    if ( IsEmpty() || !Contains(sequence) )\n+        m_data.push_back(sequence);\n+}\n+\n+/*! \\fn void SamSequenceDictionary::Add(const std::string& name, const int& length)\n+    \\brief Adds a sequence to the dictionary.\n+\n+    This is an overloaded function.\n+\n+    \\param name name of sequence entry to be added\n+    \\param length length of sequence entry to be added\n+    \\sa Add()\n+*/\n+void SamSequenceDictionary::Add(const std::string& name, const int& length) {\n+    Add( SamSequence(name, length) );\n+}\n+\n+/*! \\fn void SamSequenceDictionary::Add(const std::vector<SamSequence>& sequences)\n+    \\brief Adds multiple sequences to the dictionary.\n+\n+    This is an overloaded function.\n+\n+    \\param sequences entries to be added\n+    \\sa Add()\n+*/\n+void SamSequenceDictionary::Add(const std::vector<SamSequence>& sequences) {\n+    vector<SamSequence>::const_iterator seqIter = sequences.begin();\n+    vector<SamSequence>::const_iterator seqEnd  = sequences.end();\n+    for ( ; seqIter!= seqEnd; ++seqIter )\n+        Add(*seqIter);\n+}\n+\n+/*! \\fn void SamSequenceDictionary::Add(const std::map<std::string, int>& sequenceMap)\n+    \\brief Adds multiple sequences to the dictionary.\n+\n+    This is an overloaded function.\n+\n+    \\param sequenceMap map of sequence entries (name => length) to be added\n+    \\sa Add()\n+*/\n+void SamSequenceDictionary::Add(const std::map<std::string, int>& sequenceMap) {\n+    map<string, int>::const_iterator seqIter = sequenceMap.begin();\n+    map<string, int>::const_iterator seqEnd  = sequenceMap.end();\n+    for ( ; seqIter != seqEnd; ++seqIter ) {\n+        const string& name = (*seqIter).first;\n+        const int& length = (*seqIter).second;\n+        Add( SamSequence(name, length) );\n+    }\n+}\n+\n+/*! \\fn SamSequenceIterator SamSequenceDictionary::Begin(void)\n+    \\return an STL iterator pointing to the first sequence\n+    \\sa ConstBegin(), End()\n+*/\n+SamSequenceIterator SamSequenceDictionary::Begin(void) {\n+    return m_data.begin();\n+}\n+\n+/*! \\fn SamSequenceConstIterator SamSequenceDictionary::Begin(void) const\n+    \\return an STL const_iterator pointing to the first sequence\n+\n+    This is an overloaded function.\n+\n+    \\sa ConstBegin(), End()\n+*/\n+SamSequenceConstIterator SamSequenceDictionary::Begin(void) const {\n'..b'r SamSequenceDictionary::End(void) const {\n+    return m_data.end();\n+}\n+\n+/*! \\fn int SamSequenceDictionary::IndexOf(const std::string& name) const\n+    \\internal\n+    \\return index of sequence if found (matching on name).  Otherwise, returns vector::size() (invalid index).\n+*/\n+int SamSequenceDictionary::IndexOf(const std::string& name) const {\n+    SamSequenceConstIterator begin = ConstBegin();\n+    SamSequenceConstIterator iter  = begin;\n+    SamSequenceConstIterator end   = ConstEnd();\n+    for ( ; iter != end; ++iter ) {\n+        const SamSequence& currentSeq = (*iter);\n+        if ( currentSeq.Name == name )\n+            break;\n+    }\n+    return distance( begin, iter );\n+}\n+\n+/*! \\fn bool SamSequenceDictionary::IsEmpty(void) const\n+    \\brief Returns \\c true if dictionary contains no sequences\n+    \\sa Size()\n+*/\n+bool SamSequenceDictionary::IsEmpty(void) const {\n+    return m_data.empty();\n+}\n+\n+/*! \\fn void SamSequenceDictionary::Remove(const SamSequence& sequence)\n+    \\brief Removes sequence from dictionary, if found (matches on name).\n+\n+    This is an overloaded function.\n+\n+    \\param sequence SamSequence to remove (matching on name)\n+*/\n+void SamSequenceDictionary::Remove(const SamSequence& sequence) {\n+    Remove( sequence.Name );\n+}\n+\n+/*! \\fn void SamSequenceDictionary::Remove(const std::string& sequenceName)\n+    \\brief Removes sequence from dictionary, if found.\n+\n+    \\param sequenceName name of sequence to remove\n+    \\sa Remove()\n+*/\n+void SamSequenceDictionary::Remove(const std::string& sequenceName) {\n+    if ( Contains(sequenceName) )\n+        m_data.erase( m_data.begin() + IndexOf(sequenceName) );\n+}\n+\n+/*! \\fn void SamSequenceDictionary::Remove(const std::vector<SamSequence>& sequences)\n+    \\brief Removes multiple sequences from dictionary.\n+\n+    This is an overloaded function.\n+\n+    \\param sequences sequences to remove\n+    \\sa Remove()\n+*/\n+void SamSequenceDictionary::Remove(const std::vector<SamSequence>& sequences) {\n+    vector<SamSequence>::const_iterator rgIter = sequences.begin();\n+    vector<SamSequence>::const_iterator rgEnd  = sequences.end();\n+    for ( ; rgIter!= rgEnd; ++rgIter )\n+        Remove(*rgIter);\n+}\n+\n+/*! \\fn void SamSequenceDictionary::Remove(const std::vector<std::string>& sequenceNames)\n+    \\brief Removes multiple sequences from dictionary.\n+\n+    This is an overloaded function.\n+\n+    \\param sequenceNames names of the sequences to remove\n+    \\sa Remove()\n+*/\n+void SamSequenceDictionary::Remove(const std::vector<std::string>& sequenceNames) {\n+    vector<string>::const_iterator rgIter = sequenceNames.begin();\n+    vector<string>::const_iterator rgEnd  = sequenceNames.end();\n+    for ( ; rgIter!= rgEnd; ++rgIter )\n+        Remove(*rgIter);\n+}\n+\n+/*! \\fn int SamSequenceDictionary::Size(void) const\n+    \\brief Returns number of sequences in dictionary.\n+    \\sa IsEmpty()\n+*/\n+int SamSequenceDictionary::Size(void) const {\n+    return m_data.size();\n+}\n+\n+/*! \\fn SamSequence& SamSequenceDictionary::operator[](const std::string& sequenceName)\n+    \\brief Retrieves the modifiable SamSequence that matches \\a sequenceName.\n+\n+    NOTE - If the dictionary contains no sequence matching this name, this function inserts\n+    a new one with this name (length:0), and returns a reference to it.\n+\n+    If you want to avoid this insertion behavior, check the result of Contains() before\n+    using this operator.\n+\n+    \\param sequenceName name of sequence to retrieve\n+    \\return a modifiable reference to the SamSequence associated with the name\n+*/\n+SamSequence& SamSequenceDictionary::operator[](const std::string& sequenceName) {\n+\n+    // look up sequence ID\n+    int index = IndexOf(sequenceName);\n+\n+    // if found, return sequence at index\n+    if ( index != (int)m_data.size() )\n+        return m_data[index];\n+\n+    // otherwise, append new sequence and return reference\n+    else {\n+        m_data.push_back( SamSequence(sequenceName, 0) );\n+        return m_data.back();\n+    }\n+}\n'
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamSequenceDictionary.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamSequenceDictionary.h Thu Nov 03 10:25:04 2011 -0400
[
@@ -0,0 +1,89 @@
+// ***************************************************************************
+// SamSequenceDictionary.h (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 18 April 2011
+// ---------------------------------------------------------------------------
+// Provides methods for operating on a collection of SamSequence entries.
+// ***************************************************************************
+
+#ifndef SAM_SEQUENCE_DICTIONARY_H
+#define SAM_SEQUENCE_DICTIONARY_H
+
+#include <api/api_global.h>
+#include <api/SamSequence.h>
+#include <string>
+#include <map>
+#include <vector>
+
+namespace BamTools {
+
+typedef std::vector<SamSequence>             SamSequenceContainer;
+typedef SamSequenceContainer::iterator       SamSequenceIterator;
+typedef SamSequenceContainer::const_iterator SamSequenceConstIterator;
+
+class API_EXPORT SamSequenceDictionary {
+
+    // ctor & dtor
+    public:
+        SamSequenceDictionary(void);
+        SamSequenceDictionary(const SamSequenceDictionary& other);
+        ~SamSequenceDictionary(void);
+
+    // query/modify sequence data
+    public:
+        // adds a sequence
+        void Add(const SamSequence& sequence);
+        void Add(const std::string& name, const int& length);
+
+        // adds multiple sequences
+        void Add(const std::vector<SamSequence>& sequences);
+        void Add(const std::map<std::string, int>& sequenceMap);
+
+        // clears all sequence entries
+        void Clear(void);
+
+        // returns true if dictionary contains this sequence
+        bool Contains(const SamSequence& sequence) const;
+        bool Contains(const std::string& sequenceName) const;
+
+        // returns true if dictionary is empty
+        bool IsEmpty(void) const;
+
+        // removes sequence, if found
+        void Remove(const SamSequence& sequence);
+        void Remove(const std::string& sequenceName);
+
+        // removes multiple sequences
+        void Remove(const std::vector<SamSequence>& sequences);
+        void Remove(const std::vector<std::string>& sequenceNames);
+
+        // returns number of sequences in dictionary
+        int Size(void) const;
+
+        // retrieves a modifiable reference to the SamSequence object associated with this name
+        SamSequence& operator[](const std::string& sequenceName);
+
+    // retrieve STL-compatible iterators
+    public:
+        SamSequenceIterator      Begin(void);               // returns iterator to begin()
+        SamSequenceConstIterator Begin(void) const;         // returns const_iterator to begin()
+        SamSequenceConstIterator ConstBegin(void) const;    // returns const_iterator to begin()
+        SamSequenceIterator      End(void);                 // returns iterator to end()
+        SamSequenceConstIterator End(void) const;           // returns const_iterator to end()
+        SamSequenceConstIterator ConstEnd(void) const;      // returns const_iterator to end()
+
+    // internal methods
+    private:
+        int IndexOf(const std::string& name) const;
+
+    // data members
+    private:
+        SamSequenceContainer m_data;
+};
+
+} // namespace BamTools
+
+#endif // SAM_SEQUENCE_DICTIONARY_H
+
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/api_global.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/api_global.h Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,22 @@
+// ***************************************************************************
+// api_global.h (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 19 November 2010 (DB)
+// ---------------------------------------------------------------------------
+// Provides macros for exporting & importing BamTools API library symbols
+// ***************************************************************************
+
+#ifndef API_GLOBAL_H
+#define API_GLOBAL_H
+
+#include "shared/bamtools_global.h"
+
+#ifdef BAMTOOLS_API_LIBRARY
+#  define API_EXPORT BAMTOOLS_LIBRARY_EXPORT
+#else
+#  define API_EXPORT BAMTOOLS_LIBRARY_IMPORT
+#endif
+
+#endif // API_GLOBAL_H
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamHeader_p.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamHeader_p.cpp Thu Nov 03 10:25:04 2011 -0400
[
@@ -0,0 +1,133 @@
+// ***************************************************************************
+// BamHeader_p.cpp (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 21 March 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides the basic functionality for handling BAM headers.
+// ***************************************************************************
+
+#include <api/BamAux.h>
+#include <api/BamConstants.h>
+#include <api/internal/BamHeader_p.h>
+#include <api/internal/BgzfStream_p.h>
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <iostream>
+using namespace std;
+
+// ctor
+BamHeader::BamHeader(void) { }
+
+// dtor
+BamHeader::~BamHeader(void) { }
+
+// reads magic number from BGZF stream, returns true if valid
+bool BamHeader::CheckMagicNumber(BgzfStream* stream) {
+
+    // try to read magic number
+    char buffer[Constants::BAM_HEADER_MAGIC_LENGTH];
+    if ( stream->Read(buffer, Constants::BAM_HEADER_MAGIC_LENGTH) != (int)Constants::BAM_HEADER_MAGIC_LENGTH ) {
+        fprintf(stderr, "BamHeader ERROR: could not read magic number\n");
+        return false;
+    }
+
+    // validate magic number
+    if ( strncmp(buffer, Constants::BAM_HEADER_MAGIC, Constants::BAM_HEADER_MAGIC_LENGTH) != 0 ) {
+        fprintf(stderr, "BamHeader ERROR: invalid magic number\n");
+        return false;
+    }
+
+    // all checks out
+    return true;
+}
+
+// clear SamHeader data
+void BamHeader::Clear(void) {
+    m_header.Clear();
+}
+
+// return true if SamHeader data is valid
+bool BamHeader::IsValid(void) const {
+    return m_header.IsValid();
+}
+
+// load BAM header ('magic number' and SAM header text) from BGZF stream
+// returns true if all OK
+bool BamHeader::Load(BgzfStream* stream) {
+
+    // cannot load if invalid stream
+    if ( stream == 0 )
+        return false;
+
+    // cannot load if magic number is invalid
+    if ( !CheckMagicNumber(stream) )
+        return false;
+
+    // cannot load header if cannot read header length
+    uint32_t length(0);
+    if ( !ReadHeaderLength(stream, length) )
+        return false;
+
+    // cannot load header if cannot read header text
+    if ( !ReadHeaderText(stream, length) )
+        return false;
+
+    // otherwise, everything OK
+    return true;
+}
+
+// reads SAM header text length from BGZF stream, stores it in @length
+// returns read success/fail status
+bool BamHeader::ReadHeaderLength(BgzfStream* stream, uint32_t& length) {
+
+    // attempt to read BAM header text length
+    char buffer[sizeof(uint32_t)];
+    if ( stream->Read(buffer, sizeof(uint32_t)) != sizeof(uint32_t) ) {
+        fprintf(stderr, "BamHeader ERROR: could not read header length\n");
+        return false;
+    }
+
+    // convert char buffer to length, return success
+    length = BamTools::UnpackUnsignedInt(buffer);
+    if ( BamTools::SystemIsBigEndian() )
+        BamTools::SwapEndian_32(length);
+    return true;
+}
+
+// reads SAM header text from BGZF stream, stores in SamHeader object
+// returns read success/fail status
+bool BamHeader::ReadHeaderText(BgzfStream* stream, const uint32_t& length) {
+
+    // set up destination buffer
+    char* headerText = (char*)calloc(length + 1, 1);
+
+    // attempt to read header text
+    const unsigned bytesRead = stream->Read(headerText, length);
+    const bool readOk = ( bytesRead == length );
+    if ( readOk )
+        m_header.SetHeaderText( (string)((const char*)headerText) );
+    else
+        fprintf(stderr, "BamHeader ERROR: could not read header text\n");
+
+    // clean up calloc-ed temp variable (on success or fail)
+    free(headerText);
+
+    // return read success
+    return readOk;
+}
+
+// returns *copy* of SamHeader data object
+SamHeader BamHeader::ToSamHeader(void) const {
+    return m_header;
+}
+
+// returns SAM-formatted string of header data
+string BamHeader::ToString(void) const {
+    return m_header.ToString();
+}
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamHeader_p.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamHeader_p.h Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,72 @@
+// ***************************************************************************
+// BamHeader_p.h (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 26 January 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides the basic functionality for handling BAM headers.
+// ***************************************************************************
+
+#ifndef BAMHEADER_P_H
+#define BAMHEADER_P_H
+
+//  -------------
+//  W A R N I N G
+//  -------------
+//
+// This file is not part of the BamTools API.  It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include <api/SamHeader.h>
+#include <string>
+
+namespace BamTools {
+namespace Internal {
+
+class BgzfStream;
+
+class BamHeader {
+
+    // ctor & dtor
+    public:
+        BamHeader(void);
+        ~BamHeader(void);
+
+    // BamHeader interface
+    public:
+        // clear SamHeader data
+        void Clear(void);
+        // return true if SamHeader data is valid
+        bool IsValid(void) const;
+        // load BAM header ('magic number' and SAM header text) from BGZF stream
+        // returns true if all OK
+        bool Load(BgzfStream* stream);
+        // returns (editable) copy of SamHeader data object
+        SamHeader ToSamHeader(void) const;
+        // returns SAM-formatted string of header data
+        std::string ToString(void) const;
+
+    // internal methods
+    private:
+        // reads magic number from BGZF stream, returns true if valid
+        bool CheckMagicNumber(BgzfStream* stream);
+        // reads SAM header length from BGZF stream, stores it in @length
+        // returns read success/fail status
+        bool ReadHeaderLength(BgzfStream* stream, uint32_t& length);
+        // reads SAM header text from BGZF stream, stores in SamHeader object
+        // returns read success/fail status
+        bool ReadHeaderText(BgzfStream* stream, const uint32_t& length);
+
+    // data members
+    private:
+        SamHeader m_header;
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // BAMHEADER_P_H
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamIndexFactory_p.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamIndexFactory_p.cpp Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,113 @@
+// ***************************************************************************
+// BamIndexFactory_p.cpp (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 5 April 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides interface for generating BamIndex implementations
+// ***************************************************************************
+
+#include <api/BamAux.h>
+#include <api/internal/BamIndexFactory_p.h>
+#include <api/internal/BamStandardIndex_p.h>
+#include <api/internal/BamToolsIndex_p.h>
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+#include <cstdio>
+using namespace std;
+
+// generates index filename from BAM filename (depending on requested type)
+// if type is unknown, returns empty string
+const string BamIndexFactory::CreateIndexFilename(const string& bamFilename,
+                                                  const BamIndex::IndexType& type)
+{
+    switch ( type ) {
+        case ( BamIndex::STANDARD ) : return ( bamFilename + BamStandardIndex::Extension() );
+        case ( BamIndex::BAMTOOLS ) : return ( bamFilename + BamToolsIndex::Extension() );
+        default :
+            cerr << "BamIndexFactory ERROR: unknown index type" << type << endl;
+            return string();
+    }
+}
+
+// creates a new BamIndex object, depending on extension of @indexFilename
+BamIndex* BamIndexFactory::CreateIndexFromFilename(const string& indexFilename, BamReaderPrivate* reader) {
+
+    // if file doesn't exist, return null index
+    if ( !BamTools::FileExists(indexFilename) )
+        return 0;
+
+    // get file extension from index filename, including dot (".EXT")
+    // if can't get file extension, return null index
+    const string extension = FileExtension(indexFilename);
+    if ( extension.empty() )
+        return 0;
+
+    // create index based on extension
+    if      ( extension == BamStandardIndex::Extension() ) return new BamStandardIndex(reader);
+    else if ( extension == BamToolsIndex::Extension()    ) return new BamToolsIndex(reader);
+    else
+        return 0;
+}
+
+// creates a new BamIndex, object of requested @type
+BamIndex* BamIndexFactory::CreateIndexOfType(const BamIndex::IndexType& type,
+                                             BamReaderPrivate* reader)
+{
+    switch ( type ) {
+        case ( BamIndex::STANDARD ) : return new BamStandardIndex(reader);
+        case ( BamIndex::BAMTOOLS ) : return new BamToolsIndex(reader);
+        default :
+            cerr << "BamIndexFactory ERROR: unknown index type " << type << endl;
+            return 0;
+    }
+}
+
+// retrieves file extension (including '.')
+const string BamIndexFactory::FileExtension(const string& filename) {
+
+    // if filename cannot contain valid path + extension, return empty string
+    if ( filename.empty() || filename.length() <= 4 )
+        return string();
+
+    // look for last dot in filename
+    size_t lastDotPosition = filename.find_last_of('.');
+
+    // if none found, return empty string
+    if ( lastDotPosition == string::npos )
+        return string();
+
+    // return substring from last dot position
+    return filename.substr(lastDotPosition);
+}
+
+// returns name of existing index file that corresponds to @bamFilename
+// will defer to @preferredType if possible, if not will attempt to load any supported type
+// returns empty string if not found
+const string BamIndexFactory::FindIndexFilename(const string& bamFilename,
+                                                const BamIndex::IndexType& preferredType)
+{
+    // try to find index of preferred type first
+    // return index filename if found
+    string indexFilename = CreateIndexFilename(bamFilename, preferredType);
+    if ( !indexFilename.empty() && BamTools::FileExists(indexFilename) )
+        return indexFilename;
+
+    // couldn't find preferred type, try the other supported types
+    // return index filename if found
+    if ( preferredType != BamIndex::STANDARD ) {
+        indexFilename = CreateIndexFilename(bamFilename, BamIndex::STANDARD);
+        if ( !indexFilename.empty() && BamTools::FileExists(indexFilename) )
+            return indexFilename;
+    }
+    if ( preferredType != BamIndex::BAMTOOLS ) {
+        indexFilename = CreateIndexFilename(bamFilename, BamIndex::BAMTOOLS);
+        if ( !indexFilename.empty() && BamTools::FileExists(indexFilename) )
+            return indexFilename;
+    }
+
+    // otherwise couldn't find any index matching this filename
+    return string();
+}
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamIndexFactory_p.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamIndexFactory_p.h Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,50 @@
+// ***************************************************************************
+// BamIndexFactory_p.h (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 5 April 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides interface for generating BamIndex implementations
+// ***************************************************************************
+
+#ifndef BAMINDEX_FACTORY_P_H
+#define BAMINDEX_FACTORY_P_H
+
+#include <api/BamIndex.h>
+#include <string>
+
+namespace BamTools {
+namespace Internal {
+
+class BamIndexFactory {
+
+    // static interface methods
+    public:
+        // creates a new BamIndex object, depending on extension of @indexFilename
+        static BamIndex* CreateIndexFromFilename(const std::string& indexFilename,
+                                                 BamReaderPrivate* reader);
+        // creates a new BamIndex object, of requested @type
+        static BamIndex* CreateIndexOfType(const BamIndex::IndexType& type,
+                                           BamReaderPrivate* reader);
+        // returns name of existing index file that corresponds to @bamFilename
+        // will defer to @preferredType if possible
+        // if @preferredType not found, will attempt to load any supported index type
+        // returns empty string if no index file (of any type) is found
+        static const std::string FindIndexFilename(const std::string& bamFilename,
+                                                   const BamIndex::IndexType& preferredType);
+
+    // internal methods
+    public:
+        // generates index filename from BAM filename (depending on requested type)
+        // if type is unknown, returns empty string
+        static const std::string CreateIndexFilename(const std::string& bamFilename,
+                                                     const BamIndex::IndexType& type);
+        // retrieves file extension (including '.')
+        static const std::string FileExtension(const std::string& filename);
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // BAMINDEX_FACTORY_P_H
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamMultiMerger_p.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamMultiMerger_p.h Thu Nov 03 10:25:04 2011 -0400
b
b'@@ -0,0 +1,295 @@\n+// ***************************************************************************\n+// BamMultiMerger_p.h (c) 2010 Derek Barnett\n+// Marth Lab, Department of Biology, Boston College\n+// All rights reserved.\n+// ---------------------------------------------------------------------------\n+// Last modified: 18 March 2011 (DB)\n+// ---------------------------------------------------------------------------\n+// Provides merging functionality for BamMultiReader.  At this point, supports\n+// sorting results by (refId, position) or by read name.\n+// ***************************************************************************\n+\n+#ifndef BAMMULTIMERGER_P_H\n+#define BAMMULTIMERGER_P_H\n+\n+//  -------------\n+//  W A R N I N G\n+//  -------------\n+//\n+// This file is not part of the BamTools API.  It exists purely as an\n+// implementation detail. This header file may change from version to version\n+// without notice, or even be removed.\n+//\n+// We mean it.\n+\n+#include <api/BamAlignment.h>\n+#include <api/BamReader.h>\n+#include <map>\n+#include <queue>\n+#include <string>\n+#include <utility>\n+\n+namespace BamTools {\n+namespace Internal {\n+\n+typedef std::pair<BamReader*, BamAlignment*> ReaderAlignment;\n+\n+// generic MultiMerger interface\n+class IBamMultiMerger {\n+\n+    public:\n+        IBamMultiMerger(void) { }\n+        virtual ~IBamMultiMerger(void) { }\n+\n+    public:\n+        virtual void Add(const ReaderAlignment& value) =0;\n+        virtual void Clear(void) =0;\n+        virtual const ReaderAlignment& First(void) const =0;\n+        virtual bool IsEmpty(void) const =0;\n+        virtual void Remove(BamReader* reader) =0;\n+        virtual int Size(void) const =0;\n+        virtual ReaderAlignment TakeFirst(void) =0;\n+};\n+\n+// IBamMultiMerger implementation - sorted on BamAlignment: (RefId, Position)\n+class PositionMultiMerger : public IBamMultiMerger {\n+\n+    public:\n+        PositionMultiMerger(void) : IBamMultiMerger() { }\n+        ~PositionMultiMerger(void) { }\n+\n+    public:\n+        void Add(const ReaderAlignment& value);\n+        void Clear(void);\n+        const ReaderAlignment& First(void) const;\n+        bool IsEmpty(void) const;\n+        void Remove(BamReader* reader);\n+        int Size(void) const;\n+        ReaderAlignment TakeFirst(void);\n+\n+    private:\n+        typedef std::pair<int, int>           KeyType;\n+        typedef ReaderAlignment               ValueType;\n+        typedef std::pair<KeyType, ValueType> ElementType;\n+\n+        typedef std::multimap<KeyType, ValueType> ContainerType;\n+        typedef ContainerType::iterator           DataIterator;\n+        typedef ContainerType::const_iterator     DataConstIterator;\n+\n+        ContainerType m_data;\n+};\n+\n+// IBamMultiMerger implementation - sorted on BamAlignment: Name\n+class ReadNameMultiMerger : public IBamMultiMerger {\n+\n+    public:\n+        ReadNameMultiMerger(void) : IBamMultiMerger() { }\n+        ~ReadNameMultiMerger(void) { }\n+\n+    public:\n+        void Add(const ReaderAlignment& value);\n+        void Clear(void);\n+        const ReaderAlignment& First(void) const;\n+        bool IsEmpty(void) const;\n+        void Remove(BamReader* reader);\n+        int Size(void) const;\n+        ReaderAlignment TakeFirst(void);\n+\n+    private:\n+        typedef std::string                   KeyType;\n+        typedef ReaderAlignment               ValueType;\n+        typedef std::pair<KeyType, ValueType> ElementType;\n+\n+        typedef std::multimap<KeyType, ValueType> ContainerType;\n+        typedef ContainerType::iterator           DataIterator;\n+        typedef ContainerType::const_iterator     DataConstIterator;\n+\n+        ContainerType m_data;\n+};\n+\n+// IBamMultiMerger implementation - unsorted BAM file(s)\n+class UnsortedMultiMerger : public IBamMultiMerger {\n+\n+    public:\n+        UnsortedMultiMerger(void) : IBamMultiMerger() { }\n+        ~UnsortedMultiMerger(void) { }\n+\n+    public:\n+        void Add(const ReaderAlignment& value);\n+        void Clear(void);\n+        con'..b't {\n+    return m_data.empty();\n+}\n+\n+inline void PositionMultiMerger::Remove(BamReader* reader) {\n+\n+    if ( reader == 0 ) return;\n+    const std::string filenameToRemove = reader->GetFilename();\n+\n+    // iterate over readers in cache\n+    DataIterator dataIter = m_data.begin();\n+    DataIterator dataEnd  = m_data.end();\n+    for ( ; dataIter != dataEnd; ++dataIter ) {\n+        const ValueType& entry = (*dataIter).second;\n+        const BamReader* entryReader = entry.first;\n+        if ( entryReader == 0 ) continue;\n+\n+        // remove iterator on match\n+        if ( entryReader->GetFilename() == filenameToRemove ) {\n+            m_data.erase(dataIter);\n+            return;\n+        }\n+    }\n+}\n+\n+inline int PositionMultiMerger::Size(void) const {\n+    return m_data.size();\n+}\n+\n+inline ReaderAlignment PositionMultiMerger::TakeFirst(void) {\n+    DataIterator first = m_data.begin();\n+    ReaderAlignment next = (*first).second;\n+    m_data.erase(first);\n+    return next;\n+}\n+\n+// ------------------------------------------\n+// ReadNameMultiMerger implementation\n+\n+inline void ReadNameMultiMerger::Add(const ReaderAlignment& value) {\n+    const KeyType key(value.second->Name);\n+    m_data.insert( ElementType(key, value) );\n+}\n+\n+inline void ReadNameMultiMerger::Clear(void) {\n+    m_data.clear();\n+}\n+\n+inline const ReaderAlignment& ReadNameMultiMerger::First(void) const {\n+    const ElementType& entry = (*m_data.begin());\n+    return entry.second;\n+}\n+\n+inline bool ReadNameMultiMerger::IsEmpty(void) const {\n+    return m_data.empty();\n+}\n+\n+inline void ReadNameMultiMerger::Remove(BamReader* reader) {\n+\n+    if ( reader == 0 ) return;\n+    const std::string filenameToRemove = reader->GetFilename();\n+\n+    // iterate over readers in cache\n+    DataIterator dataIter = m_data.begin();\n+    DataIterator dataEnd  = m_data.end();\n+    for ( ; dataIter != dataEnd; ++dataIter ) {\n+        const ValueType& entry = (*dataIter).second;\n+        const BamReader* entryReader = entry.first;\n+        if ( entryReader == 0 ) continue;\n+\n+        // remove iterator on match\n+        if ( entryReader->GetFilename() == filenameToRemove ) {\n+            m_data.erase(dataIter);\n+            return;\n+        }\n+    }\n+\n+}\n+\n+inline int ReadNameMultiMerger::Size(void) const {\n+    return m_data.size();\n+}\n+\n+inline ReaderAlignment ReadNameMultiMerger::TakeFirst(void) {\n+    DataIterator first = m_data.begin();\n+    ReaderAlignment next = (*first).second;\n+    m_data.erase(first);\n+    return next;\n+}\n+\n+// ------------------------------------------\n+// UnsortedMultiMerger implementation\n+\n+inline void UnsortedMultiMerger::Add(const ReaderAlignment& value) {\n+    m_data.push_back(value);\n+}\n+\n+inline void UnsortedMultiMerger::Clear(void) {\n+    for (size_t i = 0; i < m_data.size(); ++i )\n+        m_data.pop_back();\n+}\n+\n+inline const ReaderAlignment& UnsortedMultiMerger::First(void) const {\n+    return m_data.front();\n+}\n+\n+inline bool UnsortedMultiMerger::IsEmpty(void) const {\n+    return m_data.empty();\n+}\n+\n+inline void UnsortedMultiMerger::Remove(BamReader* reader) {\n+\n+    if ( reader == 0 ) return;\n+    const std::string filenameToRemove = reader->GetFilename();\n+\n+    // iterate over readers in cache\n+    DataIterator dataIter = m_data.begin();\n+    DataIterator dataEnd  = m_data.end();\n+    for ( ; dataIter != dataEnd; ++dataIter ) {\n+        const BamReader* entryReader = (*dataIter).first;\n+        if ( entryReader == 0 ) continue;\n+\n+        // remove iterator on match\n+        if ( entryReader->GetFilename() == filenameToRemove ) {\n+            m_data.erase(dataIter);\n+            return;\n+        }\n+    }\n+}\n+\n+inline int UnsortedMultiMerger::Size(void) const {\n+    return m_data.size();\n+}\n+\n+inline ReaderAlignment UnsortedMultiMerger::TakeFirst(void) {\n+    ReaderAlignment first = m_data.front();\n+    m_data.erase( m_data.begin() );\n+    return first;\n+}\n+\n+} // namespace Internal\n+} // namespace BamTools\n+\n+#endif // BAMMULTIMERGER_P_H\n'
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamMultiReader_p.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamMultiReader_p.cpp Thu Nov 03 10:25:04 2011 -0400
b
b"@@ -0,0 +1,802 @@\n+// ***************************************************************************\n+// BamMultiReader_p.cpp (c) 2010 Derek Barnett, Erik Garrison\n+// Marth Lab, Department of Biology, Boston College\n+// All rights reserved.\n+// ---------------------------------------------------------------------------\n+// Last modified: 5 April 2011 (DB)\n+// ---------------------------------------------------------------------------\n+// Functionality for simultaneously reading multiple BAM files\n+// *************************************************************************\n+\n+#include <api/BamAlignment.h>\n+#include <api/BamMultiReader.h>\n+#include <api/internal/BamMultiMerger_p.h>\n+#include <api/internal/BamMultiReader_p.h>\n+using namespace BamTools;\n+using namespace BamTools::Internal;\n+\n+#include <algorithm>\n+#include <fstream>\n+#include <iostream>\n+#include <iterator>\n+#include <sstream>\n+using namespace std;\n+\n+// ctor\n+BamMultiReaderPrivate::BamMultiReaderPrivate(void)\n+    : m_alignments(0)\n+    , m_isCoreMode(false)\n+    , m_sortOrder(BamMultiReader::SortedByPosition)\n+{ }\n+\n+// dtor\n+BamMultiReaderPrivate::~BamMultiReaderPrivate(void) {\n+\n+    // close all open BAM readers\n+    Close();\n+\n+    // clean up alignment cache\n+    delete m_alignments;\n+    m_alignments = 0;\n+}\n+\n+// close all BAM files\n+void BamMultiReaderPrivate::Close(void) {\n+    CloseFiles( Filenames() );\n+}\n+\n+// close requested BAM file\n+void BamMultiReaderPrivate::CloseFile(const string& filename) {    \n+    vector<string> filenames(1, filename);\n+    CloseFiles(filenames);\n+}\n+\n+// close requested BAM files\n+void BamMultiReaderPrivate::CloseFiles(const vector<string>& filenames) {\n+\n+    // iterate over filenames\n+    vector<string>::const_iterator filesIter = filenames.begin();\n+    vector<string>::const_iterator filesEnd  = filenames.end();\n+    for ( ; filesIter != filesEnd; ++filesIter ) {\n+        const string& filename = (*filesIter);\n+        if ( filename.empty() ) continue;\n+\n+        // iterate over readers\n+        vector<ReaderAlignment>::iterator readerIter = m_readers.begin();\n+        vector<ReaderAlignment>::iterator readerEnd  = m_readers.end();\n+        for ( ; readerIter != readerEnd; ++readerIter ) {\n+            BamReader* reader = (*readerIter).first;\n+            if ( reader == 0 ) continue;\n+\n+            // if reader matches requested filename\n+            if ( reader->GetFilename() == filename ) {\n+\n+                // remove reader/alignment from alignment cache\n+                m_alignments->Remove(reader);\n+\n+                // close & delete reader\n+                reader->Close();\n+                delete reader;\n+                reader = 0;\n+\n+                // delete reader's alignment entry\n+                BamAlignment* alignment = (*readerIter).second;\n+                delete alignment;\n+                alignment = 0;\n+\n+                // remove reader from container\n+                m_readers.erase(readerIter);\n+\n+                // on match, just go on to next filename\n+                // (no need to keep looking and iterator is invalid now anyway)\n+                break;\n+            }\n+        }\n+    }\n+\n+    // make sure alignment cache is cleared if all readers are now closed\n+    if ( m_readers.empty() && m_alignments != 0 )\n+        m_alignments->Clear();\n+}\n+\n+// creates index files for BAM files that don't have them\n+bool BamMultiReaderPrivate::CreateIndexes(const BamIndex::IndexType& type) {\n+\n+    bool result = true;\n+\n+    // iterate over readers\n+    vector<ReaderAlignment>::iterator readerIter = m_readers.begin();\n+    vector<ReaderAlignment>::iterator readerEnd  = m_readers.end();\n+    for ( ; readerIter != readerEnd; ++readerIter ) {\n+        BamReader* reader = (*readerIter).first;\n+        if ( reader == 0 ) continue;\n+\n+        // if reader doesn't have an index, create one\n+        if ( !reader->HasIndex() )\n+            result &= reader->CreateIndex(type);\n+    }\n+\n+    return result;\n+}\n+\n+I"..b'= (*readerIter).second;\n+        if ( reader == 0 || alignment == 0 ) continue;\n+\n+        // save next alignment from each reader in cache\n+        SaveNextAlignment(reader, alignment);\n+    }\n+}\n+\n+// ValidateReaders checks that all the readers point to BAM files representing\n+// alignments against the same set of reference sequences, and that the\n+// sequences are identically ordered.  If these checks fail the operation of\n+// the multireader is undefined, so we force program exit.\n+void BamMultiReaderPrivate::ValidateReaders(void) const {\n+\n+    // retrieve first reader data\n+    const BamReader* firstReader = m_readers.front().first;\n+    if ( firstReader == 0 ) return;\n+    const RefVector firstReaderRefData = firstReader->GetReferenceData();\n+    const int firstReaderRefCount = firstReader->GetReferenceCount();\n+    const int firstReaderRefSize = firstReaderRefData.size();\n+\n+    // iterate over all readers\n+    vector<ReaderAlignment>::const_iterator readerIter = m_readers.begin();\n+    vector<ReaderAlignment>::const_iterator readerEnd  = m_readers.end();\n+    for ( ; readerIter != readerEnd; ++readerIter ) {\n+\n+        // get current reader data\n+        BamReader* reader = (*readerIter).first;\n+        if ( reader == 0 ) continue;\n+        const RefVector currentReaderRefData = reader->GetReferenceData();\n+        const int currentReaderRefCount = reader->GetReferenceCount();\n+        const int currentReaderRefSize  = currentReaderRefData.size();\n+\n+        // init container iterators\n+        RefVector::const_iterator firstRefIter   = firstReaderRefData.begin();\n+        RefVector::const_iterator firstRefEnd    = firstReaderRefData.end();\n+        RefVector::const_iterator currentRefIter = currentReaderRefData.begin();\n+\n+        // compare reference counts from BamReader ( & container size, in case of BR error)\n+        if ( (currentReaderRefCount != firstReaderRefCount) ||\n+             (firstReaderRefSize    != currentReaderRefSize) )\n+        {\n+            cerr << "BamMultiReader ERROR: mismatched number of references in " << reader->GetFilename()\n+                 << " expected " << firstReaderRefCount\n+                 << " reference sequences but only found " << currentReaderRefCount << endl;\n+            exit(1);\n+        }\n+\n+        // this will be ok; we just checked above that we have identically-sized sets of references\n+        // here we simply check if they are all, in fact, equal in content\n+        while ( firstRefIter != firstRefEnd ) {\n+            const RefData& firstRef   = (*firstRefIter);\n+            const RefData& currentRef = (*currentRefIter);\n+\n+            // compare reference name & length\n+            if ( (firstRef.RefName   != currentRef.RefName) ||\n+                 (firstRef.RefLength != currentRef.RefLength) )\n+            {\n+                cerr << "BamMultiReader ERROR: mismatched references found in " << reader->GetFilename()\n+                     << " expected: " << endl;\n+\n+                // print first reader\'s reference data\n+                RefVector::const_iterator refIter = firstReaderRefData.begin();\n+                RefVector::const_iterator refEnd  = firstReaderRefData.end();\n+                for ( ; refIter != refEnd; ++refIter ) {\n+                    const RefData& entry = (*refIter);\n+                    cerr << entry.RefName << " " << entry.RefLength << endl;\n+                }\n+\n+                cerr << "but found: " << endl;\n+\n+                // print current reader\'s reference data\n+                refIter = currentReaderRefData.begin();\n+                refEnd  = currentReaderRefData.end();\n+                for ( ; refIter != refEnd; ++refIter ) {\n+                    const RefData& entry = (*refIter);\n+                    cerr << entry.RefName << " " << entry.RefLength << endl;\n+                }\n+\n+                exit(1);\n+            }\n+\n+            // update iterators\n+            ++firstRefIter;\n+            ++currentRefIter;\n+        }\n+    }\n+}\n'
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamMultiReader_p.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamMultiReader_p.h Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,102 @@
+// ***************************************************************************
+// BamMultiReader_p.h (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 13 March 2011 (DB)
+// ---------------------------------------------------------------------------
+// Functionality for simultaneously reading multiple BAM files
+// *************************************************************************
+
+#ifndef BAMMULTIREADER_P_H
+#define BAMMULTIREADER_P_H
+
+//  -------------
+//  W A R N I N G
+//  -------------
+//
+// This file is not part of the BamTools API.  It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include <api/SamHeader.h>
+#include <api/BamMultiReader.h>
+#include <string>
+#include <vector>
+
+namespace BamTools {
+namespace Internal {
+
+class IBamMultiMerger;
+
+class BamMultiReaderPrivate {
+
+    // constructor / destructor
+    public:
+        BamMultiReaderPrivate(void);
+        ~BamMultiReaderPrivate(void);
+
+    // public interface
+    public:
+
+        // file operations
+        void Close(void);
+        void CloseFile(const std::string& filename);
+        void CloseFiles(const std::vector<std::string>& filenames);
+        const std::vector<std::string> Filenames(void) const;
+        bool Jump(int refID, int position = 0);
+        bool Open(const std::vector<std::string>& filenames);
+        bool OpenFile(const std::string& filename);
+        void PrintFilenames(void) const;
+        bool Rewind(void);
+        bool SetRegion(const BamRegion& region);
+
+        // access alignment data
+        bool GetNextAlignment(BamAlignment& al);
+        bool GetNextAlignmentCore(BamAlignment& al);
+        bool HasOpenReaders(void);
+        void SetSortOrder(const BamMultiReader::SortOrder& order);
+
+        // access auxiliary data
+        SamHeader GetHeader(void) const;
+        std::string GetHeaderText(void) const;
+        int GetReferenceCount(void) const;
+        const BamTools::RefVector GetReferenceData(void) const;
+        int GetReferenceID(const std::string& refName) const;
+
+        // BAM index operations
+        bool CreateIndexes(const BamIndex::IndexType& type = BamIndex::STANDARD);
+        bool HasIndexes(void) const;
+        bool LocateIndexes(const BamIndex::IndexType& preferredType = BamIndex::STANDARD);
+        bool OpenIndexes(const std::vector<std::string>& indexFilenames);
+        void SetIndexCacheMode(const BamIndex::IndexCacheMode mode);
+
+    // 'internal' methods
+    public:
+        IBamMultiMerger* CreateMergerForCurrentSortOrder(void) const;
+        const std::string ExtractReadGroup(const std::string& headerLine) const;
+        bool HasAlignmentData(void) const;
+        bool LoadNextAlignment(BamAlignment& al);
+        BamTools::BamReader* OpenReader(const std::string& filename);
+        bool RewindReaders(void);
+        void SaveNextAlignment(BamTools::BamReader* reader, BamTools::BamAlignment* alignment);
+        const std::vector<std::string> SplitHeaderText(const std::string& headerText) const;
+        void UpdateAlignmentCache(void);
+        void ValidateReaders(void) const;
+
+    // data members
+    public:
+        typedef std::pair<BamReader*, BamAlignment*> ReaderAlignment;
+        std::vector<ReaderAlignment> m_readers;
+
+        IBamMultiMerger* m_alignments;
+        bool m_isCoreMode;
+        BamMultiReader::SortOrder m_sortOrder;
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // BAMMULTIREADER_P_H
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamRandomAccessController_p.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamRandomAccessController_p.cpp Thu Nov 03 10:25:04 2011 -0400
[
b'@@ -0,0 +1,273 @@\n+// ***************************************************************************\n+// BamRandomAccessController_p.cpp (c) 2011 Derek Barnett\n+// Marth Lab, Department of Biology, Boston College\n+// All rights reserved.\n+// ---------------------------------------------------------------------------\n+// Last modified: 5 April 2011(DB)\n+// ---------------------------------------------------------------------------\n+// Manages random access operations in a BAM file\n+// **************************************************************************\n+\n+#include <api/BamIndex.h>\n+#include <api/internal/BamRandomAccessController_p.h>\n+#include <api/internal/BamReader_p.h>\n+#include <api/internal/BamIndexFactory_p.h>\n+using namespace BamTools;\n+using namespace BamTools::Internal;\n+\n+#include <iostream>\n+using namespace std;\n+\n+BamRandomAccessController::BamRandomAccessController(void)\n+    : m_index(0)\n+    , m_indexCacheMode(BamIndex::LimitedIndexCaching)\n+    , m_hasAlignmentsInRegion(true)\n+{ }\n+\n+BamRandomAccessController::~BamRandomAccessController(void) {\n+    Close();\n+}\n+\n+void BamRandomAccessController::AdjustRegion(const int& referenceCount) {\n+\n+    // skip if no index available\n+    if ( m_index == 0 )\n+        return;\n+\n+    // see if any references in region have alignments\n+    m_hasAlignmentsInRegion = false;\n+    int currentId = m_region.LeftRefID;\n+    const int rightBoundRefId = ( m_region.isRightBoundSpecified() ? m_region.RightRefID : referenceCount - 1 );\n+    while ( currentId <= rightBoundRefId ) {\n+        m_hasAlignmentsInRegion = m_index->HasAlignments(currentId);\n+        if ( m_hasAlignmentsInRegion ) break;\n+        ++currentId;\n+    }\n+\n+    // if no data found on any reference in region\n+    if ( !m_hasAlignmentsInRegion )\n+        return;\n+\n+    // if left bound of desired region had no data, use first reference that had data\n+    // otherwise, leave requested region as-is\n+    if ( currentId != m_region.LeftRefID ) {\n+        m_region.LeftRefID = currentId;\n+        m_region.LeftPosition = 0;\n+    }\n+}\n+\n+// returns alignments\' "RegionState": { Before|Overlaps|After } current region\n+BamRandomAccessController::RegionState\n+BamRandomAccessController::AlignmentState(const BamAlignment& alignment) const {\n+\n+    // if region has no left bound at all\n+    if ( !m_region.isLeftBoundSpecified() )\n+        return OverlapsRegion;\n+\n+    // handle unmapped reads - return AFTER region to halt processing\n+    if ( alignment.RefID == -1 )\n+        return AfterRegion;\n+\n+    // if alignment is on any reference before left bound reference\n+    if ( alignment.RefID < m_region.LeftRefID )\n+        return BeforeRegion;\n+\n+    // if alignment is on left bound reference\n+    else if ( alignment.RefID == m_region.LeftRefID ) {\n+\n+        // if alignment starts at or after left bound position\n+        if ( alignment.Position >= m_region.LeftPosition) {\n+\n+            if ( m_region.isRightBoundSpecified() &&            // right bound is specified AND\n+                 m_region.LeftRefID == m_region.RightRefID &&   // left & right bounds on same reference AND\n+                 alignment.Position > m_region.RightPosition )  // alignment starts after right bound position\n+                return AfterRegion;\n+\n+            // otherwise, alignment overlaps region\n+            else return OverlapsRegion;\n+        }\n+\n+        // alignment starts before left bound position\n+        else {\n+\n+            // if alignment overlaps left bound position\n+            if ( alignment.GetEndPosition() >= m_region.LeftPosition )\n+                return OverlapsRegion;\n+            else\n+                return BeforeRegion;\n+        }\n+    }\n+\n+    // otherwise alignment is on a reference after left bound reference\n+    else {\n+\n+        // if region has a right bound\n+        if ( m_region.isRightBoundSpecified() ) {\n+\n+            // alignment is on any reference between boundaries\n+            if ( alignment.RefID < m_region.Ri'..b' "BamRandomAccessController ERROR: could not create index for BAM file: "\n+             << reader->Filename() << endl;\n+        return false;\n+    }\n+\n+    // save new index\n+    SetIndex(newIndex);\n+\n+    // set new index\'s cache mode & return success\n+    newIndex->SetCacheMode(m_indexCacheMode);\n+    return true;\n+}\n+\n+bool BamRandomAccessController::HasIndex(void) const {\n+    return ( m_index != 0 );\n+}\n+\n+bool BamRandomAccessController::HasRegion(void) const  {\n+    return ( !m_region.isNull() );\n+}\n+\n+bool BamRandomAccessController::IndexHasAlignmentsForReference(const int& refId) {\n+    return m_index->HasAlignments(refId);\n+}\n+\n+bool BamRandomAccessController::LocateIndex(BamReaderPrivate* reader,\n+                                            const BamIndex::IndexType& preferredType)\n+{\n+    // look up index filename, deferring to preferredType if possible\n+    const string& indexFilename = BamIndexFactory::FindIndexFilename(reader->Filename(), preferredType);\n+\n+    // if no index file found (of any type)\n+    if ( indexFilename.empty() ) {\n+        cerr << "BamRandomAccessController WARNING: "\n+             << "could not find index file for BAM: "\n+             << reader->Filename() << endl;\n+        return false;\n+    }\n+\n+    // otherwise open & use index file that was found\n+    return OpenIndex(indexFilename, reader);\n+}\n+\n+bool BamRandomAccessController::OpenIndex(const string& indexFilename, BamReaderPrivate* reader) {\n+\n+    // attempt create new index of type based on filename\n+    BamIndex* index = BamIndexFactory::CreateIndexFromFilename(indexFilename, reader);\n+    if ( index == 0 ) {\n+        cerr << "BamRandomAccessController ERROR: could not create index for file: " << indexFilename << endl;\n+        return false;\n+    }\n+\n+    // set cache mode\n+    index->SetCacheMode(m_indexCacheMode);\n+\n+    // attempt to load data from index file\n+    if ( !index->Load(indexFilename) ) {\n+        cerr << "BamRandomAccessController ERROR: could not load index data from file: " << indexFilename << endl;\n+        return false;\n+    }\n+\n+    // save new index & return success\n+    SetIndex(index);\n+    return true;\n+}\n+\n+bool BamRandomAccessController::RegionHasAlignments(void) const {\n+    return m_hasAlignmentsInRegion;\n+}\n+\n+void BamRandomAccessController::SetIndex(BamIndex* index) {\n+    if ( m_index )\n+        ClearIndex();\n+    m_index = index;\n+}\n+\n+void BamRandomAccessController::SetIndexCacheMode(const BamIndex::IndexCacheMode& mode) {\n+    m_indexCacheMode = mode;\n+    if ( m_index )\n+        m_index->SetCacheMode(mode);\n+}\n+\n+bool BamRandomAccessController::SetRegion(BamReaderPrivate* reader,\n+                                          const BamRegion& region,\n+                                          const int& referenceCount)\n+{\n+    // store region\n+    m_region = region;\n+\n+    // cannot jump when no index is available\n+    if ( !HasIndex() )\n+        return false;\n+\n+    // adjust region as necessary to reflect where data actually begins\n+    AdjustRegion(referenceCount);\n+\n+    // if no data present, return true\n+    //   * Not an error, but future attempts to access alignments in this region will not return data\n+    //     Returning true is useful in a BamMultiReader setting where some BAM files may\n+    //     lack alignments in regions where other BAMs do have data.\n+    if ( !m_hasAlignmentsInRegion )\n+        return true;\n+\n+    // return success/failure of jump to specified region,\n+    //\n+    //  * Index::Jump() is allowed to modify the m_hasAlignmentsInRegion flag\n+    //    This covers \'corner case\' where a region is requested that lies beyond the last\n+    //    alignment on a reference. If this occurs, any subsequent calls to GetNextAlignment[Core]\n+    //    will not return data. BamMultiReader will still be able to successfully pull alignments\n+    //    from a region from multiple files even if one or more have no data.\n+    return m_index->Jump(m_region, &m_hasAlignmentsInRegion);\n+}\n'
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamRandomAccessController_p.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamRandomAccessController_p.h Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,94 @@
+// ***************************************************************************
+// BamRandomAccessController_p.h (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 24 February 2011(DB)
+// ---------------------------------------------------------------------------
+// Manages random access operations in a BAM file
+// ***************************************************************************
+
+#ifndef BAMRACONTROLLER_P_H
+#define BAMRACONTROLLER_P_H
+
+//  -------------
+//  W A R N I N G
+//  -------------
+//
+// This file is not part of the BamTools API.  It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include <api/BamAux.h>
+#include <api/BamIndex.h>
+
+namespace BamTools {
+
+class BamAlignment;
+
+namespace Internal {
+
+class BamReaderPrivate;
+
+class BamRandomAccessController {
+
+    // enums
+    public: enum RegionState { BeforeRegion = 0
+                             , OverlapsRegion
+                             , AfterRegion
+                             };
+
+    // ctor & dtor
+    public:
+        BamRandomAccessController(void);
+        ~BamRandomAccessController(void);
+
+    // general interface
+    public:
+        void Close(void);
+
+    // index operations
+    public:
+        //
+        void ClearIndex(void);
+        bool CreateIndex(BamReaderPrivate* reader, const BamIndex::IndexType& type);
+        bool HasIndex(void) const;
+        bool IndexHasAlignmentsForReference(const int& refId);
+        bool LocateIndex(BamReaderPrivate* reader, const BamIndex::IndexType& preferredType);
+        bool OpenIndex(const std::string& indexFilename, BamReaderPrivate* reader);
+        void SetIndex(BamIndex* index);
+        void SetIndexCacheMode(const BamIndex::IndexCacheMode& mode);
+
+    // region operations
+    public:
+        void ClearRegion(void);
+        bool HasRegion(void) const;
+        RegionState AlignmentState(const BamAlignment& alignment) const;
+        bool RegionHasAlignments(void) const;
+        bool SetRegion(BamReaderPrivate* reader,
+                       const BamRegion& region,
+                       const int& referenceCount);
+
+    // 'internal' methods
+    public:
+        // adjusts requested region if necessary (depending on where data actually begins)
+        void AdjustRegion(const int& referenceCount);
+
+    // data members
+    private:
+
+        // index data
+        BamIndex* m_index;  // owns index, not a copy - responsible for deleting
+        BamIndex::IndexCacheMode m_indexCacheMode;
+
+        // region data
+        BamRegion m_region;
+        bool m_hasAlignmentsInRegion;
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // BAMRACONTROLLER_P_H
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamReader_p.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamReader_p.cpp Thu Nov 03 10:25:04 2011 -0400
[
b'@@ -0,0 +1,381 @@\n+// ***************************************************************************\n+// BamReader_p.cpp (c) 2009 Derek Barnett\n+// Marth Lab, Department of Biology, Boston College\n+// All rights reserved.\n+// ---------------------------------------------------------------------------\n+// Last modified: 10 May 2011 (DB)\n+// ---------------------------------------------------------------------------\n+// Provides the basic functionality for reading BAM files\n+// ***************************************************************************\n+\n+#include <api/BamConstants.h>\n+#include <api/BamReader.h>\n+#include <api/internal/BamHeader_p.h>\n+#include <api/internal/BamRandomAccessController_p.h>\n+#include <api/internal/BamReader_p.h>\n+#include <api/internal/BamStandardIndex_p.h>\n+#include <api/internal/BamToolsIndex_p.h>\n+#include <api/internal/BgzfStream_p.h>\n+using namespace BamTools;\n+using namespace BamTools::Internal;\n+\n+#include <algorithm>\n+#include <iostream>\n+#include <iterator>\n+#include <vector>\n+using namespace std;\n+\n+// constructor\n+BamReaderPrivate::BamReaderPrivate(BamReader* parent)\n+    : m_alignmentsBeginOffset(0)\n+    , m_parent(parent)\n+{\n+    m_isBigEndian = BamTools::SystemIsBigEndian();\n+}\n+\n+// destructor\n+BamReaderPrivate::~BamReaderPrivate(void) {\n+    Close();\n+}\n+\n+// closes the BAM file\n+void BamReaderPrivate::Close(void) {\n+\n+    // clear header & reference data\n+    m_references.clear();\n+    m_header.Clear();\n+\n+    // close internal\n+    m_randomAccessController.Close();\n+    m_stream.Close();\n+\n+    // clear filename\n+    m_filename.clear();\n+}\n+\n+// creates an index file of requested type on current BAM file\n+bool BamReaderPrivate::CreateIndex(const BamIndex::IndexType& type) {\n+    if ( !IsOpen() ) return false;\n+    return m_randomAccessController.CreateIndex(this, type);\n+}\n+\n+// return path & filename of current BAM file\n+const string BamReaderPrivate::Filename(void) const {\n+    return m_filename;\n+}\n+\n+// return header data as std::string\n+string BamReaderPrivate::GetHeaderText(void) const {\n+    return m_header.ToString();\n+}\n+\n+// return header data as SamHeader object\n+SamHeader BamReaderPrivate::GetSamHeader(void) const {\n+    return m_header.ToSamHeader();\n+}\n+\n+// get next alignment (with character data fully parsed)\n+bool BamReaderPrivate::GetNextAlignment(BamAlignment& alignment) {\n+\n+    // if valid alignment found\n+    if ( GetNextAlignmentCore(alignment) ) {\n+\n+        // store alignment\'s "source" filename\n+        alignment.Filename = m_filename;\n+\n+        // return success/failure of parsing char data\n+        return alignment.BuildCharData();\n+    }\n+\n+    // no valid alignment found\n+    return false;\n+}\n+\n+// retrieves next available alignment core data (returns success/fail)\n+// ** DOES NOT populate any character data fields (read name, bases, qualities, tag data, filename)\n+//    these can be accessed, if necessary, from the supportData\n+// useful for operations requiring ONLY positional or other alignment-related information\n+bool BamReaderPrivate::GetNextAlignmentCore(BamAlignment& alignment) {\n+\n+    // skip if region is set but has no alignments\n+    if ( m_randomAccessController.HasRegion() &&\n+         !m_randomAccessController.RegionHasAlignments() )\n+    {\n+        return false;\n+    }\n+\n+    // if can\'t read next alignment\n+    if ( !LoadNextAlignment(alignment) )\n+        return false;\n+\n+    // check alignment\'s region-overlap state\n+    BamRandomAccessController::RegionState state = m_randomAccessController.AlignmentState(alignment);\n+\n+    // if alignment starts after region, no need to keep reading\n+    if ( state == BamRandomAccessController::AfterRegion )\n+        return false;\n+\n+    // read until overlap is found\n+    while ( state != BamRandomAccessController::OverlapsRegion ) {\n+\n+        // if can\'t read next alignment\n+        if ( !LoadNextAlignment(alignment) )\n+            return false;\n+\n+        // check alignment\'s region-overlap sta'..b'n readCharDataOK;\n+}\n+\n+// loads reference data from BAM file\n+bool BamReaderPrivate::LoadReferenceData(void) {\n+\n+    // get number of reference sequences\n+    char buffer[sizeof(uint32_t)];\n+    m_stream.Read(buffer, sizeof(uint32_t));\n+    uint32_t numberRefSeqs = BamTools::UnpackUnsignedInt(buffer);\n+    if ( m_isBigEndian ) BamTools::SwapEndian_32(numberRefSeqs);\n+    m_references.reserve((int)numberRefSeqs);\n+\n+    // iterate over all references in header\n+    for ( unsigned int i = 0; i != numberRefSeqs; ++i ) {\n+\n+        // get length of reference name\n+        m_stream.Read(buffer, sizeof(uint32_t));\n+        uint32_t refNameLength = BamTools::UnpackUnsignedInt(buffer);\n+        if ( m_isBigEndian ) BamTools::SwapEndian_32(refNameLength);\n+        char* refName = (char*)calloc(refNameLength, 1);\n+\n+        // get reference name and reference sequence length\n+        m_stream.Read(refName, refNameLength);\n+        m_stream.Read(buffer, sizeof(int32_t));\n+        int32_t refLength = BamTools::UnpackSignedInt(buffer);\n+        if ( m_isBigEndian ) BamTools::SwapEndian_32(refLength);\n+\n+        // store data for reference\n+        RefData aReference;\n+        aReference.RefName   = (string)((const char*)refName);\n+        aReference.RefLength = refLength;\n+        m_references.push_back(aReference);\n+\n+        // clean up calloc-ed temp variable\n+        free(refName);\n+    }\n+\n+    // return success\n+    return true;\n+}\n+\n+bool BamReaderPrivate::LocateIndex(const BamIndex::IndexType& preferredType) {\n+    return m_randomAccessController.LocateIndex(this, preferredType);\n+}\n+\n+// opens BAM file (and index)\n+bool BamReaderPrivate::Open(const string& filename) {\n+\n+    // close current BAM file if open\n+    if ( m_stream.IsOpen )\n+        Close();\n+\n+    // attempt to open BgzfStream for reading\n+    if ( !m_stream.Open(filename, "rb") ) {\n+        cerr << "BamReader ERROR: Could not open BGZF stream for " << filename << endl;\n+        return false;\n+    }\n+\n+    // attempt to load header data\n+    if ( !LoadHeaderData() ) {\n+        cerr << "BamReader ERROR: Could not load header data for " << filename << endl;\n+        Close();\n+        return false;\n+    }\n+\n+    // attempt to load reference data\n+    if ( !LoadReferenceData() ) {\n+        cerr << "BamReader ERROR: Could not load reference data for " << filename << endl;\n+        Close();\n+        return false;\n+    }\n+\n+    // if all OK, store filename & offset of first alignment\n+    m_filename = filename;\n+    m_alignmentsBeginOffset = m_stream.Tell();\n+\n+    // return success\n+    return true;\n+}\n+\n+bool BamReaderPrivate::OpenIndex(const std::string& indexFilename) {\n+    return m_randomAccessController.OpenIndex(indexFilename, this);\n+}\n+\n+// returns BAM file pointer to beginning of alignment data\n+bool BamReaderPrivate::Rewind(void) {\n+\n+    // attempt rewind to first alignment\n+    if ( !m_stream.Seek(m_alignmentsBeginOffset) )\n+        return false;\n+\n+    // verify that we can read first alignment\n+    BamAlignment al;\n+    if ( !LoadNextAlignment(al) )\n+        return false;\n+\n+    // reset region\n+    m_randomAccessController.ClearRegion();\n+\n+    // rewind back to beginning of first alignment\n+    // return success/fail of seek\n+    return m_stream.Seek(m_alignmentsBeginOffset);\n+}\n+\n+bool BamReaderPrivate::Seek(const int64_t& position) {\n+    return m_stream.Seek(position);\n+}\n+\n+void BamReaderPrivate::SetIndex(BamIndex* index) {\n+    m_randomAccessController.SetIndex(index);\n+}\n+\n+// change the index caching behavior\n+void BamReaderPrivate::SetIndexCacheMode(const BamIndex::IndexCacheMode& mode) {\n+    m_randomAccessController.SetIndexCacheMode(mode);\n+}\n+\n+// sets current region & attempts to jump to it\n+// returns success/failure\n+bool BamReaderPrivate::SetRegion(const BamRegion& region) {\n+    return m_randomAccessController.SetRegion(this, region, m_references.size());\n+}\n+\n+int64_t BamReaderPrivate::Tell(void) const {\n+    return m_stream.Tell();\n+}\n'
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamReader_p.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamReader_p.h Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,113 @@
+// ***************************************************************************
+// BamReader_p.h (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 5 April 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides the basic functionality for reading BAM files
+// ***************************************************************************
+
+#ifndef BAMREADER_P_H
+#define BAMREADER_P_H
+
+//  -------------
+//  W A R N I N G
+//  -------------
+//
+// This file is not part of the BamTools API.  It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include <api/BamAlignment.h>
+#include <api/BamIndex.h>
+#include <api/BamReader.h>
+#include <api/SamHeader.h>
+#include <api/internal/BamHeader_p.h>
+#include <api/internal/BamRandomAccessController_p.h>
+#include <api/internal/BgzfStream_p.h>
+#include <string>
+
+namespace BamTools {
+namespace Internal {
+
+class BamReaderPrivate {
+
+    // ctor & dtor
+    public:
+        BamReaderPrivate(BamReader* parent);
+        ~BamReaderPrivate(void);
+
+    // BamReader interface
+    public:
+
+        // file operations
+        void Close(void);
+        const std::string Filename(void) const;
+        bool IsOpen(void) const;
+        bool Open(const std::string& filename);
+        bool Rewind(void);
+        bool SetRegion(const BamRegion& region);
+
+        // access alignment data
+        bool GetNextAlignment(BamAlignment& alignment);
+        bool GetNextAlignmentCore(BamAlignment& alignment);
+
+        // access auxiliary data
+        std::string GetHeaderText(void) const;
+        SamHeader GetSamHeader(void) const;
+        int GetReferenceCount(void) const;
+        const RefVector& GetReferenceData(void) const;
+        int GetReferenceID(const std::string& refName) const;
+
+        // index operations
+        bool CreateIndex(const BamIndex::IndexType& type);
+        bool HasIndex(void) const;
+        bool LocateIndex(const BamIndex::IndexType& preferredType);
+        bool OpenIndex(const std::string& indexFilename);
+        void SetIndex(BamIndex* index);
+        void SetIndexCacheMode(const BamIndex::IndexCacheMode& mode);
+
+    // internal methods, but available as a BamReaderPrivate 'interface'
+    //
+    // these methods should only be used by BamTools::Internal classes
+    // (currently only used by the BamIndex subclasses)
+    public:
+        // retrieves header text from BAM file
+        bool LoadHeaderData(void);
+        // retrieves BAM alignment under file pointer
+        // (does no overlap checking or character data parsing)
+        bool LoadNextAlignment(BamAlignment& alignment);
+        // builds reference data structure from BAM file
+        bool LoadReferenceData(void);
+        // seek reader to file position
+        bool Seek(const int64_t& position);
+        // return reader's file position
+        int64_t Tell(void) const;
+
+    // data members
+    public:
+
+        // general BAM file data
+        int64_t     m_alignmentsBeginOffset;
+        std::string m_filename;
+        RefVector   m_references;
+
+        // system data
+        bool m_isBigEndian;
+
+        // parent BamReader
+        BamReader* m_parent;
+
+        // BamReaderPrivate components
+        BamHeader m_header;
+        BamRandomAccessController m_randomAccessController;
+        BgzfStream m_stream;
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // BAMREADER_P_H
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamStandardIndex_p.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamStandardIndex_p.cpp Thu Nov 03 10:25:04 2011 -0400
b
b'@@ -0,0 +1,974 @@\n+// ***************************************************************************\n+// BamStandardIndex.cpp (c) 2010 Derek Barnett\n+// Marth Lab, Department of Biology, Boston College\n+// All rights reserved.\n+// ---------------------------------------------------------------------------\n+// Last modified: 16 June 2011 (DB)\n+// ---------------------------------------------------------------------------\n+// Provides index operations for the standardized BAM index format (".bai")\n+// ***************************************************************************\n+\n+#include <api/BamAlignment.h>\n+#include <api/internal/BamReader_p.h>\n+#include <api/internal/BamStandardIndex_p.h>\n+using namespace BamTools;\n+using namespace BamTools::Internal;\n+\n+#include <cstdio>\n+#include <cstdlib>\n+#include <cstring>\n+#include <algorithm>\n+#include <iostream>\n+using namespace std;\n+\n+// static BamStandardIndex constants\n+const int BamStandardIndex::MAX_BIN               = 37450;  // =(8^6-1)/7+1\n+const int BamStandardIndex::BAM_LIDX_SHIFT        = 14;\n+const string BamStandardIndex::BAI_EXTENSION      = ".bai";\n+const char* const BamStandardIndex::BAI_MAGIC     = "BAI\\1";\n+const int BamStandardIndex::SIZEOF_ALIGNMENTCHUNK = sizeof(uint64_t)*2;\n+const int BamStandardIndex::SIZEOF_BINCORE        = sizeof(uint32_t) + sizeof(int32_t);\n+const int BamStandardIndex::SIZEOF_LINEAROFFSET   = sizeof(uint64_t);\n+\n+// ctor\n+BamStandardIndex::BamStandardIndex(Internal::BamReaderPrivate* reader)\n+    : BamIndex(reader)\n+    , m_indexStream(0)\n+    , m_cacheMode(BamIndex::LimitedIndexCaching)\n+    , m_buffer(0)\n+    , m_bufferLength(0)\n+{\n+     m_isBigEndian = BamTools::SystemIsBigEndian();\n+}\n+\n+// dtor\n+BamStandardIndex::~BamStandardIndex(void) {\n+    CloseFile();\n+}\n+\n+bool BamStandardIndex::AdjustRegion(const BamRegion& region, uint32_t& begin, uint32_t& end) {\n+\n+    // retrieve references from reader\n+    const RefVector& references = m_reader->GetReferenceData();\n+\n+    // make sure left-bound position is valid\n+    if ( region.LeftPosition > references.at(region.LeftRefID).RefLength )\n+        return false;\n+\n+    // set region \'begin\'\n+    begin = (unsigned int)region.LeftPosition;\n+\n+    // if right bound specified AND left&right bounds are on same reference\n+    // OK to use right bound position as region \'end\'\n+    if ( region.isRightBoundSpecified() && ( region.LeftRefID == region.RightRefID ) )\n+        end = (unsigned int)region.RightPosition;\n+\n+    // otherwise, set region \'end\' to last reference base\n+    else end = (unsigned int)references.at(region.LeftRefID).RefLength - 1;\n+\n+    // return success\n+    return true;\n+}\n+\n+void BamStandardIndex::CalculateCandidateBins(const uint32_t& begin,\n+                                              const uint32_t& end,\n+                                              set<uint16_t>& candidateBins)\n+{\n+    // initialize list, bin \'0\' is always a valid bin\n+    candidateBins.insert(0);\n+\n+    // get rest of bins that contain this region\n+    unsigned int k;\n+    for (k =    1 + (begin>>26); k <=    1 + (end>>26); ++k) { candidateBins.insert(k); }\n+    for (k =    9 + (begin>>23); k <=    9 + (end>>23); ++k) { candidateBins.insert(k); }\n+    for (k =   73 + (begin>>20); k <=   73 + (end>>20); ++k) { candidateBins.insert(k); }\n+    for (k =  585 + (begin>>17); k <=  585 + (end>>17); ++k) { candidateBins.insert(k); }\n+    for (k = 4681 + (begin>>14); k <= 4681 + (end>>14); ++k) { candidateBins.insert(k); }\n+}\n+\n+bool BamStandardIndex::CalculateCandidateOffsets(const BaiReferenceSummary& refSummary,\n+                                                 const uint64_t& minOffset,\n+                                                 set<uint16_t>& candidateBins,\n+                                                 vector<int64_t>& offsets)\n+{\n+    // attempt seek to first bin\n+    if ( !Seek(refSummary.FirstBinFilePosition, SEEK_SET) )\n+        return false;\n+\n+    // iterate over reference bins\n+    uint32_t bi'..b"ctor& chunks) {\n+\n+    // make sure chunks are merged (simplified) before writing & saving summary\n+    MergeAlignmentChunks(chunks);\n+\n+    size_t elementsWritten = 0;\n+\n+    // write chunks\n+    int32_t chunkCount = chunks.size();\n+    if ( m_isBigEndian ) SwapEndian_32(chunkCount);\n+    elementsWritten += fwrite(&chunkCount, sizeof(chunkCount), 1, m_indexStream);\n+\n+    // iterate over chunks\n+    bool chunksOk = true;\n+    BaiAlignmentChunkVector::const_iterator chunkIter = chunks.begin();\n+    BaiAlignmentChunkVector::const_iterator chunkEnd  = chunks.end();\n+    for ( ; chunkIter != chunkEnd; ++chunkIter )\n+        chunksOk &= WriteAlignmentChunk( (*chunkIter) );\n+\n+    // return success/failure of write\n+    return ( (elementsWritten == 1) && chunksOk );\n+}\n+\n+bool BamStandardIndex::WriteBin(const uint32_t& binId, BaiAlignmentChunkVector& chunks) {\n+\n+    size_t elementsWritten = 0;\n+\n+    // write BAM bin ID\n+    uint32_t binKey = binId;\n+    if ( m_isBigEndian ) SwapEndian_32(binKey);\n+    elementsWritten += fwrite(&binKey, sizeof(binKey), 1, m_indexStream);\n+\n+    // write bin's alignment chunks\n+    bool chunksOk = WriteAlignmentChunks(chunks);\n+\n+    // return success/failure of write\n+    return ( (elementsWritten == 1) && chunksOk );\n+}\n+\n+bool BamStandardIndex::WriteBins(const int& refId, BaiBinMap& bins) {\n+\n+    size_t elementsWritten = 0;\n+\n+    // write number of bins\n+    int32_t binCount = bins.size();\n+    if ( m_isBigEndian ) SwapEndian_32(binCount);\n+    elementsWritten += fwrite(&binCount, sizeof(binCount), 1, m_indexStream);\n+\n+    // save summary for reference's bins\n+    SaveBinsSummary(refId, bins.size());\n+\n+    // iterate over bins\n+    bool binsOk = true;\n+    BaiBinMap::iterator binIter = bins.begin();\n+    BaiBinMap::iterator binEnd  = bins.end();\n+    for ( ; binIter != binEnd; ++binIter )\n+        binsOk &= WriteBin( (*binIter).first, (*binIter).second );\n+\n+    // return success/failure of write\n+    return ( (elementsWritten == 1) && binsOk );\n+}\n+\n+bool BamStandardIndex::WriteHeader(void) {\n+\n+    size_t elementsWritten = 0;\n+\n+    // write magic number\n+    elementsWritten += fwrite(BamStandardIndex::BAI_MAGIC, sizeof(char), 4, m_indexStream);\n+\n+    // write number of reference sequences\n+    int32_t numReferences = m_indexFileSummary.size();\n+    if ( m_isBigEndian ) SwapEndian_32(numReferences);\n+    elementsWritten += fwrite(&numReferences, sizeof(numReferences), 1, m_indexStream);\n+\n+    // return success/failure of write\n+    return (elementsWritten == 5);\n+}\n+\n+bool BamStandardIndex::WriteLinearOffsets(const int& refId, BaiLinearOffsetVector& linearOffsets) {\n+\n+    // make sure linear offsets are sorted before writing & saving summary\n+    SortLinearOffsets(linearOffsets);\n+\n+    size_t elementsWritten = 0;\n+\n+    // write number of linear offsets\n+    int32_t offsetCount = linearOffsets.size();\n+    if ( m_isBigEndian ) SwapEndian_32(offsetCount);\n+    elementsWritten += fwrite(&offsetCount, sizeof(offsetCount), 1, m_indexStream);\n+\n+    // save summary for reference's linear offsets\n+    SaveLinearOffsetsSummary(refId, linearOffsets.size());\n+\n+    // iterate over linear offsets\n+    BaiLinearOffsetVector::const_iterator offsetIter = linearOffsets.begin();\n+    BaiLinearOffsetVector::const_iterator offsetEnd  = linearOffsets.end();\n+    for ( ; offsetIter != offsetEnd; ++offsetIter ) {\n+\n+        // write linear offset\n+        uint64_t linearOffset = (*offsetIter);\n+        if ( m_isBigEndian ) SwapEndian_64(linearOffset);\n+        elementsWritten += fwrite(&linearOffset, sizeof(linearOffset), 1, m_indexStream);\n+    }\n+\n+    // return success/failure of write\n+    return ( elementsWritten == (size_t)(linearOffsets.size() + 1) );\n+}\n+\n+bool BamStandardIndex::WriteReferenceEntry(BaiReferenceEntry& refEntry) {\n+    bool refOk = true;\n+    refOk &= WriteBins(refEntry.ID, refEntry.Bins);\n+    refOk &= WriteLinearOffsets(refEntry.ID, refEntry.LinearOffsets);\n+    return refOk;\n+}\n"
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamStandardIndex_p.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamStandardIndex_p.h Thu Nov 03 10:25:04 2011 -0400
b
b'@@ -0,0 +1,237 @@\n+// ***************************************************************************\n+// BamStandardIndex.h (c) 2010 Derek Barnett\n+// Marth Lab, Department of Biology, Boston College\n+// All rights reserved.\n+// ---------------------------------------------------------------------------\n+// Last modified: 5 April 2011 (DB)\n+// ---------------------------------------------------------------------------\n+// Provides index operations for the standardized BAM index format (".bai")\n+// ***************************************************************************\n+\n+#ifndef BAM_STANDARD_INDEX_FORMAT_H\n+#define BAM_STANDARD_INDEX_FORMAT_H\n+\n+//  -------------\n+//  W A R N I N G\n+//  -------------\n+//\n+// This file is not part of the BamTools API.  It exists purely as an\n+// implementation detail.  This header file may change from version to\n+// version without notice, or even be removed.\n+//\n+// We mean it.\n+\n+#include <api/BamAux.h>\n+#include <api/BamIndex.h>\n+#include <map>\n+#include <set>\n+#include <string>\n+#include <vector>\n+\n+namespace BamTools {\n+namespace Internal {\n+\n+// -----------------------------------------------------------------------------\n+// BamStandardIndex data structures\n+\n+// defines start and end of a contiguous run of alignments\n+struct BaiAlignmentChunk {\n+\n+    // data members\n+    uint64_t Start;\n+    uint64_t Stop;\n+\n+    // constructor\n+    BaiAlignmentChunk(const uint64_t& start = 0,\n+                      const uint64_t& stop = 0)\n+        : Start(start)\n+        , Stop(stop)\n+    { }\n+};\n+\n+// comparison operator (for sorting)\n+inline\n+bool operator<(const BaiAlignmentChunk& lhs, const BaiAlignmentChunk& rhs) {\n+    return lhs.Start < rhs.Start;\n+}\n+\n+// convenience typedef for a list of all alignment \'chunks\' in a BAI bin\n+typedef std::vector<BaiAlignmentChunk> BaiAlignmentChunkVector;\n+\n+// convenience typedef for a map of all BAI bins in a reference (ID => chunks)\n+typedef std::map<uint32_t, BaiAlignmentChunkVector> BaiBinMap;\n+\n+// convenience typedef for a list of all \'linear offsets\' in a reference\n+typedef std::vector<uint64_t> BaiLinearOffsetVector;\n+\n+// contains all fields necessary for building, loading, & writing\n+// full BAI index data for a single reference\n+struct BaiReferenceEntry {\n+\n+    // data members\n+    int32_t ID;\n+    BaiBinMap Bins;\n+    BaiLinearOffsetVector LinearOffsets;\n+\n+    // ctor\n+    BaiReferenceEntry(const int32_t& id = -1)\n+        : ID(id)\n+    { }\n+};\n+\n+// provides (persistent) summary of BaiReferenceEntry\'s index data\n+struct BaiReferenceSummary {\n+\n+    // data members\n+    int NumBins;\n+    int NumLinearOffsets;\n+    uint64_t FirstBinFilePosition;\n+    uint64_t FirstLinearOffsetFilePosition;\n+\n+    // ctor\n+    BaiReferenceSummary(void)\n+        : NumBins(0)\n+        , NumLinearOffsets(0)\n+        , FirstBinFilePosition(0)\n+        , FirstLinearOffsetFilePosition(0)\n+    { }\n+};\n+\n+// convenience typedef for describing a full BAI index file summary\n+typedef std::vector<BaiReferenceSummary> BaiFileSummary;\n+\n+// end BamStandardIndex data structures\n+// -----------------------------------------------------------------------------\n+\n+class BamStandardIndex : public BamIndex {\n+\n+    // ctor & dtor\n+    public:\n+        BamStandardIndex(Internal::BamReaderPrivate* reader);\n+        ~BamStandardIndex(void);\n+\n+    // BamIndex implementation\n+    public:\n+        // builds index from associated BAM file & writes out to index file\n+        bool Create(void);\n+        // returns whether reference has alignments or no\n+        bool HasAlignments(const int& referenceID) const;\n+        // attempts to use index data to jump to @region, returns success/fail\n+        // a "successful" jump indicates no error, but not whether this region has data\n+        //   * thus, the method sets a flag to indicate whether there are alignments\n+        //     available after the jump position\n+        bool Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegio'..b'ds\n+    private:\n+        bool AdjustRegion(const BamRegion& region, uint32_t& begin, uint32_t& end);\n+        void CalculateCandidateBins(const uint32_t& begin,\n+                                    const uint32_t& end,\n+                                    std::set<uint16_t>& candidateBins);\n+        bool CalculateCandidateOffsets(const BaiReferenceSummary& refSummary,\n+                                       const uint64_t& minOffset,\n+                                       std::set<uint16_t>& candidateBins,\n+                                       std::vector<int64_t>& offsets);\n+        uint64_t CalculateMinOffset(const BaiReferenceSummary& refSummary, const uint32_t& begin);\n+        bool GetOffsets(const BamRegion& region, std::vector<int64_t>& offsets);\n+        uint64_t LookupLinearOffset(const BaiReferenceSummary& refSummary, const int& index);\n+\n+    // internal BAI summary (create/load) methods\n+    private:\n+        void ReserveForSummary(const int& numReferences);\n+        void SaveBinsSummary(const int& refId, const int& numBins);\n+        void SaveLinearOffsetsSummary(const int& refId, const int& numLinearOffsets);\n+        bool SkipBins(const int& numBins);\n+        bool SkipLinearOffsets(const int& numLinearOffsets);\n+        bool SummarizeBins(BaiReferenceSummary& refSummary);\n+        bool SummarizeIndexFile(void);\n+        bool SummarizeLinearOffsets(BaiReferenceSummary& refSummary);\n+        bool SummarizeReference(BaiReferenceSummary& refSummary);\n+\n+    // internal BAI full index input methods\n+    private:\n+        bool ReadBinID(uint32_t& binId);\n+        bool ReadBinIntoBuffer(uint32_t& binId, int32_t& numAlignmentChunks);\n+        bool ReadIntoBuffer(const unsigned int& bytesRequested);\n+        bool ReadLinearOffset(uint64_t& linearOffset);\n+        bool ReadNumAlignmentChunks(int& numAlignmentChunks);\n+        bool ReadNumBins(int& numBins);\n+        bool ReadNumLinearOffsets(int& numLinearOffsets);\n+        bool ReadNumReferences(int& numReferences);\n+\n+    // internal BAI full index output methods\n+    private:\n+        void MergeAlignmentChunks(BaiAlignmentChunkVector& chunks);\n+        void SortLinearOffsets(BaiLinearOffsetVector& linearOffsets);\n+        bool WriteAlignmentChunk(const BaiAlignmentChunk& chunk);\n+        bool WriteAlignmentChunks(BaiAlignmentChunkVector& chunks);\n+        bool WriteBin(const uint32_t& binId, BaiAlignmentChunkVector& chunks);\n+        bool WriteBins(const int& refId, BaiBinMap& bins);\n+        bool WriteHeader(void);\n+        bool WriteLinearOffsets(const int& refId, BaiLinearOffsetVector& linearOffsets);\n+        bool WriteReferenceEntry(BaiReferenceEntry& refEntry);\n+\n+    // data members\n+    private:\n+        FILE* m_indexStream;\n+        bool  m_isBigEndian;\n+        BamIndex::IndexCacheMode m_cacheMode;\n+        BaiFileSummary m_indexFileSummary;\n+\n+        // our input buffer\n+        char* m_buffer;\n+        unsigned int m_bufferLength;\n+\n+    // static methods\n+    private:\n+        // checks if the buffer is large enough to accomodate the requested size\n+        static void CheckBufferSize(char*& buffer,\n+                                    unsigned int& bufferLength,\n+                                    const unsigned int& requestedBytes);\n+        // checks if the buffer is large enough to accomodate the requested size\n+        static void CheckBufferSize(unsigned char*& buffer,\n+                                    unsigned int& bufferLength,\n+                                    const unsigned int& requestedBytes);\n+    // static constants\n+    private:\n+        static const int MAX_BIN;\n+        static const int BAM_LIDX_SHIFT;\n+        static const std::string BAI_EXTENSION;\n+        static const char* const BAI_MAGIC;\n+        static const int SIZEOF_ALIGNMENTCHUNK;\n+        static const int SIZEOF_BINCORE;\n+        static const int SIZEOF_LINEAROFFSET;\n+};\n+\n+} // namespace Internal\n+} // namespace BamTools\n+\n+#endif // BAM_STANDARD_INDEX_FORMAT_H\n'
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamToolsIndex_p.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamToolsIndex_p.cpp Thu Nov 03 10:25:04 2011 -0400
[
b'@@ -0,0 +1,642 @@\n+// ***************************************************************************\n+// BamToolsIndex.cpp (c) 2010 Derek Barnett\n+// Marth Lab, Department of Biology, Boston College\n+// All rights reserved.\n+// ---------------------------------------------------------------------------\n+// Last modified: 27 April 2011 (DB)\n+// ---------------------------------------------------------------------------\n+// Provides index operations for the BamTools index format (".bti")\n+// ***************************************************************************\n+\n+#include <api/BamAlignment.h>\n+#include <api/internal/BamReader_p.h>\n+#include <api/internal/BamToolsIndex_p.h>\n+#include <api/internal/BgzfStream_p.h>\n+using namespace BamTools;\n+using namespace BamTools::Internal;\n+\n+#include <cstdio>\n+#include <cstdlib>\n+#include <cstring>\n+#include <algorithm>\n+#include <iostream>\n+#include <iterator>\n+#include <map>\n+using namespace std;\n+\n+// static BamToolsIndex constants\n+const int BamToolsIndex::DEFAULT_BLOCK_LENGTH = 1000;\n+const string BamToolsIndex::BTI_EXTENSION     = ".bti";\n+const char* const BamToolsIndex::BTI_MAGIC    = "BTI\\1";\n+const int BamToolsIndex::SIZEOF_BLOCK         = sizeof(int32_t)*2 + sizeof(int64_t);\n+\n+// ctor\n+BamToolsIndex::BamToolsIndex(Internal::BamReaderPrivate* reader)\n+    : BamIndex(reader)\n+    , m_indexStream(0)\n+    , m_cacheMode(BamIndex::LimitedIndexCaching)\n+    , m_blockSize(BamToolsIndex::DEFAULT_BLOCK_LENGTH)\n+    , m_inputVersion(0)\n+    , m_outputVersion(BTI_1_2) // latest version - used for writing new index files\n+{\n+    m_isBigEndian = BamTools::SystemIsBigEndian();\n+}\n+\n+// dtor\n+BamToolsIndex::~BamToolsIndex(void) {\n+    CloseFile();\n+}\n+\n+bool BamToolsIndex::CheckMagicNumber(void) {\n+\n+    // check \'magic number\' to see if file is BTI index\n+    char magic[4];\n+    size_t elementsRead = fread(magic, sizeof(char), 4, m_indexStream);\n+    if ( elementsRead != 4 ) {\n+        cerr << "BamToolsIndex ERROR: could not read format \'magic\' number" << endl;\n+        return false;\n+    }\n+\n+    if ( strncmp(magic, BamToolsIndex::BTI_MAGIC, 4) != 0 ) {\n+        cerr << "BamToolsIndex ERROR: invalid format" << endl;\n+        return false;\n+    }\n+\n+    // otherwise ok\n+    return true;\n+}\n+\n+// check index file version, return true if OK\n+bool BamToolsIndex::CheckVersion(void) {\n+\n+    // read version from file\n+    size_t elementsRead = fread(&m_inputVersion, sizeof(m_inputVersion), 1, m_indexStream);\n+    if ( elementsRead != 1 ) return false;\n+    if ( m_isBigEndian ) SwapEndian_32(m_inputVersion);\n+\n+    // if version is negative, or zero\n+    if ( m_inputVersion <= 0 ) {\n+        cerr << "BamToolsIndex ERROR: could not load index file: invalid version."\n+             << endl;\n+        return false;\n+    }\n+\n+    // if version is newer than can be supported by this version of bamtools\n+    else if ( m_inputVersion > m_outputVersion ) {\n+        cerr << "BamToolsIndex ERROR: could not load index file. This version of BamTools does not recognize new index file version"\n+             << endl\n+             << "Please update BamTools to a more recent version to support this index file."\n+             << endl;\n+        return false;\n+    }\n+\n+    // ------------------------------------------------------------------\n+    // check for deprecated, unsupported versions\n+    // (typically whose format did not accomodate a particular bug fix)\n+\n+    else if ( (Version)m_inputVersion == BamToolsIndex::BTI_1_0 ) {\n+        cerr << "BamToolsIndex ERROR: could not load index file. This version of the index contains a bug related to accessing data near reference ends."\n+             << endl << endl\n+             << "Please run \'bamtools index -bti -in yourData.bam\' to generate an up-to-date, fixed BTI file."\n+             << endl << endl;\n+        return false;\n+    }\n+\n+    else if ( (Version)m_inputVersion == BamToolsIndex::BTI_1_1 ) {\n+        cerr << "BamToolsIndex ERROR: could not load index file. '..b"efSummary.FirstBlockFilePosition << endl;\n+        return false;\n+    }\n+\n+    // read & store block entries\n+    bool readOk = true;\n+    BtiBlock block;\n+    for ( int i = 0; i < refSummary.NumBlocks; ++i ) {\n+        readOk &= ReadBlock(block);\n+        blocks.push_back(block);\n+    }\n+    return readOk;\n+}\n+\n+bool BamToolsIndex::ReadReferenceEntry(BtiReferenceEntry& refEntry) {\n+\n+    // return false if refId not valid index in file summary structure\n+    if ( refEntry.ID < 0 || refEntry.ID >= (int)m_indexFileSummary.size() )\n+        return false;\n+\n+    // use index summary to assist reading the reference's BTI blocks\n+    const BtiReferenceSummary& refSummary = m_indexFileSummary.at(refEntry.ID);\n+    return ReadBlocks(refSummary, refEntry.Blocks);\n+}\n+\n+bool BamToolsIndex::Seek(const int64_t& position, const int& origin) {\n+    return ( fseek64(m_indexStream, position, origin) == 0 );\n+}\n+\n+// change the index caching behavior\n+void BamToolsIndex::SetCacheMode(const BamIndex::IndexCacheMode& mode) {\n+    m_cacheMode = mode;\n+    // do nothing else here ? cache mode will be ignored from now on, most likely\n+}\n+\n+bool BamToolsIndex::SkipBlocks(const int& numBlocks) {\n+    return Seek( numBlocks*BamToolsIndex::SIZEOF_BLOCK, SEEK_CUR );\n+}\n+\n+int64_t BamToolsIndex::Tell(void) const {\n+    return ftell64(m_indexStream);\n+}\n+\n+bool BamToolsIndex::WriteBlock(const BtiBlock& block) {\n+\n+    // copy entry data\n+    int32_t maxEndPosition = block.MaxEndPosition;\n+    int64_t startOffset    = block.StartOffset;\n+    int32_t startPosition  = block.StartPosition;\n+\n+    // swap endian-ness if necessary\n+    if ( m_isBigEndian ) {\n+        SwapEndian_32(maxEndPosition);\n+        SwapEndian_64(startOffset);\n+        SwapEndian_32(startPosition);\n+    }\n+\n+    // write the reference index entry\n+    size_t elementsWritten = 0;\n+    elementsWritten += fwrite(&maxEndPosition, sizeof(maxEndPosition), 1, m_indexStream);\n+    elementsWritten += fwrite(&startOffset,    sizeof(startOffset),    1, m_indexStream);\n+    elementsWritten += fwrite(&startPosition,  sizeof(startPosition),  1, m_indexStream);\n+    return ( elementsWritten == 3 );\n+}\n+\n+bool BamToolsIndex::WriteBlocks(const BtiBlockVector& blocks) {\n+    bool writtenOk = true;\n+    BtiBlockVector::const_iterator blockIter = blocks.begin();\n+    BtiBlockVector::const_iterator blockEnd  = blocks.end();\n+    for ( ; blockIter != blockEnd; ++blockIter )\n+        writtenOk &= WriteBlock(*blockIter);\n+    return writtenOk;\n+}\n+\n+bool BamToolsIndex::WriteHeader(void) {\n+\n+    size_t elementsWritten = 0;\n+\n+    // write BTI index format 'magic number'\n+    elementsWritten += fwrite(BamToolsIndex::BTI_MAGIC, 1, 4, m_indexStream);\n+\n+    // write BTI index format version\n+    int32_t currentVersion = (int32_t)m_outputVersion;\n+    if ( m_isBigEndian ) SwapEndian_32(currentVersion);\n+    elementsWritten += fwrite(&currentVersion, sizeof(currentVersion), 1, m_indexStream);\n+\n+    // write block size\n+    int32_t blockSize = m_blockSize;\n+    if ( m_isBigEndian ) SwapEndian_32(blockSize);\n+    elementsWritten += fwrite(&blockSize, sizeof(blockSize), 1, m_indexStream);\n+\n+    // write number of references\n+    int32_t numReferences = m_indexFileSummary.size();\n+    if ( m_isBigEndian ) SwapEndian_32(numReferences);\n+    elementsWritten += fwrite(&numReferences, sizeof(numReferences), 1, m_indexStream);\n+\n+    // return success/failure of write\n+    return ( elementsWritten == 7 );\n+}\n+\n+bool BamToolsIndex::WriteReferenceEntry(const BtiReferenceEntry& refEntry) {\n+\n+    size_t elementsWritten = 0;\n+\n+    // write number of blocks this reference\n+    uint32_t numBlocks = refEntry.Blocks.size();\n+    if ( m_isBigEndian ) SwapEndian_32(numBlocks);\n+    elementsWritten += fwrite(&numBlocks, sizeof(numBlocks), 1, m_indexStream);\n+\n+    // write actual block entries\n+    const bool blocksOk = WriteBlocks(refEntry.Blocks);\n+\n+    // return success/fail\n+    return ( elementsWritten == 1) && blocksOk;\n+}\n"
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamToolsIndex_p.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamToolsIndex_p.h Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,188 @@
+// ***************************************************************************
+// BamToolsIndex.h (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 5 April 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides index operations for the BamTools index format (".bti")
+// ***************************************************************************
+
+#ifndef BAMTOOLS_INDEX_FORMAT_H
+#define BAMTOOLS_INDEX_FORMAT_H
+
+//  -------------
+//  W A R N I N G
+//  -------------
+//
+// This file is not part of the BamTools API.  It exists purely as an
+// implementation detail.  This header file may change from version to
+// version without notice, or even be removed.
+//
+// We mean it.
+
+#include <api/BamAux.h>
+#include <api/BamIndex.h>
+#include <map>
+#include <string>
+#include <vector>
+
+namespace BamTools {
+namespace Internal {
+
+// contains data for each 'block' in a BTI index
+struct BtiBlock {
+
+    // data members
+    int32_t MaxEndPosition;
+    int64_t StartOffset;
+    int32_t StartPosition;
+
+    // ctor
+    BtiBlock(const int32_t& maxEndPosition = 0,
+             const int64_t& startOffset    = 0,
+             const int32_t& startPosition  = 0)
+        : MaxEndPosition(maxEndPosition)
+        , StartOffset(startOffset)
+        , StartPosition(startPosition)
+    { }
+};
+
+// convenience typedef for describing a a list of BTI blocks on a reference
+typedef std::vector<BtiBlock> BtiBlockVector;
+
+// contains all fields necessary for building, loading, & writing
+// full BTI index data for a single reference
+struct BtiReferenceEntry {
+
+    // data members
+    int32_t ID;
+    BtiBlockVector Blocks;
+
+    // ctor
+    BtiReferenceEntry(const int& id = -1)
+        : ID(id)
+    { }
+};
+
+// provides (persistent) summary of BtiReferenceEntry's index data
+struct BtiReferenceSummary {
+
+    // data members
+    int NumBlocks;
+    uint64_t FirstBlockFilePosition;
+
+    // ctor
+    BtiReferenceSummary(void)
+        : NumBlocks(0)
+        , FirstBlockFilePosition(0)
+    { }
+};
+
+// convenience typedef for describing a full BTI index file summary
+typedef std::vector<BtiReferenceSummary> BtiFileSummary;
+
+class BamToolsIndex : public BamIndex {
+
+    // keep a list of any supported versions here
+    // (might be useful later to handle any 'legacy' versions if the format changes)
+    // listed for example like: BTI_1_0 = 1, BTI_1_1 = 2, BTI_1_2 = 3, BTI_2_0 = 4, and so on
+    //
+    // so a change introduced in (hypothetical) BTI_1_2 would be handled from then on by:
+    //
+    // if ( indexVersion >= BTI_1_2 )
+    //   do something new
+    // else
+    //   do the old thing
+    enum Version { BTI_1_0 = 1
+                 , BTI_1_1
+                 , BTI_1_2
+                 };
+
+    // ctor & dtor
+    public:
+        BamToolsIndex(Internal::BamReaderPrivate* reader);
+        ~BamToolsIndex(void);
+
+    // BamIndex implementation
+    public:
+        // builds index from associated BAM file & writes out to index file
+        bool Create(void);
+        // returns whether reference has alignments or no
+        bool HasAlignments(const int& referenceID) const;
+        // attempts to use index data to jump to @region, returns success/fail
+        // a "successful" jump indicates no error, but not whether this region has data
+        //   * thus, the method sets a flag to indicate whether there are alignments
+        //     available after the jump position
+        bool Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion);
+        // loads existing data from file into memory
+        bool Load(const std::string& filename);
+        // change the index caching behavior
+        void SetCacheMode(const BamIndex::IndexCacheMode& mode);
+    public:
+        // returns format's file extension
+        static const std::string Extension(void);
+
+    // internal file ops
+    private:
+        bool CheckMagicNumber(void);
+        bool CheckVersion(void);
+        void CloseFile(void);
+        bool IsFileOpen(void) const;
+        bool OpenFile(const std::string& filename, const char* mode);
+        bool Seek(const int64_t& position, const int& origin);
+        int64_t Tell(void) const;
+
+    // internal BTI index building methods
+    private:
+        void ClearReferenceEntry(BtiReferenceEntry& refEntry);
+
+    // internal random-access methods
+    private:
+        bool GetOffset(const BamRegion& region, int64_t& offset, bool* hasAlignmentsInRegion);
+
+    // internal BTI summary data methods
+    private:
+        void InitializeFileSummary(const int& numReferences);
+        bool LoadFileSummary(void);
+        bool LoadHeader(void);
+        bool LoadNumBlocks(int& numBlocks);
+        bool LoadNumReferences(int& numReferences);
+        bool LoadReferenceSummary(BtiReferenceSummary& refSummary);
+        bool SkipBlocks(const int& numBlocks);
+
+    // internal BTI full index input methods
+    private:
+        bool ReadBlock(BtiBlock& block);
+        bool ReadBlocks(const BtiReferenceSummary& refSummary, BtiBlockVector& blocks);
+        bool ReadReferenceEntry(BtiReferenceEntry& refEntry);
+
+    // internal BTI full index output methods
+    private:
+        bool WriteBlock(const BtiBlock& block);
+        bool WriteBlocks(const BtiBlockVector& blocks);
+        bool WriteHeader(void);
+        bool WriteReferenceEntry(const BtiReferenceEntry& refEntry);
+
+    // data members
+    private:
+        FILE* m_indexStream;
+        bool  m_isBigEndian;
+        BamIndex::IndexCacheMode m_cacheMode;
+        BtiFileSummary m_indexFileSummary;
+        int m_blockSize;
+        int32_t m_inputVersion; // Version is serialized as int
+        Version m_outputVersion;
+
+    // static constants
+    private:
+        static const int DEFAULT_BLOCK_LENGTH;
+        static const std::string BTI_EXTENSION;
+        static const char* const BTI_MAGIC;
+        static const int SIZEOF_BLOCK;
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // BAMTOOLS_INDEX_FORMAT_H
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamWriter_p.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamWriter_p.cpp Thu Nov 03 10:25:04 2011 -0400
[
b'@@ -0,0 +1,425 @@\n+// ***************************************************************************\n+// BamWriter_p.cpp (c) 2010 Derek Barnett\n+// Marth Lab, Department of Biology, Boston College\n+// All rights reserved.\n+// ---------------------------------------------------------------------------\n+// Last modified: 16 June 2011 (DB)\n+// ---------------------------------------------------------------------------\n+// Provides the basic functionality for producing BAM files\n+// ***************************************************************************\n+\n+#include <api/BamAlignment.h>\n+#include <api/BamConstants.h>\n+#include <api/internal/BamWriter_p.h>\n+using namespace BamTools;\n+using namespace BamTools::Internal;\n+\n+#include <cstdio>\n+#include <cstdlib>\n+#include <cstring>\n+using namespace std;\n+\n+// ctor\n+BamWriterPrivate::BamWriterPrivate(void)\n+    : m_isBigEndian( BamTools::SystemIsBigEndian() )\n+{ }\n+\n+// dtor\n+BamWriterPrivate::~BamWriterPrivate(void) {\n+    m_stream.Close();\n+}\n+\n+// calculates minimum bin for a BAM alignment interval\n+unsigned int BamWriterPrivate::CalculateMinimumBin(const int begin, int end) const {\n+    --end;\n+    if ( (begin >> 14) == (end >> 14) ) return 4681 + (begin >> 14);\n+    if ( (begin >> 17) == (end >> 17) ) return  585 + (begin >> 17);\n+    if ( (begin >> 20) == (end >> 20) ) return   73 + (begin >> 20);\n+    if ( (begin >> 23) == (end >> 23) ) return    9 + (begin >> 23);\n+    if ( (begin >> 26) == (end >> 26) ) return    1 + (begin >> 26);\n+    return 0;\n+}\n+\n+// closes the alignment archive\n+void BamWriterPrivate::Close(void) {\n+    m_stream.Close();\n+}\n+\n+// creates a cigar string from the supplied alignment\n+void BamWriterPrivate::CreatePackedCigar(const vector<CigarOp>& cigarOperations, string& packedCigar) {\n+\n+    // initialize\n+    const unsigned int numCigarOperations = cigarOperations.size();\n+    packedCigar.resize(numCigarOperations * Constants::BAM_SIZEOF_INT);\n+\n+    // pack the cigar data into the string\n+    unsigned int* pPackedCigar = (unsigned int*)packedCigar.data();\n+\n+    // iterate over cigar operations\n+    vector<CigarOp>::const_iterator coIter = cigarOperations.begin();\n+    vector<CigarOp>::const_iterator coEnd  = cigarOperations.end();\n+    for ( ; coIter != coEnd; ++coIter ) {\n+\n+        // store op in packedCigar\n+        unsigned int cigarOp;\n+        switch ( coIter->Type ) {\n+            case (Constants::BAM_CIGAR_MATCH_CHAR)    : cigarOp = Constants::BAM_CIGAR_MATCH;    break;\n+            case (Constants::BAM_CIGAR_INS_CHAR)      : cigarOp = Constants::BAM_CIGAR_INS;      break;\n+            case (Constants::BAM_CIGAR_DEL_CHAR)      : cigarOp = Constants::BAM_CIGAR_DEL;      break;\n+            case (Constants::BAM_CIGAR_REFSKIP_CHAR)  : cigarOp = Constants::BAM_CIGAR_REFSKIP;  break;\n+            case (Constants::BAM_CIGAR_SOFTCLIP_CHAR) : cigarOp = Constants::BAM_CIGAR_SOFTCLIP; break;\n+            case (Constants::BAM_CIGAR_HARDCLIP_CHAR) : cigarOp = Constants::BAM_CIGAR_HARDCLIP; break;\n+            case (Constants::BAM_CIGAR_PAD_CHAR)      : cigarOp = Constants::BAM_CIGAR_PAD;      break;\n+            case (Constants::BAM_CIGAR_SEQMATCH_CHAR) : cigarOp = Constants::BAM_CIGAR_SEQMATCH; break;\n+            case (Constants::BAM_CIGAR_MISMATCH_CHAR) : cigarOp = Constants::BAM_CIGAR_MISMATCH; break;\n+            default:\n+              fprintf(stderr, "BamWriter ERROR: unknown cigar operation found: %c\\n", coIter->Type);\n+              exit(1);\n+        }\n+\n+        *pPackedCigar = coIter->Length << Constants::BAM_CIGAR_SHIFT | cigarOp;\n+        pPackedCigar++;\n+    }\n+}\n+\n+// encodes the supplied query sequence into 4-bit notation\n+void BamWriterPrivate::EncodeQuerySequence(const string& query, string& encodedQuery) {\n+\n+    // prepare the encoded query string\n+    const unsigned int queryLen = query.size();\n+    const unsigned int encodedQueryLen = (unsigned int)((queryLen / 2.0) + 0.5);\n+    encodedQuery.resize(encodedQueryLen);\n+    char* pEncode'..b'  ++i;\n+                                    break;\n+                                case (Constants::BAM_TAG_TYPE_INT16)  :\n+                                case (Constants::BAM_TAG_TYPE_UINT16) :\n+                                    BamTools::SwapEndian_16p(&tagData[i]);\n+                                    i += sizeof(uint16_t);\n+                                    break;\n+                                case (Constants::BAM_TAG_TYPE_FLOAT)  :\n+                                case (Constants::BAM_TAG_TYPE_INT32)  :\n+                                case (Constants::BAM_TAG_TYPE_UINT32) :\n+                                    BamTools::SwapEndian_32p(&tagData[i]);\n+                                    i += sizeof(uint32_t);\n+                                    break;\n+                                default:\n+                                    // error case\n+                                    fprintf(stderr,\n+                                            "BamWriter ERROR: unknown binary array type encountered: [%c]\\n",\n+                                            arrayType);\n+                                    exit(1);\n+                            }\n+                        }\n+\n+                        break;\n+                    }\n+\n+                    default :\n+                        fprintf(stderr, "BamWriter ERROR: invalid tag value type\\n"); // shouldn\'t get here\n+                        free(tagData);\n+                        exit(1);\n+                }\n+            }\n+            m_stream.Write(tagData, tagDataLength);\n+            free(tagData);\n+        }\n+        else\n+            m_stream.Write(al.TagData.data(), tagDataLength);\n+    }\n+}\n+\n+void BamWriterPrivate::SetWriteCompressed(bool ok) {\n+\n+    // warn if BAM file is already open\n+    // modifying compression is not allowed in this case\n+    if ( IsOpen() ) {\n+        cerr << "BamWriter WARNING: attempting to change compression mode on an open BAM file is not allowed. "\n+             << "Ignoring request." << endl;\n+        return;\n+    }\n+\n+    // set BgzfStream compression mode\n+    m_stream.SetWriteCompressed(ok);\n+}\n+\n+void BamWriterPrivate::WriteMagicNumber(void) {\n+    // write BAM file \'magic number\'\n+    m_stream.Write(Constants::BAM_HEADER_MAGIC, Constants::BAM_HEADER_MAGIC_LENGTH);\n+}\n+\n+void BamWriterPrivate::WriteReferences(const BamTools::RefVector& referenceSequences) {\n+\n+    // write the number of reference sequences\n+    uint32_t numReferenceSequences = referenceSequences.size();\n+    if ( m_isBigEndian ) BamTools::SwapEndian_32(numReferenceSequences);\n+    m_stream.Write((char*)&numReferenceSequences, Constants::BAM_SIZEOF_INT);\n+\n+    // foreach reference sequence\n+    RefVector::const_iterator rsIter = referenceSequences.begin();\n+    RefVector::const_iterator rsEnd  = referenceSequences.end();\n+    for ( ; rsIter != rsEnd; ++rsIter ) {\n+\n+        // write the reference sequence name length\n+        uint32_t referenceSequenceNameLen = rsIter->RefName.size() + 1;\n+        if ( m_isBigEndian ) BamTools::SwapEndian_32(referenceSequenceNameLen);\n+        m_stream.Write((char*)&referenceSequenceNameLen, Constants::BAM_SIZEOF_INT);\n+\n+        // write the reference sequence name\n+        m_stream.Write(rsIter->RefName.c_str(), referenceSequenceNameLen);\n+\n+        // write the reference sequence length\n+        int32_t referenceLength = rsIter->RefLength;\n+        if ( m_isBigEndian ) BamTools::SwapEndian_32(referenceLength);\n+        m_stream.Write((char*)&referenceLength, Constants::BAM_SIZEOF_INT);\n+    }\n+}\n+\n+void BamWriterPrivate::WriteSamHeaderText(const std::string& samHeaderText) {\n+\n+    // write the SAM header  text length\n+    uint32_t samHeaderLen = samHeaderText.size();\n+    if ( m_isBigEndian ) BamTools::SwapEndian_32(samHeaderLen);\n+    m_stream.Write((char*)&samHeaderLen, Constants::BAM_SIZEOF_INT);\n+\n+    // write the SAM header text\n+    if ( samHeaderLen > 0 )\n+        m_stream.Write(samHeaderText.data(), samHeaderLen);\n+}\n'
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamWriter_p.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamWriter_p.h Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,67 @@
+// ***************************************************************************
+// BamWriter_p.h (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 24 February 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides the basic functionality for producing BAM files
+// ***************************************************************************
+
+#ifndef BAMWRITER_P_H
+#define BAMWRITER_P_H
+
+//  -------------
+//  W A R N I N G
+//  -------------
+//
+// This file is not part of the BamTools API.  It exists purely as an
+// implementation detail.  This header file may change from version to
+// version without notice, or even be removed.
+//
+// We mean it.
+
+#include <api/BamAux.h>
+#include <api/internal/BgzfStream_p.h>
+#include <string>
+#include <vector>
+
+namespace BamTools {
+namespace Internal {
+
+class BamWriterPrivate {
+
+    // ctor & dtor
+    public:
+        BamWriterPrivate(void);
+        ~BamWriterPrivate(void);
+
+    // interface methods
+    public:
+        void Close(void);
+        bool IsOpen(void) const;
+        bool Open(const std::string& filename,
+                  const std::string& samHeaderText,
+                  const BamTools::RefVector& referenceSequences);
+        void SaveAlignment(const BamAlignment& al);
+        void SetWriteCompressed(bool ok);
+
+    // 'internal' methods
+    public:
+        unsigned int CalculateMinimumBin(const int begin, int end) const;
+        void CreatePackedCigar(const std::vector<BamTools::CigarOp>& cigarOperations, std::string& packedCigar);
+        void EncodeQuerySequence(const std::string& query, std::string& encodedQuery);
+        void WriteMagicNumber(void);
+        void WriteReferences(const BamTools::RefVector& referenceSequences);
+        void WriteSamHeaderText(const std::string& samHeaderText);
+
+    // data members
+    private:
+        BgzfStream m_stream;
+        bool m_isBigEndian;
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // BAMWRITER_P_H
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BgzfStream_p.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BgzfStream_p.cpp Thu Nov 03 10:25:04 2011 -0400
[
b'@@ -0,0 +1,439 @@\n+// ***************************************************************************\n+// BgzfStream_p.cpp (c) 2011 Derek Barnett\n+// Marth Lab, Department of Biology, Boston College\n+// All rights reserved.\n+// ---------------------------------------------------------------------------\n+// Last modified: 5 April 2011(DB)\n+// ---------------------------------------------------------------------------\n+// Based on BGZF routines developed at the Broad Institute.\n+// Provides the basic functionality for reading & writing BGZF files\n+// Replaces the old BGZF.* files to avoid clashing with other toolkits\n+// ***************************************************************************\n+\n+#include <api/internal/BgzfStream_p.h>\n+using namespace BamTools;\n+using namespace BamTools::Internal;\n+\n+#include <cstring>\n+#include <algorithm>\n+using namespace std;\n+\n+// constructor\n+BgzfStream::BgzfStream(void)\n+    : UncompressedBlockSize(Constants::BGZF_DEFAULT_BLOCK_SIZE)\n+    , CompressedBlockSize(Constants::BGZF_MAX_BLOCK_SIZE)\n+    , BlockLength(0)\n+    , BlockOffset(0)\n+    , BlockAddress(0)\n+    , IsOpen(false)\n+    , IsWriteOnly(false)\n+    , IsWriteCompressed(true)\n+    , Stream(NULL)\n+    , UncompressedBlock(NULL)\n+    , CompressedBlock(NULL)\n+{\n+    try {\n+        CompressedBlock   = new char[CompressedBlockSize];\n+        UncompressedBlock = new char[UncompressedBlockSize];\n+    } catch( std::bad_alloc& ba ) {\n+        fprintf(stderr, "BgzfStream ERROR: unable to allocate memory\\n");\n+        exit(1);\n+    }\n+}\n+\n+// destructor\n+BgzfStream::~BgzfStream(void) {\n+    if( CompressedBlock   ) delete[] CompressedBlock;\n+    if( UncompressedBlock ) delete[] UncompressedBlock;\n+}\n+\n+// closes BGZF file\n+void BgzfStream::Close(void) {\n+\n+    // skip if file not open\n+    if ( !IsOpen ) return;\n+\n+    // if writing to file, flush the current BGZF block,\n+    // then write an empty block (as EOF marker)\n+    if ( IsWriteOnly ) {\n+        FlushBlock();\n+        int blockLength = DeflateBlock();\n+        fwrite(CompressedBlock, 1, blockLength, Stream);\n+    }\n+\n+    // flush and close stream\n+    fflush(Stream);\n+    fclose(Stream);\n+\n+    // reset flags\n+    IsWriteCompressed = true;\n+    IsOpen = false;\n+}\n+\n+// compresses the current block\n+int BgzfStream::DeflateBlock(void) {\n+\n+    // initialize the gzip header\n+    char* buffer = CompressedBlock;\n+    memset(buffer, 0, 18);\n+    buffer[0]  = Constants::GZIP_ID1;\n+    buffer[1]  = (char)Constants::GZIP_ID2;\n+    buffer[2]  = Constants::CM_DEFLATE;\n+    buffer[3]  = Constants::FLG_FEXTRA;\n+    buffer[9]  = (char)Constants::OS_UNKNOWN;\n+    buffer[10] = Constants::BGZF_XLEN;\n+    buffer[12] = Constants::BGZF_ID1;\n+    buffer[13] = Constants::BGZF_ID2;\n+    buffer[14] = Constants::BGZF_LEN;\n+\n+    // set compression level\n+    const int compressionLevel = ( IsWriteCompressed ? Z_DEFAULT_COMPRESSION : 0 );\n+\n+    // loop to retry for blocks that do not compress enough\n+    int inputLength = BlockOffset;\n+    int compressedLength = 0;\n+    unsigned int bufferSize = CompressedBlockSize;\n+\n+    while ( true ) {\n+\n+        // initialize zstream values\n+        z_stream zs;\n+        zs.zalloc    = NULL;\n+        zs.zfree     = NULL;\n+        zs.next_in   = (Bytef*)UncompressedBlock;\n+        zs.avail_in  = inputLength;\n+        zs.next_out  = (Bytef*)&buffer[Constants::BGZF_BLOCK_HEADER_LENGTH];\n+        zs.avail_out = bufferSize - Constants::BGZF_BLOCK_HEADER_LENGTH - Constants::BGZF_BLOCK_FOOTER_LENGTH;\n+\n+        // initialize the zlib compression algorithm\n+        if ( deflateInit2(&zs,\n+                          compressionLevel,\n+                          Z_DEFLATED,\n+                          Constants::GZIP_WINDOW_BITS,\n+                          Constants::Z_DEFAULT_MEM_LEVEL,\n+                          Z_DEFAULT_STRATEGY) != Z_OK )\n+        {\n+            fprintf(stderr, "BgzfStream ERROR: zlib deflate initialization failed\\n");\n+            exit(1);\n+        }\n+\n+        /'..b'Read += copyLength;\n+    }\n+\n+    // update block data\n+    if ( BlockOffset == BlockLength ) {\n+        BlockAddress = ftell64(Stream);\n+        BlockOffset  = 0;\n+        BlockLength  = 0;\n+    }\n+\n+    return numBytesRead;\n+}\n+\n+// reads a BGZF block\n+bool BgzfStream::ReadBlock(void) {\n+\n+    char header[Constants::BGZF_BLOCK_HEADER_LENGTH];\n+    int64_t blockAddress = ftell64(Stream);\n+\n+    // read block header from file\n+    int count = fread(header, 1, sizeof(header), Stream);\n+\n+    // if block header empty\n+    if ( count == 0 ) {\n+        BlockLength = 0;\n+        return true;\n+    }\n+\n+    // if block header invalid size\n+    if ( count != sizeof(header) ) {\n+        fprintf(stderr, "BgzfStream ERROR: read block failed - could not read block header\\n");\n+        return false;\n+    }\n+\n+    // validate block header contents\n+    if ( !BgzfStream::CheckBlockHeader(header) ) {\n+        fprintf(stderr, "BgzfStream ERROR: read block failed - invalid block header\\n");\n+        return false;\n+    }\n+\n+    // copy header contents to compressed buffer\n+    int blockLength = BamTools::UnpackUnsignedShort(&header[16]) + 1;\n+    char* compressedBlock = CompressedBlock;\n+    memcpy(compressedBlock, header, Constants::BGZF_BLOCK_HEADER_LENGTH);\n+    int remaining = blockLength - Constants::BGZF_BLOCK_HEADER_LENGTH;\n+\n+    // read remainder of block\n+    count = fread(&compressedBlock[Constants::BGZF_BLOCK_HEADER_LENGTH], 1, remaining, Stream);\n+    if ( count != remaining ) {\n+        fprintf(stderr, "BgzfStream ERROR: read block failed - could not read data from block\\n");\n+        return false;\n+    }\n+\n+    // decompress block data\n+    count = InflateBlock(blockLength);\n+    if ( count < 0 ) {\n+        fprintf(stderr, "BgzfStream ERROR: read block failed - could not decompress block data\\n");\n+        return false;\n+    }\n+\n+    // update block data\n+    if ( BlockLength != 0 )\n+        BlockOffset = 0;\n+    BlockAddress = blockAddress;\n+    BlockLength  = count;\n+\n+    // return success\n+    return true;\n+}\n+\n+// seek to position in BGZF file\n+bool BgzfStream::Seek(const int64_t& position) {\n+\n+    // skip if not open\n+    if ( !IsOpen ) return false;\n+\n+    // determine adjusted offset & address\n+    int     blockOffset  = (position & 0xFFFF);\n+    int64_t blockAddress = (position >> 16) & 0xFFFFFFFFFFFFLL;\n+\n+    // attempt seek in file\n+    if ( fseek64(Stream, blockAddress, SEEK_SET) != 0 ) {\n+        fprintf(stderr, "BgzfStream ERROR: unable to seek in file\\n");\n+        return false;\n+    }\n+\n+    // update block data & return success\n+    BlockLength  = 0;\n+    BlockAddress = blockAddress;\n+    BlockOffset  = blockOffset;\n+    return true;\n+}\n+\n+void BgzfStream::SetWriteCompressed(bool ok) {\n+    IsWriteCompressed = ok;\n+}\n+\n+// get file position in BGZF file\n+int64_t BgzfStream::Tell(void) const {\n+    if ( !IsOpen )\n+        return 0;\n+    return ( (BlockAddress << 16) | (BlockOffset & 0xFFFF) );\n+}\n+\n+// writes the supplied data into the BGZF buffer\n+unsigned int BgzfStream::Write(const char* data, const unsigned int dataLen) {\n+\n+    // skip if file not open for writing\n+    if ( !IsOpen || !IsWriteOnly ) return false;\n+\n+    // write blocks as needed til all data is written\n+    unsigned int numBytesWritten = 0;\n+    const char* input = data;\n+    unsigned int blockLength = UncompressedBlockSize;\n+    while ( numBytesWritten < dataLen ) {\n+\n+        // copy data contents to uncompressed output buffer\n+        unsigned int copyLength = min(blockLength - BlockOffset, dataLen - numBytesWritten);\n+        char* buffer = UncompressedBlock;\n+        memcpy(buffer + BlockOffset, input, copyLength);\n+\n+        // update counter\n+        BlockOffset     += copyLength;\n+        input           += copyLength;\n+        numBytesWritten += copyLength;\n+\n+        // flush (& compress) output buffer when full\n+        if ( BlockOffset == blockLength ) FlushBlock();\n+    }\n+\n+    // return result\n+    return numBytesWritten;\n+}\n'
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BgzfStream_p.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BgzfStream_p.h Thu Nov 03 10:25:04 2011 -0400
[
@@ -0,0 +1,109 @@
+// ***************************************************************************
+// BgzfStream_p.h (c) 2011 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 5 April 2011(DB)
+// ---------------------------------------------------------------------------
+// Based on BGZF routines developed at the Broad Institute.
+// Provides the basic functionality for reading & writing BGZF files
+// Replaces the old BGZF.* files to avoid clashing with other toolkits
+// ***************************************************************************
+
+#ifndef BGZFSTREAM_P_H
+#define BGZFSTREAM_P_H
+
+//  -------------
+//  W A R N I N G
+//  -------------
+//
+// This file is not part of the BamTools API.  It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include <api/BamAux.h>
+#include <api/BamConstants.h>
+#include "zlib.h"
+#include <cstdio>
+#include <string>
+
+namespace BamTools {
+namespace Internal {
+
+class BgzfStream {
+
+    // constructor & destructor
+    public:
+        BgzfStream(void);
+        ~BgzfStream(void);
+
+    // main interface methods
+    public:
+        // closes BGZF file
+        void Close(void);
+        // opens the BGZF file (mode is either "rb" for reading, or "wb" for writing)
+        bool Open(const std::string& filename, const char* mode);
+        // reads BGZF data into a byte buffer
+        int Read(char* data, const unsigned int dataLength);
+        // seek to position in BGZF file
+        bool Seek(const int64_t& position);
+        // enable/disable compressed output
+        void SetWriteCompressed(bool ok);
+        // get file position in BGZF file
+        int64_t Tell(void) const;
+        // writes the supplied data into the BGZF buffer
+        unsigned int Write(const char* data, const unsigned int dataLen);
+
+    // internal methods
+    private:
+        // compresses the current block
+        int DeflateBlock(void);
+        // flushes the data in the BGZF block
+        void FlushBlock(void);
+        // de-compresses the current block
+        int InflateBlock(const int& blockLength);
+        // reads a BGZF block
+        bool ReadBlock(void);
+
+    // static 'utility' methods
+    public:
+        // checks BGZF block header
+        static inline bool CheckBlockHeader(char* header);
+
+    // data members
+    public:
+        unsigned int UncompressedBlockSize;
+        unsigned int CompressedBlockSize;
+        unsigned int BlockLength;
+        unsigned int BlockOffset;
+        uint64_t BlockAddress;
+        bool IsOpen;
+        bool IsWriteOnly;
+        bool IsWriteCompressed;
+        FILE* Stream;
+        char* UncompressedBlock;
+        char* CompressedBlock;
+};
+
+// -------------------------------------------------------------
+// static 'utility' method implementations
+
+// checks BGZF block header
+inline
+bool BgzfStream::CheckBlockHeader(char* header) {
+    return (header[0] == Constants::GZIP_ID1 &&
+            header[1] == (char)Constants::GZIP_ID2 &&
+            header[2] == Z_DEFLATED &&
+            (header[3] & Constants::FLG_FEXTRA) != 0 &&
+            BamTools::UnpackUnsignedShort(&header[10]) == Constants::BGZF_XLEN &&
+            header[12] == Constants::BGZF_ID1 &&
+            header[13] == Constants::BGZF_ID2 &&
+            BamTools::UnpackUnsignedShort(&header[14]) == Constants::BGZF_LEN );
+}
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // BGZFSTREAM_P_H
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/SamFormatParser_p.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/SamFormatParser_p.cpp Thu Nov 03 10:25:04 2011 -0400
b
b'@@ -0,0 +1,231 @@\n+// ***************************************************************************\n+// SamFormatParser.cpp (c) 2010 Derek Barnett\n+// Marth Lab, Department of Biology, Boston College\n+// All rights reserved.\n+// ---------------------------------------------------------------------------\n+// Last modified: 19 April 2011 (DB)\n+// ---------------------------------------------------------------------------\n+// Provides functionality for parsing SAM header text into SamHeader object\n+// ***************************************************************************\n+\n+#include <api/SamConstants.h>\n+#include <api/SamHeader.h>\n+#include <api/internal/SamFormatParser_p.h>\n+using namespace BamTools;\n+using namespace BamTools::Internal;\n+\n+#include <iostream>\n+#include <sstream>\n+#include <vector>\n+using namespace std;\n+\n+SamFormatParser::SamFormatParser(SamHeader& header)\n+    : m_header(header)\n+{ }\n+\n+SamFormatParser::~SamFormatParser(void) { }\n+\n+void SamFormatParser::Parse(const string& headerText) {\n+\n+    // clear header\'s prior contents\n+    m_header.Clear();\n+\n+    // empty header is OK, but skip processing\n+    if ( headerText.empty() )\n+        return;\n+\n+    // other wise parse SAM lines\n+    istringstream headerStream(headerText);\n+    string headerLine("");\n+    while ( getline(headerStream, headerLine) )\n+         ParseSamLine(headerLine);\n+}\n+\n+void SamFormatParser::ParseSamLine(const string& line) {\n+\n+    // skip if line is not long enough to contain true values\n+    if (line.length() < 5 ) return;\n+\n+    // determine token at beginning of line\n+    const string firstToken = line.substr(0,3);\n+    string restOfLine = line.substr(4);\n+    if      ( firstToken == Constants::SAM_HD_BEGIN_TOKEN) ParseHDLine(restOfLine);\n+    else if ( firstToken == Constants::SAM_SQ_BEGIN_TOKEN) ParseSQLine(restOfLine);\n+    else if ( firstToken == Constants::SAM_RG_BEGIN_TOKEN) ParseRGLine(restOfLine);\n+    else if ( firstToken == Constants::SAM_PG_BEGIN_TOKEN) ParsePGLine(restOfLine);\n+    else if ( firstToken == Constants::SAM_CO_BEGIN_TOKEN) ParseCOLine(restOfLine);\n+    else\n+        cerr << "SamFormatParser ERROR: unknown token: " << firstToken << endl;\n+}\n+\n+void SamFormatParser::ParseHDLine(const string& line) {\n+\n+    // split HD lines into tokens\n+    vector<string> tokens = Split(line, Constants::SAM_TAB);\n+\n+    // iterate over tokens\n+    vector<string>::const_iterator tokenIter = tokens.begin();\n+    vector<string>::const_iterator tokenEnd  = tokens.end();\n+    for ( ; tokenIter != tokenEnd; ++tokenIter ) {\n+\n+        // get tag/value\n+        const string tokenTag = (*tokenIter).substr(0,2);\n+        const string tokenValue = (*tokenIter).substr(3);\n+\n+        // set header contents\n+        if      ( tokenTag == Constants::SAM_HD_VERSION_TAG    ) m_header.Version    = tokenValue;\n+        else if ( tokenTag == Constants::SAM_HD_SORTORDER_TAG  ) m_header.SortOrder  = tokenValue;\n+        else if ( tokenTag == Constants::SAM_HD_GROUPORDER_TAG ) m_header.GroupOrder = tokenValue;\n+        else\n+            cerr << "SamFormatParser ERROR: unknown HD tag: " << tokenTag << endl;\n+    }\n+\n+    // if @HD line exists, VN must be provided\n+    if ( !m_header.HasVersion() )\n+        cerr << "SamFormatParser ERROR: @HD line is missing VN tag" << endl;\n+}\n+\n+void SamFormatParser::ParseSQLine(const string& line) {\n+\n+    SamSequence seq;\n+\n+    // split SQ line into tokens\n+    vector<string> tokens = Split(line, Constants::SAM_TAB);\n+\n+    // iterate over tokens\n+    vector<string>::const_iterator tokenIter = tokens.begin();\n+    vector<string>::const_iterator tokenEnd  = tokens.end();\n+    for ( ; tokenIter != tokenEnd; ++tokenIter ) {\n+\n+        // get tag/value\n+        const string tokenTag = (*tokenIter).substr(0,2);\n+        const string tokenValue = (*tokenIter).substr(3);\n+\n+        // set sequence contents\n+        if      ( tokenTag == Constants::SAM_SQ_NAME_TAG       ) seq.Name = tokenValue;\n+        else if ( toke'..b'r tokenIter = tokens.begin();\n+    vector<string>::const_iterator tokenEnd  = tokens.end();\n+    for ( ; tokenIter != tokenEnd; ++tokenIter ) {\n+\n+        // get token tag/value\n+        const string tokenTag = (*tokenIter).substr(0,2);\n+        const string tokenValue = (*tokenIter).substr(3);\n+\n+        // set read group contents\n+        if      ( tokenTag == Constants::SAM_RG_ID_TAG                  ) rg.ID = tokenValue;\n+        else if ( tokenTag == Constants::SAM_RG_DESCRIPTION_TAG         ) rg.Description = tokenValue;\n+        else if ( tokenTag == Constants::SAM_RG_FLOWORDER_TAG           ) rg.FlowOrder = tokenValue;\n+        else if ( tokenTag == Constants::SAM_RG_KEYSEQUENCE_TAG         ) rg.KeySequence = tokenValue;\n+        else if ( tokenTag == Constants::SAM_RG_LIBRARY_TAG             ) rg.Library = tokenValue;\n+        else if ( tokenTag == Constants::SAM_RG_PLATFORMUNIT_TAG        ) rg.PlatformUnit = tokenValue;\n+        else if ( tokenTag == Constants::SAM_RG_PREDICTEDINSERTSIZE_TAG ) rg.PredictedInsertSize = tokenValue;\n+        else if ( tokenTag == Constants::SAM_RG_PRODUCTIONDATE_TAG      ) rg.ProductionDate = tokenValue;\n+        else if ( tokenTag == Constants::SAM_RG_PROGRAM_TAG             ) rg.Program = tokenValue;\n+        else if ( tokenTag == Constants::SAM_RG_SAMPLE_TAG              ) rg.Sample = tokenValue;\n+        else if ( tokenTag == Constants::SAM_RG_SEQCENTER_TAG           ) rg.SequencingCenter = tokenValue;\n+        else if ( tokenTag == Constants::SAM_RG_SEQTECHNOLOGY_TAG       ) rg.SequencingTechnology = tokenValue;\n+        else\n+            cerr << "SamFormatParser ERROR: unknown RG tag: " << tokenTag << endl;\n+    }\n+\n+    bool isMissingRequiredFields = false;\n+\n+    // if @RG line exists, ID must be provided\n+    if ( !rg.HasID() ) {\n+        isMissingRequiredFields = true;\n+        cerr << "SamFormatParser ERROR: @RG line is missing ID tag" << endl;\n+    }\n+\n+    // store SAM read group entry\n+    if ( !isMissingRequiredFields )\n+        m_header.ReadGroups.Add(rg);\n+}\n+\n+void SamFormatParser::ParsePGLine(const string& line) {\n+\n+    SamProgram pg;\n+\n+    // split string into tokens\n+    vector<string> tokens = Split(line, Constants::SAM_TAB);\n+\n+    // iterate over tokens\n+    vector<string>::const_iterator tokenIter = tokens.begin();\n+    vector<string>::const_iterator tokenEnd  = tokens.end();\n+    for ( ; tokenIter != tokenEnd; ++tokenIter ) {\n+\n+        // get token tag/value\n+        const string tokenTag = (*tokenIter).substr(0,2);\n+        const string tokenValue = (*tokenIter).substr(3);\n+\n+        // set program record contents\n+        if      ( tokenTag == Constants::SAM_PG_ID_TAG              ) pg.ID = tokenValue;\n+        else if ( tokenTag == Constants::SAM_PG_NAME_TAG            ) pg.Name = tokenValue;\n+        else if ( tokenTag == Constants::SAM_PG_COMMANDLINE_TAG     ) pg.CommandLine = tokenValue;\n+        else if ( tokenTag == Constants::SAM_PG_PREVIOUSPROGRAM_TAG ) pg.PreviousProgramID = tokenValue;\n+        else if ( tokenTag == Constants::SAM_PG_VERSION_TAG         ) pg.Version = tokenValue;\n+        else\n+            cerr << "SamFormatParser ERROR: unknown PG tag: " << tokenTag << endl;\n+    }\n+\n+    bool isMissingRequiredFields = false;\n+\n+    // if @PG line exists, ID must be provided\n+    if ( !pg.HasID() ) {\n+        isMissingRequiredFields = true;\n+        cerr << "SamFormatParser ERROR: @PG line is missing ID tag" << endl;\n+    }\n+\n+    // store SAM program record\n+    if ( !isMissingRequiredFields )\n+        m_header.Programs.Add(pg);\n+}\n+\n+void SamFormatParser::ParseCOLine(const string& line) {\n+    // simply add line to comments list\n+    m_header.Comments.push_back(line);\n+}\n+\n+const vector<string> SamFormatParser::Split(const string& line, const char delim) {\n+    vector<string> tokens;\n+    stringstream lineStream(line);\n+    string token;\n+    while ( getline(lineStream, token, delim) )\n+        tokens.push_back(token);\n+    return tokens;\n+}\n'
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/SamFormatParser_p.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/SamFormatParser_p.h Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,62 @@
+// ***************************************************************************
+// SamFormatParser.h (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 23 December 2010 (DB)
+// ---------------------------------------------------------------------------
+// Provides functionality for parsing SAM header text into SamHeader object
+// ***************************************************************************
+
+#ifndef SAM_FORMAT_PARSER_H
+#define SAM_FORMAT_PARSER_H
+
+//  -------------
+//  W A R N I N G
+//  -------------
+//
+// This file is not part of the BamTools API.  It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include <string>
+#include <vector>
+
+namespace BamTools {
+
+class SamHeader;
+
+namespace Internal {
+
+class SamFormatParser {
+
+    // ctor & dtor
+    public:
+        SamFormatParser(BamTools::SamHeader& header);
+        ~SamFormatParser(void);
+
+    // parse text & populate header data
+    public:
+        void Parse(const std::string& headerText);
+
+    // internal methods
+    private:
+        void ParseSamLine(const std::string& line);
+        void ParseHDLine(const std::string& line);
+        void ParseSQLine(const std::string& line);
+        void ParseRGLine(const std::string& line);
+        void ParsePGLine(const std::string& line);
+        void ParseCOLine(const std::string& line);
+        const std::vector<std::string> Split(const std::string& line, const char delim);
+
+    // data members
+    private:
+        SamHeader& m_header;
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // SAM_FORMAT_PARSER_H
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/SamFormatPrinter_p.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/SamFormatPrinter_p.cpp Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,211 @@
+// ***************************************************************************
+// SamFormatPrinter.cpp (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 19 April 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides functionality for printing formatted SAM header to string
+// ***************************************************************************
+
+#include <api/SamConstants.h>
+#include <api/SamHeader.h>
+#include <api/internal/SamFormatPrinter_p.h>
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+#include <iostream>
+#include <sstream>
+#include <vector>
+using namespace std;
+
+SamFormatPrinter::SamFormatPrinter(const SamHeader& header)
+    : m_header(header)
+{ }
+
+SamFormatPrinter::~SamFormatPrinter(void) { }
+
+const string SamFormatPrinter::FormatTag(const string &tag, const string &value) const {
+    return string(Constants::SAM_TAB + tag + Constants::SAM_COLON + value);
+}
+
+const string SamFormatPrinter::ToString(void) const {
+
+    // clear out stream
+    stringstream out("");
+
+    // generate formatted header text
+    PrintHD(out);
+    PrintSQ(out);
+    PrintRG(out);
+    PrintPG(out);
+    PrintCO(out);
+
+    // return result
+    return out.str();
+}
+
+void SamFormatPrinter::PrintHD(std::stringstream& out) const {
+
+    // if header has @HD data
+    if ( m_header.HasVersion() ) {
+
+        // @HD VN:<Version>
+        out << Constants::SAM_HD_BEGIN_TOKEN
+            << FormatTag(Constants::SAM_HD_VERSION_TAG, m_header.Version);
+
+        // SO:<SortOrder>
+        if ( m_header.HasSortOrder() )
+            out << FormatTag(Constants::SAM_HD_SORTORDER_TAG, m_header.SortOrder);
+
+        // GO:<GroupOrder>
+        if ( m_header.HasGroupOrder() )
+            out << FormatTag(Constants::SAM_HD_GROUPORDER_TAG, m_header.GroupOrder);
+
+        // newline
+        out << endl;
+    }
+}
+
+void SamFormatPrinter::PrintSQ(std::stringstream& out) const {
+
+    // iterate over sequence entries
+    SamSequenceConstIterator seqIter = m_header.Sequences.ConstBegin();
+    SamSequenceConstIterator seqEnd  = m_header.Sequences.ConstEnd();
+    for ( ; seqIter != seqEnd; ++seqIter ) {
+        const SamSequence& seq = (*seqIter);
+
+        // @SQ SN:<Name> LN:<Length>
+        out << Constants::SAM_SQ_BEGIN_TOKEN
+            << FormatTag(Constants::SAM_SQ_NAME_TAG, seq.Name)
+            << FormatTag(Constants::SAM_SQ_LENGTH_TAG, seq.Length);
+
+        // AS:<AssemblyID>
+        if ( seq.HasAssemblyID() )
+            out << FormatTag(Constants::SAM_SQ_ASSEMBLYID_TAG, seq.AssemblyID);
+
+        // M5:<Checksum>
+        if ( seq.HasChecksum() )
+            out << FormatTag(Constants::SAM_SQ_CHECKSUM_TAG, seq.Checksum);
+
+        // SP:<Species>
+        if ( seq.HasSpecies() )
+            out << FormatTag(Constants::SAM_SQ_SPECIES_TAG, seq.Species);
+
+        // UR:<URI>
+        if ( seq.HasURI() )
+            out << FormatTag(Constants::SAM_SQ_URI_TAG, seq.URI);
+
+        // newline
+        out << endl;
+    }
+}
+
+void SamFormatPrinter::PrintRG(std::stringstream& out) const {
+
+    // iterate over read group entries
+    SamReadGroupConstIterator rgIter = m_header.ReadGroups.ConstBegin();
+    SamReadGroupConstIterator rgEnd  = m_header.ReadGroups.ConstEnd();
+    for ( ; rgIter != rgEnd; ++rgIter ) {
+        const SamReadGroup& rg = (*rgIter);
+
+        // @RG ID:<ID>
+        out << Constants::SAM_RG_BEGIN_TOKEN
+            << FormatTag(Constants::SAM_RG_ID_TAG, rg.ID);
+
+        // CN:<SequencingCenter>
+        if ( rg.HasSequencingCenter() )
+            out << FormatTag(Constants::SAM_RG_SEQCENTER_TAG, rg.SequencingCenter);
+
+        // DS:<Description>
+        if ( rg.HasDescription() )
+            out << FormatTag(Constants::SAM_RG_DESCRIPTION_TAG, rg.Description);
+
+        // DT:<ProductionDate>
+        if ( rg.HasProductionDate() )
+            out << FormatTag(Constants::SAM_RG_PRODUCTIONDATE_TAG, rg.ProductionDate);
+
+        // FO:<FlowOrder>
+        if ( rg.HasFlowOrder() )
+            out << FormatTag(Constants::SAM_RG_FLOWORDER_TAG, rg.FlowOrder);
+
+        // KS:<KeySequence>
+        if ( rg.HasKeySequence() )
+            out << FormatTag(Constants::SAM_RG_KEYSEQUENCE_TAG, rg.KeySequence);
+
+        // LB:<Library>
+        if ( rg.HasLibrary() )
+            out << FormatTag(Constants::SAM_RG_LIBRARY_TAG, rg.Library);
+
+        // PG:<Program>
+        if ( rg.HasProgram() )
+            out << FormatTag(Constants::SAM_RG_PROGRAM_TAG, rg.Program);
+
+        // PI:<PredictedInsertSize>
+        if ( rg.HasPredictedInsertSize() )
+            out << FormatTag(Constants::SAM_RG_PREDICTEDINSERTSIZE_TAG, rg.PredictedInsertSize);
+
+        // PL:<SequencingTechnology>
+        if ( rg.HasSequencingTechnology() )
+            out << FormatTag(Constants::SAM_RG_SEQTECHNOLOGY_TAG, rg.SequencingTechnology);
+
+        // PU:<PlatformUnit>
+        if ( rg.HasPlatformUnit() )
+            out << FormatTag(Constants::SAM_RG_PLATFORMUNIT_TAG, rg.PlatformUnit);
+
+        // SM:<Sample>
+        if ( rg.HasSample() )
+            out << FormatTag(Constants::SAM_RG_SAMPLE_TAG, rg.Sample);
+
+        // newline
+        out << endl;
+    }
+}
+
+void SamFormatPrinter::PrintPG(std::stringstream& out) const {
+
+    // iterate over program record entries
+    SamProgramConstIterator pgIter = m_header.Programs.ConstBegin();
+    SamProgramConstIterator pgEnd  = m_header.Programs.ConstEnd();
+    for ( ; pgIter != pgEnd; ++pgIter ) {
+        const SamProgram& pg = (*pgIter);
+
+        // @PG ID:<ID>
+        out << Constants::SAM_PG_BEGIN_TOKEN
+            << FormatTag(Constants::SAM_PG_ID_TAG, pg.ID);
+
+        // PN:<Name>
+        if ( pg.HasName() )
+            out << FormatTag(Constants::SAM_PG_NAME_TAG, pg.Name);
+
+        // CL:<CommandLine>
+        if ( pg.HasCommandLine() )
+            out << FormatTag(Constants::SAM_PG_COMMANDLINE_TAG, pg.CommandLine);
+
+        // PP:<PreviousProgramID>
+        if ( pg.HasPreviousProgramID() )
+            out << FormatTag(Constants::SAM_PG_PREVIOUSPROGRAM_TAG, pg.PreviousProgramID);
+
+        // VN:<Version>
+        if ( pg.HasVersion() )
+            out << FormatTag(Constants::SAM_PG_VERSION_TAG, pg.Version);
+
+        // newline
+        out << endl;
+    }
+}
+
+void SamFormatPrinter::PrintCO(std::stringstream& out) const {
+
+    // iterate over comments
+    vector<string>::const_iterator commentIter = m_header.Comments.begin();
+    vector<string>::const_iterator commentEnd  = m_header.Comments.end();
+    for ( ; commentIter != commentEnd; ++commentIter ) {
+
+        // @CO <Comment>
+        out << Constants::SAM_CO_BEGIN_TOKEN
+            << Constants::SAM_TAB
+            << (*commentIter)
+            << endl;
+    }
+}
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/SamFormatPrinter_p.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/SamFormatPrinter_p.h Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,61 @@
+// ***************************************************************************
+// SamFormatPrinter.h (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 23 December 2010 (DB)
+// ---------------------------------------------------------------------------
+// Provides functionality for printing formatted SAM header to string
+// ***************************************************************************
+
+#ifndef SAM_FORMAT_PRINTER_H
+#define SAM_FORMAT_PRINTER_H
+
+//  -------------
+//  W A R N I N G
+//  -------------
+//
+// This file is not part of the BamTools API.  It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include <sstream>
+#include <string>
+
+namespace BamTools {
+
+class SamHeader;
+
+namespace Internal {
+
+class SamFormatPrinter {
+
+    // ctor & dtor
+    public:
+        SamFormatPrinter(const BamTools::SamHeader& header);
+        ~SamFormatPrinter(void);
+
+    // generates SAM-formatted string from header data
+    public:
+        const std::string ToString(void) const;
+
+    // internal methods
+    private:
+        const std::string FormatTag(const std::string& tag, const std::string& value) const;
+        void PrintHD(std::stringstream& out) const;
+        void PrintSQ(std::stringstream& out) const;
+        void PrintRG(std::stringstream& out) const;
+        void PrintPG(std::stringstream& out) const;
+        void PrintCO(std::stringstream& out) const;
+
+    // data members
+    private:
+        const SamHeader& m_header;
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // SAM_FORMAT_PRINTER_H
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/SamHeaderValidator_p.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/SamHeaderValidator_p.cpp Thu Nov 03 10:25:04 2011 -0400
b
b'@@ -0,0 +1,511 @@\n+// ***************************************************************************\n+// SamHeaderValidator.cpp (c) 2010 Derek Barnett\n+// Marth Lab, Department of Biology, Boston College\n+// All rights reserved.\n+// ---------------------------------------------------------------------------\n+// Last modified: 18 April 2011 (DB)\n+// ---------------------------------------------------------------------------\n+// Provides functionality for validating SamHeader data\n+// ***************************************************************************\n+\n+#include <api/SamConstants.h>\n+#include <api/SamHeader.h>\n+#include <api/internal/SamHeaderValidator_p.h>\n+#include <api/internal/SamHeaderVersion_p.h>\n+using namespace BamTools;\n+using namespace BamTools::Internal;\n+\n+#include <cctype>\n+#include <iostream>\n+#include <set>\n+#include <sstream>\n+using namespace std;\n+\n+namespace BamTools {\n+namespace Internal {\n+\n+bool caseInsensitiveCompare(const string& lhs, const string& rhs) {\n+\n+    // can omit checking chars if lengths not equal\n+    const int lhsLength = lhs.length();\n+    const int rhsLength = rhs.length();\n+    if ( lhsLength != rhsLength )\n+        return false;\n+\n+    // do *basic* toupper checks on each string char\'s\n+    for ( int i = 0; i < lhsLength; ++i ) {\n+        if ( toupper( (int)lhs.at(i)) != toupper( (int)rhs.at(i)) )\n+            return false;\n+    }\n+\n+    // otherwise OK\n+    return true;\n+}\n+\n+} // namespace Internal\n+} // namespace BamTools\n+\n+// ------------------------------------------------------------------------\n+// Allow validation rules to vary, as needed, between SAM header versions\n+//\n+// use SAM_VERSION_X_Y to tag important changes\n+//\n+// Together, they will allow for comparisons like:\n+// if ( m_version < SAM_VERSION_2_0 ) {\n+//     // use some older rule\n+// else\n+//     // use rule introduced with version 2.0\n+\n+static const SamHeaderVersion SAM_VERSION_1_0 = SamHeaderVersion(1,0);\n+static const SamHeaderVersion SAM_VERSION_1_1 = SamHeaderVersion(1,1);\n+static const SamHeaderVersion SAM_VERSION_1_2 = SamHeaderVersion(1,2);\n+static const SamHeaderVersion SAM_VERSION_1_3 = SamHeaderVersion(1,3);\n+static const SamHeaderVersion SAM_VERSION_1_4 = SamHeaderVersion(1,4);\n+\n+// TODO: This functionality is currently unused.\n+//       Make validation "version-aware."\n+//\n+// ------------------------------------------------------------------------\n+\n+const string SamHeaderValidator::ERROR_PREFIX = "ERROR: ";\n+const string SamHeaderValidator::WARN_PREFIX  = "WARNING: ";\n+const string SamHeaderValidator::NEWLINE      = "\\n";\n+\n+SamHeaderValidator::SamHeaderValidator(const SamHeader& header)\n+    : m_header(header)\n+{ }\n+\n+SamHeaderValidator::~SamHeaderValidator(void) { }\n+\n+bool SamHeaderValidator::Validate(bool verbose) {\n+\n+    // validate header components\n+    bool isValid = true;\n+    isValid &= ValidateMetadata();\n+    isValid &= ValidateSequenceDictionary();\n+    isValid &= ValidateReadGroupDictionary();\n+    isValid &= ValidateProgramChain();\n+\n+    // report errors if desired\n+    if ( verbose ) {\n+        PrintErrorMessages();\n+        PrintWarningMessages();\n+    }\n+\n+    // return validation status\n+    return isValid;\n+}\n+\n+bool SamHeaderValidator::ValidateMetadata(void) {\n+    bool isValid = true;\n+    isValid &= ValidateVersion();\n+    isValid &= ValidateSortOrder();\n+    isValid &= ValidateGroupOrder();\n+    return isValid;\n+}\n+\n+bool SamHeaderValidator::ValidateVersion(void) {\n+\n+    const string& version = m_header.Version;\n+\n+    // warn if version not present\n+    if ( version.empty() ) {\n+        AddWarning("Version (VN) missing. Not required, but strongly recommended");\n+        return true;\n+    }\n+\n+    // invalid if version does not contain a period\n+    const size_t periodFound = version.find(Constants::SAM_PERIOD);\n+    if ( periodFound == string::npos ) {\n+        AddError("Invalid version (VN) format: " + version);\n+        return false;\n+    }\n+\n+    // invalid if '..b'APILLARY)  ||\n+         caseInsensitiveCompare(technology, Constants::SAM_RG_SEQTECHNOLOGY_HELICOS)    ||\n+         caseInsensitiveCompare(technology, Constants::SAM_RG_SEQTECHNOLOGY_ILLUMINA)   ||\n+         caseInsensitiveCompare(technology, Constants::SAM_RG_SEQTECHNOLOGY_IONTORRENT) ||\n+         caseInsensitiveCompare(technology, Constants::SAM_RG_SEQTECHNOLOGY_LS454)      ||\n+         caseInsensitiveCompare(technology, Constants::SAM_RG_SEQTECHNOLOGY_PACBIO)     ||\n+         caseInsensitiveCompare(technology, Constants::SAM_RG_SEQTECHNOLOGY_SOLID)\n+       )\n+    {\n+        return true;\n+    }\n+\n+    // otherwise\n+    AddError("Invalid read group sequencing platform (PL): " + technology);\n+    return false;\n+}\n+\n+bool SamHeaderValidator::ValidateProgramChain(void) {\n+    bool isValid = true;\n+    isValid &= ContainsUniqueProgramIds();\n+    isValid &= ValidatePreviousProgramIds();\n+    return isValid;\n+}\n+\n+bool SamHeaderValidator::ContainsUniqueProgramIds(void) {\n+\n+    bool isValid = true;\n+    set<string> programIds;\n+    set<string>::iterator pgIdIter;\n+\n+    // iterate over program records\n+    const SamProgramChain& programs = m_header.Programs;\n+    SamProgramConstIterator pgIter = programs.ConstBegin();\n+    SamProgramConstIterator pgEnd  = programs.ConstEnd();\n+    for ( ; pgIter != pgEnd; ++pgIter ) {\n+        const SamProgram& pg = (*pgIter);\n+\n+        // lookup program ID\n+        const string& pgId = pg.ID;\n+        pgIdIter = programIds.find(pgId);\n+\n+        // error if found (duplicate entry)\n+        if ( pgIdIter != programIds.end() ) {\n+            AddError("Program ID (ID): " + pgId + " is not unique");\n+            isValid = false;\n+        }\n+\n+        // otherwise ok, store ID\n+        programIds.insert(pgId);\n+    }\n+\n+    // return validation state\n+    return isValid;\n+}\n+\n+bool SamHeaderValidator::ValidatePreviousProgramIds(void) {\n+\n+    bool isValid = true;\n+\n+    // iterate over program records\n+    const SamProgramChain& programs = m_header.Programs;\n+    SamProgramConstIterator pgIter = programs.ConstBegin();\n+    SamProgramConstIterator pgEnd  = programs.ConstEnd();\n+    for ( ; pgIter != pgEnd; ++pgIter ) {\n+        const SamProgram& pg = (*pgIter);\n+\n+        // ignore record for validation if PreviousProgramID is empty\n+        const string& ppId = pg.PreviousProgramID;\n+        if ( ppId.empty() )\n+            continue;\n+\n+        // see if program "chain" contains an entry for ppId\n+        if ( !programs.Contains(ppId) ) {\n+            AddError("PreviousProgramID (PP): " + ppId + " is not a known ID");\n+            isValid = false;\n+        }\n+    }\n+\n+    // return validation state\n+    return isValid;\n+}\n+void SamHeaderValidator::AddError(const string& message) {\n+    m_errorMessages.push_back(ERROR_PREFIX + message + NEWLINE);\n+}\n+\n+void SamHeaderValidator::AddWarning(const string& message) {\n+    m_warningMessages.push_back(WARN_PREFIX + message + NEWLINE);\n+}\n+\n+void SamHeaderValidator::PrintErrorMessages(void) {\n+\n+    // skip if no error messages\n+    if ( m_errorMessages.empty() ) return;\n+\n+    // print error header line\n+    cerr << "* SAM header has " << m_errorMessages.size() << " errors:" << endl;\n+\n+    // print each error message\n+    vector<string>::const_iterator errorIter = m_errorMessages.begin();\n+    vector<string>::const_iterator errorEnd  = m_errorMessages.end();\n+    for ( ; errorIter != errorEnd; ++errorIter )\n+        cerr << (*errorIter);\n+}\n+\n+void SamHeaderValidator::PrintWarningMessages(void) {\n+\n+    // skip if no warning messages\n+    if ( m_warningMessages.empty() ) return;\n+\n+    // print warning header line\n+    cerr << "* SAM header has " << m_warningMessages.size() << " warnings:" << endl;\n+\n+    // print each warning message\n+    vector<string>::const_iterator warnIter = m_warningMessages.begin();\n+    vector<string>::const_iterator warnEnd  = m_warningMessages.end();\n+    for ( ; warnIter != warnEnd; ++warnIter )\n+        cerr << (*warnIter);\n+}\n'
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/SamHeaderValidator_p.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/SamHeaderValidator_p.h Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,102 @@
+// ***************************************************************************
+// SamHeaderValidator.h (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 13 January 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides functionality for validating SamHeader data
+// ***************************************************************************
+
+#ifndef SAM_HEADER_VALIDATOR_P_H
+#define SAM_HEADER_VALIDATOR_P_H
+
+//  -------------
+//  W A R N I N G
+//  -------------
+//
+// This file is not part of the BamTools API.  It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include <string>
+#include <vector>
+
+namespace BamTools {
+
+class SamHeader;
+class SamReadGroup;
+class SamSequence;
+
+namespace Internal {
+
+class SamHeaderValidator {
+
+    // ctor & dtor
+    public:
+        SamHeaderValidator(const SamHeader& header);
+        ~SamHeaderValidator(void);
+
+    // SamHeaderValidator interface
+    public:
+        // validates SamHeader data, returns true/false accordingly
+        // prints error & warning messages to stderr when @verbose is true
+        bool Validate(bool verbose = false);
+
+    // internal methods
+    private:
+
+        // validate header metadata
+        bool ValidateMetadata(void);
+        bool ValidateVersion(void);
+        bool ContainsOnlyDigits(const std::string& s);
+        bool ValidateSortOrder(void);
+        bool ValidateGroupOrder(void);
+
+        // validate sequence dictionary
+        bool ValidateSequenceDictionary(void);
+        bool ContainsUniqueSequenceNames(void);
+        bool CheckNameFormat(const std::string& name);
+        bool ValidateSequence(const SamSequence& seq);
+        bool CheckLengthInRange(const std::string& length);
+
+        // validate read group dictionary
+        bool ValidateReadGroupDictionary(void);
+        bool ContainsUniqueIDsAndPlatformUnits(void);
+        bool ValidateReadGroup(const SamReadGroup& rg);
+        bool CheckReadGroupID(const std::string& id);
+        bool CheckSequencingTechnology(const std::string& technology);
+
+        // validate program data
+        bool ValidateProgramChain(void);
+        bool ContainsUniqueProgramIds(void);
+        bool ValidatePreviousProgramIds(void);
+
+        // error reporting
+        void AddError(const std::string& message);
+        void AddWarning(const std::string& message);
+        void PrintErrorMessages(void);
+        void PrintWarningMessages(void);
+
+    // data members
+    private:
+
+        // SamHeader being validated
+        const SamHeader& m_header;
+
+        // error reporting helpers
+        static const std::string ERROR_PREFIX;
+        static const std::string WARN_PREFIX;
+        static const std::string NEWLINE;
+
+        // error reporting messages
+        std::vector<std::string> m_errorMessages;
+        std::vector<std::string> m_warningMessages;
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // SAM_HEADER_VALIDATOR_P_H
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/SamHeaderVersion_p.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/SamHeaderVersion_p.h Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,135 @@
+// ***************************************************************************
+// SamHeaderVersion.h (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 24 February 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides functionality for comparing SAM header versions
+// *************************************************************************
+
+#ifndef SAM_HEADERVERSION_P_H
+#define SAM_HEADERVERSION_P_H
+
+//  -------------
+//  W A R N I N G
+//  -------------
+//
+// This file is not part of the BamTools API.  It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include <api/SamConstants.h>
+#include <sstream>
+#include <string>
+
+namespace BamTools {
+namespace Internal {
+
+class SamHeaderVersion {
+
+    // ctors & dtor
+    public:
+        SamHeaderVersion(void)
+            : m_majorVersion(0)
+            , m_minorVersion(0)
+        { }
+
+        explicit SamHeaderVersion(const std::string& version)
+            : m_majorVersion(0)
+            , m_minorVersion(0)
+        {
+            SetVersion(version);
+        }
+
+        SamHeaderVersion(const unsigned int& major, const unsigned int& minor)
+            : m_majorVersion(major)
+            , m_minorVersion(minor)
+        { }
+
+        ~SamHeaderVersion(void) {
+            m_majorVersion = 0;
+            m_minorVersion = 0;
+        }
+    
+    // acess data
+    public:
+        unsigned int MajorVersion(void) const { return m_majorVersion; }
+        unsigned int MinorVersion(void) const { return m_minorVersion; }
+
+        void SetVersion(const std::string& version);
+        std::string ToString(void) const;
+
+    // data members
+    private:
+        unsigned int m_majorVersion;
+        unsigned int m_minorVersion;
+};
+
+inline
+void SamHeaderVersion::SetVersion(const std::string& version) {
+
+    // do nothing if version is empty
+    if ( !version.empty() ) {
+
+        std::stringstream versionStream("");
+
+        // do nothing if period not found
+        const size_t periodFound = version.find(Constants::SAM_PERIOD);
+        if ( periodFound != std::string::npos ) {
+
+            // store major version if non-empty and contains only digits
+            const std::string& majorVersion = version.substr(0, periodFound);
+            versionStream.str(majorVersion);
+            if ( !majorVersion.empty() ) {
+                const size_t nonDigitFound = majorVersion.find_first_not_of(Constants::SAM_DIGITS);
+                if ( nonDigitFound == std::string::npos )
+                    versionStream >> m_majorVersion;
+            }
+
+            // store minor version if non-empty and contains only digits
+            const std::string& minorVersion = version.substr(periodFound + 1);
+            versionStream.str(minorVersion);
+            if ( !minorVersion.empty() ) {
+                const size_t nonDigitFound = minorVersion.find_first_not_of(Constants::SAM_DIGITS);
+                if ( nonDigitFound == std::string::npos )
+                    versionStream >> m_minorVersion;
+            }
+        }
+    }
+}
+
+// -----------------------------------------------------
+// printing
+
+inline std::string SamHeaderVersion::ToString(void) const {
+    std::stringstream version;
+    version << m_majorVersion << Constants::SAM_PERIOD << m_minorVersion;
+    return version.str();
+}
+
+// -----------------------------------------------------
+// comparison operators
+
+inline bool operator==(const SamHeaderVersion& lhs, const SamHeaderVersion& rhs) {
+    return (lhs.MajorVersion() == rhs.MajorVersion()) &&
+           (lhs.MinorVersion() == rhs.MinorVersion());
+}
+
+inline bool operator<(const SamHeaderVersion& lhs, const SamHeaderVersion& rhs) {
+    if ( lhs.MajorVersion() == rhs.MajorVersion() )
+        return lhs.MinorVersion() < rhs.MinorVersion();
+    else 
+        return lhs.MajorVersion() < rhs.MajorVersion();
+}
+
+inline bool operator> (const SamHeaderVersion& lhs, const SamHeaderVersion& rhs) { return rhs < lhs;  }
+inline bool operator<=(const SamHeaderVersion& lhs, const SamHeaderVersion& rhs) { return !(lhs>rhs); }
+inline bool operator>=(const SamHeaderVersion& lhs, const SamHeaderVersion& rhs) { return !(lhs<rhs); }
+
+} // namespace Internal 
+} // namespace BamTools
+
+#endif // SAM_HEADERVERSION_P_H
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/shared/bamtools_global.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/shared/bamtools_global.h Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,79 @@
+// ***************************************************************************
+// bamtools_global.h (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 3 March 2011 (DB)
+// ---------------------------------------------------------------------------
+// Provides the basic definitions for exporting & importing library symbols.
+// Also provides some platform-specific rules for definitions.
+// ***************************************************************************
+
+#ifndef BAMTOOLS_GLOBAL_H
+#define BAMTOOLS_GLOBAL_H
+
+/*! \brief Library export macro
+    \internal
+*/
+#ifndef BAMTOOLS_LIBRARY_EXPORT
+#  if defined(WIN32)
+#    define BAMTOOLS_LIBRARY_EXPORT __declspec(dllexport)
+#  else
+#    define BAMTOOLS_LIBRARY_EXPORT __attribute__((visibility("default")))
+#  endif
+#endif // BAMTOOLS_LIBRARY_EXPORT
+
+/*! \brief Library import macro
+    \internal
+*/
+#ifndef BAMTOOLS_LIBRARY_IMPORT
+#  if defined(WIN32)
+#    define BAMTOOLS_LIBRARY_IMPORT __declspec(dllimport)
+#  else
+#    define BAMTOOLS_LIBRARY_IMPORT
+#  endif
+#endif // BAMTOOLS_LIBRARY_IMPORT
+
+/*! \brief Platform-specific type definitions
+    \internal
+*/
+#ifndef BAMTOOLS_LFS
+#define BAMTOOLS_LFS
+    #ifdef WIN32
+        #define ftell64(a)     _ftelli64(a)
+        #define fseek64(a,b,c) _fseeki64(a,b,c)
+    #else
+        #define ftell64(a)     ftello(a)
+        #define fseek64(a,b,c) fseeko(a,b,c)
+    #endif
+#endif // BAMTOOLS_LFS
+
+/*! \def ftell64(a)
+    \brief Platform-independent tell() operation.
+    \internal
+*/
+/*! \def fseek64(a,b,c)
+    \brief Platform-independent seek() operation.
+    \internal
+*/
+
+/*! \brief Platform-specific type definitions
+    \internal
+*/
+#ifndef BAMTOOLS_TYPES
+#define BAMTOOLS_TYPES
+    #ifdef _MSC_VER
+        typedef char                 int8_t;
+        typedef unsigned char       uint8_t;
+        typedef short               int16_t;
+        typedef unsigned short     uint16_t;
+        typedef int                 int32_t;
+        typedef unsigned int       uint32_t;
+        typedef long long           int64_t;
+        typedef unsigned long long uint64_t;
+    #else
+        #include <stdint.h>
+    #endif
+#endif // BAMTOOLS_TYPES
+
+#endif // BAMTOOLS_GLOBAL_H
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/Fasta/Fasta.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/Fasta/Fasta.cpp Thu Nov 03 10:25:04 2011 -0400
[
b'@@ -0,0 +1,325 @@\n+// ***************************************************************************\n+// FastaIndex.cpp (c) 2010 Erik Garrison <erik.garrison@bc.edu>\n+// Marth Lab, Department of Biology, Boston College\n+// All rights reserved.\n+// ---------------------------------------------------------------------------\n+// Last modified: 9 February 2010 (EG)\n+// ---------------------------------------------------------------------------\n+\n+#include "Fasta.h"\n+\n+FastaIndexEntry::FastaIndexEntry(string name, int length, long long offset, int line_blen, int line_len)\n+    : name(name)\n+    , length(length)\n+    , offset(offset)\n+    , line_blen(line_blen)\n+    , line_len(line_len)\n+{}\n+\n+FastaIndexEntry::FastaIndexEntry(void) // empty constructor\n+{ clear(); }\n+\n+FastaIndexEntry::~FastaIndexEntry(void)\n+{}\n+\n+void FastaIndexEntry::clear(void)\n+{\n+    name = "";\n+    length = NULL;\n+    offset = -1;  // no real offset will ever be below 0, so this allows us to\n+                  // check if we have already recorded a real offset\n+    line_blen = NULL;\n+    line_len  = NULL;\n+}\n+\n+ostream& operator<<(ostream& output, const FastaIndexEntry& e) {\n+    // just write the first component of the name, for compliance with other tools\n+    output << split(e.name, \' \').at(0) << "\\t" << e.length << "\\t" << e.offset << "\\t" <<\n+        e.line_blen << "\\t" << e.line_len;\n+    return output;  // for multiple << operators.\n+}\n+\n+FastaIndex::FastaIndex(void) \n+{}\n+\n+void FastaIndex::readIndexFile(string fname) {\n+    string line;\n+    long long linenum = 0;\n+    indexFile.open(fname.c_str(), ifstream::in);\n+    if (indexFile.is_open()) {\n+        while (getline (indexFile, line)) {\n+            ++linenum;\n+            // the fai format defined in samtools is tab-delimited, every line being:\n+            // fai->name[i], (int)x.len, (long long)x.offset, (int)x.line_blen, (int)x.line_len\n+            vector<string> fields = split(line, \'\\t\');\n+            if (fields.size() == 5) {  // if we don\'t get enough fields then there is a problem with the file\n+                // note that fields[0] is the sequence name\n+                char* end;\n+                string name = split(fields[0], " \\t").at(0);  // key by first token of name\n+                sequenceNames.push_back(name);\n+                this->insert(make_pair(name, FastaIndexEntry(fields[0], atoi(fields[1].c_str()),\n+                                                    strtoll(fields[2].c_str(), &end, 10),\n+                                                    atoi(fields[3].c_str()),\n+                                                    atoi(fields[4].c_str()))));\n+            } else {\n+                cerr << "Warning: malformed fasta index file " << fname << \n+                    "does not have enough fields @ line " << linenum << endl;\n+                cerr << line << endl;\n+                exit(1);\n+            }\n+        }\n+    } else {\n+        cerr << "could not open index file " << fname << endl;\n+        exit(1);\n+    }\n+}\n+\n+// for consistency this should be a class method\n+bool fastaIndexEntryCompare ( FastaIndexEntry a, FastaIndexEntry b) { return (a.offset<b.offset); }\n+\n+ostream& operator<<(ostream& output, FastaIndex& fastaIndex) {\n+    vector<FastaIndexEntry> sortedIndex;\n+    for(vector<string>::const_iterator it = fastaIndex.sequenceNames.begin(); it != fastaIndex.sequenceNames.end(); ++it)\n+    {\n+        sortedIndex.push_back(fastaIndex[*it]);\n+    }\n+    sort(sortedIndex.begin(), sortedIndex.end(), fastaIndexEntryCompare);\n+    for( vector<FastaIndexEntry>::iterator fit = sortedIndex.begin(); fit != sortedIndex.end(); ++fit) {\n+        output << *fit << endl;\n+    }\n+    return output;\n+}\n+\n+void FastaIndex::indexReference(string refname) {\n+    // overview:\n+    //  for line in the reference fasta file\n+    //  track byte offset from the start of the file\n+    //  if line is a fasta header, take the name and dump the last sequnece to the index\n+    //  if line is a sequen'..b'    exit(1);\n+    } else {\n+        return e->second;\n+    }\n+}\n+\n+string FastaIndex::indexFileExtension() { return ".fai"; }\n+\n+void FastaReference::open(string reffilename, bool usemmap) {\n+    filename = reffilename;\n+    if (!(file = fopen(filename.c_str(), "r"))) {\n+        cerr << "could not open " << filename << endl;\n+        exit(1);\n+    }\n+    index = new FastaIndex();\n+    struct stat stFileInfo; \n+    string indexFileName = filename + index->indexFileExtension(); \n+    // if we can find an index file, use it\n+    if(stat(indexFileName.c_str(), &stFileInfo) == 0) { \n+        index->readIndexFile(indexFileName);\n+    } else { // otherwise, read the reference and generate the index file in the cwd\n+        cerr << "index file " << indexFileName << " not found, generating..." << endl;\n+        index->indexReference(filename);\n+        index->writeIndexFile(indexFileName);\n+    }\n+    if (usemmap) {\n+        usingmmap = true;\n+        int fd = fileno(file);\n+        struct stat sb;\n+        if (fstat(fd, &sb) == -1)\n+            cerr << "could not stat file" << filename << endl;\n+        filesize = sb.st_size;\n+        // map the whole file\n+        filemm = mmap(NULL, filesize, PROT_READ, MAP_SHARED, fd, 0);\n+    }\n+}\n+\n+FastaReference::~FastaReference(void) {\n+    fclose(file);\n+    if (usingmmap) {\n+        munmap(filemm, filesize);\n+    }\n+    delete index;\n+}\n+\n+string FastaReference::getSequence(string seqname) {\n+    FastaIndexEntry entry = index->entry(seqname);\n+    int newlines_in_sequence = entry.length / entry.line_blen;\n+    int seqlen = newlines_in_sequence  + entry.length;\n+    char* seq = (char*) calloc (seqlen + 1, sizeof(char));\n+    if (usingmmap) {\n+        memcpy(seq, (char*) filemm + entry.offset, seqlen);\n+    } else {\n+        fseek64(file, entry.offset, SEEK_SET);\n+        fread(seq, sizeof(char), seqlen, file);\n+    }\n+    seq[seqlen] = \'\\0\';\n+    char* pbegin = seq;\n+    char* pend = seq + (seqlen/sizeof(char));\n+    pend = remove(pbegin, pend, \'\\n\');\n+    pend = remove(pbegin, pend, \'\\0\');\n+    string s = seq;\n+    free(seq);\n+    s.resize((pend - pbegin)/sizeof(char));\n+    return s;\n+}\n+\n+// TODO cleanup; odd function.  use a map\n+string FastaReference::sequenceNameStartingWith(string seqnameStart) {\n+    try {\n+        return (*index)[seqnameStart].name;\n+    } catch (exception& e) {\n+        cerr << e.what() << ": unable to find index entry for " << seqnameStart << endl;\n+        exit(1);\n+    }\n+}\n+\n+string FastaReference::getSubSequence(string seqname, int start, int length) {\n+    FastaIndexEntry entry = index->entry(seqname);\n+    if (start < 0 || length < 1) {\n+        cerr << "Error: cannot construct subsequence with negative offset or length < 1" << endl;\n+        exit(1);\n+    }\n+    // we have to handle newlines\n+    // approach: count newlines before start\n+    //           count newlines by end of read\n+    //             subtracting newlines before start find count of embedded newlines\n+    int newlines_before = start > 0 ? (start - 1) / entry.line_blen : 0;\n+    int newlines_by_end = (start + length - 1) / entry.line_blen;\n+    int newlines_inside = newlines_by_end - newlines_before;\n+    int seqlen = length + newlines_inside;\n+    char* seq = (char*) calloc (seqlen + 1, sizeof(char));\n+    if (usingmmap) {\n+        memcpy(seq, (char*) filemm + entry.offset + newlines_before + start, seqlen);\n+    } else {\n+        fseek64(file, (off_t) (entry.offset + newlines_before + start), SEEK_SET);\n+        fread(seq, sizeof(char), (off_t) seqlen, file);\n+    }\n+    seq[seqlen] = \'\\0\';\n+    char* pbegin = seq;\n+    char* pend = seq + (seqlen/sizeof(char));\n+    pend = remove(pbegin, pend, \'\\n\');\n+    pend = remove(pbegin, pend, \'\\0\');\n+    string s = seq;\n+    free(seq);\n+    s.resize((pend - pbegin)/sizeof(char));\n+    return s;\n+}\n+\n+long unsigned int FastaReference::sequenceLength(string seqname) {\n+    FastaIndexEntry entry = index->entry(seqname);\n+    return entry.length;\n+}\n+\n'
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/Fasta/Fasta.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/Fasta/Fasta.h Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,78 @@
+// ***************************************************************************
+// FastaIndex.h (c) 2010 Erik Garrison <erik.garrison@bc.edu>
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 5 February 2010 (EG)
+// ---------------------------------------------------------------------------
+
+#ifndef _FASTA_H
+#define _FASTA_H
+
+#include <map>
+#include <iostream>
+#include <fstream>
+#include <vector>
+#include <stdint.h>
+#include <stdio.h>
+#include <algorithm>
+#include "LargeFileSupport.h"
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include "split.h"
+#include <stdlib.h>
+#include <ctype.h>
+#include <unistd.h>
+
+using namespace std;
+
+class FastaIndexEntry {
+    friend ostream& operator<<(ostream& output, const FastaIndexEntry& e);
+    public:
+        FastaIndexEntry(string name, int length, long long offset, int line_blen, int line_len);
+        FastaIndexEntry(void);
+        ~FastaIndexEntry(void);
+        string name;  // sequence name
+        int length;  // length of sequence
+        long long offset;  // bytes offset of sequence from start of file
+        int line_blen;  // line length in bytes, sequence characters
+        int line_len;  // line length including newline
+        void clear(void);
+};
+
+class FastaIndex : public map<string, FastaIndexEntry> {
+    friend ostream& operator<<(ostream& output, FastaIndex& i);
+    public:
+        FastaIndex(void);
+        ~FastaIndex(void);
+        vector<string> sequenceNames;
+        void indexReference(string refName);
+        void readIndexFile(string fname);
+        void writeIndexFile(string fname);
+        ifstream indexFile;
+        FastaIndexEntry entry(string key);
+        void flushEntryToIndex(FastaIndexEntry& entry);
+        string indexFileExtension(void);
+};
+
+class FastaReference {
+    public:
+        void open(string reffilename, bool usemmap = false);
+        bool usingmmap;
+        string filename;
+        FastaReference(void) : usingmmap(false) { }
+        ~FastaReference(void);
+        FILE* file;
+        void* filemm;
+        size_t filesize;
+        FastaIndex* index;
+        vector<FastaIndexEntry> findSequencesStartingWith(string seqnameStart);
+        string getSequence(string seqname);
+        // potentially useful for performance, investigate
+        // void getSequence(string seqname, string& sequence);
+        string getSubSequence(string seqname, int start, int length);
+        string sequenceNameStartingWith(string seqnameStart);
+        long unsigned int sequenceLength(string seqname);
+};
+
+#endif
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/Fasta/LargeFileSupport.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/Fasta/LargeFileSupport.h Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,13 @@
+#pragma once
+
+#define _FILE_OFFSET_BITS 64
+
+#ifdef WIN32
+#define ftell64(a)     _ftelli64(a)
+#define fseek64(a,b,c) _fseeki64(a,b,c)
+typedef __int64_t off_type;
+#else
+#define ftell64(a)     ftello(a)
+#define fseek64(a,b,c) fseeko(a,b,c)
+typedef off_t off_type;
+#endif
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/Fasta/Makefile
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/Fasta/Makefile Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,26 @@
+OBJ_DIR = ../../../obj/
+BIN_DIR = ../../../bin/
+UTILITIES_DIR = ../../utils/
+# -------------------
+# define our includes
+# -------------------
+INCLUDES = 
+
+# ----------------------------------
+# define our source and object files
+# ----------------------------------
+SOURCES= Fasta.cpp split.cpp
+OBJECTS= $(SOURCES:.cpp=.o)
+BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
+
+all: $(BUILT_OBJECTS)
+
+.PHONY: all
+
+$(BUILT_OBJECTS): $(SOURCES)
+ @echo "  * compiling" $(*F).cpp
+ @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS)
+
+clean:
+ @echo "Cleaning up."
+ @rm -f $(OBJ_DIR)/* $(BIN_DIR)/*
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/Fasta/split.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/Fasta/split.cpp Thu Nov 03 10:25:04 2011 -0400
[
@@ -0,0 +1,33 @@
+#include "split.h"
+
+std::vector<std::string> &split(const std::string &s, char delim, std::vector<std::string> &elems) {
+    std::stringstream ss(s);
+    std::string item;
+    while(std::getline(ss, item, delim)) {
+        elems.push_back(item);
+    }
+    return elems;
+}
+
+std::vector<std::string> split(const std::string &s, char delim) {
+    std::vector<std::string> elems;
+    return split(s, delim, elems);
+}
+
+std::vector<std::string> &split(const std::string &s, const std::string& delims, std::vector<std::string> &elems) {
+    char* tok;
+    char cchars [s.size()+1];
+    char* cstr = &cchars[0];
+    strcpy(cstr, s.c_str());
+    tok = strtok(cstr, delims.c_str());
+    while (tok != NULL) {
+        elems.push_back(tok);
+        tok = strtok(NULL, delims.c_str());
+    }
+    return elems;
+}
+
+std::vector<std::string> split(const std::string &s, const std::string& delims) {
+    std::vector<std::string> elems;
+    return split(s, delims, elems);
+}
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/Fasta/split.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/Fasta/split.h Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,20 @@
+#ifndef __SPLIT_H
+#define __SPLIT_H
+
+// functions to split a string by a specific delimiter
+#include <string>
+#include <vector>
+#include <sstream>
+#include <string.h>
+
+// thanks to Evan Teran, http://stackoverflow.com/questions/236129/how-to-split-a-string/236803#236803
+
+// split a string on a single delimiter character (delim)
+std::vector<std::string>& split(const std::string &s, char delim, std::vector<std::string> &elems);
+std::vector<std::string>  split(const std::string &s, char delim);
+
+// split a string on any character found in the string of delimiters (delims)
+std::vector<std::string>& split(const std::string &s, const std::string& delims, std::vector<std::string> &elems);
+std::vector<std::string>  split(const std::string &s, const std::string& delims);
+
+#endif
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/bedFile/Makefile
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/bedFile/Makefile Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,29 @@
+OBJ_DIR = ../../../obj/
+BIN_DIR = ../../../bin/
+UTILITIES_DIR = ../../utils/
+# -------------------
+# define our includes
+# -------------------
+INCLUDES = -I$(UTILITIES_DIR)/lineFileUtilities/ -I$(UTILITIES_DIR)/gzstream/ -I$(UTILITIES_DIR)/fileType/ -I$(UTILITIES_DIR)/stringUtilities/
+
+# ----------------------------------
+# define our source and object files
+# ----------------------------------
+SOURCES= bedFile.cpp
+OBJECTS= $(SOURCES:.cpp=.o)
+_EXT_OBJECTS=lineFileUtilities.o gzstream.o fileType.o
+EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS))
+BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
+
+$(BUILT_OBJECTS): $(SOURCES)
+ @echo "  * compiling" $(*F).cpp
+ @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES)
+
+$(EXT_OBJECTS):
+ @$(MAKE) --no-print-directory -C -W $(INCLUDES)
+
+clean:
+ @echo "Cleaning up."
+ @rm -f $(OBJ_DIR)/* $(BIN_DIR)/*
+
+.PHONY: clean
\ No newline at end of file
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/bedFile/bedFile.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/bedFile/bedFile.cpp Thu Nov 03 10:25:04 2011 -0400
[
b'@@ -0,0 +1,720 @@\n+/*****************************************************************************\n+  bedFile.cpp\n+\n+  (c) 2009 - Aaron Quinlan\n+  Hall Laboratory\n+  Department of Biochemistry and Molecular Genetics\n+  University of Virginia\n+  aaronquinlan@gmail.com\n+\n+  Licensed under the GNU General Public License 2.0 license.\n+******************************************************************************/\n+#include "bedFile.h"\n+\n+\n+/************************************************\n+Helper functions\n+*************************************************/\n+void splitBedIntoBlocks(const BED &bed, int lineNum, bedVector &bedBlocks) {\n+\n+    if (bed.otherFields.size() < 6) {\n+        cerr << "Input error: Cannot split into blocks. Found interval with fewer than 12 columns on line " << lineNum << "." << endl;\n+        exit(1);\n+    }\n+\n+    int blockCount = atoi(bed.otherFields[3].c_str());\n+    if ( blockCount <= 0 ) {\n+        cerr << "Input error: found interval having <= 0 blocks on line " << lineNum << "." << endl;\n+        exit(1);\n+    }\n+    else if ( blockCount == 1 ) {\n+        //take a short-cut for single blocks\n+        bedBlocks.push_back(bed);\n+    }\n+    else {\n+        // get the comma-delimited strings for the BED12 block starts and block ends.\n+        string blockSizes(bed.otherFields[4]);\n+        string blockStarts(bed.otherFields[5]);\n+\n+        vector<int> sizes;\n+        vector<int> starts;\n+        Tokenize(blockSizes, sizes, ",");\n+        Tokenize(blockStarts, starts, ",");\n+\n+        if ( sizes.size() != (size_t) blockCount || starts.size() != (size_t) blockCount ) {\n+            cerr << "Input error: found interval with block-counts not matching starts/sizes on line " << lineNum << "." << endl;\n+            exit(1);\n+        }\n+\n+        // add each BED block to the bedBlocks vector\n+        for (UINT i = 0; i < (UINT) blockCount; ++i) {\n+            CHRPOS blockStart = bed.start + starts[i];\n+            CHRPOS blockEnd   = bed.start + starts[i] + sizes[i];\n+            BED currBedBlock(bed.chrom, blockStart, blockEnd, bed.name, bed.score, bed.strand, bed.otherFields);\n+            bedBlocks.push_back(currBedBlock);\n+        }\n+    }\n+}\n+\n+\n+/***********************************************\n+Sorting comparison functions\n+************************************************/\n+bool sortByChrom(BED const &a, BED const &b) {\n+    if (a.chrom < b.chrom) return true;\n+    else return false;\n+};\n+\n+bool sortByStart(const BED &a, const BED &b) {\n+    if (a.start < b.start) return true;\n+    else return false;\n+};\n+\n+bool sortBySizeAsc(const BED &a, const BED &b) {\n+\n+    CHRPOS aLen = a.end - a.start;\n+    CHRPOS bLen = b.end - b.start;\n+\n+    if (aLen < bLen) return true;\n+    else return false;\n+};\n+\n+bool sortBySizeDesc(const BED &a, const BED &b) {\n+\n+    CHRPOS aLen = a.end - a.start;\n+    CHRPOS bLen = b.end - b.start;\n+\n+    if (aLen > bLen) return true;\n+    else return false;\n+};\n+\n+bool sortByScoreAsc(const BED &a, const BED &b) {\n+    if (a.score < b.score) return true;\n+    else return false;\n+};\n+\n+bool sortByScoreDesc(const BED &a, const BED &b) {\n+    if (a.score > b.score) return true;\n+    else return false;\n+};\n+\n+bool byChromThenStart(BED const &a, BED const &b) {\n+\n+    if (a.chrom < b.chrom) return true;\n+    else if (a.chrom > b.chrom) return false;\n+\n+    if (a.start < b.start) return true;\n+    else if (a.start >= b.start) return false;\n+\n+    return false;\n+};\n+\n+\n+/*******************************************\n+Class methods\n+*******************************************/\n+\n+// Constructor\n+BedFile::BedFile(string &bedFile)\n+: bedFile(bedFile),\n+  _isGff(false),\n+  _isVcf(false),\n+  _typeIsKnown(false),\n+  _merged_start(-1),\n+  _merged_end(-1),\n+  _merged_chrom(""),\n+  _prev_start(-1),\n+  _prev_chrom("")\n+{}\n+\n+// Destructor\n+BedFile::~BedFile(void) {\n+}\n+\n+\n+void BedFile::Open(void) {\n+    \n+    _bedFields.reserve(12);\n+    \n+    if (bedFile == "stdin" || bedFile == "-") {\n+        _bedStream'..b'  else {\n+                        // correct for the fact that we artificially expanded the zeroLength feature\n+                        bedItr->depthMapList[index][a.start+2].starts++;\n+                        bedItr->depthMapList[index][a.end-1].ends++;                        \n+                    }\n+\n+                    if (a.start < bedItr->minOverlapStarts[index]) {\n+                        bedItr->minOverlapStarts[index] = a.start;\n+                    }\n+                }\n+            }\n+        }\n+        startBin >>= _binNextShift;\n+        endBin >>= _binNextShift;\n+    }\n+}\n+\n+void BedFile::setZeroBased(bool zeroBased) { this->isZeroBased = zeroBased; }\n+\n+void BedFile::setGff (bool gff) { this->_isGff = gff; }\n+\n+\n+void BedFile::setVcf (bool vcf) { this->_isVcf = vcf; }\n+\n+\n+void BedFile::setFileType (FileType type) {\n+    _fileType    = type;\n+    _typeIsKnown = true;\n+}\n+\n+\n+void BedFile::setBedType (int colNums) {\n+    bedType = colNums;\n+}\n+\n+\n+void BedFile::loadBedFileIntoMap() {\n+\n+    BED bedEntry, nullBed;\n+    int lineNum = 0;\n+    BedLineStatus bedStatus;\n+\n+    Open();\n+    while ((bedStatus = GetNextBed(bedEntry, lineNum)) != BED_INVALID) {\n+        if (bedStatus == BED_VALID) {\n+            BIN bin = getBin(bedEntry.start, bedEntry.end);\n+            bedMap[bedEntry.chrom][bin].push_back(bedEntry);\n+            bedEntry = nullBed;\n+        }\n+    }\n+    Close();\n+}\n+\n+\n+void BedFile::loadBedCovFileIntoMap() {\n+\n+    BED bedEntry, nullBed;\n+    int lineNum = 0;\n+    BedLineStatus bedStatus;\n+\n+    Open();\n+    while ((bedStatus = GetNextBed(bedEntry, lineNum)) != BED_INVALID) {\n+        if (bedStatus == BED_VALID) {\n+            BIN bin = getBin(bedEntry.start, bedEntry.end);\n+\n+            BEDCOV bedCov;\n+            bedCov.chrom        = bedEntry.chrom;\n+            bedCov.start        = bedEntry.start;\n+            bedCov.end          = bedEntry.end;\n+            bedCov.name         = bedEntry.name;\n+            bedCov.score        = bedEntry.score;\n+            bedCov.strand       = bedEntry.strand;\n+            bedCov.otherFields  = bedEntry.otherFields;\n+            bedCov.zeroLength   = bedEntry.zeroLength;\n+            bedCov.count = 0;\n+            bedCov.minOverlapStart = INT_MAX;\n+\n+            bedCovMap[bedEntry.chrom][bin].push_back(bedCov);\n+            bedEntry = nullBed;\n+        }\n+    }\n+    Close();\n+}\n+\n+void BedFile::loadBedCovListFileIntoMap() {\n+\n+    BED bedEntry, nullBed;\n+    int lineNum = 0;\n+    BedLineStatus bedStatus;\n+\n+    Open();\n+    while ((bedStatus = GetNextBed(bedEntry, lineNum)) != BED_INVALID) {\n+        if (bedStatus == BED_VALID) {\n+            BIN bin = getBin(bedEntry.start, bedEntry.end);\n+\n+            BEDCOVLIST bedCovList;\n+            bedCovList.chrom        = bedEntry.chrom;\n+            bedCovList.start        = bedEntry.start;\n+            bedCovList.end          = bedEntry.end;\n+            bedCovList.name         = bedEntry.name;\n+            bedCovList.score        = bedEntry.score;\n+            bedCovList.strand       = bedEntry.strand;\n+            bedCovList.otherFields  = bedEntry.otherFields;\n+            bedCovList.zeroLength   = bedEntry.zeroLength;\n+\n+            bedCovListMap[bedEntry.chrom][bin].push_back(bedCovList);\n+            bedEntry = nullBed;\n+        }\n+    }\n+    Close();\n+}\n+\n+\n+void BedFile::loadBedFileIntoMapNoBin() {\n+\n+    BED bedEntry, nullBed;\n+    int lineNum = 0;\n+    BedLineStatus bedStatus;\n+\n+    Open();\n+    while ((bedStatus = this->GetNextBed(bedEntry, lineNum)) != BED_INVALID) {\n+        if (bedStatus == BED_VALID) {\n+            bedMapNoBin[bedEntry.chrom].push_back(bedEntry);\n+            bedEntry = nullBed;\n+        }\n+    }\n+    Close();\n+\n+    // sort the BED entries for each chromosome\n+    // in ascending order of start position\n+    for (masterBedMapNoBin::iterator m = this->bedMapNoBin.begin(); m != this->bedMapNoBin.end(); ++m) {\n+        sort(m->second.begin(), m->second.end(), sortByStart);\n+    }\n+}\n'
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/bedFile/bedFile.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/bedFile/bedFile.h Thu Nov 03 10:25:04 2011 -0400
[
b'@@ -0,0 +1,1167 @@\n+/*****************************************************************************\n+  bedFile.h\n+\n+  (c) 2009 - Aaron Quinlan\n+  Hall Laboratory\n+  Department of Biochemistry and Molecular Genetics\n+  University of Virginia\n+  aaronquinlan@gmail.com\n+\n+  Licensed under the GNU General Public License 2.0 license.\n+******************************************************************************/\n+#ifndef BEDFILE_H\n+#define BEDFILE_H\n+\n+// "local" includes\n+#include "gzstream.h"\n+#include "lineFileUtilities.h"\n+#include "fileType.h"\n+\n+// standard includes\n+#include <vector>\n+#include <map>\n+#include <set>\n+#include <string>\n+#include <iostream>\n+#include <fstream>\n+#include <sstream>\n+#include <cstring>\n+#include <algorithm>\n+#include <limits.h>\n+#include <stdint.h>\n+#include <cstdio>\n+//#include <tr1/unordered_map>  // Experimental.\n+using namespace std;\n+\n+\n+//*************************************************\n+// Data type tydedef\n+//*************************************************\n+typedef uint32_t CHRPOS;\n+typedef uint16_t BINLEVEL;\n+typedef uint32_t BIN;\n+typedef uint16_t USHORT;\n+typedef uint32_t UINT;\n+\n+//*************************************************\n+// Genome binning constants\n+//*************************************************\n+\n+const BIN      _numBins   = 37450;\n+const BINLEVEL _binLevels = 7;\n+\n+// bins range in size from 16kb to 512Mb\n+// Bin  0          spans 512Mbp,   # Level 1\n+// Bins 1-8        span 64Mbp,     # Level 2\n+// Bins 9-72       span 8Mbp,      # Level 3\n+// Bins 73-584     span 1Mbp       # Level 4\n+// Bins 585-4680   span 128Kbp     # Level 5\n+// Bins 4681-37449 span 16Kbp      # Level 6\n+const BIN _binOffsetsExtended[] = {32678+4096+512+64+8+1, 4096+512+64+8+1, 512+64+8+1, 64+8+1, 8+1, 1, 0};\n+//const BIN _binOffsetsExtended[] = {4096+512+64+8+1, 4096+512+64+8+1, 512+64+8+1, 64+8+1, 8+1, 1, 0};\n+\n+const USHORT _binFirstShift = 14;       /* How much to shift to get to finest bin. */\n+const USHORT _binNextShift  = 3;        /* How much to shift to get to next larger bin. */\n+\n+\n+//*************************************************\n+// Common data structures\n+//*************************************************\n+\n+struct DEPTH {\n+    UINT starts;\n+    UINT ends;\n+};\n+\n+\n+/*\n+    Structure for regular BED records\n+*/\n+struct BED {\n+\n+    // Regular BED fields\n+    string chrom;\n+    CHRPOS start;\n+    CHRPOS end;\n+    string name;\n+    string score;\n+    string strand;\n+\n+    // Add\'l fields for BED12 and/or custom BED annotations\n+    vector<string> otherFields;\n+\n+    // experimental fields for the FJOIN approach.\n+    bool   zeroLength;\n+    bool   added;\n+    bool   finished;\n+    // list of hits from another file.\n+    vector<BED> overlaps;\n+\n+public:\n+    // constructors\n+\n+    // Null\n+    BED()\n+    : chrom(""),\n+      start(0),\n+      end(0),\n+      name(""),\n+      score(""),\n+      strand(""),\n+      otherFields(),\n+      zeroLength(false),\n+      added(false),\n+      finished(false),\n+      overlaps()\n+    {}\n+\n+    // BED3\n+    BED(string chrom, CHRPOS start, CHRPOS end)\n+    : chrom(chrom),\n+      start(start),\n+      end(end),\n+      name(""),\n+      score(""),\n+      strand(""),\n+      otherFields(),\n+      zeroLength(false),\n+      added(false),\n+      finished(false),\n+      overlaps()\n+    {}\n+\n+    // BED4\n+    BED(string chrom, CHRPOS start, CHRPOS end, string strand)\n+    : chrom(chrom),\n+      start(start),\n+      end(end),\n+      name(""),\n+      score(""),\n+      strand(strand),\n+      otherFields(),\n+      zeroLength(false),\n+      added(false),\n+      finished(false),\n+      overlaps()\n+    {}\n+\n+    // BED6\n+    BED(string chrom, CHRPOS start, CHRPOS end, string name,\n+        string score, string strand)\n+    : chrom(chrom),\n+      start(start),\n+      end(end),\n+      name(name),\n+      score(score),\n+      strand(strand),\n+      otherFields(),\n+      zeroLength(false),\n+      added(false),\n+      finished(false),\n+      overlaps()\n+    {}\n+\n+    // B'..b'           vector<string>::const_iterator othEnd = bed.otherFields.end();\n+                for ( ; othIt != othEnd; ++othIt) {\n+                    printf("\\t%s", othIt->c_str());\n+                }\n+                printf("\\n");\n+            }\n+        }\n+        // VCF\n+        else if (_isGff == false && _isVcf == true) {\n+            printf ("%s\\t%d\\t", bed.chrom.c_str(), bed.start+1);\n+\n+            vector<string>::const_iterator othIt = bed.otherFields.begin();\n+            vector<string>::const_iterator othEnd = bed.otherFields.end();\n+            for ( ; othIt != othEnd; ++othIt) {\n+                printf("%s\\t", othIt->c_str());\n+            }\n+            printf("\\n");\n+        }\n+        // GFF\n+        else if (_isGff == true) {\n+            // "GFF-9"\n+            if (this->bedType == 8) {\n+                printf ("%s\\t%s\\t%s\\t%d\\t%d\\t%s\\t%s\\t%s\\n", bed.chrom.c_str(), bed.otherFields[0].c_str(),\n+                                                             bed.name.c_str(), start+1, end,\n+                                                             bed.score.c_str(), bed.strand.c_str(),\n+                                                             bed.otherFields[1].c_str());\n+            }\n+            // "GFF-8"\n+            else if (this->bedType == 9) {\n+                printf ("%s\\t%s\\t%s\\t%d\\t%d\\t%s\\t%s\\t%s\\t%s\\n", bed.chrom.c_str(), bed.otherFields[0].c_str(),\n+                                                             bed.name.c_str(), start+1, end,\n+                                                             bed.score.c_str(), bed.strand.c_str(),\n+                                                             bed.otherFields[1].c_str(), bed.otherFields[2].c_str());\n+            }\n+        }\n+    }\n+\n+\n+    /*\n+        reportNullBedTab\n+    */\n+    void reportNullBedTab() {\n+\n+        if (_isGff == false && _isVcf == false) {\n+            if (this->bedType == 3) {\n+                printf (".\\t-1\\t-1\\t");\n+            }\n+            else if (this->bedType == 4) {\n+                printf (".\\t-1\\t-1\\t.\\t");\n+            }\n+            else if (this->bedType == 5) {\n+                printf (".\\t-1\\t-1\\t.\\t-1\\t");\n+            }\n+            else if (this->bedType == 6) {\n+                printf (".\\t-1\\t-1\\t.\\t-1\\t.\\t");\n+            }\n+            else if (this->bedType > 6) {\n+                printf (".\\t-1\\t-1\\t.\\t-1\\t.\\t");\n+                for (unsigned int i = 6; i < this->bedType; ++i) {\n+                    printf(".\\t");\n+                }\n+            }\n+        }\n+        else if (_isGff == true && _isVcf == false) {\n+            if (this->bedType == 8) {\n+                printf (".\\t.\\t.\\t-1\\t-1\\t-1\\t.\\t.\\t");\n+            }\n+            else if (this->bedType == 9) {\n+                printf (".\\t.\\t.\\t-1\\t-1\\t-1\\t.\\t.\\t.\\t");\n+            }\n+        }\n+    }\n+\n+\n+    /*\n+        reportNullBedTab\n+    */\n+    void reportNullBedNewLine() {\n+\n+        if (_isGff == false && _isVcf == false) {\n+            if (this->bedType == 3) {\n+                printf (".\\t-1\\t-1\\n");\n+            }\n+            else if (this->bedType == 4) {\n+                printf (".\\t-1\\t-1\\t.\\n");\n+            }\n+            else if (this->bedType == 5) {\n+                printf (".\\t-1\\t-1\\t.\\t-1\\n");\n+            }\n+            else if (this->bedType == 6) {\n+                printf (".\\t-1\\t-1\\t.\\t-1\\t.\\n");\n+            }\n+            else if (this->bedType > 6) {\n+                printf (".\\t-1\\t-1\\t.\\t-1\\t.");\n+                for (unsigned int i = 6; i < this->bedType; ++i) {\n+                    printf("\\t.");\n+                }\n+                printf("\\n");\n+            }\n+        }\n+        else if (_isGff == true && _isVcf == false) {\n+            if (this->bedType == 8) {\n+                printf (".\\t.\\t.\\t-1\\t-1\\t-1\\t.\\t.\\n");\n+            }\n+            else if (this->bedType == 9) {\n+                printf (".\\t.\\t.\\t-1\\t-1\\t-1\\t.\\t.\\t.\\n");\n+            }\n+        }\n+    }\n+\n+\n+};\n+\n+#endif /* BEDFILE_H */\n'
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/bedFile/bedFile.h.orig
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/bedFile/bedFile.h.orig Thu Nov 03 10:25:04 2011 -0400
[
b'@@ -0,0 +1,1144 @@\n+/*****************************************************************************\n+  bedFile.h\n+\n+  (c) 2009 - Aaron Quinlan\n+  Hall Laboratory\n+  Department of Biochemistry and Molecular Genetics\n+  University of Virginia\n+  aaronquinlan@gmail.com\n+\n+  Licensed under the GNU General Public License 2.0 license.\n+******************************************************************************/\n+#ifndef BEDFILE_H\n+#define BEDFILE_H\n+\n+// "local" includes\n+#include "gzstream.h"\n+#include "lineFileUtilities.h"\n+#include "fileType.h"\n+\n+// standard includes\n+#include <vector>\n+#include <map>\n+#include <set>\n+#include <string>\n+#include <iostream>\n+#include <fstream>\n+#include <sstream>\n+#include <cstring>\n+#include <algorithm>\n+#include <limits.h>\n+#include <stdint.h>\n+#include <cstdio>\n+//#include <tr1/unordered_map>  // Experimental.\n+using namespace std;\n+\n+\n+//*************************************************\n+// Data type tydedef\n+//*************************************************\n+typedef uint32_t CHRPOS;\n+typedef uint16_t BINLEVEL;\n+typedef uint32_t BIN;\n+typedef uint16_t USHORT;\n+typedef uint32_t UINT;\n+\n+//*************************************************\n+// Genome binning constants\n+//*************************************************\n+\n+const BIN      _numBins   = 37450;\n+const BINLEVEL _binLevels = 7;\n+\n+// bins range in size from 16kb to 512Mb\n+// Bin  0          spans 512Mbp,   # Level 1\n+// Bins 1-8        span 64Mbp,     # Level 2\n+// Bins 9-72       span 8Mbp,      # Level 3\n+// Bins 73-584     span 1Mbp       # Level 4\n+// Bins 585-4680   span 128Kbp     # Level 5\n+// Bins 4681-37449 span 16Kbp      # Level 6\n+const BIN _binOffsetsExtended[] = {32678+4096+512+64+8+1, 4096+512+64+8+1, 512+64+8+1, 64+8+1, 8+1, 1, 0};\n+//const BIN _binOffsetsExtended[] = {4096+512+64+8+1, 4096+512+64+8+1, 512+64+8+1, 64+8+1, 8+1, 1, 0};\n+\n+const USHORT _binFirstShift = 14;       /* How much to shift to get to finest bin. */\n+const USHORT _binNextShift  = 3;        /* How much to shift to get to next larger bin. */\n+\n+\n+//*************************************************\n+// Common data structures\n+//*************************************************\n+\n+struct DEPTH {\n+    UINT starts;\n+    UINT ends;\n+};\n+\n+\n+/*\n+    Structure for regular BED records\n+*/\n+struct BED {\n+\n+    // Regular BED fields\n+    string chrom;\n+    CHRPOS start;\n+    CHRPOS end;\n+    string name;\n+    string score;\n+    string strand;\n+\n+    // Add\'l fields for BED12 and/or custom BED annotations\n+    vector<string> otherFields;\n+\n+    // experimental fields for the FJOIN approach.\n+    bool   zeroLength;\n+    bool   added;\n+    bool   finished;\n+    // list of hits from another file.\n+    vector<BED> overlaps;\n+\n+public:\n+    // constructors\n+\n+    // Null\n+    BED()\n+    : chrom(""),\n+      start(0),\n+      end(0),\n+      name(""),\n+      score(""),\n+      strand(""),\n+      otherFields(),\n+      zeroLength(false),\n+      added(false),\n+      finished(false),\n+      overlaps()\n+    {}\n+\n+    // BED3\n+    BED(string chrom, CHRPOS start, CHRPOS end)\n+    : chrom(chrom),\n+      start(start),\n+      end(end),\n+      name(""),\n+      score(""),\n+      strand(""),\n+      otherFields(),\n+      zeroLength(false),\n+      added(false),\n+      finished(false),\n+      overlaps()\n+    {}\n+\n+    // BED4\n+    BED(string chrom, CHRPOS start, CHRPOS end, string strand)\n+    : chrom(chrom),\n+      start(start),\n+      end(end),\n+      name(""),\n+      score(""),\n+      strand(strand),\n+      otherFields(),\n+      zeroLength(false),\n+      added(false),\n+      finished(false),\n+      overlaps()\n+    {}\n+\n+    // BED6\n+    BED(string chrom, CHRPOS start, CHRPOS end, string name,\n+        string score, string strand)\n+    : chrom(chrom),\n+      start(start),\n+      end(end),\n+      name(name),\n+      score(score),\n+      strand(strand),\n+      otherFields(),\n+      zeroLength(false),\n+      added(false),\n+      finished(false),\n+      overlaps()\n+    {}\n+\n+    // B'..b'           vector<string>::const_iterator othEnd = bed.otherFields.end();\n+                for ( ; othIt != othEnd; ++othIt) {\n+                    printf("\\t%s", othIt->c_str());\n+                }\n+                printf("\\n");\n+            }\n+        }\n+        // VCF\n+        else if (_isGff == false && _isVcf == true) {\n+            printf ("%s\\t%d\\t", bed.chrom.c_str(), bed.start+1);\n+\n+            vector<string>::const_iterator othIt = bed.otherFields.begin();\n+            vector<string>::const_iterator othEnd = bed.otherFields.end();\n+            for ( ; othIt != othEnd; ++othIt) {\n+                printf("%s\\t", othIt->c_str());\n+            }\n+            printf("\\n");\n+        }\n+        // GFF\n+        else if (_isGff == true) {\n+            // "GFF-9"\n+            if (this->bedType == 8) {\n+                printf ("%s\\t%s\\t%s\\t%d\\t%d\\t%s\\t%s\\t%s\\n", bed.chrom.c_str(), bed.otherFields[0].c_str(),\n+                                                             bed.name.c_str(), start+1, end,\n+                                                             bed.score.c_str(), bed.strand.c_str(),\n+                                                             bed.otherFields[1].c_str());\n+            }\n+            // "GFF-8"\n+            else if (this->bedType == 9) {\n+                printf ("%s\\t%s\\t%s\\t%d\\t%d\\t%s\\t%s\\t%s\\t%s\\n", bed.chrom.c_str(), bed.otherFields[0].c_str(),\n+                                                             bed.name.c_str(), start+1, end,\n+                                                             bed.score.c_str(), bed.strand.c_str(),\n+                                                             bed.otherFields[1].c_str(), bed.otherFields[2].c_str());\n+            }\n+        }\n+    }\n+\n+\n+    /*\n+        reportNullBedTab\n+    */\n+    void reportNullBedTab() {\n+\n+        if (_isGff == false && _isVcf == false) {\n+            if (this->bedType == 3) {\n+                printf (".\\t-1\\t-1\\t");\n+            }\n+            else if (this->bedType == 4) {\n+                printf (".\\t-1\\t-1\\t.\\t");\n+            }\n+            else if (this->bedType == 5) {\n+                printf (".\\t-1\\t-1\\t.\\t-1\\t");\n+            }\n+            else if (this->bedType == 6) {\n+                printf (".\\t-1\\t-1\\t.\\t-1\\t.\\t");\n+            }\n+            else if (this->bedType > 6) {\n+                printf (".\\t-1\\t-1\\t.\\t-1\\t.\\t");\n+                for (unsigned int i = 6; i < this->bedType; ++i) {\n+                    printf(".\\t");\n+                }\n+            }\n+        }\n+        else if (_isGff == true && _isVcf == false) {\n+            if (this->bedType == 8) {\n+                printf (".\\t.\\t.\\t-1\\t-1\\t-1\\t.\\t.\\t");\n+            }\n+            else if (this->bedType == 9) {\n+                printf (".\\t.\\t.\\t-1\\t-1\\t-1\\t.\\t.\\t.\\t");\n+            }\n+        }\n+    }\n+\n+\n+    /*\n+        reportNullBedTab\n+    */\n+    void reportNullBedNewLine() {\n+\n+        if (_isGff == false && _isVcf == false) {\n+            if (this->bedType == 3) {\n+                printf (".\\t-1\\t-1\\n");\n+            }\n+            else if (this->bedType == 4) {\n+                printf (".\\t-1\\t-1\\t.\\n");\n+            }\n+            else if (this->bedType == 5) {\n+                printf (".\\t-1\\t-1\\t.\\t-1\\n");\n+            }\n+            else if (this->bedType == 6) {\n+                printf (".\\t-1\\t-1\\t.\\t-1\\t.\\n");\n+            }\n+            else if (this->bedType > 6) {\n+                printf (".\\t-1\\t-1\\t.\\t-1\\t.");\n+                for (unsigned int i = 6; i < this->bedType; ++i) {\n+                    printf("\\t.");\n+                }\n+                printf("\\n");\n+            }\n+        }\n+        else if (_isGff == true && _isVcf == false) {\n+            if (this->bedType == 8) {\n+                printf (".\\t.\\t.\\t-1\\t-1\\t-1\\t.\\t.\\n");\n+            }\n+            else if (this->bedType == 9) {\n+                printf (".\\t.\\t.\\t-1\\t-1\\t-1\\t.\\t.\\t.\\n");\n+            }\n+        }\n+    }\n+\n+\n+};\n+\n+#endif /* BEDFILE_H */\n'
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/bedFilePE/Makefile
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/bedFilePE/Makefile Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,29 @@
+OBJ_DIR = ../../../obj/
+BIN_DIR = ../../../bin/
+UTILITIES_DIR = ../../utils/
+# -------------------
+# define our includes
+# -------------------
+INCLUDES = -I$(UTILITIES_DIR)/lineFileUtilities/ -I$(UTILITIES_DIR)/bedFile/ -I$(UTILITIES_DIR)/gzstream/ -I$(UTILITIES_DIR)/fileType/
+
+# ----------------------------------
+# define our source and object files
+# ----------------------------------
+SOURCES= bedFilePE.cpp
+OBJECTS= $(SOURCES:.cpp=.o)
+_EXT_OBJECTS=lineFileUtilities.o fileType.o
+EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS))
+BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
+
+$(BUILT_OBJECTS): $(SOURCES)
+ @echo "  * compiling" $(*F).cpp
+ @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES)
+
+$(EXT_OBJECTS):
+ @$(MAKE) --no-print-directory -C $(INCLUDES)
+
+clean:
+ @echo "Cleaning up."
+ @rm -f $(OBJ_DIR)/* $(BIN_DIR)/*
+
+.PHONY: clean
\ No newline at end of file
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/bedFilePE/bedFilePE.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/bedFilePE/bedFilePE.cpp Thu Nov 03 10:25:04 2011 -0400
[
b'@@ -0,0 +1,530 @@\n+//\n+//  bedFilePE.cpp\n+//  BEDTools\n+//\n+//  Created by Aaron Quinlan Spring 2009.\n+//  Copyright 2009 Aaron Quinlan. All rights reserved.\n+//\n+//  Summary:  Contains common functions for finding BED overlaps.\n+//\n+//  Acknowledgments: Much of the code herein is taken from Jim Kent\'s\n+//                   BED processing code.  I am grateful for his elegant\n+//                   genome binning algorithm and therefore use it extensively.\n+\n+\n+#include "bedFilePE.h"\n+\n+\n+// Constructor\n+BedFilePE::BedFilePE(string &bedFile) {\n+    this->bedFile = bedFile;\n+}\n+\n+// Destructor\n+BedFilePE::~BedFilePE(void) {\n+}\n+\n+void BedFilePE::Open(void) {\n+    if (bedFile == "stdin" || bedFile == "-") {\n+        _bedStream = &cin;\n+    }\n+    else {\n+        _bedStream = new ifstream(bedFile.c_str(), ios::in);\n+\n+        if (isGzipFile(_bedStream) == true) {\n+            delete _bedStream;\n+            _bedStream = new igzstream(bedFile.c_str(), ios::in);\n+        }\n+        // can we open the file?\n+        if ( !(_bedStream->good()) ) {\n+            cerr << "Error: The requested bed file (" << bedFile << ") could not be opened. Exiting!" << endl;\n+            exit (1);\n+        }\n+    }\n+}\n+\n+\n+\n+// Close the BEDPE file\n+void BedFilePE::Close(void) {\n+    if (bedFile != "stdin" && bedFile != "-") delete _bedStream;\n+}\n+\n+\n+BedLineStatus BedFilePE::GetNextBedPE (BEDPE &bedpe, int &lineNum) {\n+\n+    // make sure there are still lines to process.\n+    // if so, tokenize, validate and return the BEDPE entry.\n+    if (_bedStream->good()) {\n+        string bedPELine;\n+        vector<string> bedPEFields;\n+        bedPEFields.reserve(10);\n+\n+        // parse the bedStream pointer\n+        getline(*_bedStream, bedPELine);\n+        lineNum++;\n+\n+        // split into a string vector.\n+        Tokenize(bedPELine,bedPEFields);\n+\n+        // load the BEDPE struct as long as it\'s a valid BEDPE entry.\n+        return parseLine(bedpe, bedPEFields, lineNum);\n+    }\n+    // default if file is closed or EOF\n+    return BED_INVALID;\n+}\n+\n+\n+/*\n+    reportBedPETab\n+\n+    Writes the _original_ BED entry for A.\n+    Works for BEDPE only.\n+*/\n+void BedFilePE::reportBedPETab(const BEDPE &a) {\n+\n+    if (this->bedType == 6) {\n+        printf("%s\\t%d\\t%d\\t%s\\t%d\\t%d\\t", a.chrom1.c_str(), a.start1, a.end1,\n+                                            a.chrom2.c_str(), a.start2, a.end2);\n+    }\n+    else if (this->bedType == 7) {\n+        printf("%s\\t%d\\t%d\\t%s\\t%d\\t%d\\t%s\\t", a.chrom1.c_str(), a.start1, a.end1,\n+                                            a.chrom2.c_str(), a.start2, a.end2,\n+                                            a.name.c_str());\n+    }\n+    else if (this->bedType == 8) {\n+        printf("%s\\t%d\\t%d\\t%s\\t%d\\t%d\\t%s\\t%s\\t", a.chrom1.c_str(), a.start1, a.end1,\n+                                            a.chrom2.c_str(), a.start2, a.end2,\n+                                            a.name.c_str(), a.score.c_str());\n+    }\n+    else if (this->bedType == 10) {\n+        printf("%s\\t%d\\t%d\\t%s\\t%d\\t%d\\t%s\\t%s\\t%s\\t%s\\t", a.chrom1.c_str(), a.start1, a.end1,\n+                                            a.chrom2.c_str(), a.start2, a.end2,\n+                                            a.name.c_str(), a.score.c_str(), a.strand1.c_str(), a.strand2.c_str());\n+    }\n+    else if (this->bedType > 10) {\n+        printf("%s\\t%d\\t%d\\t%s\\t%d\\t%d\\t%s\\t%s\\t%s\\t%s", a.chrom1.c_str(), a.start1, a.end1,\n+                                            a.chrom2.c_str(), a.start2, a.end2,\n+                                            a.name.c_str(), a.score.c_str(), a.strand1.c_str(), a.strand2.c_str());\n+\n+        vector<string>::const_iterator othIt = a.otherFields.begin();\n+        vector<string>::const_iterator othEnd = a.otherFields.end();\n+        for ( ; othIt != othEnd; ++othIt) {\n+            printf("\\t%s", othIt->c_str());\n+        }\n+        printf("\\t");\n+    }\n+}\n+\n+\n+\n+/*\n+    reportBedPENewLine\n+\n+    Writes the _original_ BED entry for A.\n+   '..b'      float size    = end - start;\n+\n+                if ( (overlap / size) >= overlapFraction ) {\n+\n+                    // skip the hit if not on the same strand (and we care)\n+                    if ((forceStrand == false) && (enforceDiffNames == false)) {\n+                        hits.push_back(*bedItr);    // it\'s a hit, add it.\n+                    }\n+                    else if ((forceStrand == true) && (enforceDiffNames == false)) {\n+                        if (strand == bedItr->bed.strand)\n+                            hits.push_back(*bedItr);    // it\'s a hit, add it.\n+                    }\n+                    else if ((forceStrand == true) && (enforceDiffNames == true)) {\n+                        if ((strand == bedItr->bed.strand) && (name != bedItr->bed.name))\n+                            hits.push_back(*bedItr);    // it\'s a hit, add it.\n+                    }\n+                    else if ((forceStrand == false) && (enforceDiffNames == true)) {\n+                        if (name != bedItr->bed.name)\n+                            hits.push_back(*bedItr);    // it\'s a hit, add it.\n+                    }\n+                }\n+\n+            }\n+        }\n+        startBin >>= _binNextShift;\n+        endBin >>= _binNextShift;\n+    }\n+}\n+\n+\n+void BedFilePE::loadBedPEFileIntoMap() {\n+\n+    int lineNum = 0;\n+    int bin1, bin2;\n+    BedLineStatus bedStatus;\n+    BEDPE bedpeEntry, nullBedPE;\n+\n+    Open();\n+    bedStatus = this->GetNextBedPE(bedpeEntry, lineNum);\n+    while (bedStatus != BED_INVALID) {\n+\n+        if (bedStatus == BED_VALID) {\n+            MATE *bedEntry1 = new MATE();\n+            MATE *bedEntry2 = new MATE();\n+            // separate the BEDPE entry into separate\n+            // BED entries\n+            splitBedPEIntoBeds(bedpeEntry, lineNum, bedEntry1, bedEntry2);\n+\n+            // load end1 into a UCSC bin map\n+            bin1 = getBin(bedEntry1->bed.start, bedEntry1->bed.end);\n+            this->bedMapEnd1[bedEntry1->bed.chrom][bin1].push_back(*bedEntry1);\n+\n+            // load end2 into a UCSC bin map\n+            bin2 = getBin(bedEntry2->bed.start, bedEntry2->bed.end);\n+            this->bedMapEnd2[bedEntry2->bed.chrom][bin2].push_back(*bedEntry2);\n+\n+            bedpeEntry = nullBedPE;\n+        }\n+        bedStatus = this->GetNextBedPE(bedpeEntry, lineNum);\n+    }\n+    Close();\n+}\n+\n+\n+void BedFilePE::splitBedPEIntoBeds(const BEDPE &bedpeEntry, const int &lineNum, MATE *bedEntry1, MATE *bedEntry2) {\n+\n+    /*\n+       Split the BEDPE entry into separate BED entries\n+\n+       NOTE: I am using a trick here where I store\n+       the lineNum of the BEDPE from the original file\n+       in the "count" column.  This allows me to later\n+       resolve whether the hits found on both ends of BEDPE A\n+       came from the same entry in BEDPE B.  Tracking by "name"\n+       alone with fail when there are multiple mappings for a given\n+       read-pair.\n+    */\n+\n+    bedEntry1->bed.chrom           = bedpeEntry.chrom1;\n+    bedEntry1->bed.start           = bedpeEntry.start1;\n+    bedEntry1->bed.end             = bedpeEntry.end1;\n+    bedEntry1->bed.name            = bedpeEntry.name;\n+    bedEntry1->bed.score           = bedpeEntry.score;        // only store the score in end1 to save memory\n+    bedEntry1->bed.strand          = bedpeEntry.strand1;\n+    bedEntry1->bed.otherFields     = bedpeEntry.otherFields;  // only store the otherFields in end1 to save memory\n+    bedEntry1->lineNum             = lineNum;\n+    bedEntry1->mate                = bedEntry2;               // keep a pointer to end2\n+\n+    bedEntry2->bed.chrom           = bedpeEntry.chrom2;\n+    bedEntry2->bed.start           = bedpeEntry.start2;\n+    bedEntry2->bed.end             = bedpeEntry.end2;\n+    bedEntry2->bed.name            = bedpeEntry.name;\n+    bedEntry2->bed.strand          = bedpeEntry.strand2;\n+    bedEntry2->lineNum             = lineNum;\n+    bedEntry2->mate                = bedEntry1;               // keep a pointer to end1\n+}\n+\n+\n+\n'
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/bedFilePE/bedFilePE.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/bedFilePE/bedFilePE.h Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,93 @@
+#ifndef BEDFILEPE_H
+#define BEDFILEPE_H
+
+#include <vector>
+#include <map>
+#include <string>
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <cstring>
+#include <algorithm>
+#include "bedFile.h"
+#include "lineFileUtilities.h"
+
+using namespace std;
+
+
+/*
+    Structure for paired-end records
+*/
+struct BEDPE {
+
+    // UCSC BED fields
+    string chrom1;
+    CHRPOS start1;
+    CHRPOS end1;
+
+    string chrom2;
+    CHRPOS start2;
+    CHRPOS end2;
+
+    string name;
+    string score;
+
+    string strand1;
+    string strand2;
+
+    vector<string> otherFields;
+};
+
+
+
+
+//************************************************
+// BedFile Class methods and elements
+//************************************************
+class BedFilePE {
+
+public:
+
+    // Constructor
+    BedFilePE(string &);
+
+    // Destructor
+    ~BedFilePE(void);
+
+    // Open a BEDPE file for reading (creates an istream pointer)
+    void Open(void);
+
+    // Close an opened BEDPE file.
+    void Close(void);
+
+    // Get the next BED entry in an opened BED file.
+    BedLineStatus GetNextBedPE (BEDPE &bedpe, int &lineNum);
+
+
+    // Methods
+
+    void reportBedPETab(const BEDPE &a);
+    void reportBedPENewLine(const BEDPE &a);
+    void loadBedPEFileIntoMap();
+    void splitBedPEIntoBeds(const BEDPE &a, const int &lineNum, MATE *bedEntry1, MATE *bedEntry2);
+
+
+    void FindOverlapsPerBin(int bEnd, string chrom, CHRPOS start, CHRPOS end, string name, string strand,
+        vector<MATE> &hits, float overlapFraction, bool forceStrand, bool enforceDiffNames);
+
+
+    string bedFile;
+    unsigned int bedType;
+
+    masterMateMap bedMapEnd1;
+    masterMateMap bedMapEnd2;
+
+private:
+    istream *_bedStream;
+
+    // methods
+    BedLineStatus parseLine (BEDPE &bedpe, const vector<string> &lineVector, int &lineNum);
+    bool parseBedPELine (BEDPE &bed, const vector<string> &lineVector, const int &lineNum);
+};
+
+#endif /* BEDFILEPE_H */
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/bedGraphFile/Makefile
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/bedGraphFile/Makefile Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,31 @@
+OBJ_DIR = ../../../obj/
+BIN_DIR = ../../../bin/
+UTILITIES_DIR = ../../utils/
+# -------------------
+# define our includes
+# -------------------
+INCLUDES =  -I$(UTILITIES_DIR)/lineFileUtilities/ \
+            -I$(UTILITIES_DIR)/gzstream/ \
+            -I$(UTILITIES_DIR)/fileType/
+
+# ----------------------------------
+# define our source and object files
+# ----------------------------------
+SOURCES= bedGraphFile.cpp
+OBJECTS= $(SOURCES:.cpp=.o)
+_EXT_OBJECTS=lineFileUtilities.o gzstream.o fileType.o
+EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS))
+BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
+
+$(BUILT_OBJECTS): $(SOURCES)
+ @echo "  * compiling" $(*F).cpp
+ @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES)
+
+$(EXT_OBJECTS):
+ @$(MAKE) --no-print-directory -C -W $(INCLUDES)
+
+clean:
+ @echo "Cleaning up."
+ @rm -f $(OBJ_DIR)/* $(BIN_DIR)/*
+
+.PHONY: clean
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/bedGraphFile/bedGraphFile.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/bedGraphFile/bedGraphFile.cpp Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,58 @@
+/*****************************************************************************
+  bedGraphFile.cpp
+
+  (c) 2010 - Assaf Gordon
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licenced under the GNU General Public License 2.0 license.
+******************************************************************************/
+#include "bedGraphFile.h"
+#include <sstream>
+
+// Constructor
+BedGraphFile::BedGraphFile(string &_file) :
+    bedGraphFile(_file),
+    _bedGraphStream(NULL)
+{}
+
+
+// Destructor
+BedGraphFile::~BedGraphFile() {
+    Close();
+}
+
+
+// Open the BEDGRAPH file
+void BedGraphFile::Open() {
+    if (bedGraphFile == "stdin" || bedGraphFile == "-") {
+        _bedGraphStream = &cin;
+    }
+    else {
+        _bedGraphStream = new ifstream(bedGraphFile.c_str(), ios::in);
+
+        if (isGzipFile(_bedGraphStream) == true) {
+            delete _bedGraphStream;
+            _bedGraphStream = new igzstream(bedGraphFile.c_str(), ios::in);
+        }
+        // can we open the file?
+        if ( !(_bedGraphStream->good()) ) {
+            cerr << "Error: The requested bed file (" << bedGraphFile << ") could not be opened. Exiting!" << endl;
+            exit (1);
+        }
+    }
+}
+
+
+// Close the BEDGRAPH file
+void BedGraphFile::Close() {
+    if (bedGraphFile != "stdin" && bedGraphFile != "-") {
+        if (_bedGraphStream) {
+            delete _bedGraphStream;
+            _bedGraphStream = NULL ;
+        }
+    }
+}
+
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/bedGraphFile/bedGraphFile.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/bedGraphFile/bedGraphFile.h Thu Nov 03 10:25:04 2011 -0400
[
@@ -0,0 +1,199 @@
+/*****************************************************************************
+  bedGraphFile.cpp
+
+  (c) 2010 - Assaf Gordon
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licenced under the GNU General Public License 2.0 license.
+******************************************************************************/
+#ifndef BEDGRAPHFILE_H
+#define BEDGRAPHFILE_H
+
+#include "gzstream.h"
+#include "lineFileUtilities.h"
+#include "fileType.h"
+#include <vector>
+#include <map>
+#include <set>
+#include <string>
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <cstring>
+#include <algorithm>
+#include <limits.h>
+#include <stdint.h>
+#include <cstdio>
+
+using namespace std;
+
+//*************************************************
+// Data type tydedef
+//*************************************************
+#ifndef CHRPOS
+typedef uint32_t CHRPOS;
+#endif
+
+#ifndef DEPTH
+typedef uint32_t DEPTH;
+#endif
+
+/*
+   Structure for regular BedGraph records
+ */
+template <typename T>
+class BEDGRAPH
+{
+public:
+    std::string chrom;
+    CHRPOS start;
+    CHRPOS end;
+    T depth;
+
+public:
+    typedef T DEPTH_TYPE;
+    // constructors
+
+    // Null
+    BEDGRAPH() :
+        start(0),
+        end(0),
+        depth(T())
+    {}
+
+    // BEDGraph
+    BEDGRAPH(string _chrom, CHRPOS _start, CHRPOS _end, T _depth) :
+        chrom(_chrom),
+        start(_start),
+        end(_end),
+        depth(_depth)
+    {}
+}; // BEDGraph
+
+typedef BEDGRAPH<int32_t> BEDGRAPH_INT;
+typedef BEDGRAPH<std::string> BEDGRAPH_STR;
+typedef BEDGRAPH<double> BEDGRAPH_FLOAT;
+
+template <typename T>
+std::ostream& operator<< (std::ostream& strm, const BEDGRAPH<T>& bg)
+{
+    strm << bg.chrom << "\t"
+        << bg.start << "\t"
+        << bg.end << "\t"
+        << bg.depth;
+    return strm;
+}
+
+// enum to flag the state of a given line in a BEDGraph file.
+enum BedGraphLineStatus
+{
+    BEDGRAPH_INVALID = -1,
+    BEDGRAPH_HEADER  = 0,
+    BEDGRAPH_BLANK   = 1,
+    BEDGRAPH_VALID   = 2
+};
+
+
+//************************************************
+// BedGraphFile Class methods and elements
+//************************************************
+class BedGraphFile {
+
+public:
+
+    // Constructor
+    BedGraphFile(string &);
+
+    // Destructor
+    ~BedGraphFile(void);
+
+    // Open a BEDGraph file for reading (creates an istream pointer)
+    void Open(void);
+
+    // Close an opened BED file.
+    void Close(void);
+
+    // Get the next BED entry in an opened BED file.
+    template <typename T>
+    BedGraphLineStatus GetNextBedGraph (BEDGRAPH<T> &bedgraph, int &lineNum)
+    {
+        // make sure there are still lines to process.
+        // if so, tokenize, validate and return the BED entry.
+        if (_bedGraphStream->good()) {
+            string bedGraphLine;
+            vector<string> bedGraphFields;
+
+            // parse the bedStream pointer
+            getline(*_bedGraphStream, bedGraphLine);
+            if (_bedGraphStream->eof())
+                return BEDGRAPH_INVALID;
+            if (_bedGraphStream->bad()) {
+                cerr << "Error while reading file '" << bedGraphFile << "' : "
+                    << strerror(errno) << endl;
+                exit(1);
+            }
+            lineNum++;
+
+            // split into a string vector.
+            Tokenize(bedGraphLine,bedGraphFields);
+
+            // load the BED struct as long as it's a valid BED entry.
+            return parseLine(bedgraph, bedGraphFields, lineNum);
+        }
+
+        // default if file is closed or EOF
+        return BEDGRAPH_INVALID;
+    }
+
+    // the bedfile with which this instance is associated
+    string bedGraphFile;
+
+private:
+    // data
+    istream *_bedGraphStream;
+
+    template <typename T>
+    BedGraphLineStatus parseLine (BEDGRAPH<T> &bg, const vector<string> &lineVector, int &lineNum)
+    {
+        if (lineVector.size() == 0)
+            return BEDGRAPH_BLANK;
+
+        if (lineVector[0].find("track")   != string::npos ||
+            lineVector[0].find("browser") != string::npos ||
+            lineVector[0].find("#") != string::npos)
+            return BEDGRAPH_HEADER;
+
+        if (lineVector.size() != 4)
+            return BEDGRAPH_INVALID;
+
+        bg.chrom = lineVector[0];
+
+        stringstream str_start(lineVector[1]);
+        if (! (str_start >> bg.start) ) {
+            cerr << "Input error, failed to extract start value from '" << lineVector[1]
+                << "' (column 2) in " << bedGraphFile << " line " << lineNum << endl;
+            exit(1);
+        }
+
+        stringstream str_end(lineVector[2]);
+        if (! (str_end >> bg.end) ) {
+            cerr << "Input error, failed to extract end value from '" << lineVector[2]
+                << "' (column 3) in " << bedGraphFile << " line " << lineNum << endl;
+            exit(1);
+        }
+
+        stringstream str_depth(lineVector[3]);
+        if (! (str_depth >> bg.depth) ) {
+            cerr << "Input error, failed to extract depth value from '" << lineVector[3]
+                << "' (column 4) in " << bedGraphFile << " line " << lineNum << endl;
+            exit(1);
+        }
+
+        return BEDGRAPH_VALID;
+    }
+};
+
+#endif /* BEDFILE_H */
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/chromsweep/Makefile
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/chromsweep/Makefile Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,32 @@
+OBJ_DIR = ../../../obj/
+BIN_DIR = ../../../bin/
+UTILITIES_DIR = ../../utils/
+# -------------------
+# define our includes
+# -------------------
+INCLUDES = -I$(UTILITIES_DIR)/lineFileUtilities/ \
+           -I$(UTILITIES_DIR)/bedFile/ \
+           -I$(UTILITIES_DIR)/gzstream/ \
+           -I$(UTILITIES_DIR)/fileType/
+
+# ----------------------------------
+# define our source and object files
+# ----------------------------------
+SOURCES= chromsweep.cpp
+OBJECTS= $(SOURCES:.cpp=.o)
+_EXT_OBJECTS=lineFileUtilities.o fileType.o
+EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS))
+BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
+
+$(BUILT_OBJECTS): $(SOURCES)
+ @echo "  * compiling" $(*F).cpp
+ @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES)
+
+$(EXT_OBJECTS):
+ @$(MAKE) --no-print-directory -C $(INCLUDES)
+
+clean:
+ @echo "Cleaning up."
+ @rm -f $(OBJ_DIR)/* $(BIN_DIR)/*
+
+.PHONY: clean
\ No newline at end of file
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/chromsweep/chromsweep.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/chromsweep/chromsweep.cpp Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,180 @@
+/*****************************************************************************
+  chromsweep.cpp
+
+  (c) 2009 - Aaron Quinlan
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licenced under the GNU General Public License 2.0 license.
+******************************************************************************/
+#include "lineFileUtilities.h"
+#include "chromsweep.h"
+#include <queue>
+
+bool after(const BED &a, const BED &b);
+void report_hits(const BED &curr_qy, const vector<BED> &hits);
+vector<BED> scan_cache(const BED &curr_qy, BedLineStatus qy_status, const vector<BED> &db_cache, vector<BED> &hits);
+
+
+/*
+    // constructor using existing BedFile pointers
+*/
+ChromSweep::ChromSweep(BedFile *bedA, BedFile *bedB, bool sameStrand, bool diffStrand)
+: _bedA(bedA)
+, _bedB(bedB)
+, _sameStrand(sameStrand)
+, _diffStrand(diffStrand)
+{
+    // prime the results pump.
+    _qy_lineNum = 0;
+    _db_lineNum = 0;
+    
+    _hits.reserve(1000);
+    _cache.reserve(1000);
+    
+    _bedA->Open();
+    _bedB->Open();
+    _qy_status = _bedA->GetNextBed(_curr_qy, _qy_lineNum);
+    _db_status = _bedB->GetNextBed(_curr_db, _db_lineNum);
+}
+
+/*
+    Constructor with filenames
+*/
+ChromSweep::ChromSweep(string &bedAFile, string &bedBFile) 
+{
+    // prime the results pump.
+    _qy_lineNum = 0;
+    _db_lineNum = 0;
+    
+    _hits.reserve(100000);
+    _cache.reserve(100000);
+    
+    _bedA = new BedFile(bedAFile);
+    _bedB = new BedFile(bedBFile);
+    
+    _bedA->Open();
+    _bedB->Open();
+    
+    _qy_status = _bedA->GetNextBed(_curr_qy, _qy_lineNum);
+    _db_status = _bedB->GetNextBed(_curr_db, _db_lineNum);
+}
+
+
+/*
+    Destructor
+*/
+ChromSweep::~ChromSweep(void) {
+}
+
+
+void ChromSweep::ScanCache() {
+    if (_qy_status != BED_INVALID) {
+        vector<BED>::iterator c = _cache.begin();
+        while (c != _cache.end())
+        {
+            if ((_curr_qy.chrom == c->chrom) && !(after(_curr_qy, *c))) {
+                if (IsValidHit(_curr_qy, *c)) {
+                    _hits.push_back(*c);
+                }
+                ++c;
+            }
+            else {
+                c = _cache.erase(c);
+            }
+        }
+    }
+}
+
+
+bool ChromSweep::ChromChange()
+{
+    // the files are on the same chrom
+    if ((_curr_qy.chrom == _curr_db.chrom) || (_db_status == BED_INVALID) || (_qy_status == BED_INVALID)) {
+        return false;
+    }
+    // the query is ahead of the database. fast-forward the database to catch-up.
+    else if (_curr_qy.chrom > _curr_db.chrom) {
+        while (!_bedB->Empty() && _curr_db.chrom < _curr_qy.chrom)
+        {
+            _db_status = _bedB->GetNextBed(_curr_db, _db_lineNum);
+        }
+        _cache.clear();
+        return false;
+    }
+    // the database is ahead of the query.
+    else {
+        // 1. scan the cache for remaining hits on the query's current chrom.
+        if (_curr_qy.chrom == _curr_chrom)
+        {
+            ScanCache();
+            _results.push(make_pair(_curr_qy, _hits));
+            _hits.clear();
+        }
+        // 2. fast-forward until we catch up and report 0 hits until we do.
+        else if (_curr_qy.chrom < _curr_db.chrom)
+        {
+            _results.push(make_pair(_curr_qy, _no_hits));
+            _cache.clear();
+        }
+        _qy_status = _bedA->GetNextBed(_curr_qy, _qy_lineNum);
+        _curr_chrom = _curr_qy.chrom;
+        return true;
+    }
+}
+
+bool ChromSweep::IsValidHit(const BED &query, const BED &db) {
+    // do we have an overlap in the DB?
+    if (overlaps(query.start, query.end, db.start, db.end) > 0) {
+        // Now test for necessary strandedness.
+        bool strands_are_same = (query.strand == db.strand);
+        if ( (_sameStrand == false && _diffStrand == false)
+             ||
+             (_sameStrand == true && strands_are_same == true)
+             ||
+             (_diffStrand == true && strands_are_same == false)
+           )
+        {
+            return true;
+        }
+    }
+    return false;
+}
+
+
+bool ChromSweep::Next(pair<BED, vector<BED> > &next) {
+    if (!_bedA->Empty()) {
+        // have we changed chromosomes?
+        if (ChromChange() == false) {
+            // scan the database cache for hits
+            ScanCache();
+            // advance the db until we are ahead of the query. update hits and cache as necessary
+            while (!_bedB->Empty() && _curr_qy.chrom == _curr_db.chrom && !(after(_curr_db, _curr_qy)))
+            {
+                if (IsValidHit(_curr_qy, _curr_db)) {
+                    _hits.push_back(_curr_db);
+                }
+                _cache.push_back(_curr_db);
+                _db_status = _bedB->GetNextBed(_curr_db, _db_lineNum);
+            }
+            // add the hits for this query to the pump
+            _results.push(make_pair(_curr_qy, _hits));
+            // reset for the next query
+            _hits.clear();
+            _curr_qy = _nullBed;
+            _qy_status = _bedA->GetNextBed(_curr_qy, _qy_lineNum);
+            _curr_chrom = _curr_qy.chrom;
+        }
+    }
+    // report the next set if hits if there are still overlaps in the pump
+    if (!_results.empty()) {
+        next = _results.front();
+        _results.pop();
+        return true;
+    }
+    // otherwise, the party is over.
+    else {return false;}
+}
+
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/chromsweep/chromsweep.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/chromsweep/chromsweep.h Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,89 @@
+/*****************************************************************************
+  chromsweepBed.h
+
+  (c) 2009 - Aaron Quinlan
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licenced under the GNU General Public License 2.0 license.
+******************************************************************************/
+#ifndef CHROMSWEEP_H
+#define CHROMSWEEP_H
+
+#include "bedFile.h"
+#include <vector>
+#include <queue>
+#include <iostream>
+#include <fstream>
+#include <stdlib.h>
+using namespace std;
+
+
+
+class ChromSweep {
+
+// public interface.
+public:
+
+    // A is the query and B is the database
+    
+    // constructor using existing BedFile pointers
+    ChromSweep(BedFile *bedA, BedFile *bedB, bool sameStrand = false, bool diffStrand = false);
+    
+    // constructor using filenames
+    ChromSweep(string &bedAFile, string &bedBFile);
+    
+    // destructor
+    ~ChromSweep(void);
+    
+    // loads next (a pair) with the current query and it's overlaps
+    //   next.first is the current query interval
+    //   next.second is a vector of the current query's hits.
+    // returns true if overlap
+    bool Next(pair<BED, vector<BED> > &next);
+    
+    // Usage:
+    //     ChromSweep sweep = ChromSweep(_bedA, _bedB);
+    //     pair<BED, vector<BED> > hit_set;
+    //     while (sweep.Next(hit_set)) 
+    //     {
+    //        // magic happens here!
+    //        processHits(hit_set.first, hit_set.second);
+    //     }
+    
+// private variables.
+private:
+
+    // instances of a bed file class.
+    BedFile *_bedA, *_bedB;
+    // do we care about strandedness.
+    bool _sameStrand, _diffStrand;
+    // a cache of still active features from the database file
+    vector<BED> _cache;
+    // the set of hits in the database for the current query
+    vector<BED> _hits;
+    // a queue from which we retrieve overlap results.  used by Next()
+    queue< pair<BED, vector<BED> > > _results;
+    BED _nullBed;
+    // an empty BED vector for returning no hits for a given query
+    vector<BED> _no_hits;
+    // the current query and db features.
+    BED _curr_qy, _curr_db;
+    // a cache of the current chrom from the query. used to handle chrom changes.
+    string _curr_chrom;
+    // the current line status in the database and query files
+    BedLineStatus _qy_status, _db_status;
+    // the current line numbers in the database and query files
+    int _qy_lineNum, _db_lineNum;
+
+// private methods.
+private:
+    
+    void ScanCache();
+    bool ChromChange();
+    bool IsValidHit(const BED &query, const BED &db);
+};
+
+#endif /* CHROMSWEEP_H */
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/fileType/Makefile
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/fileType/Makefile Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,29 @@
+OBJ_DIR = ../../../obj/
+BIN_DIR = ../../../bin/
+UTILITIES_DIR = ../../utils/
+# -------------------
+# define our includes
+# -------------------
+INCLUDES = 
+
+# ----------------------------------
+# define our source and object files
+# ----------------------------------
+SOURCES= fileType.cpp
+OBJECTS= $(SOURCES:.cpp=.o)
+_EXT_OBJECTS=
+EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS))
+BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
+
+$(BUILT_OBJECTS): $(SOURCES)
+ @echo "  * compiling" $(*F).cpp
+ @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES)
+
+$(EXT_OBJECTS):
+ @$(MAKE) --no-print-directory -C $(INCLUDES)
+
+clean:
+ @echo "Cleaning up."
+ @rm -f $(OBJ_DIR)/* $(BIN_DIR)/*
+
+.PHONY: clean
\ No newline at end of file
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/fileType/fileType.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/fileType/fileType.cpp Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,71 @@
+/*****************************************************************************
+fileType.cpp
+
+(c) 2009 - Aaron Quinlan
+Hall Laboratory
+Department of Biochemistry and Molecular Genetics
+University of Virginia
+aaronquinlan@gmail.com
+
+Licensed under the GNU General Public License 2.0 license.
+******************************************************************************/
+
+#include "fileType.h"
+
+
+/*
+returns TRUE if the file is a regular file:
+not a pipe/device.
+
+This implies that the file can be opened/closed/seek'd multiple times without losing information
+*/
+bool isRegularFile(const string& filename) {
+    struct stat buf ;
+    int i;
+
+    i = stat(filename.c_str(), &buf);
+    if (i!=0) {
+        cerr << "Error: can't determine file type of '" << filename << "': " << strerror(errno) << endl;
+        exit(1);
+    }
+    if (S_ISREG(buf.st_mode))
+        return true;
+
+    return false;
+}
+
+/*
+returns TRUE if the file has a GZIP header.
+Should only be run on regular files.
+*/
+bool isGzipFile(istream *file) {
+    //see http://www.gzip.org/zlib/rfc-gzip.html#file-format
+    
+    /*
+       11-Sep-2011: 
+       We now only peek at the first byte and test for GZIPiness.
+       This is because I can only putback() one byte into an istream
+       without triggering the "fail" bit.  This was necessary to support
+       FIFOs, per version 2.13.0
+    */
+    struct  {
+        unsigned char id1;
+//      unsigned char id2;
+//      unsigned char cm;
+    } gzip_header;
+
+    if (!file->read((char*)&gzip_header, sizeof(gzip_header))) {
+        return false;
+    }
+
+    if ( gzip_header.id1 == 0x1f )
+//       &&
+//       gzip_header.id2 == 0x8b
+//       &&
+//       gzip_header.cm == 8 )
+    {
+        return true;
+    }
+    file->putback(gzip_header.id1);
+    return false;
+}
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/fileType/fileType.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/fileType/fileType.h Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,38 @@
+/*****************************************************************************
+  fileType.h
+
+  (c) 2009 - Aaron Quinlan
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licensed under the GNU General Public License 2.0 license.
+******************************************************************************/
+#ifndef FILETYPE_H
+#define FILETYPE_H
+
+#include <string>
+#include <iostream>
+#include <fstream>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <sstream>
+
+using namespace std;
+
+/*****************************************************************************
+  Convenience functions to detect whether a given file is
+  "regular" and/or "gzipped".
+
+  Kindly contributed by Assaf Gordon.
+******************************************************************************/
+string string_error(int errnum);
+bool isRegularFile(const string& filename);
+bool isGzipFile(istream *file);
+
+#endif /* FILETYPE_H */
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/genomeFile/Makefile
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/genomeFile/Makefile Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,32 @@
+OBJ_DIR = ../../../obj/
+BIN_DIR = ../../../bin/
+UTILITIES_DIR = ../
+# -------------------
+# define our includes
+# -------------------
+INCLUDES = -I$(UTILITIES_DIR)/lineFileUtilities/ \
+           -I$(UTILITIES_DIR)/fileType/ \
+           -I$(UTILITIES_DIR)/BamTools/include/
+
+# ----------------------------------
+# define our source and object files
+# ----------------------------------
+SOURCES= genomeFile.cpp
+OBJECTS= $(SOURCES:.cpp=.o)
+_EXT_OBJECTS=lineFileUtilities.o fileType.o
+EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS))
+BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
+
+$(BUILT_OBJECTS): $(SOURCES)
+ @echo "  * compiling" $(*F).cpp
+ @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) -L$(BT_ROOT)/lib
+
+$(EXT_OBJECTS):
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/
+
+clean:
+ @echo "Cleaning up."
+ @rm -f $(OBJ_DIR)/* $(BIN_DIR)/*
+
+.PHONY: clean
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/genomeFile/genomeFile.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/genomeFile/genomeFile.cpp Thu Nov 03 10:25:04 2011 -0400
[
@@ -0,0 +1,103 @@
+/*****************************************************************************
+  genomeFile.cpp
+
+  (c) 2009 - Aaron Quinlan
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licensed under the GNU General Public License 2.0 license.
+******************************************************************************/
+#include "lineFileUtilities.h"
+#include "genomeFile.h"
+
+
+GenomeFile::GenomeFile(const string &genomeFile) {
+    _genomeFile = genomeFile;
+    loadGenomeFileIntoMap();
+}
+
+GenomeFile::GenomeFile(const RefVector &genome) {
+    for (size_t i = 0; i < genome.size(); ++i) {
+        string chrom = genome[i].RefName;
+        int length = genome[i].RefLength;
+        
+        _chromSizes[chrom] = length;
+        _chromList.push_back(chrom);
+    }
+}
+
+// Destructor
+GenomeFile::~GenomeFile(void) {
+}
+
+
+void GenomeFile::loadGenomeFileIntoMap() {
+
+    string genomeLine;
+    int lineNum = 0;
+    vector<string> genomeFields;            // vector for a GENOME entry
+
+    // open the GENOME file for reading
+    ifstream genome(_genomeFile.c_str(), ios::in);
+    if ( !genome ) {
+        cerr << "Error: The requested genome file (" << _genomeFile << ") could not be opened. Exiting!" << endl;
+        exit (1);
+    }
+
+    while (getline(genome, genomeLine)) {
+
+        Tokenize(genomeLine,genomeFields);  // load the fields into the vector
+        lineNum++;
+
+        // ignore a blank line
+        if (genomeFields.size() > 0) {
+            if (genomeFields[0].find("#") == string::npos) {
+
+                // we need at least 2 columns
+                if (genomeFields.size() >= 2) {
+                    char *p2End;
+                    long c2;
+                    // make sure the second column is numeric.
+                    c2 = strtol(genomeFields[1].c_str(), &p2End, 10);
+
+                    // strtol  will set p2End to the start of the string if non-integral, base 10
+                    if (p2End != genomeFields[1].c_str()) {
+                        string chrom       = genomeFields[0];
+                        int size           = atoi(genomeFields[1].c_str());
+                        _chromSizes[chrom] = size;
+                        _chromList.push_back(chrom);
+                    }
+                }
+                else {
+                    cerr << "Less than the req'd two fields were encountered in the genome file (" << _genomeFile << ")";
+                    cerr << " at line " << lineNum << ".  Exiting." << endl;
+                    exit (1);
+                }
+            }
+        }
+        genomeFields.clear();
+    }
+}
+
+
+int GenomeFile::getChromSize(const string &chrom) {
+    chromToSizes::const_iterator chromIt = _chromSizes.find(chrom);
+    if (chromIt != _chromSizes.end())
+        return _chromSizes[chrom];
+    else
+        return -1;  // chrom not found.
+}
+
+vector<string> GenomeFile::getChromList() {
+    return _chromList;
+}
+
+int GenomeFile::getNumberOfChroms() {
+    return _chromList.size();
+}
+
+string GenomeFile::getGenomeFileName() {
+    return _genomeFile;
+}
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/genomeFile/genomeFile.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/genomeFile/genomeFile.h Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,62 @@
+/*****************************************************************************
+  genomeFile.h
+
+  (c) 2009 - Aaron Quinlan
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licensed under the GNU General Public License 2.0 license.
+******************************************************************************/
+#ifndef GENOMEFILE_H
+#define GENOMEFILE_H
+
+#include <map>
+#include <string>
+#include <iostream>
+#include <sstream>
+#include <fstream>
+#include <cstring>
+#include <cstdio>
+#include "api/BamReader.h"
+#include "api/BamAux.h"
+using namespace BamTools;
+
+using namespace std;
+
+
+// typedef for mapping b/w chrom name and it's size in b.p.
+typedef map<string, int, std::less<string> > chromToSizes;
+
+
+class GenomeFile {
+
+public:
+
+    // Constructor using a file
+    GenomeFile(const string &genomeFile);
+    
+    // Constructor using a vector of BamTools RefVector
+    GenomeFile(const RefVector &genome);
+
+    // Destructor
+    ~GenomeFile(void);
+
+    // load a GENOME file into a map keyed by chrom. value is size of chrom.
+    void loadGenomeFileIntoMap();
+
+    int getChromSize(const string &chrom);  // return the size of a chromosome
+    vector<string> getChromList();          // return a list of chrom names
+    int getNumberOfChroms();                // return the number of chroms
+    string getGenomeFileName();             // return the name of the genome file
+
+
+
+private:
+    string  _genomeFile;
+    chromToSizes _chromSizes;
+    vector<string> _chromList;
+};
+
+#endif /* GENOMEFILE_H */
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/gzstream/COPYING.LIB
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/gzstream/COPYING.LIB Thu Nov 03 10:25:04 2011 -0400
[
b"@@ -0,0 +1,504 @@\n+\t\t  GNU LESSER GENERAL PUBLIC LICENSE\n+\t\t       Version 2.1, February 1999\n+\n+ Copyright (C) 1991, 1999 Free Software Foundation, Inc.\n+     59 Temple Place, Suite 330, Boston, MA  02111-1307  USA\n+ Everyone is permitted to copy and distribute verbatim copies\n+ of this license document, but changing it is not allowed.\n+\n+[This is the first released version of the Lesser GPL.  It also counts\n+ as the successor of the GNU Library Public License, version 2, hence\n+ the version number 2.1.]\n+\n+\t\t\t    Preamble\n+\n+  The licenses for most software are designed to take away your\n+freedom to share and change it.  By contrast, the GNU General Public\n+Licenses are intended to guarantee your freedom to share and change\n+free software--to make sure the software is free for all its users.\n+\n+  This license, the Lesser General Public License, applies to some\n+specially designated software packages--typically libraries--of the\n+Free Software Foundation and other authors who decide to use it.  You\n+can use it too, but we suggest you first think carefully about whether\n+this license or the ordinary General Public License is the better\n+strategy to use in any particular case, based on the explanations below.\n+\n+  When we speak of free software, we are referring to freedom of use,\n+not price.  Our General Public Licenses are designed to make sure that\n+you have the freedom to distribute copies of free software (and charge\n+for this service if you wish); that you receive source code or can get\n+it if you want it; that you can change the software and use pieces of\n+it in new free programs; and that you are informed that you can do\n+these things.\n+\n+  To protect your rights, we need to make restrictions that forbid\n+distributors to deny you these rights or to ask you to surrender these\n+rights.  These restrictions translate to certain responsibilities for\n+you if you distribute copies of the library or if you modify it.\n+\n+  For example, if you distribute copies of the library, whether gratis\n+or for a fee, you must give the recipients all the rights that we gave\n+you.  You must make sure that they, too, receive or can get the source\n+code.  If you link other code with the library, you must provide\n+complete object files to the recipients, so that they can relink them\n+with the library after making changes to the library and recompiling\n+it.  And you must show them these terms so they know their rights.\n+\n+  We protect your rights with a two-step method: (1) we copyright the\n+library, and (2) we offer you this license, which gives you legal\n+permission to copy, distribute and/or modify the library.\n+\n+  To protect each distributor, we want to make it very clear that\n+there is no warranty for the free library.  Also, if the library is\n+modified by someone else and passed on, the recipients should know\n+that what they have is not the original version, so that the original\n+author's reputation will not be affected by problems that might be\n+introduced by others.\n+\x0c\n+  Finally, software patents pose a constant threat to the existence of\n+any free program.  We wish to make sure that a company cannot\n+effectively restrict the users of a free program by obtaining a\n+restrictive license from a patent holder.  Therefore, we insist that\n+any patent license obtained for a version of the library must be\n+consistent with the full freedom of use specified in this license.\n+\n+  Most GNU software, including some libraries, is covered by the\n+ordinary GNU General Public License.  This license, the GNU Lesser\n+General Public License, applies to certain designated libraries, and\n+is quite different from the ordinary General Public License.  We use\n+this license for certain libraries in order to permit linking those\n+libraries into non-free programs.\n+\n+  When a program is linked with a library, whether statically or using\n+a shared library, the combination of the two is legally speaking a\n+combined work, a derivative of the original library.  The o"..b'se version number, you may choose any version ever published by\n+the Free Software Foundation.\n+\x0c\n+  14. If you wish to incorporate parts of the Library into other free\n+programs whose distribution conditions are incompatible with these,\n+write to the author to ask for permission.  For software which is\n+copyrighted by the Free Software Foundation, write to the Free\n+Software Foundation; we sometimes make exceptions for this.  Our\n+decision will be guided by the two goals of preserving the free status\n+of all derivatives of our free software and of promoting the sharing\n+and reuse of software generally.\n+\n+\t\t\t    NO WARRANTY\n+\n+  15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO\n+WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW.\n+EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR\n+OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY\n+KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE\n+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR\n+PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE\n+LIBRARY IS WITH YOU.  SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME\n+THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.\n+\n+  16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN\n+WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY\n+AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU\n+FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR\n+CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE\n+LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING\n+RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A\n+FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF\n+SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH\n+DAMAGES.\n+\n+\t\t     END OF TERMS AND CONDITIONS\n+\x0c\n+           How to Apply These Terms to Your New Libraries\n+\n+  If you develop a new library, and you want it to be of the greatest\n+possible use to the public, we recommend making it free software that\n+everyone can redistribute and change.  You can do so by permitting\n+redistribution under these terms (or, alternatively, under the terms of the\n+ordinary General Public License).\n+\n+  To apply these terms, attach the following notices to the library.  It is\n+safest to attach them to the start of each source file to most effectively\n+convey the exclusion of warranty; and each file should have at least the\n+"copyright" line and a pointer to where the full notice is found.\n+\n+    <one line to give the library\'s name and a brief idea of what it does.>\n+    Copyright (C) <year>  <name of author>\n+\n+    This library is free software; you can redistribute it and/or\n+    modify it under the terms of the GNU Lesser General Public\n+    License as published by the Free Software Foundation; either\n+    version 2.1 of the License, or (at your option) any later version.\n+\n+    This library is distributed in the hope that it will be useful,\n+    but WITHOUT ANY WARRANTY; without even the implied warranty of\n+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n+    Lesser General Public License for more details.\n+\n+    You should have received a copy of the GNU Lesser General Public\n+    License along with this library; if not, write to the Free Software\n+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA\n+\n+Also add information on how to contact you by electronic and paper mail.\n+\n+You should also get your employer (if you work as a programmer) or your\n+school, if any, to sign a "copyright disclaimer" for the library, if\n+necessary.  Here is a sample; alter the names:\n+\n+  Yoyodyne, Inc., hereby disclaims all copyright interest in the\n+  library `Frob\' (a library for tweaking knobs) written by James Random Hacker.\n+\n+  <signature of Ty Coon>, 1 April 1990\n+  Ty Coon, President of Vice\n+\n+That\'s all there is to it!\n+\n+\n'
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/gzstream/Makefile
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/gzstream/Makefile Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,49 @@
+# ============================================================================
+# gzstream, C++ iostream classes wrapping the zlib compression library.
+# Copyright (C) 2001  Deepak Bandyopadhyay, Lutz Kettner
+# 
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+# 
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+# 
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+# ============================================================================
+# 
+# File          : Makefile
+# Revision      : $Revision: 1.3 $
+# Revision_date : $Date: 2001/10/04 15:09:28 $
+# Author(s)     : Deepak Bandyopadhyay, Lutz Kettner
+# 
+# ============================================================================
+
+# ----------------------------------------------------------------------------
+# adapt these settings to your need:
+# add '-DGZSTREAM_NAMESPACE=name' to CPPFLAGS to place the classes
+# in its own namespace. Note, this macro needs to be set while creating
+# the library as well while compiling applications based on it.
+# As an alternative, gzstream.C and gzstream.h can be edited.
+# ----------------------------------------------------------------------------
+
+INCLUDES = -I.
+OBJ_DIR = ../../../obj/
+BIN_DIR = ../../../bin/
+UTILITIES_DIR = ../../utils/
+
+${OBJ_DIR}/gzstream.o : gzstream.C gzstream.h
+ ${CXX} ${CXXFLAGS} -c -o ${OBJ_DIR}/gzstream.o gzstream.C $(INCLUDES)
+
+clean:
+ @echo "Cleaning up."
+ @rm -f $(OBJ_DIR)/* $(BIN_DIR)/*
+
+.PHONY: clean
+
+
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/gzstream/README
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/gzstream/README Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,6 @@
+
+                              gzstream
+      C++ iostream classes wrapping the zlib compression library.
+===========================================================================
+
+    See index.html for documentation and installation instructions.
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/gzstream/gzstream.C
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/gzstream/gzstream.C Thu Nov 03 10:25:04 2011 -0400
[
@@ -0,0 +1,165 @@
+// ============================================================================
+// gzstream, C++ iostream classes wrapping the zlib compression library.
+// Copyright (C) 2001  Deepak Bandyopadhyay, Lutz Kettner
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+// ============================================================================
+//
+// File          : gzstream.C
+// Revision      : $Revision: 1.7 $
+// Revision_date : $Date: 2003/01/08 14:41:27 $
+// Author(s)     : Deepak Bandyopadhyay, Lutz Kettner
+//
+// Standard streambuf implementation following Nicolai Josuttis, "The
+// Standard C++ Library".
+// ============================================================================
+
+#include <gzstream.h>
+#include <iostream>
+#include <string.h>  // for memcpy
+
+#ifdef GZSTREAM_NAMESPACE
+namespace GZSTREAM_NAMESPACE {
+#endif
+
+// ----------------------------------------------------------------------------
+// Internal classes to implement gzstream. See header file for user classes.
+// ----------------------------------------------------------------------------
+
+// --------------------------------------
+// class gzstreambuf:
+// --------------------------------------
+
+gzstreambuf* gzstreambuf::open( const char* name, int open_mode) {
+    if ( is_open())
+        return (gzstreambuf*)0;
+    mode = open_mode;
+    // no append nor read/write mode
+    if ((mode & std::ios::ate) || (mode & std::ios::app)
+        || ((mode & std::ios::in) && (mode & std::ios::out)))
+        return (gzstreambuf*)0;
+    char  fmode[10];
+    char* fmodeptr = fmode;
+    if ( mode & std::ios::in)
+        *fmodeptr++ = 'r';
+    else if ( mode & std::ios::out)
+        *fmodeptr++ = 'w';
+    *fmodeptr++ = 'b';
+    *fmodeptr = '\0';
+    file = gzopen( name, fmode);
+    if (file == 0)
+        return (gzstreambuf*)0;
+    opened = 1;
+    return this;
+}
+
+gzstreambuf * gzstreambuf::close() {
+    if ( is_open()) {
+        sync();
+        opened = 0;
+        if ( gzclose( file) == Z_OK)
+            return this;
+    }
+    return (gzstreambuf*)0;
+}
+
+int gzstreambuf::underflow() { // used for input buffer only
+    if ( gptr() && ( gptr() < egptr()))
+        return * reinterpret_cast<unsigned char *>( gptr());
+
+    if ( ! (mode & std::ios::in) || ! opened)
+        return EOF;
+    // Josuttis' implementation of inbuf
+    int n_putback = gptr() - eback();
+    if ( n_putback > 4)
+        n_putback = 4;
+    memcpy( buffer + (4 - n_putback), gptr() - n_putback, n_putback);
+
+    int num = gzread( file, buffer+4, bufferSize-4);
+    if (num <= 0) // ERROR or EOF
+        return EOF;
+
+    // reset buffer pointers
+    setg( buffer + (4 - n_putback),   // beginning of putback area
+          buffer + 4,                 // read position
+          buffer + 4 + num);          // end of buffer
+
+    // return next character
+    return * reinterpret_cast<unsigned char *>( gptr());
+}
+
+int gzstreambuf::flush_buffer() {
+    // Separate the writing of the buffer from overflow() and
+    // sync() operation.
+    int w = pptr() - pbase();
+    if ( gzwrite( file, pbase(), w) != w)
+        return EOF;
+    pbump( -w);
+    return w;
+}
+
+int gzstreambuf::overflow( int c) { // used for output buffer only
+    if ( ! ( mode & std::ios::out) || ! opened)
+        return EOF;
+    if (c != EOF) {
+        *pptr() = c;
+        pbump(1);
+    }
+    if ( flush_buffer() == EOF)
+        return EOF;
+    return c;
+}
+
+int gzstreambuf::sync() {
+    // Changed to use flush_buffer() instead of overflow( EOF)
+    // which caused improper behavior with std::endl and flush(),
+    // bug reported by Vincent Ricard.
+    if ( pptr() && pptr() > pbase()) {
+        if ( flush_buffer() == EOF)
+            return -1;
+    }
+    return 0;
+}
+
+// --------------------------------------
+// class gzstreambase:
+// --------------------------------------
+
+gzstreambase::gzstreambase( const char* name, int mode) {
+    init( &buf);
+    open( name, mode);
+}
+
+gzstreambase::~gzstreambase() {
+    buf.close();
+}
+
+void gzstreambase::open( const char* name, int open_mode) {
+    if ( ! buf.open( name, open_mode))
+        clear( rdstate() | std::ios::badbit);
+}
+
+void gzstreambase::close() {
+    if ( buf.is_open())
+        if ( ! buf.close())
+            clear( rdstate() | std::ios::badbit);
+}
+
+#ifdef GZSTREAM_NAMESPACE
+} // namespace GZSTREAM_NAMESPACE
+#endif
+
+// ============================================================================
+// EOF //
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/gzstream/gzstream.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/gzstream/gzstream.h Thu Nov 03 10:25:04 2011 -0400
[
@@ -0,0 +1,121 @@
+// ============================================================================
+// gzstream, C++ iostream classes wrapping the zlib compression library.
+// Copyright (C) 2001  Deepak Bandyopadhyay, Lutz Kettner
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+// ============================================================================
+//
+// File          : gzstream.h
+// Revision      : $Revision: 1.5 $
+// Revision_date : $Date: 2002/04/26 23:30:15 $
+// Author(s)     : Deepak Bandyopadhyay, Lutz Kettner
+//
+// Standard streambuf implementation following Nicolai Josuttis, "The
+// Standard C++ Library".
+// ============================================================================
+
+#ifndef GZSTREAM_H
+#define GZSTREAM_H 1
+
+// standard C++ with new header file names and std:: namespace
+#include <iostream>
+#include <fstream>
+#include <zlib.h>
+
+#ifdef GZSTREAM_NAMESPACE
+namespace GZSTREAM_NAMESPACE {
+#endif
+
+// ----------------------------------------------------------------------------
+// Internal classes to implement gzstream. See below for user classes.
+// ----------------------------------------------------------------------------
+
+class gzstreambuf : public std::streambuf {
+private:
+    static const int bufferSize = 47+256;    // size of data buff
+    // totals 512 bytes under g++ for igzstream at the end.
+
+    gzFile           file;               // file handle for compressed file
+    char             buffer[bufferSize]; // data buffer
+    char             opened;             // open/close state of stream
+    int              mode;               // I/O mode
+
+    int flush_buffer();
+public:
+    gzstreambuf() : opened(0) {
+        setp( buffer, buffer + (bufferSize-1));
+        setg( buffer + 4,     // beginning of putback area
+              buffer + 4,     // read position
+              buffer + 4);    // end position
+        // ASSERT: both input & output capabilities will not be used together
+    }
+    int is_open() { return opened; }
+    gzstreambuf* open( const char* name, int open_mode);
+    gzstreambuf* close();
+    ~gzstreambuf() { close(); }
+
+    virtual int     overflow( int c = EOF);
+    virtual int     underflow();
+    virtual int     sync();
+};
+
+class gzstreambase : virtual public std::ios {
+protected:
+    gzstreambuf buf;
+public:
+    gzstreambase() { init(&buf); }
+    gzstreambase( const char* name, int open_mode);
+    ~gzstreambase();
+    void open( const char* name, int open_mode);
+    void close();
+    gzstreambuf* rdbuf() { return &buf; }
+};
+
+// ----------------------------------------------------------------------------
+// User classes. Use igzstream and ogzstream analogously to ifstream and
+// ofstream respectively. They read and write files based on the gz*
+// function interface of the zlib. Files are compatible with gzip compression.
+// ----------------------------------------------------------------------------
+
+class igzstream : public gzstreambase, public std::istream {
+public:
+    igzstream() : std::istream( &buf) {}
+    igzstream( const char* name, int open_mode = std::ios::in)
+        : gzstreambase( name, open_mode), std::istream( &buf) {}
+    gzstreambuf* rdbuf() { return gzstreambase::rdbuf(); }
+    void open( const char* name, int open_mode = std::ios::in) {
+        gzstreambase::open( name, open_mode);
+    }
+};
+
+class ogzstream : public gzstreambase, public std::ostream {
+public:
+    ogzstream() : std::ostream( &buf) {}
+    ogzstream( const char* name, int mode = std::ios::out)
+        : gzstreambase( name, mode), std::ostream( &buf) {}
+    gzstreambuf* rdbuf() { return gzstreambase::rdbuf(); }
+    void open( const char* name, int open_mode = std::ios::out) {
+        gzstreambase::open( name, open_mode);
+    }
+};
+
+#ifdef GZSTREAM_NAMESPACE
+} // namespace GZSTREAM_NAMESPACE
+#endif
+
+#endif // GZSTREAM_H
+// ============================================================================
+// EOF //
+
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/gzstream/gzstream.o
b
Binary file BEDTools-Version-2.14.3/src/utils/gzstream/gzstream.o has changed
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/gzstream/test_gunzip.o
b
Binary file BEDTools-Version-2.14.3/src/utils/gzstream/test_gunzip.o has changed
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/gzstream/test_gzip.o
b
Binary file BEDTools-Version-2.14.3/src/utils/gzstream/test_gzip.o has changed
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/gzstream/version
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/gzstream/version Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,1 @@
+1.5 (08 Jan 2003)
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/lineFileUtilities/Makefile
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/lineFileUtilities/Makefile Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,29 @@
+OBJ_DIR = ../../../obj/
+BIN_DIR = ../../../bin/
+UTILITIES_DIR = ../../utils/
+# -------------------
+# define our includes
+# -------------------
+INCLUDES = 
+
+# ----------------------------------
+# define our source and object files
+# ----------------------------------
+SOURCES= lineFileUtilities.cpp
+OBJECTS= $(SOURCES:.cpp=.o)
+_EXT_OBJECTS=
+EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS))
+BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
+
+$(BUILT_OBJECTS): $(SOURCES)
+ @echo "  * compiling" $(*F).cpp
+ @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES)
+
+$(EXT_OBJECTS):
+ @$(MAKE) --no-print-directory -C $(INCLUDES)
+
+clean:
+ @echo "Cleaning up."
+ @rm -f $(OBJ_DIR)/* $(BIN_DIR)/*
+
+.PHONY: clean
\ No newline at end of file
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/lineFileUtilities/lineFileUtilities.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/lineFileUtilities/lineFileUtilities.cpp Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,15 @@
+//
+//  lineFileUtilities.cpp
+//  BEDTools
+//
+//  Created by Aaron Quinlan Spring 2009.
+//  Copyright 2009 Aaron Quinlan. All rights reserved.
+//
+//  Summary:  Contains common functions for processing text files.
+//
+#include <sstream>
+#include <iostream>
+#include "lineFileUtilities.h"
+
+
+
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/lineFileUtilities/lineFileUtilities.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/lineFileUtilities/lineFileUtilities.h Thu Nov 03 10:25:04 2011 -0400
[
@@ -0,0 +1,52 @@
+#ifndef LINEFILEUTILITIES_H
+#define LINEFILEUTILITIES_H
+
+#include <vector>
+#include <string>
+#include <cstring>
+#include <cstdlib>
+#include <sstream>
+
+using namespace std;
+
+// templated function to convert objects to strings
+template <typename T>
+inline
+std::string ToString(const T & value) {
+    std::stringstream ss;
+    ss << value;
+    return ss.str();
+}
+
+// tokenize into a list of strings.
+inline
+void Tokenize(const string &str, vector<string> &elems, const string &delimiter = "\t") 
+{
+    char* tok;
+    char cchars [str.size()+1];
+    char* cstr = &cchars[0];
+    strcpy(cstr, str.c_str());
+    tok = strtok(cstr, delimiter.c_str());
+    while (tok != NULL) {
+        elems.push_back(tok);
+        tok = strtok(NULL, delimiter.c_str());
+    }
+}
+
+// tokenize into a list of integers
+inline
+void Tokenize(const string &str, vector<int> &elems, const string &delimiter = "\t") 
+{
+    char* tok;
+    char cchars [str.size()+1];
+    char* cstr = &cchars[0];
+    strcpy(cstr, str.c_str());
+    tok = strtok(cstr, delimiter.c_str());
+    while (tok != NULL) {
+        elems.push_back(atoi(tok));
+        tok = strtok(NULL, delimiter.c_str());
+    }
+}
+
+#endif /* LINEFILEUTILITIES_H */
+
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/sequenceUtilities/Makefile
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/sequenceUtilities/Makefile Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,29 @@
+OBJ_DIR = ../../../obj/
+BIN_DIR = ../../../bin/
+UTILITIES_DIR = ../../utils/
+# -------------------
+# define our includes
+# -------------------
+INCLUDES = 
+
+# ----------------------------------
+# define our source and object files
+# ----------------------------------
+SOURCES= sequenceUtils.cpp
+OBJECTS= $(SOURCES:.cpp=.o)
+_EXT_OBJECTS =
+EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS))
+BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
+
+$(BUILT_OBJECTS): $(SOURCES)
+ @echo "  * compiling" $(*F).cpp
+ @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES)
+
+$(EXT_OBJECTS):
+ @$(MAKE) --no-print-directory -C $(INCLUDES)
+
+clean:
+ @echo "Cleaning up."
+ @rm -f $(OBJ_DIR)/* $(BIN_DIR)/*
+
+.PHONY: clean
\ No newline at end of file
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/sequenceUtilities/sequenceUtils.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/sequenceUtilities/sequenceUtils.cpp Thu Nov 03 10:25:04 2011 -0400
[
@@ -0,0 +1,126 @@
+//
+//  sequenceUtils.cpp
+//  BEDTools
+//
+//  Created by Aaron Quinlan Spring 2009.
+//  Copyright 2009 Aaron Quinlan. All rights reserved.
+//
+//  Summary:  Contains common functions for manipulating DNA sequences.
+//
+//  Acknowledgment: I am grateful to Michael Stromberg for the code below to
+//                  reverse complement a sequence.
+
+#include "sequenceUtils.h"
+
+// Performs an in-place sequence reversal
+void reverseSequence(string &seq) {
+    std::reverse(seq.begin(), seq.end());
+}
+
+// Performs an in-place reverse complement conversion
+void reverseComplement(string &seq) {
+
+    // reverse the sequence
+    reverseSequence(seq);
+
+    // swap the bases
+    for(unsigned int i = 0; i < seq.length(); i++) {
+        switch(seq[i]) {
+            case 'A':
+                seq[i] = 'T';
+                break;
+            case 'C':
+                seq[i] = 'G';
+                break;
+            case 'G':
+                seq[i] = 'C';
+                break;
+            case 'T':
+                seq[i] = 'A';
+                break;
+            case 'a':
+                seq[i] = 't';
+                break;
+            case 'c':
+                seq[i] = 'g';
+                break;
+            case 'g':
+                seq[i] = 'c';
+                break;
+            case 't':
+                seq[i] = 'a';
+                break;
+            default:
+                break;
+        }
+    }
+}
+
+
+void toLowerCase(std::string &seq)
+{
+    const int length = seq.length();
+    for(int i=0; i < length; ++i)
+    {
+        seq[i] = std::tolower(seq[i]);
+    }
+}
+
+
+void toUpperCase(std::string &seq)
+{
+    const int length = seq.length();
+    for(int i=0; i < length; ++i)
+    {
+        seq[i] = std::toupper(seq[i]);
+    }
+}
+
+
+void getDnaContent(const string &seq, int &a, int &c, int &g, int &t, int &n, int &other)
+{
+    // swap the bases
+    for(unsigned int i = 0; i < seq.length(); i++) {
+        switch(seq[i]) {
+            case 'A':
+            case 'a':
+                a++;
+                break;
+            case 'C':
+            case 'c':
+                c++;
+                break;
+            case 'G':
+            case 'g':
+                g++;
+                break;
+            case 'T':
+            case 't':
+                t++;
+                break;
+            case 'N':
+            case 'n':
+                n++;
+                break;
+            default:
+                other++;
+                break;
+        }
+    }    
+}
+
+
+int countPattern(const string &seq, const string &pattern)
+{
+    // swap the bases
+    int patternLength = pattern.size();
+    int patternCount = 0;
+    for(unsigned int i = 0; i < seq.length(); i++) {
+        if (seq.substr(i,patternLength) == pattern) {
+            patternCount++;
+        }
+    }
+    return patternCount;
+}
+        
+
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/sequenceUtilities/sequenceUtils.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/sequenceUtilities/sequenceUtils.h Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,27 @@
+#ifndef SEQUENCEUTILS_H
+#define SEQUENCEUTILS_H
+
+#include <string>
+#include <algorithm>
+#include <cctype>
+
+using namespace std;
+
+// Performs an in-place sequence reversal
+void reverseSequence(string &seq);
+
+// Performs an in-place reverse complement conversion
+void reverseComplement(string &seq);
+
+// Converts every character in a string to lowercase
+void toLowerCase(string &seq);
+
+// Converts every character in a string to uppercase
+void toUpperCase(string &seq);
+
+// Calculates the number of a, c, g, t, n, and other bases found in a sequence
+void getDnaContent(const string &seq, int &a, int &c, int &g, int &t, int &n, int &other);
+
+int countPattern(const string &seq, const string &pattern);
+
+#endif
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/stringUtilities/stringUtilities.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/stringUtilities/stringUtilities.h Thu Nov 03 10:25:04 2011 -0400
[
@@ -0,0 +1,18 @@
+#ifndef STRINGUTILITIES_H
+#define STRINGUTILITIES_H
+
+#include <cctype>
+#include <string>
+
+/****************************************************
+// isInteger(s): Tests if string s is a valid integer
+*****************************************************/
+inline bool isInteger(const std::string& s) {
+    int len = s.length();
+    for (int i = 0; i < len; i++) {
+        if (!std::isdigit(s[i])) return false;
+    return true;
+}
+
+#endif /* STRINGUTILITIES_H */
+
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/tabFile/Makefile
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/tabFile/Makefile Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,29 @@
+OBJ_DIR = ../../../obj/
+BIN_DIR = ../../../bin/
+UTILITIES_DIR = ../../utils/
+# -------------------
+# define our includes
+# -------------------
+INCLUDES = -I$(UTILITIES_DIR)/lineFileUtilities/ -I$(UTILITIES_DIR)/gzstream/ -I$(UTILITIES_DIR)/fileType/
+
+# ----------------------------------
+# define our source and object files
+# ----------------------------------
+SOURCES= tabFile.cpp
+OBJECTS= $(SOURCES:.cpp=.o)
+_EXT_OBJECTS=lineFileUtilities.o gzstream.o fileType.o
+EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS))
+BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
+
+$(BUILT_OBJECTS): $(SOURCES)
+ @echo "  * compiling" $(*F).cpp
+ @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES)
+
+$(EXT_OBJECTS):
+ @$(MAKE) --no-print-directory -C -W $(INCLUDES)
+
+clean:
+ @echo "Cleaning up."
+ @rm -f $(OBJ_DIR)/* $(BIN_DIR)/*
+
+.PHONY: clean
\ No newline at end of file
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/tabFile/tabFile.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/tabFile/tabFile.cpp Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,99 @@
+/*****************************************************************************
+  tabFile.cpp
+
+  (c) 2009 - Aaron Quinlan
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licensed under the GNU General Public License 2.0 license.
+******************************************************************************/
+#include "lineFileUtilities.h"
+#include "tabFile.h"
+
+/*******************************************
+Class methods
+*******************************************/
+
+// Constructor
+TabFile::TabFile(const string &tabFile)
+: _tabFile(tabFile)
+{}
+
+// Destructor
+TabFile::~TabFile(void) {
+}
+
+
+void TabFile::Open(void) {
+    if (_tabFile == "stdin") {
+        _tabStream = &cin;
+    }
+    else {
+        size_t foundPos;
+        foundPos = _tabFile.find_last_of(".gz");
+        // is this a GZIPPED TAB file?
+        if (foundPos == _tabFile.size() - 1) {
+            igzstream tabs(_tabFile.c_str(), ios::in);
+            if ( !tabs ) {
+                cerr << "Error: The requested file (" << _tabFile << ") could not be opened. Exiting!" << endl;
+                exit (1);
+            }
+            else {
+                // if so, close it (this was just a test)
+                tabs.close();
+                // now set a pointer to the stream so that we
+                // can read the file later on.
+                _tabStream = new igzstream(_tabFile.c_str(), ios::in);
+            }
+        }
+        // not GZIPPED.
+        else {
+
+            ifstream tabs(_tabFile.c_str(), ios::in);
+            // can we open the file?
+            if ( !tabs ) {
+                cerr << "Error: The requested file (" << _tabFile << ") could not be opened. Exiting!" << endl;
+                exit (1);
+            }
+            else {
+                // if so, close it (this was just a test)
+                tabs.close();
+                // now set a pointer to the stream so that we
+                // can read the file later on.
+                _tabStream = new ifstream(_tabFile.c_str(), ios::in);
+            }
+        }
+    }
+}
+
+
+// Close the TAB file
+void TabFile::Close(void) {
+    if (_tabFile != "stdin") delete _tabStream;
+}
+
+
+TabLineStatus TabFile::GetNextTabLine(TAB_FIELDS &tabFields, int &lineNum) {
+
+    // make sure there are still lines to process.
+    // if so, tokenize, return the TAB_FIELDS.
+    if (_tabStream->good() == true) {
+        string tabLine;
+        tabFields.reserve(20);
+
+        // parse the tabStream pointer
+        getline(*_tabStream, tabLine);
+        lineNum++;
+
+        // split into a string vector.
+        Tokenize(tabLine, tabFields);
+
+        // parse the line and validate it
+        return parseTabLine(tabFields, lineNum);
+    }
+
+    // default if file is closed or EOF
+    return TAB_INVALID;
+}
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/tabFile/tabFile.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/tabFile/tabFile.h Thu Nov 03 10:25:04 2011 -0400
[
@@ -0,0 +1,80 @@
+/*****************************************************************************
+  tabFile.h
+
+  (c) 2009 - Aaron Quinlan
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licensed under the GNU General Public License 2.0 license.
+******************************************************************************/
+#ifndef TABFILE_H
+#define TABFILE_H
+
+#include "gzstream.h"
+#include <vector>
+#include <string>
+#include <iostream>
+
+using namespace std;
+
+// enum to flag the state of a given line in a TAB file.
+enum TabLineStatus
+{
+    TAB_INVALID = -1,
+    TAB_HEADER  = 0,
+    TAB_BLANK   = 1,
+    TAB_VALID   = 2
+};
+
+typedef vector<string> TAB_FIELDS;
+
+//************************************************
+// TabFile Class methods and elements
+//************************************************
+class TabFile {
+
+public:
+
+    // Constructor
+    TabFile(const string &tabFile);
+
+    // Destructor
+    ~TabFile(void);
+
+    // Open a TAB file for reading (creates an istream pointer)
+    void Open(void);
+
+    // Close an opened TAB file.
+    void Close(void);
+
+    // Get the next TAB entry in an opened TAB file.
+    TabLineStatus GetNextTabLine (TAB_FIELDS &tab, int &lineNum);
+
+private:
+
+    // data
+    istream *_tabStream;
+    string _tabFile;
+
+    // methods
+    inline TabLineStatus parseTabLine (const vector<string> &lineVector, int &lineNum) {
+        // bail out if we have a blank line
+        if (lineVector.size() == 0)
+            return TAB_BLANK;
+        // real line with data
+        if (lineVector[0][0] != '#') {
+            return TAB_VALID;
+        }
+        // comment or header line
+        else {
+            lineNum--;
+            return TAB_HEADER;
+        }
+        // default
+        return TAB_INVALID;
+    }
+};
+
+#endif /* TABFILE_H */
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/version/version.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/version/version.h Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,8 @@
+#ifndef VERSION_H
+#define VERSION_H
+
+// define the version.  All tools in the
+// suite carry the same version number.
+#define VERSION "2.14.2"
+
+#endif /* VERSION_H */
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/windowBed/Makefile
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/windowBed/Makefile Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,51 @@
+UTILITIES_DIR = ../utils/
+OBJ_DIR = ../../obj/
+BIN_DIR = ../../bin/
+
+# -------------------
+# define our includes
+# -------------------
+INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \
+           -I$(UTILITIES_DIR)/version/ \
+           -I$(UTILITIES_DIR)/gzstream/ \
+           -I$(UTILITIES_DIR)/genomeFile/ \
+           -I$(UTILITIES_DIR)/lineFileUtilities/ \
+           -I$(UTILITIES_DIR)/fileType/ \
+           -I$(UTILITIES_DIR)/BamTools/include \
+           -I$(UTILITIES_DIR)/BamTools-Ancillary
+# ----------------------------------
+# define our source and object files
+# ----------------------------------
+SOURCES= windowMain.cpp windowBed.cpp
+OBJECTS= $(SOURCES:.cpp=.o)
+_EXT_OBJECTS=bedFile.o lineFileUtilities.o gzstream.o fileType.o
+EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS))
+BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
+PROGRAM= windowBed
+
+
+all: $(PROGRAM)
+
+.PHONY: all
+
+$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS)
+ @echo "  * linking $(PROGRAM)"
+ @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ -L$(UTILITIES_DIR)/BamTools/lib/ -lbamtools $(LIBS)
+
+$(BUILT_OBJECTS): $(SOURCES)
+ @echo "  * compiling" $(*F).cpp
+ @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES)
+
+$(EXT_OBJECTS):
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/BamTools/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/BamTools-Ancillary/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/
+ @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/
+
+clean:
+ @echo "Cleaning up."
+ @rm -f $(OBJ_DIR)/* $(BIN_DIR)/*
+
+.PHONY: clean
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/windowBed/windowBed.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/windowBed/windowBed.cpp Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,253 @@
+/*****************************************************************************
+  windowBed.cpp
+
+  (c) 2009 - Aaron Quinlan
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licenced under the GNU General Public License 2.0 license.
+******************************************************************************/
+#include "lineFileUtilities.h"
+#include "windowBed.h"
+
+
+/*
+    Constructor
+*/
+BedWindow::BedWindow(string bedAFile, string bedBFile, int leftSlop, int rightSlop,
+                     bool anyHit, bool noHit, bool writeCount, bool strandWindows,
+                     bool matchOnSameStrand, bool matchOnDiffStrand, bool bamInput, bool bamOutput, bool isUncompressedBam) {
+
+    _bedAFile      = bedAFile;
+    _bedBFile      = bedBFile;
+
+    _leftSlop      = leftSlop;
+    _rightSlop     = rightSlop;
+
+    _anyHit              = anyHit;
+    _noHit               = noHit;
+    _writeCount          = writeCount;
+    _strandWindows       = strandWindows;
+    _matchOnSameStrand   = matchOnSameStrand;
+    _matchOnDiffStrand   = matchOnDiffStrand;
+    _bamInput            = bamInput;
+    _bamOutput           = bamOutput;
+    _isUncompressedBam   = isUncompressedBam;
+
+    _bedA          = new BedFile(bedAFile);
+    _bedB          = new BedFile(bedBFile);
+
+    if (_bamInput == false)
+        WindowIntersectBed();
+    else
+        WindowIntersectBam(_bedAFile);
+}
+
+
+
+/*
+    Destructor
+*/
+BedWindow::~BedWindow(void) {
+}
+
+
+
+void BedWindow::FindWindowOverlaps(const BED &a, vector<BED> &hits) {
+
+    /*
+        Adjust the start and end of a based on the requested window
+    */
+
+    // update the current feature's start and end
+    // according to the slop requested (slop = 0 by default)
+    CHRPOS aFudgeStart = 0;
+    CHRPOS aFudgeEnd;
+    AddWindow(a, aFudgeStart, aFudgeEnd);
+
+    /*
+        Now report the hits (if any) based on the window around a.
+    */
+    // get the hits in B for the A feature
+    _bedB->FindOverlapsPerBin(a.chrom, aFudgeStart, aFudgeEnd, a.strand, hits, _matchOnSameStrand, _matchOnDiffStrand);
+
+    int numOverlaps = 0;
+
+    // loop through the hits and report those that meet the user's criteria
+    vector<BED>::const_iterator h = hits.begin();
+    vector<BED>::const_iterator hitsEnd = hits.end();
+    for (; h != hitsEnd; ++h) {
+
+        int s = max(aFudgeStart, h->start);
+        int e = min(aFudgeEnd, h->end);
+        int overlapBases = (e - s);             // the number of overlapping bases b/w a and b
+        int aLength = (a.end - a.start);        // the length of a in b.p.
+
+        if (s < e) {
+            // is there enough overlap (default ~ 1bp)
+            if ( ((float) overlapBases / (float) aLength) > 0 ) {
+                numOverlaps++;
+                if (_anyHit == false && _noHit == false && _writeCount == false) {
+                    _bedA->reportBedTab(a);
+                    _bedB->reportBedNewLine(*h);
+                }
+            }
+        }
+    }
+    if (_anyHit == true && (numOverlaps >= 1)) {
+        _bedA->reportBedNewLine(a); }
+    else if (_writeCount == true) {
+        _bedA->reportBedTab(a); printf("\t%d\n", numOverlaps);
+    }
+    else if (_noHit == true && (numOverlaps == 0)) {
+        _bedA->reportBedNewLine(a);
+    }
+}
+
+
+bool BedWindow::FindOneOrMoreWindowOverlaps(const BED &a) {
+
+    // update the current feature's start and end
+    // according to the slop requested (slop = 0 by default)
+    CHRPOS aFudgeStart = 0;
+    CHRPOS aFudgeEnd;
+    AddWindow(a, aFudgeStart, aFudgeEnd);
+
+    bool overlapsFound = _bedB->FindOneOrMoreOverlapsPerBin(a.chrom, a.start, a.end, a.strand, _matchOnSameStrand, _matchOnDiffStrand);
+    return overlapsFound;
+}
+
+
+void BedWindow::WindowIntersectBed() {
+
+    // load the "B" bed file into a map so
+    // that we can easily compare "A" to it for overlaps
+    _bedB->loadBedFileIntoMap();
+
+    BED a, nullBed;
+    int lineNum = 0;                    // current input line number
+    BedLineStatus bedStatus;
+    vector<BED> hits;                   // vector of potential hits
+    hits.reserve(100);
+
+    _bedA->Open();
+    while ((bedStatus = _bedA->GetNextBed(a, lineNum)) != BED_INVALID) {
+        if (bedStatus == BED_VALID) {
+            FindWindowOverlaps(a, hits);
+            hits.clear();
+            a = nullBed;
+        }
+    }
+    _bedA->Close();
+}
+
+
+void BedWindow::WindowIntersectBam(string bamFile) {
+
+    // load the "B" bed file into a map so
+    // that we can easily compare "A" to it for overlaps
+    _bedB->loadBedFileIntoMap();
+
+    // open the BAM file
+    BamReader reader;
+    BamWriter writer;
+    reader.Open(bamFile);
+
+    // get header & reference information
+    string bamHeader  = reader.GetHeaderText();
+    RefVector refs    = reader.GetReferenceData();
+
+    // open a BAM output to stdout if we are writing BAM
+    if (_bamOutput == true) {
+        // set compression mode
+        BamWriter::CompressionMode compressionMode = BamWriter::Compressed;
+        if ( _isUncompressedBam ) compressionMode = BamWriter::Uncompressed;
+        writer.SetCompressionMode(compressionMode);
+        // open our BAM writer
+        writer.Open("stdout", bamHeader, refs);
+    }
+
+    vector<BED> hits;                   // vector of potential hits
+    // reserve some space
+    hits.reserve(100);
+
+    _bedA->bedType = 6;
+    BamAlignment bam;
+    bool overlapsFound;
+    // get each set of alignments for each pair.
+    while (reader.GetNextAlignment(bam)) {
+
+        if (bam.IsMapped()) {
+            BED a;
+            a.chrom = refs.at(bam.RefID).RefName;
+            a.start = bam.Position;
+            a.end   = bam.GetEndPosition(false, false);
+
+            // build the name field from the BAM alignment.
+            a.name = bam.Name;
+            if (bam.IsFirstMate()) a.name += "/1";
+            if (bam.IsSecondMate()) a.name += "/2";
+
+            a.score  = ToString(bam.MapQuality);
+            a.strand = "+"; if (bam.IsReverseStrand()) a.strand = "-";
+
+            if (_bamOutput == true) {
+                overlapsFound = FindOneOrMoreWindowOverlaps(a);
+                if (overlapsFound == true) {
+                    if (_noHit == false)
+                        writer.SaveAlignment(bam);
+                }
+                else {
+                    if (_noHit == true)
+                        writer.SaveAlignment(bam);
+                }
+            }
+            else {
+                FindWindowOverlaps(a, hits);
+                hits.clear();
+            }
+        }
+        // BAM IsMapped() is false
+        else if (_noHit == true) {
+            writer.SaveAlignment(bam);
+        }
+    }
+
+    // close the relevant BAM files.
+    reader.Close();
+    if (_bamOutput == true) {
+        writer.Close();
+    }
+}
+
+
+void BedWindow::AddWindow(const BED &a, CHRPOS &fudgeStart, CHRPOS &fudgeEnd) {
+    // Does the user want to treat the windows based on strand?
+    // If so,
+    // if "+", then left is left and right is right
+    // if "-", the left is right and right is left.
+    if (_strandWindows) {
+        if (a.strand == "+") {
+            if ((int) (a.start - _leftSlop) > 0) 
+                fudgeStart = a.start - _leftSlop;
+            else fudgeStart = 0;
+            fudgeEnd = a.end + _rightSlop;
+        }
+        else {
+            if ((int) (a.start - _rightSlop) > 0) 
+                fudgeStart = a.start - _rightSlop;
+            else fudgeStart = 0;
+            fudgeEnd = a.end + _leftSlop;
+        }
+    }
+    // If not, add the windows irrespective of strand
+    else {
+        if ((int) (a.start - _leftSlop) > 0) 
+            fudgeStart = a.start - _leftSlop;
+        else fudgeStart = 0;
+        fudgeEnd = a.end + _rightSlop;
+    }
+}
+
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/windowBed/windowBed.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/windowBed/windowBed.h Thu Nov 03 10:25:04 2011 -0400
b
@@ -0,0 +1,69 @@
+/*****************************************************************************
+  windowBed.h
+
+  (c) 2009 - Aaron Quinlan
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licenced under the GNU General Public License 2.0 license.
+******************************************************************************/
+#ifndef WINDOWBED_H
+#define WINDOWBED_H
+
+#include "api/BamReader.h"
+#include "api/BamWriter.h"
+#include "api/BamAux.h"
+using namespace BamTools;
+
+#include "bedFile.h"
+#include <vector>
+#include <iostream>
+#include <fstream>
+
+using namespace std;
+
+//************************************************
+// Class methods and elements
+//************************************************
+class BedWindow {
+
+public:
+
+    // constructor
+    BedWindow(string bedAFile, string bedBFile, int leftSlop, int rightSlop,
+              bool anyHit, bool noHit, bool writeCount, bool strandWindows,
+              bool matchOnSameStrand, bool matchOnDiffStrand, bool bamInput, bool bamOutput, bool isUncompressedBam);
+
+    // destructor
+    ~BedWindow(void);
+
+private:
+
+    string _bedAFile;
+    string _bedBFile;
+    bool _anyHit;
+    bool _writeCount;
+    int _leftSlop;
+    int _rightSlop;
+    bool _noHit;
+    bool _strandWindows;
+    bool _matchOnSameStrand;
+    bool _matchOnDiffStrand;
+    bool _bamInput;
+    bool _bamOutput;
+    bool  _isUncompressedBam;
+
+    // instance of a bed file class.
+    BedFile *_bedA, *_bedB;
+
+    // methods
+    void WindowIntersectBed();
+    void WindowIntersectBam(string bamFile);
+    void FindWindowOverlaps(const BED &a, vector<BED> &hits);
+    bool FindOneOrMoreWindowOverlaps(const BED &a);
+    void AddWindow(const BED &a, CHRPOS &fudgeStart, CHRPOS &fudgeEnd);
+
+};
+#endif /* WINDOWBED_H */
b
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/windowBed/windowMain.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/windowBed/windowMain.cpp Thu Nov 03 10:25:04 2011 -0400
[
b'@@ -0,0 +1,263 @@\n+/*****************************************************************************\n+  windowMain.cpp\n+\n+  (c) 2009 - Aaron Quinlan\n+  Hall Laboratory\n+  Department of Biochemistry and Molecular Genetics\n+  University of Virginia\n+  aaronquinlan@gmail.com\n+\n+  Licenced under the GNU General Public License 2.0 license.\n+******************************************************************************/\n+#include "windowBed.h"\n+#include "version.h"\n+\n+using namespace std;\n+\n+// define the version\n+#define PROGRAM_NAME "windowBed"\n+\n+// define our parameter checking macro\n+#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)\n+\n+// function declarations\n+void ShowHelp(void);\n+\n+\n+int main(int argc, char* argv[]) {\n+\n+    // our configuration variables\n+    bool showHelp = false;\n+\n+    // input files\n+    string bedAFile;\n+    string bedBFile;\n+\n+    // input arguments\n+    int leftSlop  = 1000;\n+    int rightSlop = 1000;\n+\n+    bool haveBedA        = false;\n+    bool haveBedB        = false;\n+    bool noHit           = false;\n+    bool anyHit          = false;\n+    bool writeCount      = false;\n+    bool haveSlop        = false;\n+    bool haveLeft        = false;\n+    bool haveRight       = false;\n+    bool strandWindows   = false;\n+    bool matchOnSameStrand   = false;\n+    bool matchOnDiffStrand   = false;\n+    bool inputIsBam      = false;\n+    bool outputIsBam     = true;\n+    bool uncompressedBam = false;\n+\n+    // check to see if we should print out some help\n+    if(argc <= 1) showHelp = true;\n+\n+    for(int i = 1; i < argc; i++) {\n+        int parameterLength = (int)strlen(argv[i]);\n+\n+        if((PARAMETER_CHECK("-h", 2, parameterLength)) ||\n+        (PARAMETER_CHECK("--help", 5, parameterLength))) {\n+            showHelp = true;\n+        }\n+    }\n+\n+    if(showHelp) ShowHelp();\n+\n+    // do some parsing (all of these parameters require 2 strings)\n+    for(int i = 1; i < argc; i++) {\n+\n+        int parameterLength = (int)strlen(argv[i]);\n+\n+        if(PARAMETER_CHECK("-a", 2, parameterLength)) {\n+            if ((i+1) < argc) {\n+                haveBedA = true;\n+                bedAFile = argv[i + 1];\n+                i++;\n+            }\n+        }\n+        else if(PARAMETER_CHECK("-abam", 5, parameterLength)) {\n+            if ((i+1) < argc) {\n+                haveBedA = true;\n+                inputIsBam = true;\n+                bedAFile = argv[i + 1];\n+                i++;\n+            }\n+        }\n+        else if(PARAMETER_CHECK("-b", 2, parameterLength)) {\n+            if ((i+1) < argc) {\n+                haveBedB = true;\n+                bedBFile = argv[i + 1];\n+                i++;\n+            }\n+        }\n+        else if(PARAMETER_CHECK("-bed", 4, parameterLength)) {\n+            outputIsBam = false;\n+        }\n+        else if(PARAMETER_CHECK("-u", 2, parameterLength)) {\n+            anyHit = true;\n+        }\n+        else if(PARAMETER_CHECK("-c", 2, parameterLength)) {\n+            writeCount = true;\n+        }\n+        else if (PARAMETER_CHECK("-v", 2, parameterLength)) {\n+            noHit = true;\n+        }\n+        else if (PARAMETER_CHECK("-sw", 3, parameterLength)) {\n+            strandWindows = true;\n+        }\n+        else if (PARAMETER_CHECK("-sm", 3, parameterLength)) {\n+            matchOnSameStrand = true;\n+        }\n+        else if (PARAMETER_CHECK("-Sm", 3, parameterLength)) {\n+            matchOnDiffStrand = true;\n+        }\n+        else if (PARAMETER_CHECK("-w", 2, parameterLength)) {\n+            if ((i+1) < argc) {\n+                haveSlop = true;\n+                leftSlop = atoi(argv[i + 1]);\n+                rightSlop = leftSlop;\n+                i++;\n+            }\n+        }\n+        else if (PARAMETER_CHECK("-l", 2, parameterLength)) {\n+            if ((i+1) < argc) {\n+                haveLeft = true;\n+                leftSlop = atoi(argv[i + 1]);\n+                i++;\n+            }\n+      '..b'ressedBam);\n+        delete bi;\n+        return 0;\n+    }\n+    else {\n+        ShowHelp();\n+    }\n+}\n+\n+\n+void ShowHelp(void) {\n+\n+    cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl;\n+\n+    cerr << "Author:  Aaron Quinlan (aaronquinlan@gmail.com)" << endl;\n+\n+    cerr << "Summary: Examines a \\"window\\" around each feature in A and" << endl;\n+    cerr << "\\t reports all features in B that overlap the window. For each" << endl;\n+    cerr << "\\t overlap the entire entry in A and B are reported." << endl << endl;\n+\n+    cerr << "Usage:   " << PROGRAM_NAME << " [OPTIONS] -a <bed/gff/vcf> -b <bed/gff/vcf>" << endl << endl;\n+\n+    cerr << "Options: " << endl;\n+\n+    cerr << "\\t-abam\\t"         << "The A input file is in BAM format.  Output will be BAM as well." << endl << endl;\n+\n+    cerr << "\\t-ubam\\t"         << "Write uncompressed BAM output. Default is to write compressed BAM." << endl << endl;\n+\n+    cerr << "\\t-bed\\t"          << "When using BAM input (-abam), write output as BED. The default" << endl;\n+    cerr                        << "\\t\\tis to write output in BAM when using -abam." << endl << endl;\n+\n+    cerr << "\\t-w\\t"            << "Base pairs added upstream and downstream of each entry" << endl;\n+    cerr                        << "\\t\\tin A when searching for overlaps in B." << endl;\n+    cerr                        << "\\t\\t- Creates symterical \\"windows\\" around A." << endl;\n+    cerr                        << "\\t\\t- Default is 1000 bp." << endl;\n+    cerr                        << "\\t\\t- (INTEGER)" << endl << endl;\n+\n+    cerr << "\\t-l\\t"            << "Base pairs added upstream (left of) of each entry" << endl;\n+    cerr                        << "\\t\\tin A when searching for overlaps in B." << endl;\n+    cerr                        << "\\t\\t- Allows one to define assymterical \\"windows\\"." << endl;\n+    cerr                        << "\\t\\t- Default is 1000 bp." << endl;\n+    cerr                        << "\\t\\t- (INTEGER)" << endl << endl;\n+\n+    cerr << "\\t-r\\t"            << "Base pairs added downstream (right of) of each entry" << endl;\n+    cerr                        << "\\t\\tin A when searching for overlaps in B." << endl;\n+    cerr                        << "\\t\\t- Allows one to define assymterical \\"windows\\"." << endl;\n+    cerr                        << "\\t\\t- Default is 1000 bp." << endl;\n+    cerr                        << "\\t\\t- (INTEGER)" << endl << endl;\n+\n+    cerr << "\\t-sw\\t"           << "Define -l and -r based on strand.  For example if used, -l 500" << endl;\n+    cerr                        << "\\t\\tfor a negative-stranded feature will add 500 bp downstream." << endl;\n+    cerr                        << "\\t\\t- Default = disabled." << endl << endl;\n+\n+    cerr << "\\t-sm\\t"           << "Only report hits in B that overlap A on the _same_ strand." << endl;\n+    cerr                        << "\\t\\t- By default, overlaps are reported without respect to strand." << endl << endl;\n+\n+    cerr << "\\t-Sm\\t"           << "Only report hits in B that overlap A on the _opposite_ strand." << endl;\n+    cerr                        << "\\t\\t- By default, overlaps are reported without respect to strand." << endl << endl;\n+\n+    cerr << "\\t-u\\t"            << "Write the original A entry _once_ if _any_ overlaps found in B." << endl;\n+    cerr                        << "\\t\\t- In other words, just report the fact >=1 hit was found." << endl << endl;\n+\n+    cerr << "\\t-c\\t"            << "For each entry in A, report the number of overlaps with B." << endl;\n+    cerr                        << "\\t\\t- Reports 0 for A entries that have no overlap with B." << endl;\n+    cerr                        << "\\t\\t- Overlaps restricted by -f." << endl << endl;\n+\n+    cerr << "\\t-v\\t"            << "Only report those entries in A that have _no overlaps_ with B." << endl;\n+    cerr                        << "\\t\\t- Similar to \\"grep -v.\\"" << endl << endl;\n+\n+    // end the program here\n+    exit(1);\n+}\n'