Next changeset 1:bec36315bd12 (2011-11-19) |
Commit message:
Uploaded |
added:
BEDTools-Version-2.14.3/LICENSE BEDTools-Version-2.14.3/Makefile BEDTools-Version-2.14.3/README.rst BEDTools-Version-2.14.3/RELEASE_HISTORY BEDTools-Version-2.14.3/data/knownGene.hg18.chr21.bed BEDTools-Version-2.14.3/data/rmsk.hg18.chr21.bed BEDTools-Version-2.14.3/genomes/human.hg18.genome BEDTools-Version-2.14.3/genomes/human.hg19.genome BEDTools-Version-2.14.3/genomes/mouse.mm8.genome BEDTools-Version-2.14.3/genomes/mouse.mm9.genome BEDTools-Version-2.14.3/src/Makefile BEDTools-Version-2.14.3/src/annotateBed/Makefile BEDTools-Version-2.14.3/src/annotateBed/annotateBed.cpp BEDTools-Version-2.14.3/src/annotateBed/annotateBed.h BEDTools-Version-2.14.3/src/annotateBed/annotateMain.cpp BEDTools-Version-2.14.3/src/bamToBed/Makefile BEDTools-Version-2.14.3/src/bamToBed/bamToBed.cpp BEDTools-Version-2.14.3/src/bed12ToBed6/Makefile BEDTools-Version-2.14.3/src/bed12ToBed6/bed12ToBed6.cpp BEDTools-Version-2.14.3/src/bedToBam/Makefile BEDTools-Version-2.14.3/src/bedToBam/bedToBam.cpp BEDTools-Version-2.14.3/src/bedToIgv/Makefile BEDTools-Version-2.14.3/src/bedToIgv/bedToIgv.cpp BEDTools-Version-2.14.3/src/closestBed/Makefile BEDTools-Version-2.14.3/src/closestBed/closestBed.cpp BEDTools-Version-2.14.3/src/closestBed/closestBed.h BEDTools-Version-2.14.3/src/closestBed/closestMain.cpp BEDTools-Version-2.14.3/src/complementBed/Makefile BEDTools-Version-2.14.3/src/complementBed/complementBed.cpp BEDTools-Version-2.14.3/src/complementBed/complementBed.h BEDTools-Version-2.14.3/src/complementBed/complementMain.cpp BEDTools-Version-2.14.3/src/coverageBed/Makefile BEDTools-Version-2.14.3/src/coverageBed/coverageBed.cpp BEDTools-Version-2.14.3/src/coverageBed/coverageBed.h BEDTools-Version-2.14.3/src/coverageBed/coverageMain.cpp BEDTools-Version-2.14.3/src/cuffToTrans/Makefile BEDTools-Version-2.14.3/src/fastaFromBed/Makefile BEDTools-Version-2.14.3/src/fastaFromBed/fastaFromBed.cpp BEDTools-Version-2.14.3/src/fastaFromBed/fastaFromBed.h BEDTools-Version-2.14.3/src/fastaFromBed/fastaFromBedMain.cpp BEDTools-Version-2.14.3/src/fjoin/Makefile BEDTools-Version-2.14.3/src/fjoin/fjoin.cpp BEDTools-Version-2.14.3/src/fjoin/fjoin.h BEDTools-Version-2.14.3/src/fjoin/fjoinMain.cpp BEDTools-Version-2.14.3/src/flankBed/Makefile BEDTools-Version-2.14.3/src/flankBed/flankBed.cpp BEDTools-Version-2.14.3/src/flankBed/flankBed.h BEDTools-Version-2.14.3/src/flankBed/flankBedMain.cpp BEDTools-Version-2.14.3/src/genomeCoverageBed/Makefile BEDTools-Version-2.14.3/src/genomeCoverageBed/genomeCoverageBed.cpp BEDTools-Version-2.14.3/src/genomeCoverageBed/genomeCoverageBed.h BEDTools-Version-2.14.3/src/genomeCoverageBed/genomeCoverageMain.cpp BEDTools-Version-2.14.3/src/intersectBed/Makefile BEDTools-Version-2.14.3/src/intersectBed/intersectBed.cpp BEDTools-Version-2.14.3/src/intersectBed/intersectBed.h BEDTools-Version-2.14.3/src/intersectBed/intersectMain.cpp BEDTools-Version-2.14.3/src/linksBed/Makefile BEDTools-Version-2.14.3/src/linksBed/linksBed.cpp BEDTools-Version-2.14.3/src/linksBed/linksBed.h BEDTools-Version-2.14.3/src/linksBed/linksMain.cpp BEDTools-Version-2.14.3/src/maskFastaFromBed/Makefile BEDTools-Version-2.14.3/src/maskFastaFromBed/maskFastaFromBed.cpp BEDTools-Version-2.14.3/src/maskFastaFromBed/maskFastaFromBed.h BEDTools-Version-2.14.3/src/maskFastaFromBed/maskFastaFromBedMain.cpp BEDTools-Version-2.14.3/src/mergeBed/Makefile BEDTools-Version-2.14.3/src/mergeBed/mergeBed.cpp BEDTools-Version-2.14.3/src/mergeBed/mergeBed.h BEDTools-Version-2.14.3/src/mergeBed/mergeMain.cpp BEDTools-Version-2.14.3/src/multiBamCov/Makefile BEDTools-Version-2.14.3/src/multiBamCov/multiBamCov.cpp BEDTools-Version-2.14.3/src/multiBamCov/multiBamCov.h BEDTools-Version-2.14.3/src/multiBamCov/multiBamCovMain.cpp BEDTools-Version-2.14.3/src/multiIntersectBed/Makefile BEDTools-Version-2.14.3/src/multiIntersectBed/intervalItem.h BEDTools-Version-2.14.3/src/multiIntersectBed/multiIntersectBed.cpp BEDTools-Version-2.14.3/src/multiIntersectBed/multiIntersectBed.h BEDTools-Version-2.14.3/src/multiIntersectBed/multiIntersectBedMain.cpp BEDTools-Version-2.14.3/src/nucBed/LargeFileSupport.h BEDTools-Version-2.14.3/src/nucBed/Makefile BEDTools-Version-2.14.3/src/nucBed/nucBed.cpp BEDTools-Version-2.14.3/src/nucBed/nucBed.h BEDTools-Version-2.14.3/src/nucBed/nucBedMain.cpp BEDTools-Version-2.14.3/src/overlap/Makefile BEDTools-Version-2.14.3/src/overlap/overlap.cpp BEDTools-Version-2.14.3/src/pairToBed/Makefile BEDTools-Version-2.14.3/src/pairToBed/pairToBed.cpp BEDTools-Version-2.14.3/src/pairToBed/pairToBed.h BEDTools-Version-2.14.3/src/pairToBed/pairToBedMain.cpp BEDTools-Version-2.14.3/src/pairToPair/Makefile BEDTools-Version-2.14.3/src/pairToPair/pairToPair.cpp BEDTools-Version-2.14.3/src/pairToPair/pairToPair.h BEDTools-Version-2.14.3/src/pairToPair/pairToPairMain.cpp BEDTools-Version-2.14.3/src/shuffleBed/Makefile BEDTools-Version-2.14.3/src/shuffleBed/shuffleBed.cpp BEDTools-Version-2.14.3/src/shuffleBed/shuffleBed.h BEDTools-Version-2.14.3/src/shuffleBed/shuffleBedMain.cpp BEDTools-Version-2.14.3/src/slopBed/Makefile BEDTools-Version-2.14.3/src/slopBed/slopBed.cpp BEDTools-Version-2.14.3/src/slopBed/slopBed.h BEDTools-Version-2.14.3/src/slopBed/slopBedMain.cpp BEDTools-Version-2.14.3/src/sortBed/Makefile BEDTools-Version-2.14.3/src/sortBed/sortBed.cpp BEDTools-Version-2.14.3/src/sortBed/sortBed.h BEDTools-Version-2.14.3/src/sortBed/sortMain.cpp BEDTools-Version-2.14.3/src/subtractBed/Makefile BEDTools-Version-2.14.3/src/subtractBed/subtractBed.cpp BEDTools-Version-2.14.3/src/subtractBed/subtractBed.h BEDTools-Version-2.14.3/src/subtractBed/subtractMain.cpp BEDTools-Version-2.14.3/src/tagBam/Makefile BEDTools-Version-2.14.3/src/tagBam/tagBam.cpp BEDTools-Version-2.14.3/src/tagBam/tagBam.h BEDTools-Version-2.14.3/src/tagBam/tagBamMain.cpp BEDTools-Version-2.14.3/src/unionBedGraphs/Makefile BEDTools-Version-2.14.3/src/unionBedGraphs/intervalItem.h BEDTools-Version-2.14.3/src/unionBedGraphs/unionBedGraphs.cpp BEDTools-Version-2.14.3/src/unionBedGraphs/unionBedGraphs.h BEDTools-Version-2.14.3/src/unionBedGraphs/unionBedGraphsMain.cpp BEDTools-Version-2.14.3/src/utils/BamTools-Ancillary/BamAncillary.cpp BEDTools-Version-2.14.3/src/utils/BamTools-Ancillary/BamAncillary.h BEDTools-Version-2.14.3/src/utils/BamTools-Ancillary/Makefile BEDTools-Version-2.14.3/src/utils/BamTools/LICENSE BEDTools-Version-2.14.3/src/utils/BamTools/Makefile BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamAlignment.cpp BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamAlignment.h BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamAux.h BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamConstants.h BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamIndex.h BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamMultiReader.cpp BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamMultiReader.h BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamReader.cpp BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamReader.h BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamWriter.cpp BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamWriter.h BEDTools-Version-2.14.3/src/utils/BamTools/src/api/CMakeLists.txt BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamConstants.h BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamHeader.cpp BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamHeader.h BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamProgram.cpp BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamProgram.h BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamProgramChain.cpp BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamProgramChain.h BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamReadGroup.cpp BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamReadGroup.h BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamReadGroupDictionary.cpp BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamReadGroupDictionary.h BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamSequence.cpp BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamSequence.h BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamSequenceDictionary.cpp BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamSequenceDictionary.h BEDTools-Version-2.14.3/src/utils/BamTools/src/api/api_global.h BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamHeader_p.cpp BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamHeader_p.h BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamIndexFactory_p.cpp BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamIndexFactory_p.h BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamMultiMerger_p.h BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamMultiReader_p.cpp BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamMultiReader_p.h BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamRandomAccessController_p.cpp BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamRandomAccessController_p.h BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamReader_p.cpp BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamReader_p.h BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamStandardIndex_p.cpp BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamStandardIndex_p.h BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamToolsIndex_p.cpp BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamToolsIndex_p.h BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamWriter_p.cpp BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamWriter_p.h BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BgzfStream_p.cpp BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BgzfStream_p.h BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/SamFormatParser_p.cpp BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/SamFormatParser_p.h BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/SamFormatPrinter_p.cpp BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/SamFormatPrinter_p.h BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/SamHeaderValidator_p.cpp BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/SamHeaderValidator_p.h BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/SamHeaderVersion_p.h BEDTools-Version-2.14.3/src/utils/BamTools/src/shared/bamtools_global.h BEDTools-Version-2.14.3/src/utils/Fasta/Fasta.cpp BEDTools-Version-2.14.3/src/utils/Fasta/Fasta.h BEDTools-Version-2.14.3/src/utils/Fasta/LargeFileSupport.h BEDTools-Version-2.14.3/src/utils/Fasta/Makefile BEDTools-Version-2.14.3/src/utils/Fasta/split.cpp BEDTools-Version-2.14.3/src/utils/Fasta/split.h BEDTools-Version-2.14.3/src/utils/bedFile/Makefile BEDTools-Version-2.14.3/src/utils/bedFile/bedFile.cpp BEDTools-Version-2.14.3/src/utils/bedFile/bedFile.h BEDTools-Version-2.14.3/src/utils/bedFile/bedFile.h.orig BEDTools-Version-2.14.3/src/utils/bedFilePE/Makefile BEDTools-Version-2.14.3/src/utils/bedFilePE/bedFilePE.cpp BEDTools-Version-2.14.3/src/utils/bedFilePE/bedFilePE.h BEDTools-Version-2.14.3/src/utils/bedGraphFile/Makefile BEDTools-Version-2.14.3/src/utils/bedGraphFile/bedGraphFile.cpp BEDTools-Version-2.14.3/src/utils/bedGraphFile/bedGraphFile.h BEDTools-Version-2.14.3/src/utils/chromsweep/Makefile BEDTools-Version-2.14.3/src/utils/chromsweep/chromsweep.cpp BEDTools-Version-2.14.3/src/utils/chromsweep/chromsweep.h BEDTools-Version-2.14.3/src/utils/fileType/Makefile BEDTools-Version-2.14.3/src/utils/fileType/fileType.cpp BEDTools-Version-2.14.3/src/utils/fileType/fileType.h BEDTools-Version-2.14.3/src/utils/genomeFile/Makefile BEDTools-Version-2.14.3/src/utils/genomeFile/genomeFile.cpp BEDTools-Version-2.14.3/src/utils/genomeFile/genomeFile.h BEDTools-Version-2.14.3/src/utils/gzstream/COPYING.LIB BEDTools-Version-2.14.3/src/utils/gzstream/Makefile BEDTools-Version-2.14.3/src/utils/gzstream/README BEDTools-Version-2.14.3/src/utils/gzstream/gzstream.C BEDTools-Version-2.14.3/src/utils/gzstream/gzstream.h BEDTools-Version-2.14.3/src/utils/gzstream/gzstream.o BEDTools-Version-2.14.3/src/utils/gzstream/test_gunzip.o BEDTools-Version-2.14.3/src/utils/gzstream/test_gzip.o BEDTools-Version-2.14.3/src/utils/gzstream/version BEDTools-Version-2.14.3/src/utils/lineFileUtilities/Makefile BEDTools-Version-2.14.3/src/utils/lineFileUtilities/lineFileUtilities.cpp BEDTools-Version-2.14.3/src/utils/lineFileUtilities/lineFileUtilities.h BEDTools-Version-2.14.3/src/utils/sequenceUtilities/Makefile BEDTools-Version-2.14.3/src/utils/sequenceUtilities/sequenceUtils.cpp BEDTools-Version-2.14.3/src/utils/sequenceUtilities/sequenceUtils.h BEDTools-Version-2.14.3/src/utils/stringUtilities/stringUtilities.h BEDTools-Version-2.14.3/src/utils/tabFile/Makefile BEDTools-Version-2.14.3/src/utils/tabFile/tabFile.cpp BEDTools-Version-2.14.3/src/utils/tabFile/tabFile.h BEDTools-Version-2.14.3/src/utils/version/version.h BEDTools-Version-2.14.3/src/windowBed/Makefile BEDTools-Version-2.14.3/src/windowBed/windowBed.cpp BEDTools-Version-2.14.3/src/windowBed/windowBed.h BEDTools-Version-2.14.3/src/windowBed/windowMain.cpp |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/LICENSE --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/LICENSE Thu Nov 03 10:25:04 2011 -0400 |
b |
b'@@ -0,0 +1,340 @@\n+\t\t GNU GENERAL PUBLIC LICENSE\n+\t\t Version 2, June 1991\n+\n+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.,\n+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\n+ Everyone is permitted to copy and distribute verbatim copies\n+ of this license document, but changing it is not allowed.\n+\n+\t\t\t Preamble\n+\n+ The licenses for most software are designed to take away your\n+freedom to share and change it. By contrast, the GNU General Public\n+License is intended to guarantee your freedom to share and change free\n+software--to make sure the software is free for all its users. This\n+General Public License applies to most of the Free Software\n+Foundation\'s software and to any other program whose authors commit to\n+using it. (Some other Free Software Foundation software is covered by\n+the GNU Lesser General Public License instead.) You can apply it to\n+your programs, too.\n+\n+ When we speak of free software, we are referring to freedom, not\n+price. Our General Public Licenses are designed to make sure that you\n+have the freedom to distribute copies of free software (and charge for\n+this service if you wish), that you receive source code or can get it\n+if you want it, that you can change the software or use pieces of it\n+in new free programs; and that you know you can do these things.\n+\n+ To protect your rights, we need to make restrictions that forbid\n+anyone to deny you these rights or to ask you to surrender the rights.\n+These restrictions translate to certain responsibilities for you if you\n+distribute copies of the software, or if you modify it.\n+\n+ For example, if you distribute copies of such a program, whether\n+gratis or for a fee, you must give the recipients all the rights that\n+you have. You must make sure that they, too, receive or can get the\n+source code. And you must show them these terms so they know their\n+rights.\n+\n+ We protect your rights with two steps: (1) copyright the software, and\n+(2) offer you this license which gives you legal permission to copy,\n+distribute and/or modify the software.\n+\n+ Also, for each author\'s protection and ours, we want to make certain\n+that everyone understands that there is no warranty for this free\n+software. If the software is modified by someone else and passed on, we\n+want its recipients to know that what they have is not the original, so\n+that any problems introduced by others will not reflect on the original\n+authors\' reputations.\n+\n+ Finally, any free program is threatened constantly by software\n+patents. We wish to avoid the danger that redistributors of a free\n+program will individually obtain patent licenses, in effect making the\n+program proprietary. To prevent this, we have made it clear that any\n+patent must be licensed for everyone\'s free use or not licensed at all.\n+\n+ The precise terms and conditions for copying, distribution and\n+modification follow.\n+\n+\t\t GNU GENERAL PUBLIC LICENSE\n+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION\n+\n+ 0. This License applies to any program or other work which contains\n+a notice placed by the copyright holder saying it may be distributed\n+under the terms of this General Public License. The "Program", below,\n+refers to any such program or work, and a "work based on the Program"\n+means either the Program or any derivative work under copyright law:\n+that is to say, a work containing the Program or a portion of it,\n+either verbatim or with modifications and/or translated into another\n+language. (Hereinafter, translation is included without limitation in\n+the term "modification".) Each licensee is addressed as "you".\n+\n+Activities other than copying, distribution and modification are not\n+covered by this License; they are outside its scope. The act of\n+running the Program is not restricted, and the output from the Program\n+is covered only if its contents constitute a work based on the\n+Program (independent of having been made by running the Program).\n+Whethe'..b'/OR OTHER PARTIES\n+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED\n+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF\n+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS\n+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE\n+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,\n+REPAIR OR CORRECTION.\n+\n+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING\n+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR\n+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,\n+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING\n+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED\n+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY\n+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER\n+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE\n+POSSIBILITY OF SUCH DAMAGES.\n+\n+\t\t END OF TERMS AND CONDITIONS\n+\n+\t How to Apply These Terms to Your New Programs\n+\n+ If you develop a new program, and you want it to be of the greatest\n+possible use to the public, the best way to achieve this is to make it\n+free software which everyone can redistribute and change under these terms.\n+\n+ To do so, attach the following notices to the program. It is safest\n+to attach them to the start of each source file to most effectively\n+convey the exclusion of warranty; and each file should have at least\n+the "copyright" line and a pointer to where the full notice is found.\n+\n+ <one line to give the program\'s name and a brief idea of what it does.>\n+ Copyright (C) <year> <name of author>\n+\n+ This program is free software; you can redistribute it and/or modify\n+ it under the terms of the GNU General Public License as published by\n+ the Free Software Foundation; either version 2 of the License, or\n+ (at your option) any later version.\n+\n+ This program is distributed in the hope that it will be useful,\n+ but WITHOUT ANY WARRANTY; without even the implied warranty of\n+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n+ GNU General Public License for more details.\n+\n+ You should have received a copy of the GNU General Public License along\n+ with this program; if not, write to the Free Software Foundation, Inc.,\n+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.\n+\n+Also add information on how to contact you by electronic and paper mail.\n+\n+If the program is interactive, make it output a short notice like this\n+when it starts in an interactive mode:\n+\n+ Gnomovision version 69, Copyright (C) year name of author\n+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w\'.\n+ This is free software, and you are welcome to redistribute it\n+ under certain conditions; type `show c\' for details.\n+\n+The hypothetical commands `show w\' and `show c\' should show the appropriate\n+parts of the General Public License. Of course, the commands you use may\n+be called something other than `show w\' and `show c\'; they could even be\n+mouse-clicks or menu items--whatever suits your program.\n+\n+You should also get your employer (if you work as a programmer) or your\n+school, if any, to sign a "copyright disclaimer" for the program, if\n+necessary. Here is a sample; alter the names:\n+\n+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program\n+ `Gnomovision\' (which makes passes at compilers) written by James Hacker.\n+\n+ <signature of Ty Coon>, 1 April 1989\n+ Ty Coon, President of Vice\n+\n+This General Public License does not permit incorporating your program into\n+proprietary programs. If your program is a subroutine library, you may\n+consider it more useful to permit linking proprietary applications with the\n+library. If this is what you want to do, use the GNU Lesser General\n+Public License instead of this License.\n+\n' |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/Makefile Thu Nov 03 10:25:04 2011 -0400 |
[ |
@@ -0,0 +1,89 @@ +# ========================== +# BEDTools Makefile +# (c) 2009 Aaron Quinlan +# ========================== + +# define our object and binary directories +export OBJ_DIR = obj +export BIN_DIR = bin +export SRC_DIR = src +export CXX = g++ +export CXXFLAGS = -Wall -O2 -D_FILE_OFFSET_BITS=64 -fPIC +export LIBS = -lz +export BT_ROOT = src/utils/BamTools/ + + +SUBDIRS = $(SRC_DIR)/annotateBed \ + $(SRC_DIR)/bamToBed \ + $(SRC_DIR)/bedToBam \ + $(SRC_DIR)/bedToIgv \ + $(SRC_DIR)/bed12ToBed6 \ + $(SRC_DIR)/closestBed \ + $(SRC_DIR)/complementBed \ + $(SRC_DIR)/coverageBed \ + $(SRC_DIR)/fastaFromBed \ + $(SRC_DIR)/flankBed \ + $(SRC_DIR)/genomeCoverageBed \ + $(SRC_DIR)/intersectBed \ + $(SRC_DIR)/linksBed \ + $(SRC_DIR)/maskFastaFromBed \ + $(SRC_DIR)/mergeBed \ + $(SRC_DIR)/multiBamCov \ + $(SRC_DIR)/multiIntersectBed \ + $(SRC_DIR)/nucBed \ + $(SRC_DIR)/overlap \ + $(SRC_DIR)/pairToBed \ + $(SRC_DIR)/pairToPair \ + $(SRC_DIR)/shuffleBed \ + $(SRC_DIR)/slopBed \ + $(SRC_DIR)/sortBed \ + $(SRC_DIR)/subtractBed \ + $(SRC_DIR)/tagBam \ + $(SRC_DIR)/unionBedGraphs \ + $(SRC_DIR)/windowBed + +UTIL_SUBDIRS = $(SRC_DIR)/utils/lineFileUtilities \ + $(SRC_DIR)/utils/bedFile \ + $(SRC_DIR)/utils/bedGraphFile \ + $(SRC_DIR)/utils/chromsweep \ + $(SRC_DIR)/utils/gzstream \ + $(SRC_DIR)/utils/fileType \ + $(SRC_DIR)/utils/bedFilePE \ + $(SRC_DIR)/utils/sequenceUtilities \ + $(SRC_DIR)/utils/tabFile \ + $(SRC_DIR)/utils/BamTools \ + $(SRC_DIR)/utils/BamTools-Ancillary \ + $(SRC_DIR)/utils/Fasta \ + $(SRC_DIR)/utils/genomeFile + +all: + [ -d $(OBJ_DIR) ] || mkdir -p $(OBJ_DIR) + [ -d $(BIN_DIR) ] || mkdir -p $(BIN_DIR) + + @echo "Building BEDTools:" + @echo "=========================================================" + + @for dir in $(UTIL_SUBDIRS); do \ + echo "- Building in $$dir"; \ + $(MAKE) --no-print-directory -C $$dir; \ + echo ""; \ + done + + @for dir in $(SUBDIRS); do \ + echo "- Building in $$dir"; \ + $(MAKE) --no-print-directory -C $$dir; \ + echo ""; \ + done + + +.PHONY: all + +clean: + @echo "Cleaning up." + @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* + @rm -Rf $(BT_ROOT)/lib + @rm -f $(BT_ROOT)/src/api/*.o + @rm -f $(BT_ROOT)/src/api/internal/*.o + @rm -Rf $(BT_ROOT)/include + +.PHONY: clean |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/README.rst --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/README.rst Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,47 @@ +============================== + BEDTools +============================== + +Created by Aaron Quinlan Spring 2009. + +Copyright 2009,2010,2011 Aaron Quinlan. All rights reserved. + +Stable releases: http://code.google.com/p/bedtools + +Repository: https://github.com/arq5x/bedtools + +Released under GNU public license version 2 (GPL v2). + + +Summary +------- +BEDTools is a collection of utilities for comparing, summarizing, and +intersecting genomic features in BED, GTF/GFF, VCF and BAM formats. + + +Manual +------ +See the extensive PDF manual included at: http://code.google.com/p/bedtools/downloads/detail?name=BEDTools-User-Manual.v4.pdf. + +This manual covers many common usage examples. There are also examples available at: +http://code.google.com/p/bedtools/wiki/Usage +http://code.google.com/p/bedtools/wiki/UsageAdvanced + +Installation +------------ +Git +... +git clone git://github.com/arq5x/bedtools.git + +Download tarball - that big gray button on the upper right. +........................................................... +#. Unpack the source downloaded tarball. +#. cd into the expanded folder. +#. Type "make clean" and hit enter. +#. Type "make all" and hit enter. +#. If you encountered no errors, then all of the BED Tools should now be in bin/ + If not, try to troubleshoot then email me: aaronquinlan at gmail dot com +#. Copy the files in bin/ to ~/bin or if you have the privileges, to /usr/local/bin. +#. Use the tools. + + |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/RELEASE_HISTORY --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/RELEASE_HISTORY Thu Nov 03 10:25:04 2011 -0400 |
b |
b'@@ -0,0 +1,646 @@\n+Version 2.14.2 (2-Nov-2011)\n+\n+Bug Fixes\n+=========\n+1. Corrected the help for closestBed. It now correctly reads -io instead of -no.\n+2. Fixed regression in closestBed injected in version 2.13.4 whereby B features to the right of an A feature were missed.\n+\n+New tool.\n+============\n+1. Added the multiIntersectBed tool for reporting common intervals among multiple **sorted** BED/GFF/VCF files.\n+\n+\n+\n+Version 2.13.4 (26-Oct-2011)\n+Bug Fixes\n+=========\n+1. The -sorted option (chromsweep) in intersectBed now obeys -s and -S. I had neglected to implement that. Thanks to Paul Ryvkin for pointing this out.\n+2. The -split option was mistakenly splitting of D CIGAR ops.\n+3. The Makefile was not including zlib properly for newer versions of GCC. Thanks to Istvan Albert for pointing this out and providing the solution.\n+\n+Improvements\n+============\n+1. Thanks to Jacob Biesinger for a new option (-D) in closestBed that will report _signed_ distances. Moreover, the new option allows fine control over whether the distances are reported based on the reference genome or based on the strand of the A or B feature. Many thanks to Jacob.\n+2. Thanks to some nice analysis from Paul Ryvkin, I realized that the -sorted option was using way too much memory in certain cases where there is a chromosome change in a sorted BED file. This has been corrected.\n+\n+Version 2.13.3 (30-Sept-2011)\n+Bug Fixes\n+============\n+1. intersectBed detected, but did not report overlaps when using BAM input and -bed.\n+\n+Other\n+=====\n+1. Warning that -sorted trusts, but does not enforce that data is actually sorted.\n+\n+\n+Version 2.13.2 (23-Sept-2011)\n+\n+New algorithm\n+=============\n+1. Preliminary release of the chrom_sweep algorithm.\n+\n+New options\n+===========\n+1. genomeCoverageBed no longer requires a genome file when working with BAM input. It instead uses the BAM header.\n+2. tagBam now has a -score option for annotating alignments with the BED "scores" field in annotation files. This overrides the default behavior, which is to use the -labels associated with the annotation files passed in on the command line.\n+\n+Bug fixes\n+=========\n+1. Correct a bug that prevented proper BAM support in intersectBed.\n+2. Improved detection of GFF features with negative coordinates.\n+\n+\n+\n+Version 2.13.1 (6-Sept-2011)\n+\n+New options\n+===========\n+1. tagBam now has -s and -S options for only annotating alignments with features on the same and opposite strand, respectively.\n+2. tagBam now has a -names option for annotating alignments with the "name" field in annotation files. This overrides the default behavior, which is to use the -labels associated with the annotation files passed in on the command line. Currently, this works well with BED files, but given the limited metadata support for GFF files, annotating with -names and GFF files may not work as well as wished, depending on the type of GFF file used.\n+\n+\n+\n+Version 2.13.0 (1-Sept-2011)\n+\n+New tools\n+=========\n+1. tagBam. This tool annotates a BAM file with custom tag fields based on overlaps with BED/GFF/VCF files.\n+For example:\n+$ tagBam -i aln.bam -files exons.bed introns.bed cpg.bed utrs.bed \\\n+ -tags exonic intonic cpg utr \\\n+ > aln.tagged.bam\n+For alignments that have overlaps, you should see new BAM tags like "YB:Z:exonic", "YB:Z:cpg;utr"\n+\n+2. multiBamCov. The new tool counts sequence coverage for multiple bams at specific loci defined in a BED/GFF/VCF file.\n+For example:\n+\n+$ multiBamCov -bams aln.1.bam aln.2.bam aln3.bam -bed exons.bed\n+chr1\t861306\t861409\tSAMD11\t1\t+\t181\t280\t236\n+chr1\t865533\t865718\tSAMD11\t2\t+\t249\t365\t374\n+chr1\t866393\t866496\tSAMD11\t3\t+\t162\t298\t322\n+\n+where the last 3 columns represent the number of alignments overlapping each interval from the three BAM file.\n+\n+The following options are available to control which types of alignments are are counted.\n+-q\tMinimum mapping quality allowed. Default is 0.\n+\n+-D\tInclude duplicate-marked reads. Defa'..b'.\n+\n+\n+Version 2.2.1\n+1. Fixed a very obvious bug in subtractBed that caused improper behavior when a feature in A was overlapped by more than one feature in B.\n+Many thanks to folks in the Hannon lab at CSHL for pointing this out.\n+\n+\n+Version 2.2.0\n+=== Notable changes in this release ===\n+1. coverageBed will optionally only count features in BED file A (e.g. sequencing reads) that overlap with \n+\tthe intervals/windows in BED file B on the same strand. This has been requested several times recently \n+\tand facilitates CHiP-Seq and RNA-Seq experiments.\n+\n+2. intersectBed can now require a minimum __reciprocal__ overlap between intervals in BED A and BED B. For example,\n+\tpreviously, if one used -f 0.90, it required that a feature in B overlap 90% of the feature in A for the "hit"\n+\tto be reported. If one adds the -r (reciprocal) option, the hit must also cover 90% of the feature in B. This helps\n+\tto exclude overlaps between say small features in A and large features in B:\n+\n+\tA ==========\n+\tB **********************************************************\n+\t\t\n+\t-f 0.50 (Reported), whereas -f 0.50 -r (Not reported)\n+\n+3. The score field has been changed to be a string. While this deviates from the UCSC definition, it allows one to track\n+\tmuch more meaningful information about a feature/interval. For example, score could now be:\n+\t\n+\t7.31E-05 (a p-value)\n+\t0.334577 (mean enrichment)\n+\t2:2.2:40:2 (several values encoded in a string)\n+\t\n+4. closestBed now, by default, reports __all__ intervals in B that overlap equally with an interval in A. Previously, it\n+\tmerely reported the first such feature that appeared in B. Here\'s a cartoon explaining the difference.\n+\t\n+\t**Prior behavior**\n+\t\n+\tA\t ==============\n+\tB.1 \t\t\t\t++++++++++++++\n+\tB.2 \t\t\t\t++++++++++++++\n+\tB.3 \t\t\t\t+++++++++\n+\n+\t-----------------------------------------\n+\tResult = B.1 \t\t\t++++++++++++++\n+\t\n+\t\n+\t**Current behavior**\n+\t\n+\tA\t ==============\n+\tB.1 \t\t\t\t++++++++++++++\n+\tB.2 \t\t\t\t++++++++++++++\n+\tB.3 \t\t\t\t+++++++++\n+\n+\t-----------------------------------------\n+\tResult = B.1 \t\t\t++++++++++++++\n+\t\t\t B.2 \t\t\t++++++++++++++\n+\n+\tUsing the -t option, one can also choose to report either the first or the last entry in B in the event of a tie.\n+\n+5. Several other minor changes to the algorithms have been made to increase speed a bit.\n+\n+\n+VERSION 2.1.2\n+1. Fixed yet another bug in the parsing of "track" or "browser" lines. Sigh...\n+2. Change the "score" column (i.e. column 5) to b stored as a string. While this deviates\n+ from the UCSC convention, it allows significantly more information to be packed into the column.\n+\n+\n+VERSION 2.1.1\n+1. Added limits.h to bedFile.h to fix compilation issues on some systems.\n+2. Fixed bug in testing for "track" or "browser" lines.\n+\n+\n+VERSION 2.1.0\n+1. Fixed a bug in peIntersectBed that prevented -a from being correctly handled when passed via stdin.\n+2. Added new functionality to coverageBed that calculates the density of coverage.\n+3. Fixed bug in geneomCoverageBed.\n+\n+\n+VERSION 2.0.1\n+1. Added the ability to retain UCSC browser track/browser headers in BED files.\n+\n+\n+VERSION 2.0\n+1. Sped up the file parsing. ~10-20% increase in speed.\n+2. Created reportBed() as a common method in the bedFile class. Cleans up the code quite nicely.\n+3. Added the ability to compare BED files accounting for strandedness.\n+4. Paired-end intersect.\n+5. Fixed bug that prevented overlaps from being reported when the overlap fraction requested is 1.0\n+\n+\n+\n+VERSION 1.2, 04/27/2009. (1eb06115bdf3c49e75793f764a70c3501bb53f33)\n+1. Added subtractBed.\n+\tA. Fixed bug that prevented "split" overlaps from being reported.\n+\tB. Prevented A from being reported if >=1 feature in B completely spans it.\n+2. Added linksBed.\n+3. Added the ability to define separate windows for upstream and downstream to windowBed.\n+\n+\n+VERSION 1.1, 04/23/2009. (b74eb1afddca9b70bfa90ba763d4f2981a56f432)\n+Initial release.\n' |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/data/knownGene.hg18.chr21.bed --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/data/knownGene.hg18.chr21.bed Thu Nov 03 10:25:04 2011 -0400 |
b |
b'@@ -0,0 +1,828 @@\n+chr21\t9928613\t10012791\tuc002yip.1\t0\t-\t9928775\t9995604\t0\t24\t298,71,93,80,106,81,62,89,82,61,65,64,100,120,162,51,60,54,54,54,54,58,109,158,\t0,2082,3564,7620,9627,13341,15191,27109,27296,28194,35165,35968,36178,37925,44523,46170,52998,62332,63266,64549,66980,78302,81026,84020,\n+chr21\t9928613\t10012791\tuc002yiq.1\t0\t-\t9928775\t9995604\t0\t23\t298,71,93,80,106,81,62,89,82,61,65,64,100,120,162,51,60,54,54,54,58,109,158,\t0,2082,3564,7620,9627,13341,15191,27109,27296,28194,35165,35968,36178,37925,44523,46170,52998,63266,64549,66980,78302,81026,84020,\n+chr21\t9928613\t10012791\tuc002yir.1\t0\t-\t9928775\t9995604\t0\t22\t298,71,93,80,106,81,62,89,82,61,65,64,100,120,162,51,54,54,54,58,109,158,\t0,2082,3564,7620,9627,13341,15191,27109,27296,28194,35165,35968,36178,37925,44523,46170,63266,64549,66980,78302,81026,84020,\n+chr21\t9928613\t10012791\tuc010gkv.1\t0\t-\t9928775\t9973168\t0\t19\t298,71,93,80,106,81,62,89,82,61,65,64,100,120,162,51,58,109,158,\t0,2082,3564,7620,9627,13341,15191,27109,27296,28194,35165,35968,36178,37925,44523,46170,78302,81026,84020,\n+chr21\t9928613\t10061300\tuc002yis.1\t0\t-\t9928613\t9928613\t0\t33\t298,71,93,80,81,62,89,82,61,65,64,100,120,162,51,60,54,54,54,58,109,120,129,213,66,130,165,197,105,102,117,120,702,\t0,2082,3564,7620,13341,15191,27109,27296,28194,35165,35968,36178,37925,44523,46170,52998,63266,64549,66980,78302,81026,89277,91464,104695,106174,106728,108195,108605,114070,114367,119980,122855,131985,\n+chr21\t10042683\t10120796\tuc002yit.1\t0\t-\t10071441\t10120588\t0\t10\t105,102,117,120,702,115,172,163,101,223,\t0,297,5910,8785,17915,26668,28637,37348,76733,77890,\n+chr21\t10042683\t10120808\tuc002yiu.1\t0\t-\t10080193\t10120608\t0\t9\t105,102,117,702,115,172,163,101,215,\t0,297,5910,17915,26668,28637,37348,76733,77910,\n+chr21\t10079666\t10120808\tuc002yiv.1\t0\t-\t10081686\t10120608\t0\t4\t528,91,101,215,\t0,1930,39750,40927,\n+chr21\t10080031\t10081687\tuc002yiw.1\t0\t-\t10080031\t10080031\t0\t2\t200,91,\t0,1565,\n+chr21\t10081660\t10120796\tuc002yix.2\t0\t-\t10081660\t10081660\t0\t3\t27,101,223,\t0,37756,38913,\n+chr21\t13332351\t13346202\tuc002yiy.2\t0\t+\t13332351\t13332351\t0\t5\t265,115,2492,65,215,\t0,4342,4619,10805,13636,\n+chr21\t13336975\t13346202\tuc002yiz.2\t0\t+\t13336975\t13336975\t0\t4\t169,108,65,215,\t0,2379,6181,9012,\n+chr21\t13361138\t13412440\tuc002yja.2\t0\t+\t13361189\t13412250\t0\t3\t102,118,411,\t0,2228,50891,\n+chr21\t13904368\t13935777\tuc002yjb.1\t0\t+\t13904420\t13935758\t0\t11\t573,115,174,107,138,71,71,45,167,124,241,\t0,4946,5220,8172,9981,12628,18205,19679,20764,29338,31168,\n+chr21\t13944438\t13944477\tuc002yjc.1\t0\t+\t13944438\t13944438\t0\t1\t39,\t0,\n+chr21\t13945076\t13945106\tuc002yjd.1\t0\t+\t13945076\t13945076\t0\t1\t30,\t0,\n+chr21\t13973491\t13975330\tuc002yje.1\t0\t-\t13973781\t13974201\t0\t1\t1839,\t0,\n+chr21\t14137333\t14142556\tuc002yjf.1\t0\t-\t14137333\t14137333\t0\t6\t291,114,270,129,191,275,\t0,880,1863,2871,3617,4948,\n+chr21\t14200023\t14200052\tuc002yjg.1\t0\t+\t14200023\t14200023\t0\t1\t29,\t0,\n+chr21\t14202070\t14202096\tuc002yjh.1\t0\t-\t14202070\t14202070\t0\t1\t26,\t0,\n+chr21\t14237966\t14274631\tuc002yji.1\t0\t-\t14237966\t14237966\t0\t6\t88,71,73,29,85,738,\t0,1264,7292,7457,10291,35927,\n+chr21\t14270940\t14274631\tuc002yjj.2\t0\t-\t14270940\t14270940\t0\t2\t1809,738,\t0,2953,\n+chr21\t14321612\t14438647\tuc002yjk.2\t0\t+\t14321612\t14321612\t0\t3\t177,195,699,\t0,56529,116336,\n+chr21\t14321612\t14438730\tuc002yjl.2\t0\t+\t14321612\t14321612\t0\t3\t177,195,1030,\t0,56529,116088,\n+chr21\t14403005\t14501125\tuc002yjm.1\t0\t-\t14403184\t14501115\t0\t10\t267,177,112,105,168,90,102,109,386,119,\t0,35809,43759,54605,56409,57548,72944,77147,80220,98001,\n+chr21\t14459414\t14483611\tuc010gkw.1\t0\t-\t14459415\t14483519\t0\t4\t168,102,109,386,\t0,16535,20738,23811,\n+chr21\t14510336\t14522564\tuc002yjo.2\t0\t+\t14510378\t14521485\t0\t5\t138,163,73,100,1493,\t0,3418,4952,8293,10735,\n+chr21\t14510336\t14522564\tuc002yjn.2\t0\t+\t14518639\t14521485\t0\t5\t138,173,73,100,1493,\t0,3408,4952,8293,10735,\n+chr21\t14510336\t14522564\tuc002yjp.2\t0\t+\t14518639\t14521485\t0\t4\t138,73,100,1493,\t0,4952,8293,10735,\n+chr21\t14567990\t14585577\tuc002yjq.1\t0\t+\t14568283\t14585568\t0\t5\t359,62,148,143,100,\t0,5797,13751,14605,17487,\n+chr21\t14567990\t14595563\t'..b',39359,42463,57573,61711,64621,65078,66549,67043,73143,73892,74113,75468,77429,78209,87067,88715,91947,94082,97848,101709,103500,104273,105888,106385,107407,111781,112856,114041,115960,116732,118374,119687,120571,121161,\n+chr21\t46569229\t46690110\tuc002zjj.1\t0\t+\t46578825\t46689668\t0\t47\t323,213,372,81,256,56,175,137,112,223,82,175,218,455,556,147,152,143,233,163,213,230,117,21,207,171,153,879,156,771,103,155,141,174,196,223,151,450,245,103,174,120,230,77,139,128,486,\t0,1489,9509,21240,21855,22569,24124,24796,26541,28216,29099,30565,32087,38593,41697,56807,60945,63855,64312,65783,66277,72377,73126,73347,74702,76663,77443,86301,87949,91181,93316,97082,100943,102734,103507,105122,105619,106641,111015,112090,113275,115194,115966,117608,118921,119805,120395,\n+chr21\t46578738\t46594162\tuc010gqk.1\t0\t+\t46578738\t46578738\t0\t6\t372,81,259,56,175,137,\t0,11731,12326,13060,14615,15287,\n+chr21\t46699327\t46703021\tuc002zjk.1\t0\t-\t46699327\t46699327\t0\t2\t932,597,\t0,3097,\n+chr21\t46703317\t46790647\tuc002zjl.1\t0\t+\t46703472\t46790347\t0\t20\t246,72,120,120,252,129,120,198,92,111,124,110,103,65,94,120,115,140,137,426,\t0,25780,31623,38011,39605,45384,50280,52440,70115,73150,74547,75027,75608,78247,78469,79506,80900,82779,86160,86904,\n+chr21\t46703317\t46791548\tuc002zjm.1\t0\t+\t46703472\t46791387\t0\t21\t246,72,120,120,252,129,120,198,92,111,124,110,103,65,94,120,115,140,137,81,293,\t0,25780,31623,38011,39605,45384,50280,52440,70115,73150,74547,75027,75608,78247,78469,79506,80900,82779,86160,86904,87938,\n+chr21\t46703317\t46791548\tuc010gql.1\t0\t+\t46703472\t46791387\t0\t20\t246,72,120,120,252,120,198,92,111,124,110,103,65,94,120,115,140,137,81,293,\t0,25780,31623,38011,39605,50280,52440,70115,73150,74547,75027,75608,78247,78469,79506,80900,82779,86160,86904,87938,\n+chr21\t46703317\t46794451\tuc002zjn.1\t0\t+\t46703472\t46794259\t0\t22\t246,72,120,120,252,129,120,198,92,111,124,110,103,65,94,120,115,140,137,81,128,340,\t0,25780,31623,38011,39605,45384,50280,52440,70115,73150,74547,75027,75608,78247,78469,79506,80900,82779,86160,86904,87938,90794,\n+chr21\t46703317\t46813028\tuc002zjo.1\t0\t+\t46703472\t46811963\t0\t38\t246,72,120,120,252,129,120,198,92,111,124,110,103,65,94,120,115,140,137,81,128,115,202,110,81,124,122,112,110,131,169,171,62,58,75,175,124,1318,\t0,25780,31623,38011,39605,45384,50280,52440,70115,73150,74547,75027,75608,78247,78469,79506,80900,82779,86160,86904,87938,90794,91566,92657,92847,95192,95598,95845,97005,97962,98608,99246,101745,102771,104881,106736,107583,108393,\n+chr21\t46748653\t46789614\tuc002zjp.1\t0\t+\t46748811\t46789614\t0\t14\t177,120,198,92,111,124,110,103,65,94,120,115,140,137,\t0,4944,7104,24779,27814,29211,29691,30272,32911,33133,34170,35564,37443,40824,\n+chr21\t46783075\t46791548\tuc002zjq.1\t0\t+\t46784120\t46791387\t0\t5\t1257,140,137,81,293,\t0,3021,6402,7146,8180,\n+chr21\t46797251\t46803437\tuc002zjr.2\t0\t+\t46798578\t46802830\t0\t7\t1382,122,112,110,131,169,874,\t0,1664,1911,3071,4028,4674,5312,\n+chr21\t46805625\t46813028\tuc002zjs.1\t0\t+\t46806017\t46811963\t0\t5\t521,75,175,124,1318,\t0,2573,4428,5275,6085,\n+chr21\t46808123\t46813028\tuc002zjt.1\t0\t+\t46808123\t46808123\t0\t4\t150,175,124,1318,\t0,1930,2777,3587,\n+chr21\t46842958\t46849463\tuc002zju.1\t0\t-\t46843703\t46846756\t0\t3\t886,139,110,\t0,3660,6395,\n+chr21\t46842958\t46849463\tuc002zjv.1\t0\t-\t46843791\t46846756\t0\t4\t886,94,139,110,\t0,1664,3660,6395,\n+chr21\t46879954\t46904483\tuc002zjw.2\t0\t+\t46881291\t46903264\t0\t7\t149,95,105,183,162,165,1399,\t0,1281,7920,8691,12843,13960,23130,\n+chr21\t46879954\t46909291\tuc002zjx.1\t0\t+\t46881291\t46908667\t0\t12\t149,109,95,105,183,162,165,176,130,137,172,657,\t0,824,1281,7920,8691,12843,13960,23130,25218,26185,27768,28680,\n+chr21\t46879954\t46909291\tuc002zjy.1\t0\t+\t46881291\t46908667\t0\t11\t149,95,105,183,162,165,176,130,137,172,657,\t0,1281,7920,8691,12843,13960,23130,25218,26185,27768,28680,\n+chr21\t46879954\t46909291\tuc010gqm.1\t0\t+\t46881291\t46908667\t0\t9\t149,95,105,183,162,165,137,172,657,\t0,1281,7920,8691,12843,13960,26185,27768,28680,\n+chr21\t46887625\t46906276\tuc002zjz.1\t0\t+\t46892812\t46905317\t0\t6\t354,183,162,165,176,1104,\t0,1020,5172,6289,15459,17547,\n' |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/data/rmsk.hg18.chr21.bed --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/data/rmsk.hg18.chr21.bed Thu Nov 03 10:25:04 2011 -0400 |
b |
b'@@ -0,0 +1,57261 @@\n+chr21\t9719768\t9721892\tALR/Alpha\t1004\t+\n+chr21\t9721905\t9725582\tALR/Alpha\t1010\t+\n+chr21\t9725582\t9725977\tL1PA3\t3288\t+\n+chr21\t9726021\t9729309\tALR/Alpha\t1051\t+\n+chr21\t9729320\t9729809\tL1PA3\t3897\t-\n+chr21\t9729809\t9730866\tL1P1\t8367\t+\n+chr21\t9730866\t9734026\tALR/Alpha\t1036\t-\n+chr21\t9734037\t9757471\tALR/Alpha\t1182\t-\n+chr21\t9757520\t9758476\tALR/Alpha\t1092\t-\n+chr21\t9758521\t9764575\tL1PA3\t26286\t-\n+chr21\t9764577\t9778787\tALR/Alpha\t1141\t-\n+chr21\t9778798\t9788657\tALR/Alpha\t1188\t-\n+chr21\t9788657\t9794680\tL1PA3\t27485\t-\n+chr21\t9794680\t9795266\tALR/Alpha\t1011\t-\n+chr21\t9795278\t9795587\tALR/Alpha\t979\t-\n+chr21\t9795589\t9795713\t(GAATG)n\t308\t+\n+chr21\t9795736\t9795894\t(GAATG)n\t683\t+\n+chr21\t9795911\t9796007\t(GAATG)n\t345\t+\n+chr21\t9796028\t9796187\t(GAATG)n\t756\t+\n+chr21\t9796202\t9796615\t(GAATG)n\t891\t+\n+chr21\t9796637\t9796824\t(GAATG)n\t621\t+\n+chr21\t9796824\t9796866\tHSATII\t242\t-\n+chr21\t9796866\t9797049\t(GAATG)n\t621\t+\n+chr21\t9797067\t9797436\t(GAATG)n\t900\t+\n+chr21\t9797482\t9797839\t(GAATG)n\t1008\t+\n+chr21\t9797866\t9798044\t(GAATG)n\t858\t+\n+chr21\t9798051\t9798118\t(GAGTG)n\t259\t+\n+chr21\t9798118\t9798658\t(GAATG)n\t969\t+\n+chr21\t9798626\t9798765\t(AAATG)n\t201\t+\n+chr21\t9798770\t9798950\t(GAGTG)n\t571\t+\n+chr21\t9798908\t9799265\t(GAATG)n\t942\t+\n+chr21\t9799280\t9799460\t(GAATG)n\t813\t+\n+chr21\t9799500\t9800262\t(GAATG)n\t933\t+\n+chr21\t9800289\t9800469\t(GAATG)n\t666\t+\n+chr21\t9800481\t9800797\t(GAATG)n\t977\t+\n+chr21\t9800840\t9800878\t(GAGTG)n\t225\t+\n+chr21\t9800913\t9801092\t(GAGTG)n\t930\t+\n+chr21\t9801092\t9801169\t(GAATG)n\t298\t+\n+chr21\t9801182\t9801639\t(GAATG)n\t747\t+\n+chr21\t9801651\t9801769\t(GAATG)n\t330\t+\n+chr21\t9801781\t9802265\t(GAATG)n\t747\t+\n+chr21\t9802265\t9802310\t(GAGTG)n\t245\t+\n+chr21\t9802310\t9802490\t(GAATG)n\t1203\t+\n+chr21\t9802490\t9802503\t(GAGTG)n\t245\t+\n+chr21\t9802508\t9802679\t(GAATG)n\t660\t+\n+chr21\t9802699\t9803425\t(GAATG)n\t1008\t+\n+chr21\t9803427\t9803488\t(GAGTG)n\t332\t+\n+chr21\t9803490\t9803789\t(GAATG)n\t708\t+\n+chr21\t9803803\t9804202\t(GAATG)n\t897\t+\n+chr21\t9804215\t9804262\t(GAATG)n\t261\t+\n+chr21\t9804276\t9804450\t(GAATG)n\t771\t+\n+chr21\t9804469\t9804637\t(GAATG)n\t756\t+\n+chr21\t9804660\t9804840\t(GAATG)n\t729\t+\n+chr21\t9804905\t9805085\t(GAATG)n\t726\t+\n+chr21\t9805118\t9805404\t(GAATG)n\t930\t+\n+chr21\t9805416\t9805716\t(GAATG)n\t708\t+\n+chr21\t9805730\t9806084\t(GAATG)n\t1050\t+\n+chr21\t9806147\t9806522\t(GAATG)n\t765\t+\n+chr21\t9806555\t9806812\t(GAATG)n\t661\t+\n+chr21\t9806824\t9807184\t(GAATG)n\t1080\t+\n+chr21\t9807228\t9807661\t(GAATG)n\t888\t+\n+chr21\t9807669\t9807698\t(GAGTG)n\t231\t+\n+chr21\t9807698\t9808290\t(GAATG)n\t807\t+\n+chr21\t9808301\t9808897\t(GAATG)n\t984\t+\n+chr21\t9808920\t9809796\t(GAATG)n\t960\t+\n+chr21\t9809843\t9810023\t(GAATG)n\t972\t+\n+chr21\t9810043\t9810492\t(GAATG)n\t690\t+\n+chr21\t9810503\t9810553\t(GAATG)n\t208\t+\n+chr21\t9810554\t9810733\t(GAGTG)n\t828\t+\n+chr21\t9810696\t9811576\t(GAATG)n\t1005\t+\n+chr21\t9811606\t9811772\t(GAATG)n\t604\t+\n+chr21\t9811778\t9812022\t(GAGTG)n\t858\t+\n+chr21\t9812022\t9812464\t(GAATG)n\t1017\t+\n+chr21\t9812479\t9812900\t(GAATG)n\t729\t+\n+chr21\t9812901\t9812954\t(GAGTG)n\t235\t+\n+chr21\t9812958\t9813124\t(GAATG)n\t740\t+\n+chr21\t9813179\t9813356\t(GAGTG)n\t819\t+\n+chr21\t9813335\t9813790\t(GAATG)n\t837\t+\n+chr21\t9813801\t9813973\t(GAATG)n\t582\t+\n+chr21\t9814004\t9814407\t(GAATG)n\t780\t+\n+chr21\t9814408\t9814467\t(GAGTG)n\t300\t+\n+chr21\t9814467\t9814824\t(GAATG)n\t921\t+\n+chr21\t9814871\t9815045\t(GAATG)n\t654\t+\n+chr21\t9815045\t9815117\t(GAGTG)n\t423\t+\n+chr21\t9815118\t9815297\t(GAATG)n\t926\t+\n+chr21\t9815356\t9815455\t(GAATG)n\t325\t+\n+chr21\t9815463\t9815640\t(GAGTG)n\t981\t+\n+chr21\t9815642\t9815982\t(GAATG)n\t805\t+\n+chr21\t9816000\t9816174\t(GAATG)n\t660\t+\n+chr21\t9816197\t9816535\t(GAATG)n\t919\t+\n+chr21\t9816595\t9816936\t(GAATG)n\t867\t+\n+chr21\t9816995\t9817175\t(GAATG)n\t510\t+\n+chr21\t9817189\t9817257\t(GAGTG)n\t269\t+\n+chr21\t9817258\t9817854\t(GAATG)n\t1092\t+\n+chr21\t9817883\t9818578\t(GAATG)n\t966\t+\n+chr21\t9818589\t9818768\t(GAATG)n\t552\t+\n+chr21\t9818798\t9818860\t(GAATG)n\t213\t+\n+chr21\t9818872\t9819215\t(GAATG)n\t894\t+\n+chr21\t9819230\t9819370\t(GAATG)n\t312\t+\n+chr21\t9819426\t9819773\t(GAATG)n\t897\t+\n+chr21\t9819798\t9819976\t(GAATG)n\t878\t+\n+chr21\t9819990\t9820169\t(GAATG)n\t680\t+\n+chr21\t9820188\t9820366\t(GAATG)n\t738\t+\n+chr21\t9820322\t9820507\t(GAGTG)n\t641\t+\n+chr21\t9820'..b'6885745\t46886054\tAluSx\t1935\t+\n+chr21\t46886059\t46886096\t(TG)n\t270\t+\n+chr21\t46886242\t46886708\tL1ME4a\t405\t+\n+chr21\t46886812\t46886962\tL1ME4a\t361\t+\n+chr21\t46887080\t46887388\tAluSx\t1967\t+\n+chr21\t46888920\t46889201\tMER58B\t1178\t-\n+chr21\t46889293\t46889629\tAluJo\t1519\t+\n+chr21\t46889638\t46889821\tAluJo\t850\t+\n+chr21\t46889914\t46890044\tL1ME3B\t405\t+\n+chr21\t46890044\t46890353\tAluSx\t1946\t-\n+chr21\t46890353\t46890614\tL1ME3B\t405\t+\n+chr21\t46890829\t46891136\tL1MC4a\t413\t-\n+chr21\t46891141\t46891421\tMLT1A1\t693\t-\n+chr21\t46891477\t46891642\tL1MC4_3endX\t343\t-\n+chr21\t46891836\t46892003\tFRAM\t650\t-\n+chr21\t46892029\t46892086\tAT_rich\t22\t+\n+chr21\t46892396\t46892604\tL1MC4a\t314\t-\n+chr21\t46892583\t46892681\tL1MD\t293\t-\n+chr21\t46896438\t46896576\tL1PREC2\t277\t+\n+chr21\t46896932\t46897040\tL1ME4a\t288\t+\n+chr21\t46898261\t46898291\t(T)n\t195\t+\n+chr21\t46898475\t46898726\tL1ME4a\t442\t+\n+chr21\t46898714\t46898903\tL1MD2\t776\t+\n+chr21\t46898903\t46899208\tMER2\t1135\t+\n+chr21\t46899304\t46899922\tL1MD2\t2612\t+\n+chr21\t46899916\t46900310\tL1MD2\t1306\t+\n+chr21\t46900317\t46900724\tMSTB1\t2258\t+\n+chr21\t46900724\t46902105\tL1MD2\t2718\t+\n+chr21\t46902161\t46902336\tL1ME4a\t384\t+\n+chr21\t46902470\t46902579\tMER45B\t546\t-\n+chr21\t46903800\t46903973\tL1M5\t344\t+\n+chr21\t46904289\t46904311\tAT_rich\t22\t+\n+chr21\t46906284\t46906449\tG-rich\t373\t+\n+chr21\t46909243\t46909287\tL2\t195\t+\n+chr21\t46909413\t46909464\tAT_rich\t23\t+\n+chr21\t46909464\t46909768\tAluJo\t2015\t+\n+chr21\t46909769\t46910593\tL1ME3A\t1283\t-\n+chr21\t46910643\t46910947\tAluSx\t2427\t-\n+chr21\t46911036\t46911163\tAluJo\t773\t+\n+chr21\t46911163\t46911432\tAluSx\t2091\t+\n+chr21\t46913108\t46913727\tL1PA3\t5539\t-\n+chr21\t46915754\t46916276\tMLT1E2\t1609\t-\n+chr21\t46916330\t46916418\tL1M5\t226\t-\n+chr21\t46916433\t46916857\tL1M5\t1626\t-\n+chr21\t46916857\t46917170\tAluJo\t1400\t+\n+chr21\t46917170\t46917283\tL1M5\t1626\t-\n+chr21\t46917286\t46917758\tL1MA4A\t2227\t+\n+chr21\t46917764\t46917862\tL1MA4A\t435\t+\n+chr21\t46917957\t46918166\tMIR\t320\t-\n+chr21\t46918500\t46918536\tAT_rich\t22\t+\n+chr21\t46918925\t46919030\tGA-rich\t252\t+\n+chr21\t46919402\t46919654\tL2\t390\t+\n+chr21\t46919654\t46919928\tAluSx\t1723\t-\n+chr21\t46919928\t46920171\tL2\t390\t+\n+chr21\t46920752\t46922048\tL1PA7\t7374\t-\n+chr21\t46922123\t46922411\tL1ME1\t883\t+\n+chr21\t46922411\t46922704\tAluJb\t1385\t-\n+chr21\t46922718\t46922935\tL1ME1\t699\t+\n+chr21\t46923377\t46923802\tMLT1K\t323\t-\n+chr21\t46924116\t46924575\tHAL1\t655\t+\n+chr21\t46924661\t46924875\tMER74A\t443\t+\n+chr21\t46925595\t46925624\tAT_rich\t22\t+\n+chr21\t46926764\t46927048\tMLT1I\t641\t-\n+chr21\t46927048\t46927090\t(A)n\t378\t+\n+chr21\t46927155\t46927194\tMLT1I\t248\t-\n+chr21\t46927433\t46927614\t(TTAGGG)n\t234\t+\n+chr21\t46928301\t46928745\tMER4A1\t2938\t-\n+chr21\t46929613\t46930969\tL1MC3\t5007\t-\n+chr21\t46930969\t46931413\tMSTB1\t1749\t-\n+chr21\t46931413\t46931818\tL1MC3\t5007\t-\n+chr21\t46931945\t46932055\tMER34B-int\t303\t-\n+chr21\t46932058\t46932281\tAluJb\t1504\t-\n+chr21\t46932282\t46932323\tAluJb\t228\t-\n+chr21\t46932323\t46932751\tMER34B-int\t2060\t-\n+chr21\t46932771\t46933151\tLTR10C\t1336\t-\n+chr21\t46933171\t46933204\t(CA)n\t297\t+\n+chr21\t46933260\t46934052\tMER34B-int\t3116\t-\n+chr21\t46934052\t46934352\tAluY\t2333\t-\n+chr21\t46934352\t46934522\tMER34B-int\t3116\t-\n+chr21\t46934536\t46935047\tMER34B-int\t1297\t-\n+chr21\t46935651\t46936098\tMER34B-int\t1588\t-\n+chr21\t46936101\t46936169\tMLT2B3\t330\t+\n+chr21\t46936194\t46936262\tMLT2B3\t406\t+\n+chr21\t46936287\t46936355\tMLT2B3\t375\t+\n+chr21\t46936380\t46936448\tMLT2B3\t330\t+\n+chr21\t46936473\t46936541\tMLT2B3\t346\t+\n+chr21\t46936566\t46936632\tMLT2B3\t335\t+\n+chr21\t46936657\t46936725\tMLT2B5\t417\t+\n+chr21\t46936750\t46936977\tMLT2B3\t2194\t+\n+chr21\t46936977\t46937339\tTHE1C\t2217\t+\n+chr21\t46937339\t46937689\tMLT2B5\t1691\t+\n+chr21\t46937696\t46938061\tMER34B-int\t678\t-\n+chr21\t46938036\t46938374\tMER34B-int\t910\t-\n+chr21\t46938415\t46938527\tMER34B-int\t268\t-\n+chr21\t46938776\t46938841\tMER72\t3923\t-\n+chr21\t46938841\t46939137\tAluSq\t2287\t-\n+chr21\t46939137\t46939777\tMER72\t3923\t-\n+chr21\t46939986\t46940174\tL1MC\t565\t-\n+chr21\t46940179\t46940735\tMER34C_\t2769\t-\n+chr21\t46940746\t46941357\tL1MC\t1506\t-\n+chr21\t46941373\t46941479\tLTR60\t529\t+\n+chr21\t46941590\t46941894\tAluYb8\t2829\t+\n+chr21\t46941894\t46942298\tLTR60\t958\t+\n+chr21\t46942142\t46944181\tTAR1\t16459\t+\n+chr21\t46944181\t46944323\t(TTAGGG)n\t1057\t+\n' |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/genomes/human.hg18.genome --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/genomes/human.hg18.genome Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,50 @@ +chr1 247249719 +chr1_random 1663265 +chr10 135374737 +chr10_random 113275 +chr11 134452384 +chr11_random 215294 +chr12 132349534 +chr13 114142980 +chr13_random 186858 +chr14 106368585 +chr15 100338915 +chr15_random 784346 +chr16 88827254 +chr16_random 105485 +chr17 78774742 +chr17_random 2617613 +chr18 76117153 +chr18_random 4262 +chr19 63811651 +chr19_random 301858 +chr2 242951149 +chr2_random 185571 +chr20 62435964 +chr21 46944323 +chr21_random 1679693 +chr22 49691432 +chr22_random 257318 +chr22_h2_hap1 63661 +chr3 199501827 +chr3_random 749256 +chr4 191273063 +chr4_random 842648 +chr5 180857866 +chr5_random 143687 +chr5_h2_hap1 1794870 +chr6 170899992 +chr6_random 1875562 +chr6_cox_hap1 4731698 +chr6_qbl_hap2 4565931 +chr7 158821424 +chr7_random 549659 +chr8 146274826 +chr8_random 943810 +chr9 140273252 +chr9_random 1146434 +chrM 16571 +chrX 154913754 +chrX_random 1719168 +chrY 57772954 + |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/genomes/human.hg19.genome --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/genomes/human.hg19.genome Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,94 @@ +chr1 249250621 +chr2 243199373 +chr3 198022430 +chr4 191154276 +chr5 180915260 +chr6 171115067 +chr7 159138663 +chrX 155270560 +chr8 146364022 +chr9 141213431 +chr10 135534747 +chr11 135006516 +chr12 133851895 +chr13 115169878 +chr14 107349540 +chr15 102531392 +chr16 90354753 +chr17 81195210 +chr18 78077248 +chr20 63025520 +chrY 59373566 +chr19 59128983 +chr22 51304566 +chr21 48129895 +chr6_ssto_hap7 4928567 +chr6_mcf_hap5 4833398 +chr6_cox_hap2 4795371 +chr6_mann_hap4 4683263 +chr6_apd_hap1 4622290 +chr6_qbl_hap6 4611984 +chr6_dbb_hap3 4610396 +chr17_ctg5_hap1 1680828 +chr4_ctg9_hap1 590426 +chr1_gl000192_random 547496 +chrUn_gl000225 211173 +chr4_gl000194_random 191469 +chr4_gl000193_random 189789 +chr9_gl000200_random 187035 +chrUn_gl000222 186861 +chrUn_gl000212 186858 +chr7_gl000195_random 182896 +chrUn_gl000223 180455 +chrUn_gl000224 179693 +chrUn_gl000219 179198 +chr17_gl000205_random 174588 +chrUn_gl000215 172545 +chrUn_gl000216 172294 +chrUn_gl000217 172149 +chr9_gl000199_random 169874 +chrUn_gl000211 166566 +chrUn_gl000213 164239 +chrUn_gl000220 161802 +chrUn_gl000218 161147 +chr19_gl000209_random 159169 +chrUn_gl000221 155397 +chrUn_gl000214 137718 +chrUn_gl000228 129120 +chrUn_gl000227 128374 +chr1_gl000191_random 106433 +chr19_gl000208_random 92689 +chr9_gl000198_random 90085 +chr17_gl000204_random 81310 +chrUn_gl000233 45941 +chrUn_gl000237 45867 +chrUn_gl000230 43691 +chrUn_gl000242 43523 +chrUn_gl000243 43341 +chrUn_gl000241 42152 +chrUn_gl000236 41934 +chrUn_gl000240 41933 +chr17_gl000206_random 41001 +chrUn_gl000232 40652 +chrUn_gl000234 40531 +chr11_gl000202_random 40103 +chrUn_gl000238 39939 +chrUn_gl000244 39929 +chrUn_gl000248 39786 +chr8_gl000196_random 38914 +chrUn_gl000249 38502 +chrUn_gl000246 38154 +chr17_gl000203_random 37498 +chr8_gl000197_random 37175 +chrUn_gl000245 36651 +chrUn_gl000247 36422 +chr9_gl000201_random 36148 +chrUn_gl000235 34474 +chrUn_gl000239 33824 +chr21_gl000210_random 27682 +chrUn_gl000231 27386 +chrUn_gl000229 19913 +chrM 16571 +chrUn_gl000226 15008 +chr18_gl000207_random 4262 + |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/genomes/mouse.mm8.genome --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/genomes/mouse.mm8.genome Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,35 @@ +chr1 197069962 +chr2 181976762 +chr3 159872112 +chr4 155029701 +chr5 152003063 +chr6 149525685 +chr7 145134094 +chr8 132085098 +chr9 124000669 +chrM 16299 +chrX 165556469 +chrY 16029404 +chr10 129959148 +chr11 121798632 +chr12 120463159 +chr13 120614378 +chr14 123978870 +chr15 103492577 +chr16 98252459 +chr17 95177420 +chr18 90736837 +chr19 61321190 +chr1_random 172274 +chr5_random 2921247 +chr7_random 243910 +chr8_random 206961 +chr9_random 17232 +chrX_random 39696 +chrY_random 14577732 +chr10_random 10781 +chr13_random 436191 +chr15_random 105932 +chr17_random 89091 +chrUn_random 1540053 + |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/genomes/mouse.mm9.genome --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/genomes/mouse.mm9.genome Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,36 @@ +chr1 197195432 +chr2 181748087 +chr3 159599783 +chr4 155630120 +chr5 152537259 +chr6 149517037 +chr7 152524553 +chr8 131738871 +chr9 124076172 +chr10 129993255 +chr11 121843856 +chr12 121257530 +chr13 120284312 +chr14 125194864 +chr15 103494974 +chr16 98319150 +chr17 95272651 +chr18 90772031 +chr19 61342430 +chrX 166650296 +chrY 15902555 +chrM 16299 +chr13_random 400311 +chr16_random 3994 +chr17_random 628739 +chr1_random 1231697 +chr3_random 41899 +chr4_random 160594 +chr5_random 357350 +chr7_random 362490 +chr8_random 849593 +chr9_random 449403 +chrUn_random 5900358 +chrX_random 1785075 +chrY_random 58682461 + |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/Makefile Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,48 @@ +UTILITIES_DIR = ../utils/ +OBJ_DIR = ../../obj/ +BIN_DIR = ../../bin/ + +# ------------------- +# define our includes +# ------------------- +INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \ + -I$(UTILITIES_DIR)/version/ \ + -I$(UTILITIES_DIR)/gzstream/ \ + -I$(UTILITIES_DIR)/lineFileUtilities/ \ + -I$(UTILITIES_DIR)/fileType/ \ + -I$(UTILITIES_DIR)/BamTools/include + +# ---------------------------------- +# define our source and object files +# ---------------------------------- +SOURCES= multiCovMain.cpp multiCovBam.cpp +OBJECTS= $(SOURCES:.cpp=.o) +_EXT_OBJECTS=bedFile.o lineFileUtilities.o gzstream.o fileType.o +EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) +BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) +PROGRAM= multiCovBam + +all: $(PROGRAM) + +.PHONY: all + +$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS) + @echo " * linking $(PROGRAM)" + @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ $(LIBS) -L$(UTILITIES_DIR)/BamTools/lib/ -lbamtools + +$(BUILT_OBJECTS): $(SOURCES) + @echo " * compiling" $(*F).cpp + @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) + +$(EXT_OBJECTS): + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/BamTools/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/ + +clean: + @echo "Cleaning up." + @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* + +.PHONY: clean |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/annotateBed/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/annotateBed/Makefile Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,43 @@ +UTILITIES_DIR = ../utils/ +OBJ_DIR = ../../obj/ +BIN_DIR = ../../bin/ + +# ------------------- +# define our includes +# ------------------- +INCLUDES = -I$(UTILITIES_DIR)/bedFile/ -I$(UTILITIES_DIR)/lineFileUtilities/ -I$(UTILITIES_DIR)/version/ -I$(UTILITIES_DIR)/BamTools/ -I$(UTILITIES_DIR)/gzstream/ -I$(UTILITIES_DIR)/fileType/ + +# ---------------------------------- +# define our source and object files +# ---------------------------------- +SOURCES= annotateMain.cpp annotateBed.cpp +OBJECTS= $(SOURCES:.cpp=.o) +_EXT_OBJECTS=bedFile.o lineFileUtilities.o gzstream.o fileType.o +EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) +BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) +PROGRAM= annotateBed + + +all: $(PROGRAM) + +.PHONY: all + +$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS) + @echo " * linking $(PROGRAM)" + @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ $(LIBS) + +$(BUILT_OBJECTS): $(SOURCES) + @echo " * compiling" $(*F).cpp + @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) + +$(EXT_OBJECTS): + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/ + +clean: + @echo "Cleaning up." + @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* + +.PHONY: clean |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/annotateBed/annotateBed.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/annotateBed/annotateBed.cpp Thu Nov 03 10:25:04 2011 -0400 |
[ |
b'@@ -0,0 +1,209 @@\n+/*****************************************************************************\n+ annotateBed.cpp\n+\n+ (c) 2009 - Aaron Quinlan\n+ Hall Laboratory\n+ Department of Biochemistry and Molecular Genetics\n+ University of Virginia\n+ aaronquinlan@gmail.com\n+\n+ Licenced under the GNU General Public License 2.0 license.\n+******************************************************************************/\n+#include "lineFileUtilities.h"\n+#include "annotateBed.h"\n+\n+// build\n+BedAnnotate::BedAnnotate(const string &mainFile, const vector<string> &annoFileNames,\n+ const vector<string> &annoTitles, bool sameStrand, bool diffStrand, bool reportCounts, bool reportBoth) :\n+\n+ _mainFile(mainFile),\n+ _annoFileNames(annoFileNames),\n+ _annoTitles(annoTitles),\n+ _sameStrand(sameStrand),\n+ _diffStrand(diffStrand),\n+ _reportCounts(reportCounts),\n+ _reportBoth(reportBoth)\n+{\n+ _bed = new BedFile(_mainFile);\n+}\n+\n+\n+// destroy and delete the open file pointers\n+BedAnnotate::~BedAnnotate(void) {\n+ delete _bed;\n+ CloseAnnoFiles();\n+}\n+\n+\n+void BedAnnotate::OpenAnnoFiles() {\n+ for (size_t i=0; i < _annoFileNames.size(); ++i) {\n+ BedFile *file = new BedFile(_annoFileNames[i]);\n+ file->Open();\n+ _annoFiles.push_back(file);\n+ }\n+}\n+\n+\n+void BedAnnotate::CloseAnnoFiles() {\n+ for (size_t i=0; i < _annoFiles.size(); ++i) {\n+ BedFile *file = _annoFiles[i];\n+ delete file;\n+ _annoFiles[i] = NULL;\n+ }\n+}\n+\n+\n+void BedAnnotate::PrintHeader() {\n+ // print a hash to indicate header and then write a tab\n+ // for each field in the main file.\n+ printf("#");\n+ for (size_t i = 0; i < _bed->bedType; ++i)\n+ printf("\\t");\n+\n+ // now print the label for each file.\n+ if (_reportBoth == false) {\n+ for (size_t i = 0; i < _annoTitles.size(); ++i)\n+ printf("%s\\t", _annoTitles[i].c_str());\n+ printf("\\n");\n+ }\n+ else {\n+ for (size_t i = 0; i < _annoTitles.size(); ++i)\n+ printf("%s_cnt\\t%s_pct", _annoTitles[i].c_str(), _annoTitles[i].c_str());\n+ printf("\\n");\n+ }\n+}\n+\n+\n+void BedAnnotate::InitializeMainFile() {\n+ // process each chromosome\n+ masterBedCovListMap::iterator chromItr = _bed->bedCovListMap.begin();\n+ masterBedCovListMap::iterator chromEnd = _bed->bedCovListMap.end();\n+ for (; chromItr != chromEnd; ++chromItr) {\n+ // for each chrom, process each bin\n+ binsToBedCovLists::iterator binItr = chromItr->second.begin();\n+ binsToBedCovLists::iterator binEnd = chromItr->second.end();\n+ for (; binItr != binEnd; ++binItr) {\n+ // initialize BEDCOVLIST in this chrom/bin\n+ vector<BEDCOVLIST>::iterator bedItr = binItr->second.begin();\n+ vector<BEDCOVLIST>::iterator bedEnd = binItr->second.end();\n+ for (; bedItr != bedEnd; ++bedItr) {\n+ // initialize the depthMaps, counts, etc. for each anno file.\n+ for (size_t i = 0; i < _annoFiles.size(); ++i) {\n+ map<unsigned int, DEPTH> dummy;\n+ bedItr->depthMapList.push_back(dummy);\n+ bedItr->counts.push_back(0);\n+ bedItr->minOverlapStarts.push_back(INT_MAX);\n+ }\n+ }\n+ }\n+ }\n+}\n+\n+\n+void BedAnnotate::AnnotateBed() {\n+\n+ // load the "main" bed file into a map so\n+ // that we can easily compare each annoFile to it for overlaps\n+ _bed->loadBedCovListFileIntoMap();\n+ // open the annotations files for processing;\n+ OpenAnnoFiles();\n+ // initialize counters, depths, etc. for the main file\n+ InitializeMainFile();\n+\n+ // annotate the main file with the coverage from the annotation files.\n+ for (size_t annoIndex = 0; annoIndex < _annoFiles.size(); ++annoIndex) {\n+ // grab the current annotation file.\n+ BedFile *anno = _annoFiles[annoIndex];\n+ int lineNum = 0;\n+ BED a, nullBed;\n+ BedLi'..b' a = nullBed;\n+ }\n+ }\n+ }\n+\n+ // report the annotations of the main file from the anno file.\n+ ReportAnnotations();\n+ // close the annotations files;\n+ CloseAnnoFiles();\n+}\n+\n+\n+void BedAnnotate::ReportAnnotations() {\n+\n+ if (_annoTitles.size() > 0) {\n+ PrintHeader();\n+ }\n+\n+ // process each chromosome\n+ masterBedCovListMap::const_iterator chromItr = _bed->bedCovListMap.begin();\n+ masterBedCovListMap::const_iterator chromEnd = _bed->bedCovListMap.end();\n+ for (; chromItr != chromEnd; ++chromItr) {\n+ // for each chrom, process each bin\n+ binsToBedCovLists::const_iterator binItr = chromItr->second.begin();\n+ binsToBedCovLists::const_iterator binEnd = chromItr->second.end();\n+ for (; binItr != binEnd; ++binItr) {\n+ // for each chrom & bin, compute and report\n+ // the observed coverage for each feature\n+ vector<BEDCOVLIST>::const_iterator bedItr = binItr->second.begin();\n+ vector<BEDCOVLIST>::const_iterator bedEnd = binItr->second.end();\n+ for (; bedItr != bedEnd; ++bedItr) {\n+ // print the main BED entry.\n+ _bed->reportBedTab(*bedItr);\n+\n+ // now report the coverage from each annotation file.\n+ for (size_t i = 0; i < _annoFiles.size(); ++i) {\n+ unsigned int totalLength = 0;\n+ int zeroDepthCount = 0; // number of bases with zero depth\n+ int depth = 0; // tracks the depth at the current base\n+\n+ // the start is either the first base in the feature OR\n+ // the leftmost position of an overlapping feature. e.g. (s = start):\n+ // A ----------\n+ // B s ------------\n+ int start = min(bedItr->minOverlapStarts[i], bedItr->start);\n+\n+ map<unsigned int, DEPTH>::const_iterator depthItr;\n+ map<unsigned int, DEPTH>::const_iterator depthEnd;\n+\n+ // compute the coverage observed at each base in the feature marching from start to end.\n+ for (CHRPOS pos = start+1; pos <= bedItr->end; pos++) {\n+ // map pointer grabbing the starts and ends observed at this position\n+ depthItr = bedItr->depthMapList[i].find(pos);\n+ depthEnd = bedItr->depthMapList[i].end();\n+\n+ // increment coverage if starts observed at this position.\n+ if (depthItr != depthEnd)\n+ depth += depthItr->second.starts;\n+ // update zero depth\n+ if ((pos > bedItr->start) && (pos <= bedItr->end) && (depth == 0))\n+ zeroDepthCount++;\n+ // decrement coverage if ends observed at this position.\n+ if (depthItr != depthEnd)\n+ depth = depth - depthItr->second.ends;\n+ }\n+ // Summarize the coverage for the current interval,\n+ CHRPOS length = bedItr->end - bedItr->start;\n+ totalLength += length;\n+ int nonZeroBases = (length - zeroDepthCount);\n+ float fractCovered = (float) nonZeroBases / length;\n+ if (_reportCounts == false && _reportBoth == false)\n+ printf("%f\\t", fractCovered);\n+ else if (_reportCounts == true && _reportBoth == false)\n+ printf("%d\\t", bedItr->counts[i]);\n+ else if (_reportCounts == false && _reportBoth == true)\n+ printf("%d\\t%f\\t", bedItr->counts[i], fractCovered);\n+ }\n+ // print newline for next feature.\n+ printf("\\n");\n+ }\n+ }\n+ }\n+}\n+\n+\n' |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/annotateBed/annotateBed.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/annotateBed/annotateBed.h Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,71 @@ +/***************************************************************************** + annotateBed.h + + (c) 2009 - Aaron Quinlan + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licenced under the GNU General Public License 2.0 license. +******************************************************************************/ +#ifndef ANNOTATEBED_H +#define ANNOTATEBED_H + +#include "bedFile.h" +#include <vector> +#include <algorithm> +#include <iostream> +#include <iomanip> +#include <fstream> +#include <stdlib.h> + +using namespace std; + +//************************************************ +// Class methods and elements +//************************************************ +class BedAnnotate { + +public: + + // constructor + BedAnnotate(const string &mainFile, const vector<string> &annoFileNames, + const vector<string> &annoTitles, bool sameStrand, bool diffStrand, bool reportCounts, bool reportBoth); + + // destructor + ~BedAnnotate(void); + + // annotate the master file with all of the annotation files. + void AnnotateBed(); + +private: + + // input files. + string _mainFile; + vector<string> _annoFileNames; + vector<string> _annoTitles; + + // instance of a bed file class. + BedFile *_bed; + vector<BedFile*> _annoFiles; + + // do we care about strandedness when counting coverage? + bool _sameStrand; + bool _diffStrand; + + bool _reportCounts; + bool _reportBoth; + + // private function for reporting coverage information + void ReportAnnotations(); + + void OpenAnnoFiles(); + + void CloseAnnoFiles(); + + void PrintHeader(); + + void InitializeMainFile(); +}; +#endif /* ANNOTATEBED_H */ |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/annotateBed/annotateMain.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/annotateBed/annotateMain.cpp Thu Nov 03 10:25:04 2011 -0400 |
[ |
@@ -0,0 +1,170 @@ +/***************************************************************************** + annotateMain.cpp + + (c) 2009 - Aaron Quinlan + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licenced under the GNU General Public License 2.0 license. +******************************************************************************/ +#include "annotateBed.h" +#include "version.h" + +using namespace std; + +// define the version +#define PROGRAM_NAME "annotateBed" + +// define our parameter checking macro +#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen) + +// function declarations +void ShowHelp(void); + +int main(int argc, char* argv[]) { + + // our configuration variables + bool showHelp = false; + + // input file + string mainFile; + + // parm flags + bool sameStrand = false; + bool diffStrand = false; + bool haveBed = false; + bool haveFiles = false; + bool haveTitles = false; + bool reportCounts = false; + bool reportBoth = false; + + // list of annotation files / names + vector<string> inputFiles; + vector<string> inputTitles; + + // check to see if we should print out some help + if(argc <= 1) showHelp = true; + + for(int i = 1; i < argc; i++) { + int parameterLength = (int)strlen(argv[i]); + + if((PARAMETER_CHECK("-h", 2, parameterLength)) || + (PARAMETER_CHECK("--help", 5, parameterLength))) { + showHelp = true; + } + } + + if(showHelp) ShowHelp(); + + // do some parsing (all of these parameters require 2 strings) + for(int i = 1; i < argc; i++) { + + int parameterLength = (int)strlen(argv[i]); + + if(PARAMETER_CHECK("-i", 2, parameterLength)) { + if ((i+1) < argc) { + haveBed = true; + mainFile = argv[i + 1]; + i++; + } + } + else if(PARAMETER_CHECK("-files", 6, parameterLength)) { + if ((i+1) < argc) { + haveFiles = true; + i = i+1; + string file = argv[i]; + while (file[0] != '-' && i < argc) { + inputFiles.push_back(file); + i++; + if (i < argc) + file = argv[i]; + } + i--; + } + } + else if(PARAMETER_CHECK("-names", 6, parameterLength)) { + if ((i+1) < argc) { + haveTitles = true; + i = i+1; + string title = argv[i]; + while (title[0] != '-' && i < argc) { + inputTitles.push_back(title); + i++; + if (i < argc) + title = argv[i]; + } + i--; + } + } + else if(PARAMETER_CHECK("-counts", 7, parameterLength)) { + reportCounts = true; + } + else if(PARAMETER_CHECK("-both", 5, parameterLength)) { + reportBoth = true; + } + else if (PARAMETER_CHECK("-s", 2, parameterLength)) { + sameStrand = true; + } + else if (PARAMETER_CHECK("-S", 2, parameterLength)) { + diffStrand = true; + } + else { + cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl; + showHelp = true; + } + } + + // make sure we have both input files + if (!haveBed || !haveFiles) { + cerr << endl << "*****" << endl << "*****ERROR: Need -i and -files files. " << endl << "*****" << endl; + showHelp = true; + } + if (sameStrand && diffStrand) { + cerr << endl << "*****" << endl << "*****ERROR: Request either -s OR -S, not both." << endl << "*****" << endl; + showHelp = true; + } + + if (!showHelp) { + BedAnnotate *ba = new BedAnnotate(mainFile, inputFiles, inputTitles, sameStrand, diffStrand, reportCounts, reportBoth); + ba->AnnotateBed(); + delete ba; + return 0; + } + else { + ShowHelp(); + } +} + +void ShowHelp(void) { + + cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl; + + cerr << "Author: Aaron Quinlan (aaronquinlan@gmail.com)" << endl; + + cerr << "Summary: Annotates the depth & breadth of coverage of features from multiple files" << endl; + cerr << "\t on the intervals in -i." << endl << endl; + + cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -i <bed/gff/vcf> -files FILE1 FILE2 .. FILEn" << endl << endl; + + cerr << "Options: " << endl; + + cerr << "\t-names\t" << "A list of names (one / file) to describe each file in -i." << endl; + cerr << "\t\tThese names will be printed as a header line." << endl << endl; + + cerr << "\t-counts\t" << "Report the count of features in each file that overlap -i." << endl; + cerr << "\t\t- Default is to report the fraction of -i covered by each file." << endl << endl; + + cerr << "\t-both\t" << "Report the counts followed by the % coverage." << endl; + cerr << "\t\t- Default is to report the fraction of -i covered by each file." << endl << endl; + + cerr << "\t-s\t" << "Require same strandedness. That is, only counts overlaps" << endl; + cerr << "\t\ton the _same_ strand." << endl; + cerr << "\t\t- By default, overlaps are counted without respect to strand." << endl << endl; + + cerr << "\t-S\t" << "Require different strandedness. That is, only count overlaps" << endl; + cerr << "\t\ton the _opposite_ strand." << endl; + cerr << "\t\t- By default, overlaps are counted without respect to strand." << endl << endl; + exit(1); +} |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/bamToBed/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/bamToBed/Makefile Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,47 @@ +UTILITIES_DIR = ../utils/ +OBJ_DIR = ../../obj/ +BIN_DIR = ../../bin/ + +# ------------------- +# define our includes +# ------------------- +INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \ + -I$(UTILITIES_DIR)/version/ \ + -I$(UTILITIES_DIR)/gzstream/ \ + -I$(UTILITIES_DIR)/lineFileUtilities/ \ + -I$(UTILITIES_DIR)/fileType/ \ + -I$(UTILITIES_DIR)/BamTools/include \ + -I$(UTILITIES_DIR)/BamTools-Ancillary + + +# ---------------------------------- +# define our source and object files +# ---------------------------------- +SOURCES= bamToBed.cpp +OBJECTS= $(SOURCES:.cpp=.o) +_EXT_OBJECTS=BamAncillary.o +EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) +BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) +PROGRAM= bamToBed + + +all: $(PROGRAM) + +.PHONY: all + +$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS) + @echo " * linking $(PROGRAM)" + @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ -L$(UTILITIES_DIR)/BamTools/lib/ -lbamtools $(LIBS) + +$(BUILT_OBJECTS): $(SOURCES) + @echo " * compiling" $(*F).cpp + @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) + +$(EXT_OBJECTS): + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/BamAncillary/ + +clean: + @echo "Cleaning up." + @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* + +.PHONY: clean |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/bamToBed/bamToBed.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/bamToBed/bamToBed.cpp Thu Nov 03 10:25:04 2011 -0400 |
[ |
b'@@ -0,0 +1,564 @@\n+/*****************************************************************************\n+ bamToBed.cpp\n+\n+ (c) 2009 - Aaron Quinlan\n+ Hall Laboratory\n+ Department of Biochemistry and Molecular Genetics\n+ University of Virginia\n+ aaronquinlan@gmail.com\n+\n+ Licenced under the GNU General Public License 2.0 license.\n+******************************************************************************/\n+#include "version.h"\n+#include "api/BamReader.h"\n+#include "api/BamAux.h"\n+#include "BamAncillary.h"\n+#include "bedFile.h"\n+using namespace BamTools;\n+\n+#include <vector>\n+#include <algorithm> // for swap()\n+#include <iostream>\n+#include <fstream>\n+#include <stdlib.h>\n+\n+using namespace std;\n+\n+\n+// define our program name\n+#define PROGRAM_NAME "bamToBed"\n+\n+// define our parameter checking macro\n+#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)\n+\n+\n+// function declarations\n+void ShowHelp(void);\n+\n+void ConvertBamToBed(const string &bamFile, const bool &useEditDistance, const string &bamTag,\n+ const bool &writeBed12, const bool &obeySplits, const string &color, const bool &useCigar);\n+void ConvertBamToBedpe(const string &bamFile, const bool &useEditDistance);\n+\n+void PrintBed(const BamAlignment &bam, const RefVector &refs, bool useEditDistance, const string &bamTag, bool obeySplits, bool useCigar);\n+void PrintBed12(const BamAlignment &bam, const RefVector &refs, bool useEditDistance, const string &bamTag, string color = "255,0,0");\n+void PrintBedPE(const BamAlignment &bam1, const BamAlignment &bam2,\n+ const RefVector &refs, bool useEditDistance);\n+\n+void ParseCigarBed12(const vector<CigarOp> &cigar, vector<int> &blockStarts,\n+ vector<int> &blockEnds, int &alignmentEnd);\n+string BuildCigarString(const vector<CigarOp> &cigar);\n+\n+bool IsCorrectMappingForBEDPE (const BamAlignment &bam);\n+\n+\n+int main(int argc, char* argv[]) {\n+\n+ // our configuration variables\n+ bool showHelp = false;\n+\n+ // input files\n+ string bamFile = "stdin";\n+ string color = "255,0,0";\n+ string tag = "";\n+\n+ bool haveBam = true;\n+ bool haveColor = false;\n+ bool haveOtherTag = false;\n+ bool writeBedPE = false;\n+ bool writeBed12 = false;\n+ bool useEditDistance = false;\n+ bool useAlignmentScore = false;\n+ bool useCigar = false;\n+ bool obeySplits = false;\n+\n+ // check to see if we should print out some help\n+\n+ for(int i = 1; i < argc; i++) {\n+ int parameterLength = (int)strlen(argv[i]);\n+\n+ if((PARAMETER_CHECK("-h", 2, parameterLength)) ||\n+ (PARAMETER_CHECK("--help", 5, parameterLength))) {\n+ showHelp = true;\n+ }\n+ }\n+\n+ if(showHelp) ShowHelp();\n+\n+ // do some parsing (all of these parameters require 2 strings)\n+ for(int i = 1; i < argc; i++) {\n+\n+ int parameterLength = (int)strlen(argv[i]);\n+\n+ if(PARAMETER_CHECK("-i", 2, parameterLength)) {\n+ if ((i+1) < argc) {\n+ bamFile = argv[i + 1];\n+ i++;\n+ }\n+ }\n+ else if(PARAMETER_CHECK("-bedpe", 6, parameterLength)) {\n+ writeBedPE = true;\n+ }\n+ else if(PARAMETER_CHECK("-bed12", 6, parameterLength)) {\n+ writeBed12 = true;\n+ }\n+ else if(PARAMETER_CHECK("-split", 6, parameterLength)) {\n+ obeySplits = true;\n+ }\n+ else if(PARAMETER_CHECK("-ed", 3, parameterLength)) {\n+ useEditDistance = true;\n+ }\n+ else if(PARAMETER_CHECK("-cigar", 6, parameterLength)) {\n+ useCigar = true;\n+ }\n+ else if(PARAMETER_CHECK("-as", 3, parameterLength)) {\n+ useAlignmentScore = true;\n+ }\n+ else if(PARAMETER_CHECK("-color", 6, parameterLength)) {\n+ if ((i+1) < argc) {\n+ '..b'etc.\n+ printf("%d\\t%d\\t%s\\t%d\\t", bam.Position, alignmentEnd, color.c_str(), (int) blockStarts.size());\n+\n+ // now write the lengths portion\n+ unsigned int b;\n+ for (b = 0; b < blockLengths.size() - 1; ++b) {\n+ printf("%d,", blockLengths[b]);\n+ }\n+ printf("%d\\t", blockLengths[b]);\n+\n+ // now write the starts portion\n+ for (b = 0; b < blockStarts.size() - 1; ++b) {\n+ printf("%d,", blockStarts[b]);\n+ }\n+ printf("%d\\n", blockStarts[b]);\n+}\n+\n+\n+void PrintBedPE(const BamAlignment &bam1, const BamAlignment &bam2, const RefVector &refs, bool useEditDistance) {\n+\n+ // initialize BEDPE variables\n+ string chrom1, chrom2, strand1, strand2;\n+ int start1, start2, end1, end2;\n+ uint32_t editDistance1, editDistance2;\n+ start1 = start2 = end1 = end2 = -1;\n+ chrom1 = chrom2 = strand1 = strand2 = ".";\n+ editDistance1 = editDistance2 = 0;\n+ uint16_t minMapQuality = 0;\n+\n+ // extract relevant info for end 1\n+ if (bam1.IsMapped()) {\n+ chrom1 = refs.at(bam1.RefID).RefName;\n+ start1 = bam1.Position;\n+ end1 = bam1.GetEndPosition(false);\n+ strand1 = "+";\n+ if (bam1.IsReverseStrand()) strand1 = "-";\n+\n+ // extract the edit distance from the NM tag\n+ // if possible. otherwise, complain.\n+ if (useEditDistance == true && bam1.GetTag("NM", editDistance1) == false) {\n+ cerr << "The edit distance tag (NM) was not found in the BAM file. Please disable -ed. Exiting\\n";\n+ exit(1);\n+ }\n+ }\n+\n+ // extract relevant info for end 2\n+ if (bam2.IsMapped()) {\n+ chrom2 = refs.at(bam2.RefID).RefName;\n+ start2 = bam2.Position;\n+ end2 = bam2.GetEndPosition(false);\n+ strand2 = "+";\n+ if (bam2.IsReverseStrand()) strand2 = "-";\n+\n+ // extract the edit distance from the NM tag\n+ // if possible. otherwise, complain.\n+ if (useEditDistance == true && bam2.GetTag("NM", editDistance2) == false) {\n+ cerr << "The edit distance tag (NM) was not found in the BAM file. Please disable -ed. Exiting\\n";\n+ exit(1);\n+ }\n+ }\n+\n+ // swap the ends if necessary\n+ if ( chrom1 > chrom2 || ((chrom1 == chrom2) && (start1 > start2)) ) {\n+ swap(chrom1, chrom2);\n+ swap(start1, start2);\n+ swap(end1, end2);\n+ swap(strand1, strand2);\n+ }\n+\n+ // report BEDPE using min mapQuality\n+ if (useEditDistance == false) {\n+ // compute the minimum mapping quality b/w the two ends of the pair.\n+ if (bam1.IsMapped() == true && bam2.IsMapped() == true)\n+ minMapQuality = min(bam1.MapQuality, bam2.MapQuality);\n+\n+ printf("%s\\t%d\\t%d\\t\\%s\\t%d\\t%d\\t%s\\t%d\\t%s\\t%s\\n",\n+ chrom1.c_str(), start1, end1, chrom2.c_str(), start2, end2,\n+ bam1.Name.c_str(), minMapQuality, strand1.c_str(), strand2.c_str());\n+ }\n+ // report BEDPE using total edit distance\n+ else {\n+ uint16_t totalEditDistance = 0;\n+ if (bam1.IsMapped() == true && bam2.IsMapped() == true)\n+ totalEditDistance = editDistance1 + editDistance2;\n+ else if (bam1.IsMapped() == true)\n+ totalEditDistance = editDistance1;\n+ else if (bam2.IsMapped() == true)\n+ totalEditDistance = editDistance2;\n+\n+ printf("%s\\t%d\\t%d\\t\\%s\\t%d\\t%d\\t%s\\t%d\\t%s\\t%s\\n",\n+ chrom1.c_str(), start1, end1, chrom2.c_str(), start2, end2,\n+ bam1.Name.c_str(), totalEditDistance, strand1.c_str(), strand2.c_str());\n+ }\n+}\n+\n+\n+// deprecated.\n+bool IsCorrectMappingForBEDPE (const BamAlignment &bam) {\n+\n+ if ( (bam.RefID == bam.MateRefID) && (bam.InsertSize > 0) ) {\n+ return true;\n+ }\n+ else if ( (bam.RefID == bam.MateRefID) && (bam.InsertSize == 0) && bam.IsFirstMate() ) {\n+ return true;\n+ }\n+ else if ( (bam.RefID != bam.MateRefID) && bam.IsFirstMate() ) {\n+ return true;\n+ }\n+ else return false;\n+}\n' |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/bed12ToBed6/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/bed12ToBed6/Makefile Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,44 @@ +UTILITIES_DIR = ../utils/ +OBJ_DIR = ../../obj/ +BIN_DIR = ../../bin/ + +# ------------------- +# define our includes +# ------------------- +INCLUDES = -I$(UTILITIES_DIR)/bedFile/ -I$(UTILITIES_DIR)/lineFileUtilities/ -I$(UTILITIES_DIR)/version/ -I$(UTILITIES_DIR)/gzstream/ -I$(UTILITIES_DIR)/fileType/ + +# ---------------------------------- +# define our source and object files +# ---------------------------------- +SOURCES= bed12ToBed6.cpp +OBJECTS= $(SOURCES:.cpp=.o) +_EXT_OBJECTS=bedFile.o lineFileUtilities.o gzstream.o fileType.o +EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) +BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) +PROGRAM= bed12ToBed6 + + +all: $(PROGRAM) + +.PHONY: all + + +$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS) + @echo " * linking $(PROGRAM)" + @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ $(LIBS) + +$(BUILT_OBJECTS): $(SOURCES) + @echo " * compiling" $(*F).cpp + @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) + +$(EXT_OBJECTS): + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/ + +clean: + @echo "Cleaning up." + @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* + +.PHONY: clean |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/bed12ToBed6/bed12ToBed6.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/bed12ToBed6/bed12ToBed6.cpp Thu Nov 03 10:25:04 2011 -0400 |
[ |
@@ -0,0 +1,168 @@ +/***************************************************************************** + bed12ToBed6.cpp + + (c) 2009 - Aaron Quinlan + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licenced under the GNU General Public License 2.0 license. +******************************************************************************/ +#include "lineFileUtilities.h" +#include "bedFile.h" +#include "version.h" + +#include <vector> +#include <iostream> +#include <fstream> +#include <stdlib.h> + +using namespace std; + + +// define our program name +#define PROGRAM_NAME "bed12ToBed6" + +// define our parameter checking macro +#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen) + + +// function declarations +void ShowHelp(void); +void DetermineBedInput(BedFile *bed); +void ProcessBed(istream &bedInput, BedFile *bed); + + +bool addBlockNums = false; + +int main(int argc, char* argv[]) { + + // our configuration variables + bool showHelp = false; + + // input files + string bedFile = "stdin"; + bool haveBed = true; + + for(int i = 1; i < argc; i++) { + int parameterLength = (int)strlen(argv[i]); + + if((PARAMETER_CHECK("-h", 2, parameterLength)) || + (PARAMETER_CHECK("--help", 5, parameterLength))) { + showHelp = true; + } + } + + if(showHelp) ShowHelp(); + + // do some parsing (all of these parameters require 2 strings) + for(int i = 1; i < argc; i++) { + + int parameterLength = (int)strlen(argv[i]); + + if(PARAMETER_CHECK("-i", 2, parameterLength)) { + if ((i+1) < argc) { + bedFile = argv[i + 1]; + i++; + } + } + else if(PARAMETER_CHECK("-n", 2, parameterLength)) { + addBlockNums = true; + i++; + } + else { + cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl; + showHelp = true; + } + } + + // make sure we have an input files + if (!haveBed ) { + cerr << endl << "*****" << endl << "*****ERROR: Need -i (BED) file. " << endl << "*****" << endl; + showHelp = true; + } + + if (!showHelp) { + BedFile *bed = new BedFile(bedFile); + DetermineBedInput(bed); + } + else { + ShowHelp(); + } +} + + +void ShowHelp(void) { + + cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl; + + cerr << "Author: Aaron Quinlan (aaronquinlan@gmail.com)" << endl; + + cerr << "Summary: Splits BED12 features into discrete BED6 features." << endl << endl; + + cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -i <bed12>" << endl << endl; + + cerr << "Options: " << endl; + + cerr << "\t-n\t" << "Force the score to be the (1-based) block number from the BED12." << endl << endl; + + + // end the program here + exit(1); +} + + +void DetermineBedInput(BedFile *bed) { + + // dealing with a proper file + if (bed->bedFile != "stdin") { + + ifstream bedStream(bed->bedFile.c_str(), ios::in); + if ( !bedStream ) { + cerr << "Error: The requested bed file (" << bed->bedFile << ") could not be opened. Exiting!" << endl; + exit (1); + } + ProcessBed(bedStream, bed); + } + // reading from stdin + else { + ProcessBed(cin, bed); + } +} + + +void ProcessBed(istream &bedInput, BedFile *bed) { + + // process each BED entry and convert to BAM + BED bedEntry, nullBed; + int lineNum = 0; + BedLineStatus bedStatus; + // open the BED file for reading. + bed->Open(); + while ((bedStatus = bed->GetNextBed(bedEntry, lineNum)) != BED_INVALID) { + if (bedStatus == BED_VALID) { + + bedVector bedBlocks; // vec to store the discrete BED "blocks" from a + splitBedIntoBlocks(bedEntry, lineNum, bedBlocks); + + for (int i = 0; i < (int) bedBlocks.size(); ++i) { + if (addBlockNums == false) { + printf ("%s\t%d\t%d\t%s\t%s\t%s\n", bedBlocks[i].chrom.c_str(), bedBlocks[i].start, bedBlocks[i].end, bedBlocks[i].name.c_str(), + bedBlocks[i].score.c_str(), bedBlocks[i].strand.c_str()); + } + else { + if (bedBlocks[i].strand == "+") + printf ("%s\t%d\t%d\t%s\t%d\t%s\n", bedBlocks[i].chrom.c_str(), bedBlocks[i].start, bedBlocks[i].end, bedBlocks[i].name.c_str(), + i+1, bedBlocks[i].strand.c_str()); + else + printf ("%s\t%d\t%d\t%s\t%d\t%s\n", bedBlocks[i].chrom.c_str(), bedBlocks[i].start, bedBlocks[i].end, bedBlocks[i].name.c_str(), + (int) ((bedBlocks.size()+1)-i), bedBlocks[i].strand.c_str()); + } + } + bedEntry = nullBed; + } + } + // close up + bed->Close(); +} |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/bedToBam/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/bedToBam/Makefile Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,53 @@ +UTILITIES_DIR = ../utils/ +OBJ_DIR = ../../obj/ +BIN_DIR = ../../bin/ + +# ------------------- +# define our includes +# ------------------- +INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \ + -I$(UTILITIES_DIR)/version/ \ + -I$(UTILITIES_DIR)/gzstream/ \ + -I$(UTILITIES_DIR)/genomeFile/ \ + -I$(UTILITIES_DIR)/lineFileUtilities/ \ + -I$(UTILITIES_DIR)/fileType/ \ + -I$(UTILITIES_DIR)/BamTools/include \ + -I$(UTILITIES_DIR)/BamTools-Ancillary + +# ---------------------------------- +# define our source and object files +# ---------------------------------- +SOURCES= bedToBam.cpp +OBJECTS= $(SOURCES:.cpp=.o) +_EXT_OBJECTS=bedFile.o genomeFile.o lineFileUtilities.o gzstream.o fileType.o +EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) +BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) +PROGRAM= bedToBam + + +all: $(PROGRAM) + +.PHONY: all + + +$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS) + @echo " * linking $(PROGRAM)" + @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ -L$(UTILITIES_DIR)/BamTools/lib/ -lbamtools $(LIBS) + +$(BUILT_OBJECTS): $(SOURCES) + @echo " * compiling" $(*F).cpp + @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) + +$(EXT_OBJECTS): + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/BamTools/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/genomeFile/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/ + +clean: + @echo "Cleaning up." + @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* + +.PHONY: clean |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/bedToBam/bedToBam.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/bedToBam/bedToBam.cpp Thu Nov 03 10:25:04 2011 -0400 |
[ |
b'@@ -0,0 +1,357 @@\n+/*****************************************************************************\n+ bedToBam.cpp\n+\n+ (c) 2009 - Aaron Quinlan\n+ Hall Laboratory\n+ Department of Biochemistry and Molecular Genetics\n+ University of Virginia\n+ aaronquinlan@gmail.com\n+\n+ Licenced under the GNU General Public License 2.0 license.\n+******************************************************************************/\n+#include "lineFileUtilities.h"\n+#include "bedFile.h"\n+#include "genomeFile.h"\n+#include "version.h"\n+\n+\n+#include "api/BamReader.h"\n+#include "api/BamAux.h"\n+#include "api/BamWriter.h"\n+using namespace BamTools;\n+\n+#include <vector>\n+#include <iostream>\n+#include <fstream>\n+#include <stdlib.h>\n+\n+using namespace std;\n+\n+\n+// define our program name\n+#define PROGRAM_NAME "bedToBam"\n+\n+// define our parameter checking macro\n+#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)\n+\n+\n+// function declarations\n+void ShowHelp(void);\n+void ProcessBed(istream &bedInput, BedFile *bed, GenomeFile *genome, bool isBED12, int mapQual, bool uncompressedBam);\n+void ConvertBedToBam(const BED &bed, BamAlignment &bam, map<string, int> &chromToId, bool isBED12, int mapQual, int lineNum);\n+void MakeBamHeader(const string &genomeFile, RefVector &refs, string &header, map<string, int> &chromToInt);\n+int reg2bin(int beg, int end);\n+\n+\n+\n+int main(int argc, char* argv[]) {\n+\n+ // our configuration variables\n+ bool showHelp = false;\n+\n+ // input files\n+ string bedFile = "stdin";\n+ string genomeFile;\n+\n+ unsigned int mapQual = 255;\n+\n+ bool haveBed = true;\n+ bool haveGenome = false;\n+ bool haveMapQual = false;\n+ bool isBED12 = false;\n+ bool uncompressedBam = false;\n+\n+ for(int i = 1; i < argc; i++) {\n+ int parameterLength = (int)strlen(argv[i]);\n+\n+ if((PARAMETER_CHECK("-h", 2, parameterLength)) ||\n+ (PARAMETER_CHECK("--help", 5, parameterLength))) {\n+ showHelp = true;\n+ }\n+ }\n+\n+ if(showHelp) ShowHelp();\n+\n+ // do some parsing (all of these parameters require 2 strings)\n+ for(int i = 1; i < argc; i++) {\n+\n+ int parameterLength = (int)strlen(argv[i]);\n+\n+ if(PARAMETER_CHECK("-i", 2, parameterLength)) {\n+ if ((i+1) < argc) {\n+ bedFile = argv[i + 1];\n+ i++;\n+ }\n+ }\n+ else if(PARAMETER_CHECK("-g", 2, parameterLength)) {\n+ if ((i+1) < argc) {\n+ haveGenome = true;\n+ genomeFile = argv[i + 1];\n+ i++;\n+ }\n+ }\n+ else if(PARAMETER_CHECK("-mapq", 5, parameterLength)) {\n+ haveMapQual = true;\n+ if ((i+1) < argc) {\n+ mapQual = atoi(argv[i + 1]);\n+ i++;\n+ }\n+ }\n+ else if(PARAMETER_CHECK("-bed12", 6, parameterLength)) {\n+ isBED12 = true;\n+ }\n+ else if(PARAMETER_CHECK("-ubam", 5, parameterLength)) {\n+ uncompressedBam = true;\n+ }\n+ else {\n+ cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl;\n+ showHelp = true;\n+ }\n+ }\n+\n+ // make sure we have an input files\n+ if (!haveBed ) {\n+ cerr << endl << "*****" << endl << "*****ERROR: Need -i (BED) file. " << endl << "*****" << endl;\n+ showHelp = true;\n+ }\n+ if (!haveGenome ) {\n+ cerr << endl << "*****" << endl << "*****ERROR: Need -g (genome) file. " << endl << "*****" << endl;\n+ showHelp = true;\n+ }\n+ if (mapQual < 0 || mapQual > 255) {\n+ cerr << endl << "*****" << endl << "*****ERROR: MAPQ must be in range [0,255]. " << endl << "*****" << endl;\n+ showHelp = true;\n+ }\n+\n+\n+ if (!showHelp) {\n+ BedFile *bed = new BedFile(bedFile);\n+ GenomeFile *genome = new GenomeFile(genomeFile);\n+\n+ '..b'() == 6) {\n+\n+ // extract the relevant BED fields to convert BED12 to BAM\n+ // namely: blockCount, blockStarts, blockEnds\n+ unsigned int blockCount = atoi(bed.otherFields[3].c_str());\n+\n+ vector<int> blockSizes, blockStarts;\n+ Tokenize(bed.otherFields[4], blockSizes, ",");\n+ Tokenize(bed.otherFields[5], blockStarts, ",");\n+\n+ // make sure this is a well-formed BED12 entry.\n+ if (blockSizes.size() != blockCount) {\n+ cerr << "Error: Number of BED blocks does not match blockCount at line: " << lineNum << ". Exiting!" << endl;\n+ exit (1);\n+ }\n+ else {\n+ // does the first block start after the bed.start?\n+ // if so, we need to do some "splicing"\n+ if (blockStarts[0] > 0) {\n+ CigarOp cOp;\n+ cOp.Length = blockStarts[0];\n+ cOp.Type = \'N\';\n+ bam.CigarData.push_back(cOp);\n+ }\n+ // handle the "middle" blocks\n+ for (unsigned int i = 0; i < blockCount - 1; ++i) {\n+ CigarOp cOp;\n+ cOp.Length = blockSizes[i];\n+ cOp.Type = \'M\';\n+ bam.CigarData.push_back(cOp);\n+\n+ if (blockStarts[i+1] > (blockStarts[i] + blockSizes[i])) {\n+ CigarOp cOp;\n+ cOp.Length = (blockStarts[i+1] - (blockStarts[i] + blockSizes[i]));\n+ cOp.Type = \'N\';\n+ bam.CigarData.push_back(cOp);\n+ }\n+ }\n+ // handle the last block.\n+ CigarOp cOp;\n+ cOp.Length = blockSizes[blockCount - 1];\n+ cOp.Type = \'M\';\n+ bam.CigarData.push_back(cOp);\n+ }\n+ }\n+ // it doesn\'t smell like BED12. complain.\n+ else {\n+ cerr << "You\'ve indicated that the input file is in BED12 format, yet the relevant fields cannot be found. Exiting." << endl << endl;\n+ exit(1);\n+ }\n+ }\n+}\n+\n+\n+void MakeBamHeader(const string &genomeFile, RefVector &refs, string &header,\n+ map<string, int, std::less<string> > &chromToId) {\n+\n+ // make a genome map of the genome file.\n+ GenomeFile genome(genomeFile);\n+\n+ header += "@HD\\tVN:1.0\\tSO:unsorted\\n";\n+ header += "@PG\\tID:BEDTools_bedToBam\\tVN:V";\n+ header += VERSION;\n+ header += "\\n";\n+\n+ int chromId = 0;\n+ vector<string> chromList = genome.getChromList();\n+ sort(chromList.begin(), chromList.end());\n+\n+ // create a BAM header (@SQ) entry for each chrom in the BEDTools genome file.\n+ vector<string>::const_iterator genomeItr = chromList.begin();\n+ vector<string>::const_iterator genomeEnd = chromList.end();\n+ for (; genomeItr != genomeEnd; ++genomeItr) {\n+ chromToId[*genomeItr] = chromId;\n+ chromId++;\n+\n+ // add to the header text\n+ int size = genome.getChromSize(*genomeItr);\n+ string chromLine = "@SQ\\tSN:" + *genomeItr + "\\tAS:" + genomeFile + "\\tLN:" + ToString(size) + "\\n";\n+ header += chromLine;\n+\n+ // create a chrom entry and add it to\n+ // the RefVector\n+ RefData chrom;\n+ chrom.RefName = *genomeItr;\n+ chrom.RefLength = size;\n+ refs.push_back(chrom);\n+ }\n+}\n+\n+\n+/* Taken directly from the SAMTools spec\n+calculate bin given an alignment in [beg,end) (zero-based, half-close, half-open) */\n+int reg2bin(int beg, int end) {\n+ --end;\n+ if (beg>>14 == end>>14) return ((1<<15)-1)/7 + (beg>>14);\n+ if (beg>>17 == end>>17) return ((1<<12)-1)/7 + (beg>>17);\n+ if (beg>>20 == end>>20) return ((1<<9)-1)/7 + (beg>>20);\n+ if (beg>>23 == end>>23) return ((1<<6)-1)/7 + (beg>>23);\n+ if (beg>>26 == end>>26) return ((1<<3)-1)/7 + (beg>>26);\n+ return 0;\n+}\n+\n+\n' |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/bedToIgv/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/bedToIgv/Makefile Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,51 @@ +UTILITIES_DIR = ../utils/ +OBJ_DIR = ../../obj/ +BIN_DIR = ../../bin/ + +# ------------------- +# define our includes +# ------------------- +INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \ + -I$(UTILITIES_DIR)/genomeFile/ \ + -I$(UTILITIES_DIR)/lineFileUtilities/ \ + -I$(UTILITIES_DIR)/version/ \ + -I$(UTILITIES_DIR)/gzstream/ \ + -I$(UTILITIES_DIR)/fileType/ \ + -I$(UTILITIES_DIR)/BamTools/include + +# ---------------------------------- +# define our source and object files +# ---------------------------------- +SOURCES= bedToIgv.cpp +OBJECTS= $(SOURCES:.cpp=.o) +_EXT_OBJECTS=bedFile.o genomeFile.o lineFileUtilities.o gzstream.o fileType.o +EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) +BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) +PROGRAM= bedToIgv + + +all: $(PROGRAM) + +.PHONY: all + + +$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS) + @echo " * linking $(PROGRAM)" + @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ $(LIBS) + +$(BUILT_OBJECTS): $(SOURCES) + @echo " * compiling" $(*F).cpp + @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) + +$(EXT_OBJECTS): + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/genomeFile/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/ + +clean: + @echo "Cleaning up." + @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* + +.PHONY: clean |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/bedToIgv/bedToIgv.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/bedToIgv/bedToIgv.cpp Thu Nov 03 10:25:04 2011 -0400 |
[ |
b'@@ -0,0 +1,269 @@\n+/*****************************************************************************\n+ bedToIgv.cpp\n+\n+ (c) 2009 - Aaron Quinlan\n+ Hall Laboratory\n+ Department of Biochemistry and Molecular Genetics\n+ University of Virginia\n+ aaronquinlan@gmail.com\n+\n+ Licenced under the GNU General Public License 2.0 license.\n+******************************************************************************/\n+#include "lineFileUtilities.h"\n+#include "bedFile.h"\n+#include "genomeFile.h"\n+#include "version.h"\n+\n+#include <vector>\n+#include <iostream>\n+#include <fstream>\n+#include <stdlib.h>\n+\n+using namespace std;\n+\n+// define our program name\n+#define PROGRAM_NAME "bedToIgv"\n+\n+// define our parameter checking macro\n+#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)\n+\n+// function declarations\n+void ShowHelp(void);\n+\n+void DetermineBedInput(BedFile *bed, string path, string sortType, string session,\n+ bool collapse, bool useNames, string imageType, int slop);\n+void ProcessBed(istream &bedInput, BedFile *bed, string path, string sortType, string session,\n+ bool collapse, bool useNames, string imageType, int slop);\n+\n+\n+int main(int argc, char* argv[]) {\n+\n+ // our configuration variables\n+ bool showHelp = false;\n+\n+ // input files\n+ string bedFile = "stdin";\n+ string imagePath = "./";\n+ string sortType = "none";\n+ string session = "none";\n+ int slop = 0;\n+ string imageType = "png";\n+\n+ bool haveBed = true;\n+ bool collapse = false;\n+ bool useNames = false;\n+\n+ for(int i = 1; i < argc; i++) {\n+ int parameterLength = (int)strlen(argv[i]);\n+\n+ if((PARAMETER_CHECK("-h", 2, parameterLength)) ||\n+ (PARAMETER_CHECK("--help", 5, parameterLength))) {\n+ showHelp = true;\n+ }\n+ }\n+\n+ if(showHelp) ShowHelp();\n+\n+ // do some parsing (all of these parameters require 2 strings)\n+ for(int i = 1; i < argc; i++) {\n+\n+ int parameterLength = (int)strlen(argv[i]);\n+\n+ if(PARAMETER_CHECK("-i", 2, parameterLength)) {\n+ if ((i+1) < argc) {\n+ bedFile = argv[i + 1];\n+ i++;\n+ }\n+ }\n+ else if(PARAMETER_CHECK("-path", 5, parameterLength)) {\n+ if ((i+1) < argc) {\n+ imagePath = argv[i + 1];\n+ i++;\n+ }\n+ }\n+ else if(PARAMETER_CHECK("-sort", 5, parameterLength)) {\n+ if ((i+1) < argc) {\n+ sortType = argv[i + 1];\n+ i++;\n+ }\n+ }\n+ else if(PARAMETER_CHECK("-sess", 5, parameterLength)) {\n+ if ((i+1) < argc) {\n+ session = argv[i + 1];\n+ i++;\n+ }\n+ }\n+ else if(PARAMETER_CHECK("-clps", 5, parameterLength)) {\n+ collapse = true;\n+ }\n+ else if(PARAMETER_CHECK("-name", 5, parameterLength)) {\n+ useNames = true;\n+ }\n+ else if(PARAMETER_CHECK("-slop", 5, parameterLength)) {\n+ if ((i+1) < argc) {\n+ slop = atoi(argv[i + 1]);\n+ i++;\n+ }\n+ }\n+ else if(PARAMETER_CHECK("-img", 4, parameterLength)) {\n+ if ((i+1) < argc) {\n+ imageType = argv[i + 1];\n+ i++;\n+ }\n+ }\n+ else {\n+ cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl;\n+ showHelp = true;\n+ }\n+ }\n+\n+ // make sure we have an input files\n+ if (!haveBed ) {\n+ cerr << endl << "*****" << endl << "*****ERROR: Need -i (BED) file. " << endl << "*****" << endl;\n+ showHelp = true;\n+ }\n+ if (sortType != "none") {\n+ if ((sortType != "base") && (sortType != "position") && (sortType != "strand") &&\n+ (sortType != "quali'..b'ns: base, position, strand, quality, sample, and readGroup" << endl;\n+ cerr << "\\t\\tDefault is to apply no sorting at all." << endl << endl;\n+\n+ cerr << "\\t-clps\\t" << "Collapse the aligned reads prior to taking a snapshot. " << endl;\n+ cerr << "\\t\\tDefault is to no collapse." << endl << endl;\n+\n+ cerr << "\\t-name\\t" << "Use the \\"name\\" field (column 4) for each image\'s filename. " << endl;\n+ cerr << "\\t\\tDefault is to use the \\"chr:start-pos.ext\\"." << endl << endl;\n+\n+ cerr << "\\t-slop\\t" << "Number of flanking base pairs on the left & right of the image." << endl;\n+ cerr << "\\t\\t- (INT) Default = 0." << endl << endl;\n+\n+ cerr << "\\t-img\\t" << "The type of image to be created. " << endl;\n+ cerr << "\\t\\tOptions: png, eps, svg" << endl;\n+ cerr << "\\t\\tDefault is png." << endl << endl;\n+\n+ cerr << "Notes: " << endl;\n+ cerr << "\\t(1) The resulting script is meant to be run from within the IGV GUI version 1.5 or later." << endl;\n+ cerr << "\\t(2) Unless you use the -sess option, it is assumed that prior to running the script, " << endl;\n+ cerr << "\\t\\tyou have loaded the proper genome, tracks and data files." << endl << endl;\n+\n+\n+ // end the program here\n+ exit(1);\n+}\n+\n+\n+void DetermineBedInput(BedFile *bed, string path, string sortType, string session,\n+ bool collapse, bool useNames, string imageType, int slop) {\n+\n+ // dealing with a proper file\n+ if (bed->bedFile != "stdin") {\n+\n+ ifstream bedStream(bed->bedFile.c_str(), ios::in);\n+ if ( !bedStream ) {\n+ cerr << "Error: The requested bed file (" << bed->bedFile << ") could not be opened. Exiting!" << endl;\n+ exit (1);\n+ }\n+ ProcessBed(bedStream, bed, path, sortType, session, collapse, useNames, imageType, slop);\n+ }\n+ // reading from stdin\n+ else {\n+ ProcessBed(cin, bed, path, sortType, session, collapse, useNames, imageType, slop);\n+ }\n+}\n+\n+\n+void ProcessBed(istream &bedInput, BedFile *bed, string path, string sortType, string session,\n+ bool collapse, bool useNames, string imageType, int slop) {\n+\n+ // set the image path\n+ cout << "snapshotDirectory " << path << endl;\n+\n+ // should we load a session\n+ if (session != "none")\n+ cout << "load " << session << endl;\n+\n+\n+ BED bedEntry, nullBed;\n+ int lineNum = 0;\n+ BedLineStatus bedStatus;\n+\n+ bed->Open();\n+ // process each BED entry and convert to an IGV request\n+ while ((bedStatus = bed->GetNextBed(bedEntry, lineNum)) != BED_INVALID) {\n+ if (bedStatus == BED_VALID) {\n+\n+ string filename = bedEntry.chrom + "_" + ToString(bedEntry.start) + "_" + ToString(bedEntry.end);\n+ string locus = bedEntry.chrom + ":" + ToString(bedEntry.start - slop) + "-" + ToString(bedEntry.end + slop);\n+\n+ if (useNames == true) {\n+ if (bedEntry.name.empty() == false)\n+ filename = filename + "_" + bedEntry.name;\n+ else {\n+ cerr << "Error: You requested that filenames be based upon the name field. However, it appears to be empty. Exiting!" << endl;\n+ exit (1);\n+ }\n+ }\n+ if (slop > 0) {\n+ filename = filename + "_" + "slop" + ToString(slop);\n+ }\n+ // goto\n+ cout << "goto " << locus << endl;\n+\n+ // sort\n+ if (sortType != "none")\n+ cout << "sort " << sortType << endl;\n+\n+ // collapse\n+ if (collapse == true)\n+ cout << "collapse" << endl;\n+\n+ // snapshot\n+ cout << "snapshot " << filename << "." << imageType << endl;\n+\n+ // reset\n+ bedEntry = nullBed;\n+ }\n+ }\n+ // close up\n+ bed->Close();\n+}\n' |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/closestBed/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/closestBed/Makefile Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,41 @@ +UTILITIES_DIR = ../utils/ +OBJ_DIR = ../../obj/ +BIN_DIR = ../../bin/ + +# ------------------- +# define our includes +# ------------------- +INCLUDES = -I$(UTILITIES_DIR)/bedFile/ -I$(UTILITIES_DIR)/lineFileUtilities/ -I$(UTILITIES_DIR)/version/ -I$(UTILITIES_DIR)/gzstream/ -I$(UTILITIES_DIR)/fileType/ +# ---------------------------------- +# define our source and object files +# ---------------------------------- +SOURCES= closestMain.cpp closestBed.cpp +OBJECTS= $(SOURCES:.cpp=.o) +_EXT_OBJECTS=bedFile.o lineFileUtilities.o gzstream.o fileType.o +EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) +BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) +PROGRAM= closestBed + +all: $(PROGRAM) + +.PHONY: all + +$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS) + @echo " * linking $(PROGRAM)" + @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ $(LIBS) + +$(BUILT_OBJECTS): $(SOURCES) + @echo " * compiling" $(*F).cpp + @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) + +$(EXT_OBJECTS): + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/ + +clean: + @echo "Cleaning up." + @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* + +.PHONY: clean |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/closestBed/closestBed.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/closestBed/closestBed.cpp Thu Nov 03 10:25:04 2011 -0400 |
[ |
b'@@ -0,0 +1,234 @@\n+/*****************************************************************************\n+ closestBed.cpp\n+\n+ (c) 2009 - Aaron Quinlan\n+ Hall Laboratory\n+ Department of Biochemistry and Molecular Genetics\n+ University of Virginia\n+ aaronquinlan@gmail.com\n+\n+ Licenced under the GNU General Public License 2.0 license.\n+******************************************************************************/\n+#include "lineFileUtilities.h"\n+#include "closestBed.h"\n+\n+const int MAXSLOP = 256000000; // 2*MAXSLOP = 512 megabases.\n+ // We don\'t want to keep looking if we\n+ // can\'t find a nearby feature within 512 Mb.\n+const int SLOPGROWTH = 2048000;\n+\n+\n+/*\n+ Constructor\n+*/\n+BedClosest::BedClosest(string &bedAFile, string &bedBFile, bool sameStrand, bool diffStrand,\n+ string &tieMode, bool reportDistance, bool signDistance, string &_strandedDistMode,\n+ bool ignoreOverlaps) \n+ : _bedAFile(bedAFile)\n+ , _bedBFile(bedBFile)\n+ , _tieMode(tieMode)\n+ , _sameStrand(sameStrand)\n+ , _diffStrand(diffStrand)\n+ , _reportDistance(reportDistance)\n+ , _signDistance(signDistance)\n+ , _strandedDistMode(_strandedDistMode)\n+ , _ignoreOverlaps(ignoreOverlaps)\n+{\n+ _bedA = new BedFile(_bedAFile);\n+ _bedB = new BedFile(_bedBFile);\n+ FindClosestBed();\n+}\n+\n+\n+/*\n+ Destructor\n+*/\n+BedClosest::~BedClosest(void) {\n+}\n+\n+\n+void BedClosest::FindWindowOverlaps(BED &a, vector<BED> &hits) {\n+\n+ int slop = 0; // start out just looking for overlaps\n+ // within the current bin (~128Kb)\n+\n+ // update the current feature\'s start and end\n+\n+ CHRPOS aFudgeStart = 0;\n+ CHRPOS aFudgeEnd;\n+ int numOverlaps = 0;\n+ vector<BED> closestB;\n+ CHRPOS minDistance = INT_MAX;\n+ int32_t curDistance = INT_MAX;\n+ vector<int32_t> distances;\n+\n+ // is there at least one feature in B on the same chrom\n+ // as the current A feature?\n+ if(_bedB->bedMap.find(a.chrom) != _bedB->bedMap.end()) {\n+\n+ while ((numOverlaps == 0) && (slop <= MAXSLOP)) {\n+\n+ // add some slop (starting at 0 bases) to a in hopes\n+ // of finding a hit in B\n+ if ((static_cast<int>(a.start) - slop) > 0)\n+ aFudgeStart = a.start - slop;\n+ else\n+ aFudgeStart = 0;\n+\n+ if ((static_cast<int>(a.start) + slop) < (2 * MAXSLOP))\n+ aFudgeEnd = a.end + slop;\n+ else\n+ aFudgeEnd = 2 * MAXSLOP;\n+\n+ // THE HEAVY LIFTING\n+ // search for hits with the current slop added\n+ _bedB->FindOverlapsPerBin(a.chrom, aFudgeStart, aFudgeEnd, a.strand, hits, _sameStrand, _diffStrand);\n+\n+ vector<BED>::const_iterator h = hits.begin();\n+ vector<BED>::const_iterator hitsEnd = hits.end();\n+ for (; h != hitsEnd; ++h) {\n+\n+ // do the actual features overlap?\n+ int s = max(a.start, h->start);\n+ int e = min(a.end, h->end);\n+ int overlapBases = (e - s); // the number of overlapping bases b/w a and b\n+\n+ // make sure we allow overlapping features.\n+ if ((overlapBases > 0) && (_ignoreOverlaps == true))\n+ continue;\n+ else\n+ numOverlaps++;\n+\n+ // there is overlap. make sure we allow overlapping features ()\n+ if (overlapBases > 0) {\n+ closestB.push_back(*h);\n+ distances.push_back(0);\n+ }\n+ // the hit is to the "left" of A\n+ else if (h->end <= a.start) {\n+ curDistance = a.start - h->end;\n+ if (_signDistance) {\n+ if ((_strandedDistMode == "ref")\n+ || (_strandedDistMode == "a" && a.strand != "'..b'se if (abs(curDistance) == minDistance) {\n+ minDistance = abs(curDistance);\n+ closestB.push_back(*h);\n+ distances.push_back(curDistance);\n+ }\n+ }\n+ // the hit is to the "right" of A\n+ else if (h->start >= a.end) {\n+ curDistance = h->start - a.end;\n+ if (_signDistance) {\n+ if ((_strandedDistMode == "a" && a.strand == "-")\n+ || (_strandedDistMode == "b" && h->strand != "-")) {\n+ curDistance = -curDistance;\n+ }\n+ }\n+ if (abs(curDistance) < minDistance) {\n+ minDistance = abs(curDistance);\n+ closestB.clear();\n+ closestB.push_back(*h);\n+ distances.clear();\n+ distances.push_back(curDistance);\n+ }\n+ else if (abs(curDistance) == minDistance) {\n+ minDistance = abs(curDistance);\n+ closestB.push_back(*h);\n+ distances.push_back(curDistance);\n+ }\n+ }\n+ }\n+ // if no overlaps were found, we\'ll widen the range\n+ // by SLOPGROWTH in each direction and search again.\n+ slop += SLOPGROWTH;\n+ }\n+ }\n+ // there is no feature in B on the same chromosome as A\n+ else {\n+ _bedA->reportBedTab(a);\n+ if (_reportDistance == true) {\n+ _bedB->reportNullBedTab();\n+ cout << -1 << endl;\n+ }\n+ else\n+ _bedB->reportNullBedNewLine();\n+ }\n+\n+ // report the closest feature(s) in B to the current A feature.\n+ // obey the user\'s reporting request (_tieMode)\n+ if (numOverlaps > 0) {\n+ if (closestB.size() == 1 || _tieMode == "first") {\n+ _bedA->reportBedTab(a);\n+ if (_reportDistance == true) {\n+ _bedB->reportBedTab(closestB[0]);\n+ cout << distances[0] << endl;\n+ }\n+ else\n+ _bedB->reportBedNewLine(closestB[0]);\n+ }\n+ else {\n+ if (_tieMode == "all") {\n+ size_t i = 0;\n+ for (vector<BED>::iterator b = closestB.begin(); b != closestB.end(); ++b) {\n+ _bedA->reportBedTab(a);\n+ if (_reportDistance == true) {\n+ _bedB->reportBedTab(*b);\n+ cout << distances[i++] <<endl;\n+ }\n+ else\n+ _bedB->reportBedNewLine(*b);\n+ }\n+ }\n+ else if (_tieMode == "last") {\n+ _bedA->reportBedTab(a);\n+ if (_reportDistance == true) {\n+ _bedB->reportBedTab(closestB[closestB.size()-1]);\n+ cout << distances[distances.size() - 1]<<endl;\n+ }\n+ else\n+ _bedB->reportBedNewLine(closestB[closestB.size()-1]);\n+ }\n+ }\n+ }\n+}\n+\n+\n+void BedClosest::FindClosestBed() {\n+\n+ // load the "B" bed file into a map so\n+ // that we can easily compare "A" to it for overlaps\n+ _bedB->loadBedFileIntoMap();\n+\n+ BED a, nullBed;\n+ int lineNum = 0; // current input line number\n+ vector<BED> hits; // vector of potential hits\n+ hits.reserve(100);\n+ BedLineStatus bedStatus;\n+\n+ _bedA->Open();\n+ // process each entry in A in search of the closest feature in B\n+ while ((bedStatus = _bedA->GetNextBed(a, lineNum)) != BED_INVALID) {\n+ if (bedStatus == BED_VALID) {\n+ FindWindowOverlaps(a, hits);\n+ hits.clear();\n+ a = nullBed;\n+ }\n+ }\n+ _bedA->Close();\n+}\n+// END ClosestBed\n+\n' |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/closestBed/closestBed.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/closestBed/closestBed.h Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,61 @@ +/***************************************************************************** + closestBed.h + + (c) 2009 - Aaron Quinlan + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licenced under the GNU General Public License 2.0 license. +******************************************************************************/ +#ifndef CLOSESTBED_H +#define CLOSESTBED_H + +#include "bedFile.h" +#include <vector> +#include <iostream> +#include <fstream> + +using namespace std; + +//************************************************ +// Class methods and elements +//************************************************ +class BedClosest { + +public: + + // constructor + BedClosest(string &bedAFile, string &bedBFile, + bool sameStrand, bool diffStrand, string &tieMode, + bool reportDistance, bool signDistance, string &strandedDistMode, + bool ignoreOverlaps); + + // destructor + ~BedClosest(void); + + // find the closest feature in B to A + void FindClosestBed(); + +private: + + // data + string _bedAFile; + string _bedBFile; + string _tieMode; + bool _sameStrand; + bool _diffStrand; + bool _reportDistance; + bool _signDistance; + string _strandedDistMode; + bool _ignoreOverlaps; + + BedFile *_bedA, *_bedB; + + // methods + void reportNullB(); + void FindWindowOverlaps(BED &, vector<BED> &); + +}; +#endif /* CLOSEST_H */ |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/closestBed/closestMain.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/closestBed/closestMain.cpp Thu Nov 03 10:25:04 2011 -0400 |
[ |
b'@@ -0,0 +1,202 @@\n+/*****************************************************************************\n+ closestMain.cpp\n+\n+ (c) 2009 - Aaron Quinlan\n+ Hall Laboratory\n+ Department of Biochemistry and Molecular Genetics\n+ University of Virginia\n+ aaronquinlan@gmail.com\n+\n+ Licenced under the GNU General Public License 2.0 license.\n+******************************************************************************/\n+#include "closestBed.h"\n+#include "version.h"\n+\n+using namespace std;\n+\n+// define our program name\n+#define PROGRAM_NAME "closestBed"\n+\n+// define our parameter checking macro\n+#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)\n+\n+// function declarations\n+void ShowHelp(void);\n+\n+int main(int argc, char* argv[]) {\n+\n+ // our configuration variables\n+ bool showHelp = false;\n+\n+ // input files\n+ string bedAFile;\n+ string bedBFile;\n+ string tieMode = "all";\n+ string strandedDistMode = "";\n+\n+ bool haveBedA = false;\n+ bool haveBedB = false;\n+ bool haveTieMode = false;\n+ bool sameStrand = false;\n+ bool diffStrand = false;\n+ bool ignoreOverlaps = false;\n+ bool reportDistance = false;\n+ bool signDistance = false;\n+ bool haveStrandedDistMode = false;\n+\n+\n+ // check to see if we should print out some help\n+ if(argc <= 1) showHelp = true;\n+\n+ for(int i = 1; i < argc; i++) {\n+ int parameterLength = (int)strlen(argv[i]);\n+\n+ if( (PARAMETER_CHECK("-h", 2, parameterLength)) ||\n+ (PARAMETER_CHECK("--help", 5, parameterLength))) {\n+ showHelp = true;\n+ }\n+ }\n+\n+ if(showHelp) ShowHelp();\n+\n+ // do some parsing (all of these parameters require 2 strings)\n+ for(int i = 1; i < argc; i++) {\n+\n+ int parameterLength = (int)strlen(argv[i]);\n+\n+ if(PARAMETER_CHECK("-a", 2, parameterLength)) {\n+ if ((i+1) < argc) {\n+ haveBedA = true;\n+ bedAFile = argv[i + 1];\n+ i++;\n+ }\n+ }\n+ else if(PARAMETER_CHECK("-b", 2, parameterLength)) {\n+ if ((i+1) < argc) {\n+ haveBedB = true;\n+ bedBFile = argv[i + 1];\n+ i++;\n+ }\n+ }\n+ else if (PARAMETER_CHECK("-s", 2, parameterLength)) {\n+ sameStrand = true;\n+ }\n+ else if (PARAMETER_CHECK("-S", 2, parameterLength)) {\n+ diffStrand = true;\n+ }\n+ else if (PARAMETER_CHECK("-d", 2, parameterLength)) {\n+ reportDistance = true;\n+ }\n+ else if (PARAMETER_CHECK("-D", 2, parameterLength)) {\n+ if ((i+1) < argc) {\n+ reportDistance = true;\n+ signDistance = true;\n+ haveStrandedDistMode = true;\n+ strandedDistMode = argv[i + 1];\n+ i++;\n+ }\n+ }\n+ else if (PARAMETER_CHECK("-io", 3, parameterLength)) {\n+ ignoreOverlaps = true;\n+ }\n+ else if (PARAMETER_CHECK("-t", 2, parameterLength)) {\n+ if ((i+1) < argc) {\n+ haveTieMode = true;\n+ tieMode = argv[i + 1];\n+ i++;\n+ }\n+ }\n+ else {\n+ cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl;\n+ showHelp = true;\n+ }\n+ }\n+\n+ // make sure we have both input files\n+ if (!haveBedA || !haveBedB) {\n+ cerr << endl << "*****" << endl << "*****ERROR: Need -a and -b files. " << endl << "*****" << endl;\n+ showHelp = true;\n+ }\n+\n+ if (haveTieMode && (tieMode != "all") && (tieMode != "first")\n+ && (tieMode != "last")) {\n+ cerr << endl << "*****" << endl << "*****ERROR: Request \\"all\\" or \\"first\\" or \\"last\\" for Tie Mode (-t)" << endl << "*****" << endl;\n+ showHelp = true;\n+ }\n+ \n+ if (haveStrandedDi'..b'reOverlaps);\n+ delete bc;\n+ return 0;\n+ }\n+ else {\n+ ShowHelp();\n+ }\n+}\n+\n+void ShowHelp(void) {\n+\n+ cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl;\n+\n+ cerr << "Authors: Aaron Quinlan (aaronquinlan@gmail.com)" << endl;\n+ cerr << "\\t Erik Arner, Riken" << endl << endl;\n+\n+ cerr << "Summary: For each feature in A, finds the closest " << endl;\n+ cerr << "\\t feature (upstream or downstream) in B." << endl << endl;\n+\n+ cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -a <bed/gff/vcf> -b <bed/gff/vcf>" << endl << endl;\n+\n+ cerr << "Options: " << endl;\n+ cerr << "\\t-s\\t" << "Require same strandedness. That is, find the closest feature in B" << endl;\n+ cerr << "\\t\\tthat overlaps A on the _same_ strand." << endl;\n+ cerr << "\\t\\t- By default, overlaps are reported without respect to strand." << endl << endl;\n+\n+ cerr << "\\t-S\\t" << "Require opposite strandedness. That is, find the closest feature in B" << endl;\n+ cerr << "\\t\\tthat overlaps A on the _opposite_ strand." << endl;\n+ cerr << "\\t\\t- By default, overlaps are reported without respect to strand." << endl << endl;\n+\n+ cerr << "\\t-d\\t" << "In addition to the closest feature in B, " << endl;\n+ cerr << "\\t\\treport its distance to A as an extra column." << endl;\n+ cerr << "\\t\\t- The reported distance for overlapping features will be 0." << endl << endl;\n+ \n+ cerr << "\\t-D\\t" << "Like -d, report the closest feature in B, and its distance to A" << endl;\n+ cerr << "\\t\\tas an extra column. Unlike -d, use negative distances to report" << endl;\n+ cerr << "\\t\\tupstream features. You must specify which orientation defines \\"upstream\\"." << endl;\n+ cerr << "\\t\\tThe options are:" << endl;\n+ cerr << "\\t\\t- \\"ref\\" Report distance with respect to the reference genome. " << endl;\n+ cerr << "\\t\\t B features with a lower (start, stop) are upstream" << endl;\n+ cerr << "\\t\\t- \\"a\\" Report distance with respect to A." << endl;\n+ cerr << "\\t\\t When A is on the - strand, \\"upstream\\" means B has a higher (start,stop)." << endl;\n+ cerr << "\\t\\t- \\"b\\" Report distance with respect to B." << endl;\n+ cerr << "\\t\\t When B is on the - strand, \\"upstream\\" means A has a higher (start,stop)." << endl << endl;\n+\n+ cerr << "\\t-io\\t" << "Ignore features in B that overlap A. That is, we want close, but " << endl;\n+ cerr << "\\t\\tnot touching features only." << endl << endl;\n+\n+ cerr << "\\t-t\\t" << "How ties for closest feature are handled. This occurs when two" << endl;\n+ cerr << "\\t\\tfeatures in B have exactly the same \\"closeness\\" with A." << endl;\n+ cerr << "\\t\\tBy default, all such features in B are reported." << endl;\n+ cerr << "\\t\\tHere are all the options:" << endl;\n+ cerr << "\\t\\t- \\"all\\" Report all ties (default)." << endl;\n+ cerr << "\\t\\t- \\"first\\" Report the first tie that occurred in the B file." << endl;\n+ cerr << "\\t\\t- \\"last\\" Report the last tie that occurred in the B file." << endl << endl;\n+\n+ cerr << "Notes: " << endl;\n+ cerr << "\\tReports \\"none\\" for chrom and \\"-1\\" for all other fields when a feature" << endl;\n+ cerr << "\\tis not found in B on the same chromosome as the feature in A." << endl;\n+ cerr << "\\tE.g. none\\t-1\\t-1" << endl << endl;\n+\n+ // end the program here\n+ exit(1);\n+}\n' |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/complementBed/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/complementBed/Makefile Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,50 @@ +UTILITIES_DIR = ../utils/ +OBJ_DIR = ../../obj/ +BIN_DIR = ../../bin/ + +# ------------------- +# define our includes +# ------------------- +INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \ + -I$(UTILITIES_DIR)/genomeFile/ \ + -I$(UTILITIES_DIR)/lineFileUtilities/ \ + -I$(UTILITIES_DIR)/version/ \ + -I$(UTILITIES_DIR)/gzstream/ \ + -I$(UTILITIES_DIR)/fileType/ \ + -I$(UTILITIES_DIR)/BamTools/include + +# ---------------------------------- +# define our source and object files +# ---------------------------------- +SOURCES= complementMain.cpp complementBed.cpp +OBJECTS= $(SOURCES:.cpp=.o) +_EXT_OBJECTS=bedFile.o genomeFile.o lineFileUtilities.o gzstream.o fileType.o +EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) +BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) +PROGRAM= complementBed + + +all: $(PROGRAM) + +.PHONY: all + +$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS) + @echo " * linking $(PROGRAM)" + @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ $(LIBS) + +$(BUILT_OBJECTS): $(SOURCES) + @echo " * compiling" $(*F).cpp + @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) + +$(EXT_OBJECTS): + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/genomeFile/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/ + +clean: + @echo "Cleaning up." + @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* + +.PHONY: clean |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/complementBed/complementBed.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/complementBed/complementBed.cpp Thu Nov 03 10:25:04 2011 -0400 |
[ |
@@ -0,0 +1,83 @@ +/***************************************************************************** + complementBed.cpp + + (c) 2009 - Aaron Quinlan + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licenced under the GNU General Public License 2.0 license. +******************************************************************************/ +#include "lineFileUtilities.h" +#include "complementBed.h" + +BedComplement::BedComplement(string &bedFile, string &genomeFile) { + + _bedFile = bedFile; + _genomeFile = genomeFile; + + _bed = new BedFile(bedFile); + _genome = new GenomeFile(genomeFile); + +} + + +BedComplement::~BedComplement(void) { +} + + +// +// Merge overlapping BED entries into a single entry +// +void BedComplement::ComplementBed() { + + // load the "B" bed file into a map so + // that we can easily compare "A" to it for overlaps + _bed->loadBedFileIntoMapNoBin(); + + // get a list of the chroms in the user's genome + vector<string> chromList = _genome->getChromList(); + + // process each chrom in the genome + for (size_t c = 0; c < chromList.size(); ++c) { + string currChrom = chromList[c]; + + // create a "bit vector" for the chrom + CHRPOS currChromSize = _genome->getChromSize(currChrom); + vector<bool> chromMasks(currChromSize, 0); + + // mask the chrom for every feature in the BED file + bedVector::const_iterator bItr = _bed->bedMapNoBin[currChrom].begin(); + bedVector::const_iterator bEnd = _bed->bedMapNoBin[currChrom].end(); + for (; bItr != bEnd; ++bItr) { + if (bItr->end > currChromSize) { + cout << "Warninge: end of BED entry exceeds chromosome length. Please correct." << endl; + _bed->reportBedNewLine(*bItr); + exit(1); + } + + // mask all of the positions spanned by this BED entry. + for (CHRPOS b = bItr->start; b < bItr->end; b++) + chromMasks[b] = 1; + } + + // report the unmasked, that is, complemented parts of the chrom + CHRPOS i = 0; + CHRPOS start; + while (i < chromMasks.size()) { + if (chromMasks[i] == 0) { + start = i; + while ((chromMasks[i] == 0) && (i < chromMasks.size())) + i++; + + if (start > 0) + cout << currChrom << "\t" << start << "\t" << i << endl; + else + cout << currChrom << "\t" << 0 << "\t" << i << endl; + } + i++; + } + } +} + |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/complementBed/complementBed.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/complementBed/complementBed.h Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,47 @@ +/***************************************************************************** + complementBed.h + + (c) 2009 - Aaron Quinlan + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licenced under the GNU General Public License 2.0 license. +******************************************************************************/ +#include "bedFile.h" +#include "genomeFile.h" + +#include <vector> +#include <bitset> +#include <algorithm> +#include <iostream> +#include <fstream> +#include <limits.h> +#include <stdlib.h> + +using namespace std; + + +//************************************************ +// Class methods and elements +//************************************************ +class BedComplement { + +public: + + // constructor + BedComplement(string &bedFile, string &genomeFile); + + // destructor + ~BedComplement(void); + + void ComplementBed(); + +private: + + string _bedFile; + string _genomeFile; + BedFile *_bed; + GenomeFile *_genome; +}; |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/complementBed/complementMain.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/complementBed/complementMain.cpp Thu Nov 03 10:25:04 2011 -0400 |
[ |
@@ -0,0 +1,116 @@ +/***************************************************************************** + complementBedMain.cpp + + (c) 2009 - Aaron Quinlan + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licenced under the GNU General Public License 2.0 license. +******************************************************************************/ +#include "complementBed.h" +#include "version.h" + +using namespace std; + +// define our program name +#define PROGRAM_NAME "complementBed" + + +// define our parameter checking macro +#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen) + +// function declarations +void ShowHelp(void); + +int main(int argc, char* argv[]) { + + // our configuration variables + bool showHelp = false; + + // input files + string bedFile = "stdin"; + string genomeFile; + + bool haveBed = true; + bool haveGenome = false; + + for(int i = 1; i < argc; i++) { + int parameterLength = (int)strlen(argv[i]); + + if((PARAMETER_CHECK("-h", 2, parameterLength)) || + (PARAMETER_CHECK("--help", 5, parameterLength))) { + showHelp = true; + } + } + + if(showHelp) ShowHelp(); + + // do some parsing (all of these parameters require 2 strings) + for(int i = 1; i < argc; i++) { + + int parameterLength = (int)strlen(argv[i]); + + if(PARAMETER_CHECK("-i", 2, parameterLength)) { + if ((i+1) < argc) { + bedFile = argv[i + 1]; + i++; + } + } + else if(PARAMETER_CHECK("-g", 2, parameterLength)) { + if ((i+1) < argc) { + haveGenome = true; + genomeFile = argv[i + 1]; + i++; + } + } + else { + cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl; + showHelp = true; + } + } + + // make sure we have both input files + if (!haveBed || !haveGenome) { + cerr << endl << "*****" << endl << "*****ERROR: Need -i BED file and -g Genome file. " << endl << "*****" << endl; + showHelp = true; + } + if (!showHelp) { + BedComplement *bc = new BedComplement(bedFile, genomeFile); + bc->ComplementBed(); + return 0; + } + else { + ShowHelp(); + } +} + +void ShowHelp(void) { + + cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl; + + cerr << "Author: Aaron Quinlan (aaronquinlan@gmail.com)" << endl; + + cerr << "Summary: Returns the base pair complement of a feature file." << endl << endl; + + cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -i <bed/gff/vcf> -g <genome>" << endl << endl; + + cerr << "Notes: " << endl; + cerr << "\t(1) The genome file should tab delimited and structured as follows:" << endl; + cerr << "\t <chromName><TAB><chromSize>" << endl << endl; + cerr << "\tFor example, Human (hg19):" << endl; + cerr << "\tchr1\t249250621" << endl; + cerr << "\tchr2\t243199373" << endl; + cerr << "\t..." << endl; + cerr << "\tchr18_gl000207_random\t4262" << endl << endl; + + cerr << "Tips: " << endl; + cerr << "\tOne can use the UCSC Genome Browser's MySQL database to extract" << endl; + cerr << "\tchromosome sizes. For example, H. sapiens:" << endl << endl; + cerr << "\tmysql --user=genome --host=genome-mysql.cse.ucsc.edu -A -e \\" << endl; + cerr << "\t\"select chrom, size from hg19.chromInfo\" > hg19.genome" << endl << endl; + + exit(1); + +} |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/coverageBed/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/coverageBed/Makefile Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,51 @@ +UTILITIES_DIR = ../utils/ +OBJ_DIR = ../../obj/ +BIN_DIR = ../../bin/ + +# ------------------- +# define our includes +# ------------------- +INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \ + -I$(UTILITIES_DIR)/version/ \ + -I$(UTILITIES_DIR)/gzstream/ \ + -I$(UTILITIES_DIR)/genomeFile/ \ + -I$(UTILITIES_DIR)/lineFileUtilities/ \ + -I$(UTILITIES_DIR)/fileType/ \ + -I$(UTILITIES_DIR)/BamTools/include \ + -I$(UTILITIES_DIR)/BamTools-Ancillary +# ---------------------------------- +# define our source and object files +# ---------------------------------- +SOURCES= coverageMain.cpp coverageBed.cpp +OBJECTS= $(SOURCES:.cpp=.o) +_EXT_OBJECTS=bedFile.o lineFileUtilities.o gzstream.o fileType.o BamAncillary.o +EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) +BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) +PROGRAM= coverageBed + + +all: $(PROGRAM) + +.PHONY: all + +$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS) + @echo " * linking $(PROGRAM)" + @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ -L$(UTILITIES_DIR)/BamTools/lib/ -lbamtools $(LIBS) + +$(BUILT_OBJECTS): $(SOURCES) + @echo " * compiling" $(*F).cpp + @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) + +$(EXT_OBJECTS): + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/BamTools/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/BamTools-Ancillary/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/ + +clean: + @echo "Cleaning up." + @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* + +.PHONY: clean |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/coverageBed/coverageBed.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/coverageBed/coverageBed.cpp Thu Nov 03 10:25:04 2011 -0400 |
[ |
b'@@ -0,0 +1,293 @@\n+/*****************************************************************************\n+ coverageBed.cpp\n+\n+ (c) 2009 - Aaron Quinlan\n+ Hall Laboratory\n+ Department of Biochemistry and Molecular Genetics\n+ University of Virginia\n+ aaronquinlan@gmail.com\n+\n+ Licenced under the GNU General Public License 2.0 license.\n+******************************************************************************/\n+#include "lineFileUtilities.h"\n+#include "coverageBed.h"\n+\n+// build\n+BedCoverage::BedCoverage(string &bedAFile, string &bedBFile, bool sameStrand, bool diffStrand,\n+ bool writeHistogram, bool bamInput, bool obeySplits, \n+ bool eachBase, bool countsOnly) {\n+\n+ _bedAFile = bedAFile;\n+ _bedBFile = bedBFile;\n+\n+ _bedA = new BedFile(bedAFile);\n+ _bedB = new BedFile(bedBFile);\n+\n+ _sameStrand = sameStrand;\n+ _diffStrand = diffStrand;\n+ _obeySplits = obeySplits;\n+ _eachBase = eachBase;\n+ _writeHistogram = writeHistogram;\n+ _bamInput = bamInput;\n+ _countsOnly = countsOnly;\n+\n+\n+ if (_bamInput == false)\n+ CollectCoverageBed();\n+ else\n+ CollectCoverageBam(_bedA->bedFile);\n+}\n+\n+// destroy\n+BedCoverage::~BedCoverage(void) {\n+ delete _bedA;\n+ delete _bedB;\n+}\n+\n+\n+void BedCoverage::CollectCoverageBed() {\n+\n+ // load the "B" bed file into a map so\n+ // that we can easily compare "A" to it for overlaps\n+ _bedB->loadBedCovFileIntoMap();\n+\n+ int lineNum = 0; // current input line number\n+ BED a, nullBed;\n+ BedLineStatus bedStatus;\n+\n+ _bedA->Open();\n+ // process each entry in A\n+ while ((bedStatus = _bedA->GetNextBed(a, lineNum)) != BED_INVALID) {\n+ if (bedStatus == BED_VALID) {\n+ // process the BED entry as a single block\n+ if (_obeySplits == false)\n+ _bedB->countHits(a, _sameStrand, _diffStrand, _countsOnly);\n+ // split the BED into discrete blocksand process each independently.\n+ else {\n+ bedVector bedBlocks;\n+ splitBedIntoBlocks(a, lineNum, bedBlocks);\n+\n+ // use countSplitHits to avoid over-counting each split chunk\n+ // as distinct read coverage.\n+ _bedB->countSplitHits(bedBlocks, _sameStrand, _diffStrand, _countsOnly);\n+ }\n+ a = nullBed;\n+ }\n+ }\n+ _bedA->Close();\n+\n+ // report the coverage (summary or histogram) for BED B.\n+ if (_countsOnly == true)\n+ ReportCounts();\n+ else \n+ ReportCoverage();\n+}\n+\n+\n+void BedCoverage::CollectCoverageBam(string bamFile) {\n+\n+ // load the "B" bed file into a map so\n+ // that we can easily compare "A" to it for overlaps\n+ _bedB->loadBedCovFileIntoMap();\n+\n+ // open the BAM file\n+ BamReader reader;\n+ reader.Open(bamFile);\n+\n+ // get header & reference information\n+ string header = reader.GetHeaderText();\n+ RefVector refs = reader.GetReferenceData();\n+\n+ // convert each aligned BAM entry to BED\n+ // and compute coverage on B\n+ BamAlignment bam;\n+ while (reader.GetNextAlignment(bam)) {\n+ if (bam.IsMapped()) {\n+ // treat the BAM alignment as a single "block"\n+ if (_obeySplits == false) {\n+ // construct a new BED entry from the current BAM alignment.\n+ BED a;\n+ a.chrom = refs.at(bam.RefID).RefName;\n+ a.start = bam.Position;\n+ a.end = bam.GetEndPosition(false, false);\n+ a.strand = "+";\n+ if (bam.IsReverseStrand()) a.strand = "-";\n+\n+ _bedB->countHits(a, _sameStrand, _diffStrand, _countsOnly);\n+ }\n+ // split the BAM alignment into discrete blocks and\n+ // look for overlaps only within each block.\n+ else {\n+ // vec to store the discrete BED "blocks"'..b' // update our histograms, assuming we are not reporting "per-base" coverage.\n+ if (_eachBase == false) {\n+ depthHist[depth]++;\n+ allDepthHist[depth]++;\n+ }\n+ else if ((_eachBase == true) && (bedItr->zeroLength == false))\n+ {\n+ _bedB->reportBedTab(*bedItr);\n+ printf("%d\\t%d\\n", pos-bedItr->start, depth);\n+ }\n+ }\n+ // decrement coverage if ends observed at this position.\n+ if (depthItr != bedItr->depthMap.end())\n+ depth = depth - depthItr->second.ends;\n+ }\n+\n+ // handle the special case where the user wants "per-base" depth\n+ // but the current feature is length = 0.\n+ if ((_eachBase == true) && (bedItr->zeroLength == true)) {\n+ _bedB->reportBedTab(*bedItr);\n+ printf("1\\t%d\\n",depth);\n+ }\n+ // Summarize the coverage for the current interval,\n+ // assuming the user has not requested "per-base" coverage.\n+ else if (_eachBase == false) \n+ {\n+ CHRPOS length = bedItr->end - bedItr->start;\n+ if (bedItr->zeroLength == true) {\n+ length = 0;\n+ }\n+ totalLength += length;\n+ int nonZeroBases = (length - zeroDepthCount);\n+ \n+ float fractCovered = 0.0;\n+ if (bedItr->zeroLength == false) {\n+ fractCovered = (float) nonZeroBases / length;\n+ }\n+ \n+ // print a summary of the coverage\n+ if (_writeHistogram == false) {\n+ _bedB->reportBedTab(*bedItr);\n+ printf("%d\\t%d\\t%d\\t%0.7f\\n", bedItr->count, nonZeroBases, length, fractCovered);\n+ }\n+ // HISTOGRAM\n+ // report the number of bases with coverage == x\n+ else {\n+ // produce a histogram when not a zero length feature.\n+ if (bedItr->zeroLength == false) {\n+ map<unsigned int, unsigned int>::const_iterator histItr = depthHist.begin();\n+ map<unsigned int, unsigned int>::const_iterator histEnd = depthHist.end();\n+ for (; histItr != histEnd; ++histItr)\n+ {\n+ float fractAtThisDepth = (float) histItr->second / length;\n+ _bedB->reportBedTab(*bedItr);\n+ printf("%d\\t%d\\t%d\\t%0.7f\\n", histItr->first, histItr->second, length, fractAtThisDepth);\n+ }\n+ }\n+ // special case when it is a zero length feauture.\n+ else {\n+ _bedB->reportBedTab(*bedItr);\n+ printf("%d\\t%d\\t%d\\t%0.7f\\n", bedItr->count, 0, 0, 1.0000000);\n+ }\n+ }\n+ }\n+ }\n+ }\n+ }\n+ // report a histogram of coverage among _all_\n+ // features in B.\n+ if (_writeHistogram == true) {\n+ map<unsigned int, unsigned int>::const_iterator histItr = allDepthHist.begin();\n+ map<unsigned int, unsigned int>::const_iterator histEnd = allDepthHist.end();\n+ for (; histItr != histEnd; ++histItr) {\n+ float fractAtThisDepth = (float) histItr->second / totalLength;\n+ printf("all\\t%d\\t%d\\t%d\\t%0.7f\\n", histItr->first, histItr->second, totalLength, fractAtThisDepth);\n+ }\n+ }\n+}\n+\n+\n' |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/coverageBed/coverageBed.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/coverageBed/coverageBed.h Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,83 @@ +/***************************************************************************** + coverageBed.h + + (c) 2009 - Aaron Quinlan + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licenced under the GNU General Public License 2.0 license. +******************************************************************************/ +#ifndef COVERAGEBED_H +#define COVERAGEBED_H + +#include "bedFile.h" + +#include "api/BamReader.h" +#include "api/BamAux.h" +#include "BamAncillary.h" +using namespace BamTools; + +#include <vector> +#include <algorithm> +#include <iostream> +#include <iomanip> +#include <fstream> +#include <stdlib.h> + +using namespace std; + +//************************************************ +// Class methods and elements +//************************************************ +class BedCoverage { + +public: + + // constructor + BedCoverage(string &bedAFile, string &bedBFile, bool sameStrand, bool diffStrand, bool writeHistogram, + bool bamInput, bool obeySplits, bool eachBase, bool countsOnly); + + // destructor + ~BedCoverage(void); + +private: + + // input files. + string _bedAFile; + string _bedBFile; + + // instance of a bed file class. + BedFile *_bedA, *_bedB; + + // do we care about same or opposite strandedness when counting coverage? + bool _sameStrand; + bool _diffStrand; + + // should we write a histogram for each feature in B? + bool _writeHistogram; + + // are we dealing with BAM input for "A"? + bool _bamInput; + + // should we split BED/BAM into discrete blocks? + bool _obeySplits; + + // should discrete coverage be reported for each base in each feature? + bool _eachBase; + + // should we just count overlaps and not try to describe the breadth? + bool _countsOnly; + + // private function for reporting coverage information + void ReportCoverage(); + + // private function for reporting overlap counts + void ReportCounts(); + + void CollectCoverageBed(); + + void CollectCoverageBam(string bamFile); +}; +#endif /* COVERAGEBED_H */ |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/coverageBed/coverageMain.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/coverageBed/coverageMain.cpp Thu Nov 03 10:25:04 2011 -0400 |
[ |
@@ -0,0 +1,182 @@ +/***************************************************************************** + coverageMain.cpp + + (c) 2009 - Aaron Quinlan + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licenced under the GNU General Public License 2.0 license. +******************************************************************************/ +#include "coverageBed.h" +#include "version.h" + +using namespace std; + +// define the version +#define PROGRAM_NAME "coverageBed" + +// define our parameter checking macro +#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen) + +// function declarations +void ShowHelp(void); + +int main(int argc, char* argv[]) { + + // our configuration variables + bool showHelp = false; + + // input files + string bedAFile; + string bedBFile; + + // parm flags + bool sameStrand = false; + bool diffStrand = false; + bool writeHistogram = false; + bool eachBase = false; + bool obeySplits = false; + bool bamInput = false; + bool haveBedA = false; + bool haveBedB = false; + bool countsOnly = false; + + // check to see if we should print out some help + if(argc <= 1) showHelp = true; + + for(int i = 1; i < argc; i++) { + int parameterLength = (int)strlen(argv[i]); + + if((PARAMETER_CHECK("-h", 2, parameterLength)) || + (PARAMETER_CHECK("--help", 5, parameterLength))) { + showHelp = true; + } + } + + if(showHelp) ShowHelp(); + + // do some parsing (all of these parameters require 2 strings) + for(int i = 1; i < argc; i++) { + + int parameterLength = (int)strlen(argv[i]); + + if(PARAMETER_CHECK("-a", 2, parameterLength)) { + if ((i+1) < argc) { + haveBedA = true; + bedAFile = argv[i + 1]; + i++; + } + } + else if(PARAMETER_CHECK("-abam", 5, parameterLength)) { + if ((i+1) < argc) { + haveBedA = true; + bamInput = true; + bedAFile = argv[i + 1]; + i++; + } + } + else if(PARAMETER_CHECK("-b", 2, parameterLength)) { + if ((i+1) < argc) { + haveBedB = true; + bedBFile = argv[i + 1]; + i++; + } + } + else if (PARAMETER_CHECK("-s", 2, parameterLength)) { + sameStrand = true; + } + else if (PARAMETER_CHECK("-S", 2, parameterLength)) { + diffStrand = true; + } + else if (PARAMETER_CHECK("-hist", 5, parameterLength)) { + writeHistogram = true; + } + else if(PARAMETER_CHECK("-d", 2, parameterLength)) { + eachBase = true; + } + else if (PARAMETER_CHECK("-split", 6, parameterLength)) { + obeySplits = true; + } + else if (PARAMETER_CHECK("-counts", 7, parameterLength)) { + countsOnly = true; + } + else { + cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl; + showHelp = true; + } + } + + // make sure we have both input files + if (!haveBedA || !haveBedB) { + cerr << endl << "*****" << endl << "*****ERROR: Need -a and -b files. " << endl << "*****" << endl; + showHelp = true; + } + + if (sameStrand && diffStrand) { + cerr << endl << "*****" << endl << "*****ERROR: Request either -s OR -S, not both." << endl << "*****" << endl; + showHelp = true; + } + + if (!showHelp) { + BedCoverage *bg = new BedCoverage(bedAFile, bedBFile, sameStrand, diffStrand, + writeHistogram, bamInput, obeySplits, eachBase, countsOnly); + delete bg; + return 0; + } + else { + ShowHelp(); + } +} + +void ShowHelp(void) { + + cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl; + + cerr << "Author: Aaron Quinlan (aaronquinlan@gmail.com)" << endl; + + cerr << "Summary: Returns the depth and breadth of coverage of features from A" << endl; + cerr << "\t on the intervals in B." << endl << endl; + + cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -a <bed/gff/vcf> -b <bed/gff/vcf>" << endl << endl; + + cerr << "Options: " << endl; + + cerr << "\t-abam\t" << "The A input file is in BAM format." << endl << endl; + + cerr << "\t-s\t" << "Require same strandedness. That is, only counts hits in A that" << endl; + cerr << "\t\toverlap B on the _same_ strand." << endl; + cerr << "\t\t- By default, overlaps are counted without respect to strand." << endl << endl; + + cerr << "\t-S\t" << "Require different strandedness. That is, only report hits in A that" << endl; + cerr << "\t\toverlap B on the _opposite_ strand." << endl; + cerr << "\t\t- By default, overlaps are counted without respect to strand." << endl << endl; + + cerr << "\t-hist\t" << "Report a histogram of coverage for each feature in B" << endl; + cerr << "\t\tas well as a summary histogram for _all_ features in B." << endl << endl; + cerr << "\t\tOutput (tab delimited) after each feature in B:" << endl; + cerr << "\t\t 1) depth\n\t\t 2) # bases at depth\n\t\t 3) size of B\n\t\t 4) % of B at depth" << endl << endl; + + cerr << "\t-d\t" << "Report the depth at each position in each B feature." << endl; + cerr << "\t\tPositions reported are one based. Each position" << endl; + cerr << "\t\tand depth follow the complete B feature." << endl << endl; + + cerr << "\t-counts\t" << "Only report the count of overlaps, don't compute fraction, etc." << endl << endl; + + cerr << "\t-split\t" << "Treat \"split\" BAM or BED12 entries as distinct BED intervals." << endl; + cerr << "\t\twhen computing coverage." << endl; + cerr << "\t\tFor BAM files, this uses the CIGAR \"N\" and \"D\" operations " << endl; + cerr << "\t\tto infer the blocks for computing coverage." << endl; + cerr << "\t\tFor BED12 files, this uses the BlockCount, BlockStarts," << endl; + cerr << "\t\tand BlockEnds fields (i.e., columns 10,11,12)." << endl << endl; + + cerr << "Default Output: " << endl; + cerr << "\t" << " After each entry in B, reports: " << endl; + cerr << "\t 1) The number of features in A that overlapped the B interval." << endl; + cerr << "\t 2) The number of bases in B that had non-zero coverage." << endl; + cerr << "\t 3) The length of the entry in B." << endl; + cerr << "\t 4) The fraction of bases in B that had non-zero coverage." << endl << endl; + + exit(1); +} |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/cuffToTrans/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/cuffToTrans/Makefile Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,44 @@ +UTILITIES_DIR = ../utils/ +OBJ_DIR = ../../obj/ +BIN_DIR = ../../bin/ + +# ------------------- +# define our includes +# ------------------- +INCLUDES = -I$(UTILITIES_DIR)/bedFile/ -I$(UTILITIES_DIR)/sequenceUtilities/ -I$(UTILITIES_DIR)/lineFileUtilities/ -I$(UTILITIES_DIR)/version/ -I$(UTILITIES_DIR)/gzstream/ -I$(UTILITIES_DIR)/fileType/ + +# ---------------------------------- +# define our source and object files +# ---------------------------------- +SOURCES= cuffToTransMain.cpp cuffToTrans.cpp Fasta.cpp split.cpp +OBJECTS= $(SOURCES:.cpp=.o) +_EXT_OBJECTS=bedFile.o sequenceUtils.o lineFileUtilities.o gzstream.o fileType.o +EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) +BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) +PROGRAM= cuffToTrans + + +all: $(PROGRAM) + +.PHONY: all + +$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS) + @echo " * linking $(PROGRAM)" + @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ $(LIBS) + +$(BUILT_OBJECTS): $(SOURCES) + @echo " * compiling" $(*F).cpp + @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) + +$(EXT_OBJECTS): + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/sequenceUtilities/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/ + +clean: + @echo "Cleaning up." + @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* + +.PHONY: clean \ No newline at end of file |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/fastaFromBed/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/fastaFromBed/Makefile Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,52 @@ +UTILITIES_DIR = ../utils/ +OBJ_DIR = ../../obj/ +BIN_DIR = ../../bin/ + +# ------------------- +# define our includes +# ------------------- +INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \ + -I$(UTILITIES_DIR)/version/ \ + -I$(UTILITIES_DIR)/gzstream/ \ + -I$(UTILITIES_DIR)/genomeFile/ \ + -I$(UTILITIES_DIR)/lineFileUtilities/ \ + -I$(UTILITIES_DIR)/sequenceUtilities/ \ + -I$(UTILITIES_DIR)/fileType/ \ + -I$(UTILITIES_DIR)/Fasta/ \ + +# ---------------------------------- +# define our source and object files +# ---------------------------------- +SOURCES= fastaFromBedMain.cpp fastaFromBed.cpp +OBJECTS= $(SOURCES:.cpp=.o) +_EXT_OBJECTS=bedFile.o sequenceUtils.o lineFileUtilities.o gzstream.o fileType.o Fasta.o split.o +EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) +BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) +PROGRAM= fastaFromBed + + +all: $(PROGRAM) + +.PHONY: all + +$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS) + @echo " * linking $(PROGRAM)" + @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ $(LIBS) + +$(BUILT_OBJECTS): $(SOURCES) + @echo " * compiling" $(*F).cpp + @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) + +$(EXT_OBJECTS): + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/sequenceUtilities/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/Fasta/ + +clean: + @echo "Cleaning up." + @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* + +.PHONY: clean \ No newline at end of file |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/fastaFromBed/fastaFromBed.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/fastaFromBed/fastaFromBed.cpp Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,141 @@ +/***************************************************************************** + fastaFromBed.cpp + + (c) 2009 - Aaron Quinlan + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licenced under the GNU General Public License 2.0 license. +******************************************************************************/ +#include "lineFileUtilities.h" +#include "fastaFromBed.h" + + +Bed2Fa::Bed2Fa(bool useName, const string &dbFile, const string &bedFile, + const string &fastaOutFile, bool useFasta, bool useStrand) { + + _useName = useName; + _dbFile = dbFile; + _bedFile = bedFile; + _fastaOutFile = fastaOutFile; + _useFasta = useFasta; + _useStrand = useStrand; + + _bed = new BedFile(_bedFile); + + // Figure out what the output file should be. + if (fastaOutFile == "stdout") { + _faOut = &cout; + } + else { + // Make sure we can open the file. + ofstream fa(fastaOutFile.c_str(), ios::out); + if ( !fa ) { + cerr << "Error: The requested fasta output file (" << fastaOutFile << ") could not be opened. Exiting!" << endl; + exit (1); + } + else { + fa.close(); + _faOut = new ofstream(fastaOutFile.c_str(), ios::out); + } + } + + // Extract the requested intervals from the FASTA input file. + ExtractDNA(); +} + + +Bed2Fa::~Bed2Fa(void) { +} + + +//****************************************************************************** +// ReportDNA +//****************************************************************************** +void Bed2Fa::ReportDNA(const BED &bed, string &dna) { + + // revcomp if necessary. Thanks to Thomas Doktor. + if ((_useStrand == true) && (bed.strand == "-")) + reverseComplement(dna); + + if (!(_useName)) { + if (_useFasta == true) { + if (_useStrand == true) + *_faOut << ">" << bed.chrom << ":" << bed.start << "-" << bed.end << "(" << bed.strand << ")" << endl << dna << endl; + else + *_faOut << ">" << bed.chrom << ":" << bed.start << "-" << bed.end << endl << dna << endl; + } + else { + if (_useStrand == true) + *_faOut << bed.chrom << ":" << bed.start << "-" << bed.end << "(" << bed.strand << ")" << "\t" << dna << endl; + else + *_faOut << bed.chrom << ":" << bed.start << "-" << bed.end << "\t" << dna << endl; + } + } + else { + if (_useFasta == true) + *_faOut << ">" << bed.name << endl << dna << endl; + else + *_faOut << bed.name << "\t" << dna << endl; + } +} + + + +//****************************************************************************** +// ExtractDNA +//****************************************************************************** +void Bed2Fa::ExtractDNA() { + + /* Make sure that we can oen all of the files successfully*/ + + // open the fasta database for reading + ifstream faDb(_dbFile.c_str(), ios::in); + if ( !faDb ) { + cerr << "Error: The requested fasta database file (" << _dbFile << ") could not be opened. Exiting!" << endl; + exit (1); + } + + // open and memory-map genome file + FastaReference *fr = new FastaReference; + bool memmap = true; + fr->open(_dbFile, memmap); + + BED bed, nullBed; + int lineNum = 0; + BedLineStatus bedStatus; + string sequence; + + _bed->Open(); + while ((bedStatus = _bed->GetNextBed(bed, lineNum)) != BED_INVALID) { + if (bedStatus == BED_VALID) { + // make sure we are extracting >= 1 bp + if (bed.zeroLength == false) { + size_t seqLength = fr->sequenceLength(bed.chrom); + // make sure this feature will not exceed the end of the chromosome. + if ( (bed.start <= seqLength) && (bed.end <= seqLength) ) + { + int length = bed.end - bed.start; + sequence = fr->getSubSequence(bed.chrom, bed.start, length); + ReportDNA(bed, sequence); + } + else + { + cerr << "Feature (" << bed.chrom << ":" << bed.start << "-" << bed.end << ") beyond the length of " + << bed.chrom << " size (" << seqLength << " bp). Skipping." << endl; + } + } + // handle zeroLength + else { + cerr << "Feature (" << bed.chrom << ":" << bed.start+1 << "-" << bed.end-1 << ") has length = 0, Skipping." << endl; + } + bed = nullBed; + } + } + _bed->Close(); +} + + + |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/fastaFromBed/fastaFromBed.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/fastaFromBed/fastaFromBed.h Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,56 @@ +/***************************************************************************** + fastaFromBed.h + + (c) 2009 - Aaron Quinlan + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licenced under the GNU General Public License 2.0 license. +******************************************************************************/ +#ifndef FASTAFROMBED_H +#define FASTAFROMBED_H + +#include "bedFile.h" +#include "sequenceUtils.h" +#include "Fasta.h" +#include <vector> +#include <iostream> +#include <fstream> + +using namespace std; + +//************************************************ +// Class methods and elements +//************************************************ +class Bed2Fa { + +public: + + // constructor + Bed2Fa(bool useName, const string &dbFile, const string &bedFile, const string &fastaOutFile, + bool useFasta, bool useStrand); + + // destructor + ~Bed2Fa(void); + + void ExtractDNA(); + void ReportDNA(const BED &bed, string &dna); + + +private: + + bool _useName; + string _dbFile; + string _bedFile; + string _fastaOutFile; + bool _useFasta; + bool _useStrand; + + // instance of a bed file class. + BedFile *_bed; + ostream *_faOut; +}; + +#endif |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/fastaFromBed/fastaFromBedMain.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/fastaFromBed/fastaFromBedMain.cpp Thu Nov 03 10:25:04 2011 -0400 |
[ |
@@ -0,0 +1,146 @@ +/***************************************************************************** + fastaFromBedMain.cpp + + (c) 2009 - Aaron Quinlan + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licenced under the GNU General Public License 2.0 license. +******************************************************************************/ +#include "fastaFromBed.h" +#include "version.h" + +using namespace std; + +// define our program name +#define PROGRAM_NAME "fastaFromBed" + + +// define our parameter checking macro +#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen) + +// function declarations +void ShowHelp(void); + +int main(int argc, char* argv[]) { + + // our configuration variables + bool showHelp = false; + + // input files + string fastaDbFile; + string bedFile; + + // output files + string fastaOutFile; + + // checks for existence of parameters + bool haveFastaDb = false; + bool haveBed = false; + bool haveFastaOut = false; + bool useNameOnly = false; + bool useFasta = true; + bool useStrand = false; + + // check to see if we should print out some help + if(argc <= 1) showHelp = true; + + for(int i = 1; i < argc; i++) { + int parameterLength = (int)strlen(argv[i]); + + if((PARAMETER_CHECK("-h", 2, parameterLength)) || + (PARAMETER_CHECK("--help", 5, parameterLength))) { + showHelp = true; + } + } + + if(showHelp) ShowHelp(); + + // do some parsing (all of these parameters require 2 strings) + for(int i = 1; i < argc; i++) { + + int parameterLength = (int)strlen(argv[i]); + + if(PARAMETER_CHECK("-fi", 3, parameterLength)) { + if ((i+1) < argc) { + haveFastaDb = true; + fastaDbFile = argv[i + 1]; + i++; + } + } + else if(PARAMETER_CHECK("-fo", 3, parameterLength)) { + if ((i+1) < argc) { + haveFastaOut = true; + fastaOutFile = argv[i + 1]; + i++; + } + } + else if(PARAMETER_CHECK("-bed", 4, parameterLength)) { + if ((i+1) < argc) { + haveBed = true; + bedFile = argv[i + 1]; + i++; + } + } + else if(PARAMETER_CHECK("-name", 5, parameterLength)) { + useNameOnly = true; + } + else if(PARAMETER_CHECK("-tab", 4, parameterLength)) { + useFasta = false; + } + else if(PARAMETER_CHECK("-s", 2, parameterLength)) { + useStrand = true; + } + else { + cerr << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl; + showHelp = true; + } + } + + if (!haveFastaDb || !haveFastaOut || !haveBed) { + showHelp = true; + } + + if (!showHelp) { + + Bed2Fa *b2f = new Bed2Fa(useNameOnly, fastaDbFile, bedFile, fastaOutFile, useFasta, useStrand); + delete b2f; + + return 0; + } + else { + ShowHelp(); + } +} + +void ShowHelp(void) { + + cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl; + + cerr << "Author: Aaron Quinlan (aaronquinlan@gmail.com)" << endl; + + cerr << "Summary: Extract DNA sequences into a fasta file based on feature coordinates." << endl << endl; + + cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -fi <fasta> -bed <bed/gff/vcf> -fo <fasta> " << endl << endl; + + cerr << "Options: " << endl; + cerr << "\t-fi\tInput FASTA file" << endl; + cerr << "\t-bed\tBED/GFF/VCF file of ranges to extract from -fi" << endl; + cerr << "\t-fo\tOutput file (can be FASTA or TAB-delimited)" << endl; + cerr << "\t-name\tUse the name field for the FASTA header" << endl; + + cerr << "\t-tab\tWrite output in TAB delimited format." << endl; + cerr << "\t\t- Default is FASTA format." << endl << endl; + + cerr << "\t-s\tForce strandedness. If the feature occupies the antisense strand," << endl; + cerr << "\t\tthe sequence will be reverse complemented." << endl; + cerr << "\t\t- By default, strand information is ignored." << endl << endl; + + + + // end the program here + exit(1); + +} |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/fjoin/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/fjoin/Makefile Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,42 @@ +UTILITIES_DIR = ../utils/ +OBJ_DIR = ../../obj/ +BIN_DIR = ../../bin/ + +# ------------------- +# define our includes +# ------------------- +INCLUDES = -I$(UTILITIES_DIR)/bedFile/ -I$(UTILITIES_DIR)/lineFileUtilities/ -I$(UTILITIES_DIR)/version/ -I$(UTILITIES_DIR)/gzstream/ -I$(UTILITIES_DIR)/fileType/ + +# ---------------------------------- +# define our source and object files +# ---------------------------------- +SOURCES= fjoinMain.cpp fjoin.cpp +OBJECTS= $(SOURCES:.cpp=.o) +_EXT_OBJECTS=bedFile.o lineFileUtilities.o gzstream.o fileType.o +EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) +BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) +PROGRAM= fjoin + +all: $(PROGRAM) + +.PHONY: all + +$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS) + @echo " * linking $(PROGRAM)" + @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ $(LIBS) + +$(BUILT_OBJECTS): $(SOURCES) + @echo " * compiling" $(*F).cpp + @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) + +$(EXT_OBJECTS): + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/ + +clean: + @echo "Cleaning up." + @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* + +.PHONY: clean |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/fjoin/fjoin.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/fjoin/fjoin.cpp Thu Nov 03 10:25:04 2011 -0400 |
[ |
b'@@ -0,0 +1,350 @@\n+/*****************************************************************************\n+ intersectBed.cpp\n+\n+ (c) 2009 - Aaron Quinlan\n+ Hall Laboratory\n+ Department of Biochemistry and Molecular Genetics\n+ University of Virginia\n+ aaronquinlan@gmail.com\n+\n+ Licenced under the GNU General Public License 2.0 license.\n+******************************************************************************/\n+#include "lineFileUtilities.h"\n+#include "fjoin.h"\n+#include <queue>\n+#include <set>\n+\n+bool leftOf(const BED &a, const BED &b);\n+\n+\n+bool BedIntersect::processHits(BED &a, vector<BED> &hits) {\n+ // how many overlaps are there b/w the bed and the set of hits?\n+ int s, e, overlapBases;\n+ int numOverlaps = 0;\n+ bool hitsFound = false;\n+ int aLength = (a.end - a.start); // the length of a in b.p.\n+\n+ // loop through the hits and report those that meet the user\'s criteria\n+ vector<BED>::const_iterator h = hits.begin();\n+ vector<BED>::const_iterator hitsEnd = hits.end();\n+ for (; h != hitsEnd; ++h) {\n+ s = max(a.start, h->start);\n+ e = min(a.end, h->end);\n+ overlapBases = (e - s); // the number of overlapping bases b/w a and b\n+\n+ // is there enough overlap relative to the user\'s request? (default ~ 1bp)\n+ if ( ( (float) overlapBases / (float) aLength ) >= _overlapFraction ) {\n+ // Report the hit if the user doesn\'t care about reciprocal overlap between A and B.\n+ if (_reciprocal == false) {\n+ hitsFound = true;\n+ numOverlaps++;\n+ if (_printable == true)\n+ ReportOverlapDetail(overlapBases, a, *h, s, e);\n+ }\n+ // we require there to be sufficient __reciprocal__ overlap\n+ else {\n+ int bLength = (h->end - h->start);\n+ float bOverlap = ( (float) overlapBases / (float) bLength );\n+ if (bOverlap >= _overlapFraction) {\n+ hitsFound = true;\n+ numOverlaps++;\n+ if (_printable == true)\n+ ReportOverlapDetail(overlapBases, a, *h, s, e);\n+ }\n+ }\n+ }\n+ }\n+ // report the summary of the overlaps if requested.\n+ ReportOverlapSummary(a, numOverlaps);\n+ // were hits found for this BED feature?\n+ return hitsFound;\n+}\n+\n+/*\n+ Constructor\n+*/\n+BedIntersect::BedIntersect(string bedAFile, string bedBFile, bool anyHit,\n+ bool writeA, bool writeB, bool writeOverlap, bool writeAllOverlap,\n+ float overlapFraction, bool noHit, bool writeCount, bool forceStrand,\n+ bool reciprocal, bool obeySplits, bool bamInput, bool bamOutput) {\n+\n+ _bedAFile = bedAFile;\n+ _bedBFile = bedBFile;\n+ _anyHit = anyHit;\n+ _noHit = noHit;\n+ _writeA = writeA;\n+ _writeB = writeB;\n+ _writeOverlap = writeOverlap;\n+ _writeAllOverlap = writeAllOverlap;\n+ _writeCount = writeCount;\n+ _overlapFraction = overlapFraction;\n+ _forceStrand = forceStrand;\n+ _reciprocal = reciprocal;\n+ _obeySplits = obeySplits;\n+ _bamInput = bamInput;\n+ _bamOutput = bamOutput;\n+\n+ if (_anyHit || _noHit || _writeCount)\n+ _printable = false;\n+ else\n+ _printable = true;\n+\n+ // create new BED file objects for A and B\n+ _bedA = new BedFile(bedAFile);\n+ _bedB = new BedFile(bedBFile);\n+\n+ IntersectBed();\n+}\n+\n+\n+/*\n+ Destructor\n+*/\n+BedIntersect::~BedIntersect(void) {\n+}\n+\n+\n+bool leftOf(const BED &a, const BED &b) {\n+ return (a.end <= b.start);\n+}\n+\n+\n+void BedIntersect::ReportOverlapDetail(const int &overlapBases, const BED &a, const BED &b,\n+ const CHRPOS &s, const CHRPOS'..b'\n+ it = _windowA.find(chrom);\n+ if (it != _windowA.end()) {\n+ return & _windowA[chrom];\n+ }\n+ else {\n+ _windowA.insert(pair<string, vector<BED *> >(chrom, vector<BED *>()));\n+ return & _windowA[chrom];\n+ }\n+ }\n+ else {\n+ it = _windowB.find(chrom);\n+ if (it != _windowB.end()) {\n+ return & _windowB[chrom];\n+ }\n+ else {\n+ _windowB.insert(pair<string, vector<BED *> >(chrom, vector<BED *>()));\n+ return & _windowB[chrom];\n+ }\n+ }\n+}\n+\n+\n+void BedIntersect::ChromSwitch(const string &chrom) {\n+\n+ vector<BED*>::iterator windowAIter = _windowA[chrom].begin();\n+ vector<BED*>::iterator windowAEnd = _windowA[chrom].end();\n+ for (; windowAIter != windowAEnd; ++windowAIter)\n+ (*windowAIter)->finished = true;\n+\n+ vector<BED*>::iterator windowBIter = _windowB[chrom].begin();\n+ vector<BED*>::iterator windowBEnd = _windowB[chrom].end();\n+ for (; windowBIter != windowBEnd; ++windowBIter)\n+ (*windowBIter)->finished = true;\n+\n+ FlushOutputBuffer();\n+}\n+\n+\n+void BedIntersect::IntersectBed() {\n+\n+ int aLineNum = 0;\n+ int bLineNum = 0;\n+\n+ // current feature from each file\n+ BED *a, *b, *prevA, *prevB;\n+\n+ // status of the current lines\n+ BedLineStatus aStatus, bStatus;\n+\n+ // open the files; get the first line from each\n+ _bedA->Open();\n+ _bedB->Open();\n+\n+ prevA = NULL;\n+ prevB = NULL;\n+ a = new BED();\n+ b = new BED();\n+ aStatus = _bedA->GetNextBed(*a, aLineNum);\n+ bStatus = _bedB->GetNextBed(*b, bLineNum);\n+\n+ cout << a->chrom << " " << a->start << " " << a->chrom << " " << b->start << endl;\n+ while (aStatus != BED_INVALID || bStatus != BED_INVALID) {\n+ \n+ if ((a->start <= b->start) && (a->chrom == b->chrom)) {\n+ prevA = a;\n+ _lastPick = 0;\n+ Scan(a, GetWindow(a->chrom, true), aStatus,\n+ *b, GetWindow(a->chrom, false), bStatus);\n+\n+ a = new BED();\n+ aStatus = _bedA->GetNextBed(*a, aLineNum);\n+ }\n+ else if ((a->start > b->start) && (a->chrom == b->chrom)) {\n+ prevB = b;\n+ _lastPick = 1;\n+ Scan(b, GetWindow(b->chrom, false), bStatus,\n+ *a, GetWindow(b->chrom, true), aStatus);\n+\n+ b = new BED();\n+ bStatus = _bedB->GetNextBed(*b, bLineNum);\n+ }\n+ else if (a->chrom != b->chrom) {\n+ // A was most recently read\n+ if (_lastPick == 0) {\n+ prevB = b;\n+ while (b->chrom == prevA->chrom){\n+ _windowB[prevA->chrom].push_back(b);\n+ b = new BED();\n+ bStatus = _bedB->GetNextBed(*b, bLineNum);\n+ }\n+ Scan(prevA, GetWindow(prevA->chrom, true), aStatus,\n+ *prevB, GetWindow(prevA->chrom, false), bStatus);\n+ }\n+ // B was most recently read\n+ else {\n+ prevA = a;\n+ while (a->chrom == prevB->chrom) {\n+ _windowA[prevB->chrom].push_back(a);\n+ a = new BED();\n+ aStatus = _bedA->GetNextBed(*a, aLineNum);\n+ }\n+ Scan(prevB, GetWindow(prevB->chrom, false), bStatus,\n+ *prevA, GetWindow(prevB->chrom, true), aStatus);\n+ }\n+ FlushOutputBuffer(true);\n+ }\n+ if (prevA!=NULL&&prevB!=NULL)\n+ //cout << prevA->chrom << " " << a->chrom << " " << a->start << " "\n+ // << prevB->chrom << " " << b->chrom << " " << b->start << "\\n";\n+ if (aStatus == BED_INVALID) a->start = INT_MAX;\n+ if (bStatus == BED_INVALID) b->start = INT_MAX;\n+ }\n+\n+ // clear out the final bit of staged output\n+ FlushOutputBuffer(true);\n+\n+ // close the files\n+ _bedA->Close();\n+ _bedB->Close();\n+}\n+\n+\n' |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/fjoin/fjoin.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/fjoin/fjoin.h Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,114 @@ +/***************************************************************************** + intersectBed.h + + (c) 2009 - Aaron Quinlan + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licenced under the GNU General Public License 2.0 license. +******************************************************************************/ +#ifndef INTERSECTBED_H +#define INTERSECTBED_H + +#include "bedFile.h" +// #include "BamReader.h" +// #include "BamWriter.h" +// #include "BamAncillary.h" +// #include "BamAux.h" +// using namespace BamTools; + + +#include <vector> +#include <queue> +#include <iostream> +#include <fstream> +#include <stdlib.h> +using namespace std; + + + +class BedIntersect { + +public: + + // constructor + BedIntersect(string bedAFile, string bedBFile, bool anyHit, + bool writeA, bool writeB, bool writeOverlap, bool writeAllOverlap, + float overlapFraction, bool noHit, bool writeCount, bool forceStrand, + bool reciprocal, bool obeySplits, bool bamInput, bool bamOutput); + + // destructor + ~BedIntersect(void); + +private: + + //------------------------------------------------ + // private attributes + //------------------------------------------------ + string _bedAFile; + string _bedBFile; + + bool _writeA; // should the original A feature be reported? + bool _writeB; // should the original B feature be reported? + bool _writeOverlap; + bool _writeAllOverlap; + + bool _forceStrand; + bool _reciprocal; + float _overlapFraction; + + bool _anyHit; + bool _noHit; + bool _writeCount; // do we want a count of the number of overlaps in B? + bool _obeySplits; + bool _bamInput; + bool _bamOutput; + + bool _printable; + + queue<BED*> _outputBuffer; + bool _lastPick; + + map<string, vector<BED*> > _windowA; + map<string, vector<BED*> > _windowB; + + // instance of a bed file class. + BedFile *_bedA, *_bedB; + + //------------------------------------------------ + // private methods + //------------------------------------------------ + void IntersectBed(istream &bedInput); + + void Scan(BED *x, vector<BED *> *windowX, BedLineStatus xStatus, + const BED &y, vector<BED *> *windowY, BedLineStatus yStatus); + + void AddHits(BED *x, const BED &y); + + void FlushOutputBuffer(bool final = false); + + vector<BED*>* GetWindow(const string &chrom, bool isA); + + void ChromSwitch(const string &chrom); + + void IntersectBed(); + + void IntersectBam(string bamFile); + + bool processHits(BED &a, vector<BED> &hits); + + bool FindOverlaps(const BED &a, vector<BED> &hits); + + bool FindOneOrMoreOverlap(const BED &a); + + void ReportOverlapDetail(const int &overlapBases, const BED &a, const BED &b, + const CHRPOS &s, const CHRPOS &e); + void ReportOverlapSummary(const BED &a, const int &numOverlapsFound); + + void ReportHits(set<BED> &A, set<BED> &B); + +}; + +#endif /* INTERSECTBED_H */ |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/fjoin/fjoinMain.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/fjoin/fjoinMain.cpp Thu Nov 03 10:25:04 2011 -0400 |
[ |
b'@@ -0,0 +1,271 @@\n+/*****************************************************************************\n+ intersectMain.cpp\n+\n+ (c) 2009 - Aaron Quinlan\n+ Hall Laboratory\n+ Department of Biochemistry and Molecular Genetics\n+ University of Virginia\n+ aaronquinlan@gmail.com\n+\n+ Licenced under the GNU General Public License 2.0 license.\n+******************************************************************************/\n+#include "fjoin.h"\n+#include "version.h"\n+\n+using namespace std;\n+\n+// define our program name\n+#define PROGRAM_NAME "fjoin"\n+\n+\n+// define our parameter checking macro\n+#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)\n+\n+// function declarations\n+void ShowHelp(void);\n+\n+int main(int argc, char* argv[]) {\n+\n+ // our configuration variables\n+ bool showHelp = false;\n+\n+ // input files\n+ string bedAFile;\n+ string bedBFile;\n+\n+ // input arguments\n+ float overlapFraction = 1E-9;\n+\n+ bool haveBedA = false;\n+ bool haveBedB = false;\n+ bool noHit = false;\n+ bool anyHit = false;\n+ bool writeA = false;\n+ bool writeB = false;\n+ bool writeCount = false;\n+ bool writeOverlap = false;\n+ bool writeAllOverlap = false;\n+ bool haveFraction = false;\n+ bool reciprocalFraction = false;\n+ bool forceStrand = false;\n+ bool obeySplits = false;\n+ bool inputIsBam = false;\n+ bool outputIsBam = true;\n+\n+ // check to see if we should print out some help\n+ if(argc <= 1) showHelp = true;\n+\n+ for(int i = 1; i < argc; i++) {\n+ int parameterLength = (int)strlen(argv[i]);\n+\n+ if((PARAMETER_CHECK("-h", 2, parameterLength)) ||\n+ (PARAMETER_CHECK("--help", 5, parameterLength))) {\n+ showHelp = true;\n+ }\n+ }\n+\n+ if(showHelp) ShowHelp();\n+\n+ // do some parsing (all of these parameters require 2 strings)\n+ for(int i = 1; i < argc; i++) {\n+\n+ int parameterLength = (int)strlen(argv[i]);\n+\n+ if(PARAMETER_CHECK("-a", 2, parameterLength)) {\n+ if ((i+1) < argc) {\n+ haveBedA = true;\n+ outputIsBam = false;\n+ bedAFile = argv[i + 1];\n+ i++;\n+ }\n+ }\n+ else if(PARAMETER_CHECK("-abam", 5, parameterLength)) {\n+ if ((i+1) < argc) {\n+ haveBedA = true;\n+ inputIsBam = true;\n+ bedAFile = argv[i + 1];\n+ i++;\n+ }\n+ }\n+ else if(PARAMETER_CHECK("-b", 2, parameterLength)) {\n+ if ((i+1) < argc) {\n+ haveBedB = true;\n+ bedBFile = argv[i + 1];\n+ i++;\n+ }\n+ }\n+ else if(PARAMETER_CHECK("-bed", 4, parameterLength)) {\n+ outputIsBam = false;\n+ }\n+ else if(PARAMETER_CHECK("-u", 2, parameterLength)) {\n+ anyHit = true;\n+ }\n+ else if(PARAMETER_CHECK("-f", 2, parameterLength)) {\n+ if ((i+1) < argc) {\n+ haveFraction = true;\n+ overlapFraction = atof(argv[i + 1]);\n+ i++;\n+ }\n+ }\n+ else if(PARAMETER_CHECK("-wa", 3, parameterLength)) {\n+ writeA = true;\n+ }\n+ else if(PARAMETER_CHECK("-wb", 3, parameterLength)) {\n+ writeB = true;\n+ }\n+ else if(PARAMETER_CHECK("-wo", 3, parameterLength)) {\n+ writeOverlap = true;\n+ }\n+ else if(PARAMETER_CHECK("-wao", 4, parameterLength)) {\n+ writeAllOverlap = true;\n+ writeOverlap = true;\n+ }\n+ else if(PARAMETER_CHECK("-c", 2, parameterLength)) {\n+ writeCount = true;\n+ }\n+ else if(PARAMETER_CHECK("-r", 2, parameterLength)) {\n+ reciprocalFraction = true;\n+ }\n+ else if (PARAMETER'..b'IsBam, outputIsBam);\n+ delete bi;\n+ return 0;\n+ }\n+ else {\n+ ShowHelp();\n+ }\n+}\n+\n+void ShowHelp(void) {\n+\n+ cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl;\n+\n+ cerr << "Author: Aaron Quinlan (aaronquinlan@gmail.com)" << endl;\n+\n+ cerr << "Summary: Report overlaps between two feature files." << endl << endl;\n+\n+ cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -a <bed/gff/vcf> -b <bed/gff/vcf>" << endl << endl;\n+\n+ cerr << "Options: " << endl;\n+\n+ cerr << "\\t-abam\\t" << "The A input file is in BAM format. Output will be BAM as well." << endl << endl;\n+\n+ cerr << "\\t-bed\\t" << "When using BAM input (-abam), write output as BED. The default" << endl;\n+ cerr << "\\t\\tis to write output in BAM when using -abam." << endl << endl;\n+\n+ cerr << "\\t-wa\\t" << "Write the original entry in A for each overlap." << endl << endl;\n+\n+ cerr << "\\t-wb\\t" << "Write the original entry in B for each overlap." << endl;\n+ cerr << "\\t\\t- Useful for knowing _what_ A overlaps. Restricted by -f and -r." << endl << endl;\n+\n+ cerr << "\\t-wo\\t" << "Write the original A and B entries plus the number of base" << endl;\n+ cerr << "\\t\\tpairs of overlap between the two features." << endl;\n+ cerr << "\\t\\t- Overlaps restricted by -f and -r." << endl;\n+ cerr << "\\t\\t Only A features with overlap are reported." << endl << endl;\n+\n+ cerr << "\\t-wao\\t" << "Write the original A and B entries plus the number of base" << endl;\n+ cerr << "\\t\\tpairs of overlap between the two features." << endl;\n+ cerr << "\\t\\t- Overlapping features restricted by -f and -r." << endl;\n+ cerr << "\\t\\t However, A features w/o overlap are also reported" << endl;\n+ cerr << "\\t\\t with a NULL B feature and overlap = 0." << endl << endl;\n+\n+ cerr << "\\t-u\\t" << "Write the original A entry _once_ if _any_ overlaps found in B." << endl;\n+ cerr << "\\t\\t- In other words, just report the fact >=1 hit was found." << endl;\n+ cerr << "\\t\\t- Overlaps restricted by -f and -r." << endl << endl;\n+\n+ cerr << "\\t-c\\t" << "For each entry in A, report the number of overlaps with B." << endl;\n+ cerr << "\\t\\t- Reports 0 for A entries that have no overlap with B." << endl;\n+ cerr << "\\t\\t- Overlaps restricted by -f and -r." << endl << endl;\n+\n+ cerr << "\\t-v\\t" << "Only report those entries in A that have _no overlaps_ with B." << endl;\n+ cerr << "\\t\\t- Similar to \\"grep -v\\" (an homage)." << endl << endl;\n+\n+ cerr << "\\t-f\\t" << "Minimum overlap required as a fraction of A." << endl;\n+ cerr << "\\t\\t- Default is 1E-9 (i.e., 1bp)." << endl;\n+ cerr << "\\t\\t- FLOAT (e.g. 0.50)" << endl << endl;\n+\n+ cerr << "\\t-r\\t" << "Require that the fraction overlap be reciprocal for A and B." << endl;\n+ cerr << "\\t\\t- In other words, if -f is 0.90 and -r is used, this requires" << endl;\n+ cerr << "\\t\\t that B overlap 90% of A and A _also_ overlaps 90% of B." << endl << endl;\n+\n+ cerr << "\\t-s\\t" << "Force strandedness. That is, only report hits in B that" << endl;\n+ cerr << "\\t\\toverlap A on the same strand." << endl;\n+ cerr << "\\t\\t- By default, overlaps are reported without respect to strand." << endl << endl;\n+\n+ cerr << "\\t-split\\t" << "Treat \\"split\\" BAM or BED12 entries as distinct BED intervals." << endl << endl;\n+\n+\n+ // end the program here\n+ exit(1);\n+\n+}\n' |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/flankBed/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/flankBed/Makefile Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,50 @@ +UTILITIES_DIR = ../utils/ +OBJ_DIR = ../../obj/ +BIN_DIR = ../../bin/ + +# ------------------- +# define our includes +# ------------------- +INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \ + -I$(UTILITIES_DIR)/genomeFile/ \ + -I$(UTILITIES_DIR)/lineFileUtilities/ \ + -I$(UTILITIES_DIR)/version/ \ + -I$(UTILITIES_DIR)/gzstream/ \ + -I$(UTILITIES_DIR)/fileType/ \ + -I$(UTILITIES_DIR)/BamTools/include + +# ---------------------------------- +# define our source and object files +# ---------------------------------- +SOURCES= flankBedMain.cpp flankBed.cpp +OBJECTS= $(SOURCES:.cpp=.o) +_EXT_OBJECTS=bedFile.o genomeFile.o lineFileUtilities.o gzstream.o fileType.o +EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) +BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) +PROGRAM= flankBed + + +all: $(PROGRAM) + +.PHONY: all + +$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS) + @echo " * linking $(PROGRAM)" + @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ $(LIBS) + +$(BUILT_OBJECTS): $(SOURCES) + @echo " * compiling" $(*F).cpp + @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) + +$(EXT_OBJECTS): + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/genomeFile/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/ + +clean: + @echo "Cleaning up." + @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* + +.PHONY: clean |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/flankBed/flankBed.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/flankBed/flankBed.cpp Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,163 @@ +/***************************************************************************** + flankBed.cpp + + (c) 2009 - Aaron Quinlan + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licensed under the GNU General Public License 2.0 license. +******************************************************************************/ +#include "lineFileUtilities.h" +#include "flankBed.h" + + +BedFlank::BedFlank(string &bedFile, string &genomeFile, bool forceStrand, float leftFlank, float rightFlank, bool fractional) { + + _bedFile = bedFile; + _genomeFile = genomeFile; + _forceStrand = forceStrand; + _leftFlank = leftFlank; + _rightFlank = rightFlank; + _fractional = fractional; + + _bed = new BedFile(bedFile); + _genome = new GenomeFile(genomeFile); + + // get going, slop it up. + FlankBed(); +} + + +BedFlank::~BedFlank(void) { + +} + + +void BedFlank::FlankBed() { + + int lineNum = 0; + BED bedEntry, nullBed; // used to store the current BED line from the BED file. + BedLineStatus bedStatus; + + _bed->Open(); + bedStatus = _bed->GetNextBed(bedEntry, lineNum); + while (bedStatus != BED_INVALID) { + if (bedStatus == BED_VALID) { + + int leftFlank = _leftFlank; + int rightFlank = _rightFlank; + if (_fractional == true) { + leftFlank = (int) (_leftFlank * bedEntry.size()); + rightFlank = (int) (_rightFlank * bedEntry.size()); + } + + if ((_forceStrand == false) || (bedEntry.strand == "+")) + { + AddFlank(bedEntry, leftFlank, rightFlank); + } + else if ((_forceStrand == true) && (bedEntry.strand == "-" )) + { + AddStrandedFlank(bedEntry, leftFlank, rightFlank); + } + bedEntry = nullBed; + } + bedStatus = _bed->GetNextBed(bedEntry, lineNum); + } + _bed->Close(); +} + + +void BedFlank::AddFlank(BED &bed, int leftFlank, int rightFlank) { + + int chromSize = _genome->getChromSize(bed.chrom); + if (chromSize == -1) { + cerr << "ERROR: chrom \"" << bed.chrom << "\" not found in genome file. Exiting." << endl; + exit(1); + } + + // init. our left and right flanks to the original BED entry. + // we'll create the flanks from these coordinates. + BED left = bed; + BED right = bed; + + // make the left flank (if necessary) + if (leftFlank > 0) { + if ( (static_cast<int>(left.start) - leftFlank) > 0) + { + left.end = left.start; + left.start -= leftFlank; + } + else + { + left.end = left.start; + left.start = 0; + } + // report the left flank + _bed->reportBedNewLine(left); + } + + // make the left flank (if necessary) + if (rightFlank > 0) { + if ( (static_cast<int>(right.end) + (rightFlank+1)) <= static_cast<int>(chromSize)) + { + right.start = right.end; + right.end += (rightFlank); + } + else { + right.start = right.end; + right.end += chromSize; + } + // report the right flank + _bed->reportBedNewLine(right); + } +} + + +void BedFlank::AddStrandedFlank(BED &bed, int leftFlank, int rightFlank) { + + int chromSize = _genome->getChromSize(bed.chrom); + if (chromSize == -1) { + cerr << "ERROR: chrom \"" << bed.chrom << "\" not found in genome file. Exiting." << endl; + exit(1); + } + + // init. our left and right flanks to the original BED entry. + // we'll create the flanks from these coordinates. + BED left = bed; + BED right = bed; + + // make the left flank (if necessary) + if (rightFlank > 0) { + if ( (static_cast<int>(left.start) - rightFlank) > 0) + { + left.end = left.start; + left.start -= rightFlank; + } + else + { + left.end = left.start; + left.start = 0; + } + // report the left flank + _bed->reportBedNewLine(left); + } + + // make the left flank (if necessary) + if (leftFlank > 0) { + if ( (static_cast<int>(right.end) + leftFlank) <= static_cast<int>(chromSize)) + { + right.start = right.end; + right.end += leftFlank; + } + else { + right.start = right.end; + right.end = chromSize; + } + // report the right flank + _bed->reportBedNewLine(right); + } +} + + |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/flankBed/flankBed.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/flankBed/flankBed.h Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,63 @@ +/***************************************************************************** + flankBed.h + + (c) 2009 - Aaron Quinlan + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licenced under the GNU General Public License 2.0 license. +******************************************************************************/ + +#include "bedFile.h" +#include "genomeFile.h" + +#include <vector> +#include <iostream> +#include <fstream> +#include <map> +#include <cstdlib> +#include <ctime> +using namespace std; + + +//************************************************ +// Class methods and elements +//************************************************ +class BedFlank { + +public: + + // constructor + BedFlank(string &bedFile, string &genomeFile, bool forceStrand, float leftSlop, float rightSlop, bool fractional); + + // destructor + ~BedFlank(void); + + + +private: + + string _bedFile; + string _genomeFile; + + bool _forceStrand; + float _leftFlank; + float _rightFlank; + bool _fractional; + + BedFile *_bed; + GenomeFile *_genome; + + // methods + + void FlankBed(); + + // method to grab requested flank w.r.t. a single BED entry + void AddFlank(BED &bed, int leftSlop, int rightSlop); + + // method to grab requested flank w.r.t. a single BED entry, + // while choosing flanks based on strand + void AddStrandedFlank(BED &bed, int leftSlop, int rightSlop); +}; |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/flankBed/flankBedMain.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/flankBed/flankBedMain.cpp Thu Nov 03 10:25:04 2011 -0400 |
[ |
@@ -0,0 +1,190 @@ +/***************************************************************************** + flankBedMain.cpp + + (c) 2009 - Aaron Quinlan + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licenced under the GNU General Public License 2.0 license. +******************************************************************************/ +#include "flankBed.h" +#include "version.h" + +using namespace std; + +// define our program name +#define PROGRAM_NAME "flankBed" + + +// define our parameter checking macro +#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen) + +// function declarations +void ShowHelp(void); + +int main(int argc, char* argv[]) { + + // our configuration variables + bool showHelp = false; + + // input files + string bedFile = "stdin"; + string genomeFile; + + bool haveBed = true; + bool haveGenome = false; + bool haveLeft = false; + bool haveRight = false; + bool haveBoth = false; + + bool forceStrand = false; + float leftSlop = 0.0; + float rightSlop = 0.0; + bool fractional = false; + + for(int i = 1; i < argc; i++) { + int parameterLength = (int)strlen(argv[i]); + + if((PARAMETER_CHECK("-h", 2, parameterLength)) || + (PARAMETER_CHECK("--help", 5, parameterLength))) { + showHelp = true; + } + } + + if(showHelp) ShowHelp(); + + // do some parsing (all of these parameters require 2 strings) + for(int i = 1; i < argc; i++) { + + int parameterLength = (int)strlen(argv[i]); + + if(PARAMETER_CHECK("-i", 2, parameterLength)) { + if ((i+1) < argc) { + bedFile = argv[i + 1]; + i++; + } + } + else if(PARAMETER_CHECK("-g", 2, parameterLength)) { + if ((i+1) < argc) { + haveGenome = true; + genomeFile = argv[i + 1]; + i++; + } + } + else if(PARAMETER_CHECK("-l", 2, parameterLength)) { + if ((i+1) < argc) { + haveLeft = true; + leftSlop = atof(argv[i + 1]); + i++; + } + } + else if(PARAMETER_CHECK("-r", 2, parameterLength)) { + if ((i+1) < argc) { + haveRight = true; + rightSlop = atof(argv[i + 1]); + i++; + } + } + else if(PARAMETER_CHECK("-b", 2, parameterLength)) { + if ((i+1) < argc) { + haveBoth = true; + leftSlop = atof(argv[i + 1]); + rightSlop = atof(argv[i + 1]); + i++; + } + } + else if(PARAMETER_CHECK("-s", 2, parameterLength)) { + forceStrand = true; + } + else if(PARAMETER_CHECK("-pct", 4, parameterLength)) { + fractional = true; + } + else { + cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl; + showHelp = true; + } + } + + // make sure we have both input files + if (!haveBed || !haveGenome) { + cerr << endl << "*****" << endl << "*****ERROR: Need both a BED (-i) and a genome (-g) file. " << endl << "*****" << endl; + showHelp = true; + } + if (!haveLeft && !haveRight && !haveBoth) { + cerr << endl << "*****" << endl << "*****ERROR: Need -l and -r together or -b alone. " << endl << "*****" << endl; + showHelp = true; + } + if ((!haveLeft && haveRight) || (haveLeft && !haveRight)) { + cerr << endl << "*****" << endl << "*****ERROR: Need both -l and -r. " << endl << "*****" << endl; + showHelp = true; + } + if (forceStrand && (!(haveLeft) || !(haveRight))) { + cerr << endl << "*****" << endl << "*****ERROR: Must supply -l and -r with -s. " << endl << "*****" << endl; + showHelp = true; + } + + if (!showHelp) { + BedFlank *bc = new BedFlank(bedFile, genomeFile, forceStrand, leftSlop, rightSlop, fractional); + delete bc; + + return 0; + } + else { + ShowHelp(); + } +} + +void ShowHelp(void) { + + cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl; + + cerr << "Author: Aaron Quinlan (aaronquinlan@gmail.com)" << endl; + + cerr << "Summary: Creates flanking interval(s) for each BED/GFF/VCF feature." << endl << endl; + + cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -i <bed/gff/vcf> -g <genome> [-b <int> or (-l and -r)]" << endl << endl; + + cerr << "Options: " << endl; + cerr << "\t-b\t" << "Create flanking intervak using -b base pairs in each direction." << endl; + cerr << "\t\t- (Integer) or (Float, e.g. 0.1) if used with -pct." << endl << endl; + + cerr << "\t-l\t" << "The number of base pairs that a flank should start from orig. start coordinate." << endl; + cerr << "\t\t- (Integer) or (Float, e.g. 0.1) if used with -pct." << endl << endl; + + cerr << "\t-r\t" << "The number of base pairs that a flank should end from orig. end coordinate." << endl; + cerr << "\t\t- (Integer) or (Float, e.g. 0.1) if used with -pct." << endl << endl; + + cerr << "\t-s\t" << "Define -l and -r based on strand." << endl; + cerr << "\t\tE.g. if used, -l 500 for a negative-stranded feature, " << endl; + cerr << "\t\tit will start the flank 500 bp downstream. Default = false." << endl << endl; + + cerr << "\t-pct\t" << "Define -l and -r as a fraction of the feature's length." << endl; + cerr << "\t\tE.g. if used on a 1000bp feature, -l 0.50, " << endl; + cerr << "\t\twill add 500 bp \"upstream\". Default = false." << endl << endl; + + cerr << "Notes: " << endl; + cerr << "\t(1) Starts will be set to 0 if options would force it below 0." << endl; + cerr << "\t(2) Ends will be set to the chromosome length if requested flank would" << endl; + cerr << "\tforce it above the max chrom length." << endl; + + cerr << "\t(3) The genome file should tab delimited and structured as follows:" << endl; + cerr << "\n\t<chromName><TAB><chromSize>" << endl << endl; + cerr << "\tFor example, Human (hg19):" << endl; + cerr << "\tchr1\t249250621" << endl; + cerr << "\tchr2\t243199373" << endl; + cerr << "\t..." << endl; + cerr << "\tchr18_gl000207_random\t4262" << endl << endl; + + + cerr << "Tips: " << endl; + cerr << "\tOne can use the UCSC Genome Browser's MySQL database to extract" << endl; + cerr << "\tchromosome sizes. For example, H. sapiens:" << endl << endl; + cerr << "\tmysql --user=genome --host=genome-mysql.cse.ucsc.edu -A -e \\" << endl; + cerr << "\t\"select chrom, size from hg19.chromInfo\" > hg19.genome" << endl << endl; + + + // end the program here + exit(1); +} |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/genomeCoverageBed/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/genomeCoverageBed/Makefile Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,52 @@ +UTILITIES_DIR = ../utils/ +OBJ_DIR = ../../obj/ +BIN_DIR = ../../bin/ + +# ------------------- +# define our includes +# ------------------- +INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \ + -I$(UTILITIES_DIR)/version/ \ + -I$(UTILITIES_DIR)/gzstream/ \ + -I$(UTILITIES_DIR)/genomeFile/ \ + -I$(UTILITIES_DIR)/lineFileUtilities/ \ + -I$(UTILITIES_DIR)/fileType/ \ + -I$(UTILITIES_DIR)/BamTools/include \ + -I$(UTILITIES_DIR)/BamTools-Ancillary +# ---------------------------------- +# define our source and object files +# ---------------------------------- +SOURCES= genomeCoverageMain.cpp genomeCoverageBed.cpp +OBJECTS= $(SOURCES:.cpp=.o) +_EXT_OBJECTS=bedFile.o genomeFile.o lineFileUtilities.o BamAncillary.o gzstream.o fileType.o +EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) +BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) +PROGRAM= genomeCoverageBed + + +all: $(PROGRAM) + +.PHONY: all + +$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS) + @echo " * linking $(PROGRAM)" + @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ -L$(UTILITIES_DIR)/BamTools/lib/ -lbamtools $(LIBS) + +$(BUILT_OBJECTS): $(SOURCES) + @echo " * compiling" $(*F).cpp + @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) + +$(EXT_OBJECTS): + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/genomeFile + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/BamTools/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/BamTools-Ancillary/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/ + +clean: + @echo "Cleaning up." + @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* + +.PHONY: clean |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/genomeCoverageBed/genomeCoverageBed.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/genomeCoverageBed/genomeCoverageBed.cpp Thu Nov 03 10:25:04 2011 -0400 |
[ |
b'@@ -0,0 +1,396 @@\n+/*****************************************************************************\n+genomeCoverage.cpp\n+\n+(c) 2009 - Aaron Quinlan\n+Hall Laboratory\n+Department of Biochemistry and Molecular Genetics\n+University of Virginia\n+aaronquinlan@gmail.com\n+\n+Licenced under the GNU General Public License 2.0 license.\n+******************************************************************************/\n+#include "lineFileUtilities.h"\n+#include "genomeCoverageBed.h"\n+\n+\n+BedGenomeCoverage::BedGenomeCoverage(string bedFile, string genomeFile,\n+ bool eachBase, bool startSites, \n+ bool bedGraph, bool bedGraphAll,\n+ int max, float scale,\n+ bool bamInput, bool obeySplits,\n+ bool filterByStrand, string requestedStrand,\n+ bool only_5p_end, bool only_3p_end,\n+ bool eachBaseZeroBased,\n+ bool add_gb_track_line, string gb_track_line_opts) {\n+\n+ _bedFile = bedFile;\n+ _genomeFile = genomeFile;\n+ _eachBase = eachBase;\n+ _eachBaseZeroBased = eachBaseZeroBased;\n+ _startSites = startSites;\n+ _bedGraph = bedGraph;\n+ _bedGraphAll = bedGraphAll;\n+ _max = max;\n+ _scale = scale;\n+ _bamInput = bamInput;\n+ _obeySplits = obeySplits;\n+ _filterByStrand = filterByStrand;\n+ _requestedStrand = requestedStrand;\n+ _only_3p_end = only_3p_end;\n+ _only_5p_end = only_5p_end;\n+ _add_gb_track_line = add_gb_track_line;\n+ _gb_track_line_opts = gb_track_line_opts;\n+ _currChromName = "";\n+ _currChromSize = 0 ;\n+\n+ \n+ if (_bamInput == false) {\n+ _genome = new GenomeFile(genomeFile);\n+ }\n+ \n+ PrintTrackDefinitionLine();\n+\n+ if (_bamInput == false) {\n+ _bed = new BedFile(bedFile);\n+ CoverageBed();\n+ }\n+ else {\n+ CoverageBam(_bedFile);\n+ }\n+}\n+\n+void BedGenomeCoverage::PrintTrackDefinitionLine()\n+{\n+ //Print Track Definition line (if requested)\n+ if ( (_bedGraph||_bedGraphAll) && _add_gb_track_line) {\n+ string line = "track type=bedGraph";\n+ if (!_gb_track_line_opts.empty()) {\n+ line += " " ;\n+ line += _gb_track_line_opts ;\n+ }\n+ cout << line << endl;\n+ }\n+\n+}\n+\n+\n+BedGenomeCoverage::~BedGenomeCoverage(void) {\n+ delete _bed;\n+ delete _genome;\n+}\n+\n+\n+void BedGenomeCoverage::ResetChromCoverage() {\n+ _currChromName = "";\n+ _currChromSize = 0 ;\n+ std::vector<DEPTH>().swap(_currChromCoverage);\n+}\n+\n+\n+void BedGenomeCoverage::StartNewChrom(const string& newChrom) {\n+ // If we\'ve moved beyond the first encountered chromosomes,\n+ // process the results of the previous chromosome.\n+ if (_currChromName.length() > 0) {\n+ ReportChromCoverage(_currChromCoverage, _currChromSize,\n+ _currChromName, _currChromDepthHist);\n+ }\n+\n+ // empty the previous chromosome and reserve new\n+ std::vector<DEPTH>().swap(_currChromCoverage);\n+\n+ if (_visitedChromosomes.find(newChrom) != _visitedChromosomes.end()) {\n+ cerr << "Input error: Chromosome " << _currChromName\n+ << " found in non-sequential lines. This suggests that the input file is not sorted correctly." << endl;\n+\n+ }\n+ _visitedChromosomes.insert(newChrom);\n+\n+ _currChromName = newChrom;\n+\n+ // get the current chrom size and allocate space\n+ _currChromSize = _genome->getChromSize(_currChromName);\n+\n+ if (_currChromSize >= 0)\n+ _currChromCoverage.resize(_currChromSize);\n+ else {\n+ cerr << "Input error: Chromosome " << _currChromName << " found in your input file but not in your genome file." << endl;\n+ exit(1);\n+ }\n+}\n+\n+\n+void BedGenomeCoverage::AddCoverage(int start, int end) {\n+ // process the first line for this chromosome.\n+ // make sure the coordinates fit within'..b'pth - chromCov[pos].ends;\n+ }\n+ // report the histogram for each chromosome\n+ histMap::const_iterator depthIt = chromDepthHist[chrom].begin();\n+ histMap::const_iterator depthEnd = chromDepthHist[chrom].end();\n+ for (; depthIt != depthEnd; ++depthIt) {\n+ int depth = depthIt->first;\n+ unsigned int numBasesAtDepth = depthIt->second;\n+ cout << chrom << "\\t" << depth << "\\t" << numBasesAtDepth << "\\t"\n+ << chromSize << "\\t" << (float) ((float)numBasesAtDepth / (float)chromSize) << endl;\n+ }\n+ }\n+}\n+\n+\n+\n+void BedGenomeCoverage::ReportGenomeCoverage(chromHistMap &chromDepthHist) {\n+\n+ // get the list of chromosome names in the genome\n+ vector<string> chromList = _genome->getChromList();\n+\n+ unsigned int genomeSize = 0;\n+ vector<string>::const_iterator chromItr = chromList.begin();\n+ vector<string>::const_iterator chromEnd = chromList.end();\n+ for (; chromItr != chromEnd; ++chromItr) {\n+ string chrom = *chromItr;\n+ genomeSize += _genome->getChromSize(chrom);\n+ // if there were no reads for a give chromosome, then\n+ // add the length of the chrom to the 0 bin.\n+ if ( chromDepthHist.find(chrom) == chromDepthHist.end() ) {\n+ chromDepthHist[chrom][0] += _genome->getChromSize(chrom);\n+ }\n+ }\n+\n+ histMap genomeHist; // depth histogram for the entire genome\n+\n+ // loop through each chromosome and add the depth and number of bases at each depth\n+ // to the aggregate histogram for the entire genome\n+ for (chromHistMap::iterator chromIt = chromDepthHist.begin(); chromIt != chromDepthHist.end(); ++chromIt) {\n+ string chrom = chromIt->first;\n+ for (histMap::iterator depthIt = chromDepthHist[chrom].begin(); depthIt != chromDepthHist[chrom].end(); ++depthIt) {\n+ int depth = depthIt->first;\n+ unsigned int numBasesAtDepth = depthIt->second;\n+ genomeHist[depth] += numBasesAtDepth;\n+ }\n+ }\n+\n+ // loop through the depths for the entire genome\n+ // and report the number and fraction of bases in\n+ // the entire genome that are at said depth.\n+ for (histMap::iterator genomeDepthIt = genomeHist.begin(); genomeDepthIt != genomeHist.end(); ++genomeDepthIt) {\n+ int depth = genomeDepthIt->first;\n+ unsigned int numBasesAtDepth = genomeDepthIt->second;\n+\n+ cout << "genome" << "\\t" << depth << "\\t" << numBasesAtDepth << "\\t"\n+ << genomeSize << "\\t" << (float) ((float)numBasesAtDepth / (float)genomeSize) << endl;\n+ }\n+}\n+\n+\n+void BedGenomeCoverage::ReportChromCoverageBedGraph(const vector<DEPTH> &chromCov, const int &chromSize, const string &chrom) {\n+\n+ int depth = 0; // initialize the depth\n+ int lastStart = -1;\n+ int lastDepth = -1;\n+\n+ for (int pos = 0; pos < chromSize; pos++) {\n+ depth += chromCov[pos].starts;\n+\n+ if (depth != lastDepth) {\n+ // Coverage depth has changed, print the last interval coverage (if any)\n+ // Print if:\n+ // (1) depth>0 (the default running mode),\n+ // (2) depth==0 and the user requested to print zero covered regions (_bedGraphAll)\n+ if ( (lastDepth != -1) && (lastDepth > 0 || _bedGraphAll) ) {\n+ cout << chrom << "\\t" << lastStart << "\\t" << pos << "\\t" << lastDepth * _scale << endl;\n+ }\n+ //Set current position as the new interval start + depth\n+ lastDepth = depth;\n+ lastStart = pos;\n+ }\n+ // Default: the depth has not changed, so we will not print anything.\n+ // Proceed until the depth changes.\n+ // Update depth\n+ depth = depth - chromCov[pos].ends;\n+ }\n+ //Print information about the last position\n+ if ( (lastDepth != -1) && (lastDepth > 0 || _bedGraphAll) ) {\n+ cout << chrom << "\\t" << lastStart << "\\t" << chromSize << "\\t" << lastDepth * _scale << endl;\n+ }\n+}\n' |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/genomeCoverageBed/genomeCoverageBed.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/genomeCoverageBed/genomeCoverageBed.h Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,104 @@ +/***************************************************************************** +genomeCoverage.h + +(c) 2009 - Aaron Quinlan +Hall Laboratory +Department of Biochemistry and Molecular Genetics +University of Virginia +aaronquinlan@gmail.com + +Licenced under the GNU General Public License 2.0 license. +******************************************************************************/ +#include "bedFile.h" +#include "genomeFile.h" + +#include "BamAncillary.h" +#include "api/BamReader.h" +#include "api/BamAux.h" +using namespace BamTools; + +#include <vector> +#include <set> +#include <iostream> +#include <fstream> +using namespace std; + + +//*********************************************** +// Typedefs +//*********************************************** +typedef map<int, DEPTH, less<int> > depthMap; +typedef map<string, depthMap, less<string> > chromDepthMap; + +typedef map<int, unsigned int, less<int> > histMap; +typedef map<string, histMap, less<string> > chromHistMap; + +//************************************************ +// Class methods and elements +//************************************************ +class BedGenomeCoverage { + +public: + + // constructor + BedGenomeCoverage(string bedFile, string genomeFile, + bool eachBase, bool startSites, + bool bedGraph, bool bedGraphAll, + int max, float scale, + bool bamInput, bool obeySplits, + bool filterByStrand, string requestedStrand, + bool only_5p_end, bool only_3p_end, + bool eachBaseZeroBased, + bool add_gb_track_line, string gb_track_line_opts); + + // destructor + ~BedGenomeCoverage(void); + +private: + + // data (parms) + string _bedFile; + string _genomeFile; + bool _bamInput; + bool _eachBase; + bool _eachBaseZeroBased; + bool _startSites; + bool _bedGraph; + bool _bedGraphAll; + int _max; + float _scale; + bool _obeySplits; + bool _filterByStrand; + bool _only_5p_end; + bool _only_3p_end; + bool _add_gb_track_line; + string _gb_track_line_opts; + string _requestedStrand; + + BedFile *_bed; + GenomeFile *_genome; + + // data for internal processing + chromDepthMap _chromCov; + string _currChromName ; + vector<DEPTH> _currChromCoverage; + chromHistMap _currChromDepthHist; + int _currChromSize ; + set<string> _visitedChromosomes; + + + // methods + void CoverageBed(); + void CoverageBam(string bamFile); + void LoadBamHeaderIntoGenomeFile(const string &bamFile); + void ReportChromCoverage(const vector<DEPTH> &, const int &chromSize, const string &chrom, chromHistMap&); + void ReportGenomeCoverage(chromHistMap &chromDepthHist); + void ReportChromCoverageBedGraph(const vector<DEPTH> &chromCov, const int &chromSize, const string &chrom); + void ResetChromCoverage(); + void StartNewChrom (const string& chrom); + void AddCoverage (int start, int end); + void AddBlockedCoverage(const vector<BED> &bedBlocks); + void PrintFinalCoverage(); + void PrintTrackDefinitionLine(); +}; + |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/genomeCoverageBed/genomeCoverageMain.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/genomeCoverageBed/genomeCoverageMain.cpp Thu Nov 03 10:25:04 2011 -0400 |
[ |
b'@@ -0,0 +1,312 @@\n+/*****************************************************************************\n+genomeCoverageMain.cpp\n+\n+(c) 2009 - Aaron Quinlan\n+Hall Laboratory\n+Department of Biochemistry and Molecular Genetics\n+University of Virginia\n+aaronquinlan@gmail.com\n+\n+Licenced under the GNU General Public License 2.0 license.\n+******************************************************************************/\n+#include "genomeCoverageBed.h"\n+#include "version.h"\n+\n+using namespace std;\n+\n+// define our program name\n+#define PROGRAM_NAME "genomeCoverageBed"\n+\n+\n+// define our parameter checking macro\n+#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)\n+\n+// function declarations\n+void ShowHelp(void);\n+\n+int main(int argc, char* argv[]) {\n+\n+ // our configuration variables\n+ bool showHelp = false;\n+\n+ // input files\n+ string bedFile;\n+ string genomeFile;\n+ int max = INT_MAX;\n+ float scale = 1.0;\n+\n+ bool haveBed = false;\n+ bool bamInput = false;\n+ bool haveGenome = false;\n+ bool startSites = false;\n+ bool bedGraph = false;\n+ bool bedGraphAll = false;\n+ bool eachBase = false;\n+ bool eachBaseZeroBased = false;\n+ bool obeySplits = false;\n+ bool haveScale = false;\n+ bool filterByStrand = false;\n+ bool only_5p_end = false;\n+ bool only_3p_end = false;\n+ bool add_gb_track_line = false;\n+ string gb_track_opts;\n+ string requestedStrand = "X";\n+\n+ // check to see if we should print out some help\n+ if(argc <= 1) showHelp = true;\n+\n+ for(int i = 1; i < argc; i++) {\n+ int parameterLength = (int)strlen(argv[i]);\n+\n+ if((PARAMETER_CHECK("-h", 2, parameterLength)) ||\n+ (PARAMETER_CHECK("--help", 5, parameterLength))) {\n+ showHelp = true;\n+ }\n+ }\n+\n+ if(showHelp) ShowHelp();\n+\n+ // do some parsing (all of these parameters require 2 strings)\n+ for(int i = 1; i < argc; i++) {\n+\n+ int parameterLength = (int)strlen(argv[i]);\n+\n+ if(PARAMETER_CHECK("-i", 2, parameterLength)) {\n+ if ((i+1) < argc) {\n+ haveBed = true;\n+ bedFile = argv[i + 1];\n+ i++;\n+ }\n+ }\n+ else if(PARAMETER_CHECK("-ibam", 5, parameterLength)) {\n+ if ((i+1) < argc) {\n+ haveBed = true;\n+ bamInput = true;\n+ bedFile = argv[i + 1];\n+ i++;\n+ }\n+ }\n+ else if(PARAMETER_CHECK("-g", 2, parameterLength)) {\n+ if ((i+1) < argc) {\n+ haveGenome = true;\n+ genomeFile = argv[i + 1];\n+ i++;\n+ }\n+ }\n+ else if(PARAMETER_CHECK("-d", 2, parameterLength)) {\n+ eachBase = true;\n+ }\n+ else if(PARAMETER_CHECK("-dz", 3, parameterLength)) {\n+ eachBase = true;\n+ eachBaseZeroBased = true;\n+ }\n+ else if(PARAMETER_CHECK("-bg", 3, parameterLength)) {\n+ bedGraph = true;\n+ }\n+ else if(PARAMETER_CHECK("-bga", 4, parameterLength)) {\n+ bedGraphAll = true;\n+ }\n+ else if(PARAMETER_CHECK("-max", 4, parameterLength)) {\n+ if ((i+1) < argc) {\n+ max = atoi(argv[i + 1]);\n+ i++;\n+ }\n+ }\n+ else if(PARAMETER_CHECK("-scale", 6, parameterLength)) {\n+ if ((i+1) < argc) {\n+ haveScale = true;\n+ scale = atof(argv[i + 1]);\n+ i++;\n+ }\n+ }\n+ else if(PARAMETER_CHECK("-split", 6, parameterLength)) {\n+ obeySplits = true;\n+ }\n+ else if(PARAMETER_CHECK("-strand", 7, parameterLength)) {\n+ if ((i+1) < argc) {\n+ filterByStrand = true;\n+ requestedStrand = argv[i+1][0];\n+ if (!(requestedStrand == "-" || requestedStrand == "+")) {\n+ '..b'\\tquickly extract all regions of a genome with 0 " << endl;\n+ cerr << "\\t\\t\\tcoverage by applying: \\"grep -w 0$\\" to the output." << endl << endl;\n+\n+ cerr << "\\t-split\\t\\t" << "Treat \\"split\\" BAM or BED12 entries as distinct BED intervals." << endl;\n+ cerr << "\\t\\t\\twhen computing coverage." << endl;\n+ cerr << "\\t\\t\\tFor BAM files, this uses the CIGAR \\"N\\" and \\"D\\" operations " << endl;\n+ cerr << "\\t\\t\\tto infer the blocks for computing coverage." << endl;\n+ cerr << "\\t\\t\\tFor BED12 files, this uses the BlockCount, BlockStarts, and BlockEnds" << endl;\n+ cerr << "\\t\\t\\tfields (i.e., columns 10,11,12)." << endl << endl;\n+\n+ cerr << "\\t-strand\\t\\t" << "Calculate coverage of intervals from a specific strand." << endl;\n+ cerr << "\\t\\t\\tWith BED files, requires at least 6 columns (strand is column 6). " << endl;\n+ cerr << "\\t\\t\\t- (STRING): can be + or -" << endl << endl;\n+\n+ cerr << "\\t-5\\t\\t" << "Calculate coverage of 5\\" positions (instead of entire interval)." << endl << endl;\n+\n+ cerr << "\\t-3\\t\\t" << "Calculate coverage of 3\\" positions (instead of entire interval)." << endl << endl;\n+\n+ cerr << "\\t-max\\t\\t" << "Combine all positions with a depth >= max into" << endl;\n+ cerr << "\\t\\t\\ta single bin in the histogram. Irrelevant" << endl;\n+ cerr << "\\t\\t\\tfor -d and -bedGraph" << endl;\n+ cerr << "\\t\\t\\t- (INTEGER)" << endl << endl;\n+\n+ cerr << "\\t-scale\\t\\t" << "Scale the coverage by a constant factor." << endl;\n+ cerr << "\\t\\t\\tEach coverage value is multiplied by this factor before being reported." << endl;\n+ cerr << "\\t\\t\\tUseful for normalizing coverage by, e.g., reads per million (RPM)." << endl;\n+ cerr << "\\t\\t\\t- Default is 1.0; i.e., unscaled." << endl;\n+ cerr << "\\t\\t\\t- (FLOAT)" << endl << endl;\n+\n+ cerr << "\\t-trackline\\t" << "Adds a UCSC/Genome-Browser track line definition in the first line of the output." << endl;\n+ cerr <<"\\t\\t\\t- See here for more details about track line definition:" << endl;\n+ cerr <<"\\t\\t\\t http://genome.ucsc.edu/goldenPath/help/bedgraph.html" << endl;\n+ cerr <<"\\t\\t\\t- NOTE: When adding a trackline definition, the output BedGraph can be easily" << endl;\n+ cerr <<"\\t\\t\\t uploaded to the Genome Browser as a custom track," << endl;\n+ cerr <<"\\t\\t\\t BUT CAN NOT be converted into a BigWig file (w/o removing the first line)." << endl << endl;\n+\n+ cerr << "\\t-trackopts\\t"<<"Writes additional track line definition parameters in the first line." << endl;\n+ cerr <<"\\t\\t\\t- Example:" << endl;\n+ cerr <<"\\t\\t\\t -trackopts \'name=\\"My Track\\" visibility=2 color=255,30,30\'" << endl;\n+ cerr <<"\\t\\t\\t Note the use of single-quotes if you have spaces in your parameters." << endl;\n+ cerr <<"\\t\\t\\t- (TEXT)" << endl << endl;\n+\n+ cerr << "Notes: " << endl;\n+ cerr << "\\t(1) The genome file should tab delimited and structured as follows:" << endl;\n+ cerr << "\\t <chromName><TAB><chromSize>" << endl << endl;\n+ cerr << "\\tFor example, Human (hg19):" << endl;\n+ cerr << "\\tchr1\\t249250621" << endl;\n+ cerr << "\\tchr2\\t243199373" << endl;\n+ cerr << "\\t..." << endl;\n+ cerr << "\\tchr18_gl000207_random\\t4262" << endl << endl;\n+\n+ cerr << "\\t(2) The input BED (-i) file must be grouped by chromosome." << endl;\n+ cerr << "\\t A simple \\"sort -k 1,1 <BED> > <BED>.sorted\\" will suffice."<< endl << endl;\n+\n+ cerr << "\\t(3) The input BAM (-ibam) file must be sorted by position." << endl;\n+ cerr << "\\t A \\"samtools sort <BAM>\\" should suffice."<< endl << endl;\n+\n+ cerr << "Tips: " << endl;\n+ cerr << "\\tOne can use the UCSC Genome Browser\'s MySQL database to extract" << endl;\n+ cerr << "\\tchromosome sizes. For example, H. sapiens:" << endl << endl;\n+ cerr << "\\tmysql --user=genome --host=genome-mysql.cse.ucsc.edu -A -e \\\\" << endl;\n+ cerr << "\\t\\"select chrom, size from hg19.chromInfo\\" > hg19.genome" << endl << endl;\n+\n+\n+ // end the program here\n+ exit(1);\n+}\n+\n' |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/intersectBed/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/intersectBed/Makefile Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,53 @@ +UTILITIES_DIR = ../utils/ +OBJ_DIR = ../../obj/ +BIN_DIR = ../../bin/ + +# ------------------- +# define our includes +# ------------------- +INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \ + -I$(UTILITIES_DIR)/version/ \ + -I$(UTILITIES_DIR)/gzstream/ \ + -I$(UTILITIES_DIR)/genomeFile/ \ + -I$(UTILITIES_DIR)/lineFileUtilities/ \ + -I$(UTILITIES_DIR)/fileType/ \ + -I$(UTILITIES_DIR)/BamTools/include \ + -I$(UTILITIES_DIR)/BamTools-Ancillary \ + -I$(UTILITIES_DIR)/chromsweep \ + +# ---------------------------------- +# define our source and object files +# ---------------------------------- +SOURCES= intersectMain.cpp intersectBed.cpp +OBJECTS= $(SOURCES:.cpp=.o) +_EXT_OBJECTS=bedFile.o lineFileUtilities.o BamAncillary.o gzstream.o fileType.o chromsweep.o +EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) +BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) +PROGRAM= intersectBed + +all: $(PROGRAM) + +.PHONY: all + +$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS) + @echo " * linking $(PROGRAM)" + @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ -L$(UTILITIES_DIR)/BamTools/lib/ -lbamtools $(LIBS) + +$(BUILT_OBJECTS): $(SOURCES) + @echo " * compiling" $(*F).cpp + @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) + +$(EXT_OBJECTS): + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/BamTools/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/BamTools-Ancillary/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/chromsweep/ + +clean: + @echo "Cleaning up." + @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* + +.PHONY: clean |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/intersectBed/intersectBed.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/intersectBed/intersectBed.cpp Thu Nov 03 10:25:04 2011 -0400 |
b |
b'@@ -0,0 +1,367 @@\n+/*****************************************************************************\n+ intersectBed.cpp\n+\n+ (c) 2009 - Aaron Quinlan\n+ Hall Laboratory\n+ Department of Biochemistry and Molecular Genetics\n+ University of Virginia\n+ aaronquinlan@gmail.com\n+\n+ Licenced under the GNU General Public License 2.0 license.\n+******************************************************************************/\n+#include "lineFileUtilities.h"\n+#include "intersectBed.h"\n+\n+/************************************\n+Helper functions\n+************************************/\n+bool BedIntersect::processHits(const BED &a, const vector<BED> &hits) {\n+\n+ // how many overlaps are there b/w the bed and the set of hits?\n+ CHRPOS s, e;\n+ int overlapBases;\n+ int numOverlaps = 0;\n+ bool hitsFound = false;\n+ int aLength = (a.end - a.start); // the length of a in b.p.\n+\n+ // loop through the hits and report those that meet the user\'s criteria\n+ vector<BED>::const_iterator h = hits.begin();\n+ vector<BED>::const_iterator hitsEnd = hits.end();\n+ for (; h != hitsEnd; ++h) {\n+ s = max(a.start, h->start);\n+ e = min(a.end, h->end);\n+ overlapBases = (e - s); // the number of overlapping bases b/w a and b\n+\n+ // is there enough overlap relative to the user\'s request? (default ~ 1bp)\n+ if ( ( (float) overlapBases / (float) aLength ) >= _overlapFraction ) {\n+ // Report the hit if the user doesn\'t care about reciprocal overlap between A and B.\n+ if (_reciprocal == false) {\n+ hitsFound = true;\n+ numOverlaps++;\n+ if (_printable == true)\n+ ReportOverlapDetail(overlapBases, a, *h, s, e);\n+ }\n+ // we require there to be sufficient __reciprocal__ overlap\n+ else {\n+ int bLength = (h->end - h->start);\n+ float bOverlap = ( (float) overlapBases / (float) bLength );\n+ if (bOverlap >= _overlapFraction) {\n+ hitsFound = true;\n+ numOverlaps++;\n+ if (_printable == true)\n+ ReportOverlapDetail(overlapBases, a, *h, s, e);\n+ }\n+ }\n+ }\n+ }\n+ // report the summary of the overlaps if requested.\n+ ReportOverlapSummary(a, numOverlaps);\n+ // were hits found for this BED feature?\n+ return hitsFound;\n+}\n+\n+\n+/*\n+ Constructor\n+*/\n+BedIntersect::BedIntersect(string bedAFile, string bedBFile, bool anyHit,\n+ bool writeA, bool writeB, bool writeOverlap, bool writeAllOverlap,\n+ float overlapFraction, bool noHit, bool writeCount, bool sameStrand, bool diffStrand,\n+ bool reciprocal, bool obeySplits, bool bamInput, bool bamOutput, bool isUncompressedBam,\n+ bool sortedInput) {\n+\n+ _bedAFile = bedAFile;\n+ _bedBFile = bedBFile;\n+ _anyHit = anyHit;\n+ _noHit = noHit;\n+ _writeA = writeA;\n+ _writeB = writeB;\n+ _writeOverlap = writeOverlap;\n+ _writeAllOverlap = writeAllOverlap;\n+ _writeCount = writeCount;\n+ _overlapFraction = overlapFraction;\n+ _sameStrand = sameStrand;\n+ _diffStrand = diffStrand;\n+ _reciprocal = reciprocal;\n+ _obeySplits = obeySplits;\n+ _bamInput = bamInput;\n+ _bamOutput = bamOutput;\n+ _isUncompressedBam = isUncompressedBam;\n+ _sortedInput = sortedInput;\n+\n+ // should we print each overlap, or does the user want summary information?\n+ _printable = true;\n+ if (_anyHit || _noHit || _writeCount)\n+ _printable = false;\n+ \n+ if (_bamInput == false)\n+ IntersectBed();\n+ else\n+ IntersectBam(bedAFile);\n+}\n+\n+\n+/*\n+ Destructor'..b'der.Open(bamFile);\n+\n+ // get header & reference information\n+ string bamHeader = reader.GetHeaderText();\n+ RefVector refs = reader.GetReferenceData();\n+\n+ // open a BAM output to stdout if we are writing BAM\n+ if (_bamOutput == true) {\n+ // set compression mode\n+ BamWriter::CompressionMode compressionMode = BamWriter::Compressed;\n+ if ( _isUncompressedBam ) compressionMode = BamWriter::Uncompressed;\n+ writer.SetCompressionMode(compressionMode);\n+ // open our BAM writer\n+ writer.Open("stdout", bamHeader, refs);\n+ }\n+\n+ vector<BED> hits;\n+ // reserve some space\n+ hits.reserve(100);\n+\n+ \n+ BamAlignment bam; \n+ // get each set of alignments for each pair.\n+ while (reader.GetNextAlignment(bam)) {\n+\n+ if (bam.IsMapped()) {\n+ BED a;\n+ a.chrom = refs.at(bam.RefID).RefName;\n+ a.start = bam.Position;\n+ a.end = bam.GetEndPosition(false, false);\n+\n+ // build the name field from the BAM alignment.\n+ a.name = bam.Name;\n+ if (bam.IsFirstMate()) a.name += "/1";\n+ if (bam.IsSecondMate()) a.name += "/2";\n+\n+ a.score = ToString(bam.MapQuality);\n+\n+ a.strand = "+";\n+ if (bam.IsReverseStrand()) a.strand = "-";\n+\n+ if (_bamOutput == true) {\n+ bool overlapsFound = false;\n+ // treat the BAM alignment as a single "block"\n+ if (_obeySplits == false) {\n+ overlapsFound = FindOneOrMoreOverlap(a);\n+ }\n+ // split the BAM alignment into discrete blocks and\n+ // look for overlaps only within each block.\n+ else {\n+ bool overlapFoundForBlock;\n+ bedVector bedBlocks; // vec to store the discrete BED "blocks" from a\n+ // we don\'t want to split on "D" ops, hence the "false"\n+ getBamBlocks(bam, refs, bedBlocks, false);\n+\n+ vector<BED>::const_iterator bedItr = bedBlocks.begin();\n+ vector<BED>::const_iterator bedEnd = bedBlocks.end();\n+ for (; bedItr != bedEnd; ++bedItr) {\n+ overlapFoundForBlock = FindOneOrMoreOverlap(*bedItr);\n+ if (overlapFoundForBlock == true)\n+ overlapsFound = true;\n+ }\n+ }\n+ if (overlapsFound == true) {\n+ if (_noHit == false)\n+ writer.SaveAlignment(bam);\n+ }\n+ else {\n+ if (_noHit == true) {\n+ writer.SaveAlignment(bam);\n+ }\n+ }\n+ }\n+ else {\n+ // treat the BAM alignment as a single BED "block"\n+ if (_obeySplits == false) {\n+ FindOverlaps(a, hits);\n+ hits.clear();\n+ }\n+ // split the BAM alignment into discrete BED blocks and\n+ // look for overlaps only within each block.\n+ else {\n+ bedVector bedBlocks; // vec to store the discrete BED "blocks" from a\n+ getBamBlocks(bam, refs, bedBlocks, false);\n+\n+ vector<BED>::const_iterator bedItr = bedBlocks.begin();\n+ vector<BED>::const_iterator bedEnd = bedBlocks.end();\n+ for (; bedItr != bedEnd; ++bedItr) {\n+ FindOverlaps(*bedItr, hits);\n+ hits.clear();\n+ }\n+ }\n+ }\n+ }\n+ // BAM IsMapped() is false\n+ else if (_noHit == true) {\n+ writer.SaveAlignment(bam);\n+ }\n+ }\n+\n+ // close the relevant BAM files.\n+ reader.Close();\n+ if (_bamOutput == true) {\n+ writer.Close();\n+ }\n+}\n+\n' |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/intersectBed/intersectBed.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/intersectBed/intersectBed.h Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,98 @@ +/***************************************************************************** + intersectBed.h + + (c) 2009 - Aaron Quinlan + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licenced under the GNU General Public License 2.0 license. +******************************************************************************/ +#ifndef INTERSECTBED_H +#define INTERSECTBED_H + +#include "bedFile.h" +#include "chromsweep.h" +#include "api/BamReader.h" +#include "api/BamWriter.h" +#include "api/BamAux.h" +#include "BamAncillary.h" +using namespace BamTools; + + +#include <vector> +#include <iostream> +#include <fstream> +#include <stdlib.h> +using namespace std; + + + +class BedIntersect { + +public: + + // constructor + BedIntersect(string bedAFile, string bedBFile, bool anyHit, + bool writeA, bool writeB, bool writeOverlap, bool writeAllOverlap, + float overlapFraction, bool noHit, bool writeCount, bool sameStrand, bool diffStrand, + bool reciprocal, bool obeySplits, bool bamInput, bool bamOutput, bool isUncompressedBam, + bool sortedInput); + + // destructor + ~BedIntersect(void); + +private: + + //------------------------------------------------ + // private attributes + //------------------------------------------------ + string _bedAFile; + string _bedBFile; + + bool _writeA; // should the original A feature be reported? + bool _writeB; // should the original B feature be reported? + bool _writeOverlap; + bool _writeAllOverlap; + + bool _sameStrand; + bool _diffStrand; + bool _reciprocal; + float _overlapFraction; + + bool _anyHit; + bool _noHit; + bool _writeCount; // do we want a count of the number of overlaps in B? + bool _obeySplits; + bool _bamInput; + bool _bamOutput; + bool _isUncompressedBam; + bool _sortedInput; + bool _printable; + + // instance of a bed file class. + BedFile *_bedA, *_bedB; + + //------------------------------------------------ + // private methods + //------------------------------------------------ + void IntersectBed(istream &bedInput); + + void IntersectBed(); + + void IntersectBam(string bamFile); + + bool processHits(const BED &a, const vector<BED> &hits); + + bool FindOverlaps(const BED &a, vector<BED> &hits); + + bool FindOneOrMoreOverlap(const BED &a); + + void ReportOverlapDetail(int overlapBases, const BED &a, const BED &b, CHRPOS s, CHRPOS e); + + void ReportOverlapSummary(const BED &a, const int &numOverlapsFound); + +}; + +#endif /* INTERSECTBED_H */ |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/intersectBed/intersectMain.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/intersectBed/intersectMain.cpp Thu Nov 03 10:25:04 2011 -0400 |
[ |
b'@@ -0,0 +1,294 @@\n+/*****************************************************************************\n+ intersectMain.cpp\n+\n+ (c) 2009 - Aaron Quinlan\n+ Hall Laboratory\n+ Department of Biochemistry and Molecular Genetics\n+ University of Virginia\n+ aaronquinlan@gmail.com\n+\n+ Licenced under the GNU General Public License 2.0 license.\n+******************************************************************************/\n+#include "intersectBed.h"\n+#include "version.h"\n+\n+using namespace std;\n+\n+// define our program name\n+#define PROGRAM_NAME "intersectBed"\n+\n+\n+// define our parameter checking macro\n+#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)\n+\n+// function declarations\n+void ShowHelp(void);\n+\n+int main(int argc, char* argv[]) {\n+\n+ // our configuration variables\n+ bool showHelp = false;\n+\n+ // input files\n+ string bedAFile;\n+ string bedBFile;\n+\n+ // input arguments\n+ float overlapFraction = 1E-9;\n+\n+ bool haveBedA = false;\n+ bool haveBedB = false;\n+ bool noHit = false;\n+ bool anyHit = false;\n+ bool writeA = false;\n+ bool writeB = false;\n+ bool writeCount = false;\n+ bool writeOverlap = false;\n+ bool writeAllOverlap = false;\n+ bool haveFraction = false;\n+ bool reciprocalFraction = false;\n+ bool sameStrand = false;\n+ bool diffStrand = false;\n+ bool obeySplits = false;\n+ bool inputIsBam = false;\n+ bool outputIsBam = true;\n+ bool uncompressedBam = false;\n+ bool sortedInput = false;\n+ // check to see if we should print out some help\n+ if(argc <= 1) showHelp = true;\n+\n+ for(int i = 1; i < argc; i++) {\n+ int parameterLength = (int)strlen(argv[i]);\n+\n+ if((PARAMETER_CHECK("-h", 2, parameterLength)) ||\n+ (PARAMETER_CHECK("--help", 5, parameterLength))) {\n+ showHelp = true;\n+ }\n+ }\n+\n+ if(showHelp) ShowHelp();\n+\n+ // do some parsing (all of these parameters require 2 strings)\n+ for(int i = 1; i < argc; i++) {\n+\n+ int parameterLength = (int)strlen(argv[i]);\n+\n+ if(PARAMETER_CHECK("-a", 2, parameterLength)) {\n+ if ((i+1) < argc) {\n+ haveBedA = true;\n+ outputIsBam = false;\n+ bedAFile = argv[i + 1];\n+ i++;\n+ }\n+ }\n+ else if(PARAMETER_CHECK("-abam", 5, parameterLength)) {\n+ if ((i+1) < argc) {\n+ haveBedA = true;\n+ inputIsBam = true;\n+ bedAFile = argv[i + 1];\n+ i++;\n+ }\n+ }\n+ else if(PARAMETER_CHECK("-b", 2, parameterLength)) {\n+ if ((i+1) < argc) {\n+ haveBedB = true;\n+ bedBFile = argv[i + 1];\n+ i++;\n+ }\n+ }\n+ else if(PARAMETER_CHECK("-bed", 4, parameterLength)) {\n+ outputIsBam = false;\n+ }\n+ else if(PARAMETER_CHECK("-u", 2, parameterLength)) {\n+ anyHit = true;\n+ }\n+ else if(PARAMETER_CHECK("-f", 2, parameterLength)) {\n+ if ((i+1) < argc) {\n+ haveFraction = true;\n+ overlapFraction = atof(argv[i + 1]);\n+ i++;\n+ }\n+ }\n+ else if(PARAMETER_CHECK("-wa", 3, parameterLength)) {\n+ writeA = true;\n+ }\n+ else if(PARAMETER_CHECK("-wb", 3, parameterLength)) {\n+ writeB = true;\n+ }\n+ else if(PARAMETER_CHECK("-wo", 3, parameterLength)) {\n+ writeOverlap = true;\n+ }\n+ else if(PARAMETER_CHECK("-wao", 4, parameterLength)) {\n+ writeAllOverlap = true;\n+ writeOverlap = true;\n+ }\n+ else if(PARAMETER_CHECK("-c", 2, parameterLength)) {\n+ writeCount = true;\n+ }\n+ else '..b'BAM output. Default is to write compressed BAM." << endl << endl;\n+\n+ cerr << "\\t-bed\\t" << "When using BAM input (-abam), write output as BED. The default" << endl;\n+ cerr << "\\t\\tis to write output in BAM when using -abam." << endl << endl;\n+\n+ cerr << "\\t-wa\\t" << "Write the original entry in A for each overlap." << endl << endl;\n+\n+ cerr << "\\t-wb\\t" << "Write the original entry in B for each overlap." << endl;\n+ cerr << "\\t\\t- Useful for knowing _what_ A overlaps. Restricted by -f and -r." << endl << endl;\n+\n+ cerr << "\\t-wo\\t" << "Write the original A and B entries plus the number of base" << endl;\n+ cerr << "\\t\\tpairs of overlap between the two features." << endl;\n+ cerr << "\\t\\t- Overlaps restricted by -f and -r." << endl;\n+ cerr << "\\t\\t Only A features with overlap are reported." << endl << endl;\n+\n+ cerr << "\\t-wao\\t" << "Write the original A and B entries plus the number of base" << endl;\n+ cerr << "\\t\\tpairs of overlap between the two features." << endl;\n+ cerr << "\\t\\t- Overlapping features restricted by -f and -r." << endl;\n+ cerr << "\\t\\t However, A features w/o overlap are also reported" << endl;\n+ cerr << "\\t\\t with a NULL B feature and overlap = 0." << endl << endl;\n+\n+ cerr << "\\t-u\\t" << "Write the original A entry _once_ if _any_ overlaps found in B." << endl;\n+ cerr << "\\t\\t- In other words, just report the fact >=1 hit was found." << endl;\n+ cerr << "\\t\\t- Overlaps restricted by -f and -r." << endl << endl;\n+\n+ cerr << "\\t-c\\t" << "For each entry in A, report the number of overlaps with B." << endl;\n+ cerr << "\\t\\t- Reports 0 for A entries that have no overlap with B." << endl;\n+ cerr << "\\t\\t- Overlaps restricted by -f and -r." << endl << endl;\n+\n+ cerr << "\\t-v\\t" << "Only report those entries in A that have _no overlaps_ with B." << endl;\n+ cerr << "\\t\\t- Similar to \\"grep -v\\" (an homage)." << endl << endl;\n+\n+ cerr << "\\t-f\\t" << "Minimum overlap required as a fraction of A." << endl;\n+ cerr << "\\t\\t- Default is 1E-9 (i.e., 1bp)." << endl;\n+ cerr << "\\t\\t- FLOAT (e.g. 0.50)" << endl << endl;\n+\n+ cerr << "\\t-r\\t" << "Require that the fraction overlap be reciprocal for A and B." << endl;\n+ cerr << "\\t\\t- In other words, if -f is 0.90 and -r is used, this requires" << endl;\n+ cerr << "\\t\\t that B overlap 90% of A and A _also_ overlaps 90% of B." << endl << endl;\n+\n+ cerr << "\\t-s\\t" << "Require same strandedness. That is, only report hits in B that" << endl;\n+ cerr << "\\t\\toverlap A on the _same_ strand." << endl;\n+ cerr << "\\t\\t- By default, overlaps are reported without respect to strand." << endl << endl;\n+\n+ cerr << "\\t-S\\t" << "Require different strandedness. That is, only report hits in B that" << endl;\n+ cerr << "\\t\\toverlap A on the _opposite_ strand." << endl;\n+ cerr << "\\t\\t- By default, overlaps are reported without respect to strand." << endl << endl;\n+\n+ cerr << "\\t-split\\t" << "Treat \\"split\\" BAM or BED12 entries as distinct BED intervals." << endl << endl;\n+\n+ cerr << "\\t-sorted\\t" << "Use the \\"chromsweep\\" algorithm for sorted (-k1,1 -k2,2n) input" << endl;\n+ cerr << "\\t\\tNOTE: this will trust, but not enforce that data is sorted. Caveat emptor." << endl << endl;\n+\n+ // end the program here\n+ exit(1);\n+\n+}\n' |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/linksBed/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/linksBed/Makefile Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,43 @@ +UTILITIES_DIR = ../utils/ +OBJ_DIR = ../../obj/ +BIN_DIR = ../../bin/ + +# ------------------- +# define our includes +# ------------------- +INCLUDES = -I$(UTILITIES_DIR)/bedFile/ -I$(UTILITIES_DIR)/lineFileUtilities/ -I$(UTILITIES_DIR)/version/ -I$(UTILITIES_DIR)/gzstream/ -I$(UTILITIES_DIR)/fileType/ + +# ---------------------------------- +# define our source and object files +# ---------------------------------- +SOURCES= linksMain.cpp linksBed.cpp +OBJECTS= $(SOURCES:.cpp=.o) +_EXT_OBJECTS=bedFile.o lineFileUtilities.o gzstream.o fileType.o +EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) +BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) +PROGRAM= linksBed + + +all: $(PROGRAM) + +.PHONY: all + +$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS) + @echo " * linking $(PROGRAM)" + @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ $(LIBS) + +$(BUILT_OBJECTS): $(SOURCES) + @echo " * compiling" $(*F).cpp + @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) + +$(EXT_OBJECTS): + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/ + +clean: + @echo "Cleaning up." + @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* + +.PHONY: clean \ No newline at end of file |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/linksBed/linksBed.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/linksBed/linksBed.cpp Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,122 @@ +/***************************************************************************** + linksBed.cpp + + (c) 2009 - Aaron Quinlan + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licenced under the GNU General Public License 2.0 license. +******************************************************************************/ +#include "lineFileUtilities.h" +#include "linksBed.h" + +// +// Constructor +// +BedLinks::BedLinks(string &bedFile, string &base, string &org, string &db) { + _bedFile = bedFile; + _bed = new BedFile(bedFile); + + _base = base; + _org = org; + _db = db; + + CreateLinks(); +} + +// +// Destructor +// +BedLinks::~BedLinks(void) { +} + + +void BedLinks::WriteURL(BED &bed, string &base) { + + string position = bed.chrom; + std::stringstream posStream; + posStream << ":" << bed.start << "-" << bed.end; + position.append(posStream.str()); + + cout << "<tr>" << endl; + cout << "\t<td>" << endl; + cout << "\t\t<a href=" << base << position << ">"; + cout << bed.chrom << ":" << bed.start << "-" << bed.end; + cout << "</a>" << endl; + cout << "\t</td>" << endl; + + if (_bed->bedType == 4) { + cout << "\t<td>" << endl; + cout << bed.name << endl; + cout << "\t</td>" << endl; + } + else if (_bed->bedType == 5) { + cout << "\t<td>" << endl; + cout << bed.name << endl; + cout << "\t</td>" << endl; + + cout << "\t<td>" << endl; + cout << bed.score << endl; + cout << "\t</td>" << endl; + } + else if ((_bed->bedType == 6) || (_bed->bedType == 9) || (_bed->bedType == 12)) { + cout << "\t<td>" << endl; + cout << bed.name << endl; + cout << "\t</td>" << endl; + + cout << "\t<td>" << endl; + cout << bed.score << endl; + cout << "\t</td>" << endl; + + cout << "\t<td>" << endl; + cout << bed.strand << endl; + cout << "\t</td>" << endl; + } + cout << "</tr>" << endl; +} + + +void BedLinks::CreateLinks() { + + + // construct the html base. + string org = _org; + string db = _db; + string base = _base; + base.append("/cgi-bin/hgTracks?org="); + base.append(org); + base.append("&db="); + base.append(db); + base.append("&position="); + + // create the HTML header + cout << "<html>" << endl <<"\t<body>" << endl; + cout << "<title>" << _bedFile << "</title>" << endl; + + // start the table of entries + cout << "<br>Firefox users: Press and hold the \"apple\" or \"alt\" key and click link to open in new tab." << endl; + cout << "<p style=\"font-family:courier\">" << endl; + cout << "<table border=\"0\" align=\"justify\"" << endl; + cout << "<h3>BED Entries from: stdin </h3>" << endl; + + int lineNum = 0; + BED bedEntry, nullBed; + BedLineStatus bedStatus; + + _bed->Open(); + while ((bedStatus = _bed->GetNextBed(bedEntry, lineNum)) != BED_INVALID) { + if (bedStatus == BED_VALID) { + WriteURL(bedEntry, base); + bedEntry = nullBed; + } + } + _bed->Close(); + + cout << "</table>" << endl; + cout << "</p>" << endl; + cout << "\t</body>" << endl <<"</html>" << endl; +} + + |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/linksBed/linksBed.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/linksBed/linksBed.h Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,44 @@ +/***************************************************************************** + linksBed.h + + (c) 2009 - Aaron Quinlan + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licenced under the GNU General Public License 2.0 license. +******************************************************************************/ +#include "bedFile.h" +#include <vector> +#include <algorithm> +#include <iostream> +#include <fstream> + +using namespace std; + +//************************************************ +// Class methods and elements +//************************************************ +class BedLinks { + +public: + + // constructor + BedLinks(string &bedFile, string &base, string &org, string &db); + + // destructor + ~BedLinks(void); + +private: + string _bedFile; + string _base; + string _org; + string _db; + + // instance of a bed file class. + BedFile *_bed; + + void WriteURL(BED &bed, string &base); + void CreateLinks(); // the default. sorts by chrom (asc.) then by start (asc.) +}; |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/linksBed/linksMain.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/linksBed/linksMain.cpp Thu Nov 03 10:25:04 2011 -0400 |
[ |
@@ -0,0 +1,129 @@ +/***************************************************************************** + linksBedMain.cpp + + (c) 2009 - Aaron Quinlan + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licenced under the GNU General Public License 2.0 license. +******************************************************************************/ +#include "linksBed.h" +#include "version.h" + +using namespace std; + +// define our program name +#define PROGRAM_NAME "linksBed" + + +// define our parameter checking macro +#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen) + +// function declarations +void ShowHelp(void); + +int main(int argc, char* argv[]) { + + // our configuration variables + bool showHelp = false; + + // input files + string bedFile = "stdin"; + bool haveBed = true; + + /* Defaults for everyone else */ + string org = "human"; + string db = "hg18"; + string base = "http://genome.ucsc.edu"; + + for(int i = 1; i < argc; i++) { + int parameterLength = (int)strlen(argv[i]); + + if((PARAMETER_CHECK("-h", 2, parameterLength)) || + (PARAMETER_CHECK("--help", 5, parameterLength))) { + showHelp = true; + } + } + + if(showHelp) ShowHelp(); + + // do some parsing (all of these parameters require 2 strings) + for(int i = 1; i < argc; i++) { + + int parameterLength = (int)strlen(argv[i]); + + if(PARAMETER_CHECK("-i", 2, parameterLength)) { + if ((i+1) < argc) { + bedFile = argv[i + 1]; + i++; + } + } + else if(PARAMETER_CHECK("-base", 5, parameterLength)) { + if ((i+1) < argc) { + base = argv[i + 1]; + i++; + } + } + else if(PARAMETER_CHECK("-org", 4, parameterLength)) { + if ((i+1) < argc) { + org = argv[i + 1]; + i++; + } + } + else if(PARAMETER_CHECK("-db", 3, parameterLength)) { + if ((i+1) < argc) { + db = argv[i + 1]; + i++; + } + } + else { + cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl; + showHelp = true; + } + } + + // make sure we have both input files + if (!haveBed) { + cerr << endl << "*****" << endl << "*****ERROR: Need -i BED file. " << endl << "*****" << endl; + showHelp = true; + } + + if (!showHelp) { + BedLinks *bl = new BedLinks(bedFile, base, org, db); + delete bl; + return 0; + } + else { + ShowHelp(); + } +} + +void ShowHelp(void) { + + cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl; + + cerr << "Author: Aaron Quinlan (aaronquinlan@gmail.com)" << endl; + + cerr << "Summary: Creates HTML links to an UCSC Genome Browser from a feature file." << endl << endl; + cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -i <bed/gff/vcf> > out.html" << endl << endl; + + cerr << "Options: " << endl; + cerr << "\t-base\t" << "The browser basename. Default: http://genome.ucsc.edu " << endl; + cerr << "\t-org\t" << "The organism. Default: human" << endl; + cerr << "\t-db\t" << "The build. Default: hg18" << endl << endl; + + cerr << "Example: " << endl; + cerr << "\t" << "By default, the links created will point to human (hg18) UCSC browser." << endl; + cerr << "\tIf you have a local mirror, you can override this behavior by supplying" << endl; + cerr << "\tthe -base, -org, and -db options." << endl << endl; + cerr << "\t" << "For example, if the URL of your local mirror for mouse MM9 is called: " << endl; + cerr << "\thttp://mymirror.myuniversity.edu, then you would use the following:" << endl; + cerr << "\t" << "-base http://mymirror.myuniversity.edu" << endl; + cerr << "\t" << "-org mouse" << endl; + cerr << "\t" << "-db mm9" << endl; + + + exit(1); +} |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/maskFastaFromBed/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/maskFastaFromBed/Makefile Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,43 @@ +UTILITIES_DIR = ../utils/ +OBJ_DIR = ../../obj/ +BIN_DIR = ../../bin/ + +# ------------------- +# define our includes +# ------------------- +INCLUDES = -I$(UTILITIES_DIR)/bedFile/ -I$(UTILITIES_DIR)/sequenceUtilities/ -I$(UTILITIES_DIR)/lineFileUtilities/ -I$(UTILITIES_DIR)/version/ -I$(UTILITIES_DIR)/gzstream/ -I$(UTILITIES_DIR)/fileType/ + +# ---------------------------------- +# define our source and object files +# ---------------------------------- +SOURCES= maskFastaFromBedMain.cpp maskFastaFromBed.cpp +OBJECTS= $(SOURCES:.cpp=.o) +_EXT_OBJECTS=bedFile.o sequenceUtils.o lineFileUtilities.o gzstream.o fileType.o +EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) +BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) +PROGRAM= maskFastaFromBed + +all: $(PROGRAM) + +.PHONY: all + +$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS) + @echo " * linking $(PROGRAM)" + @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ $(LIBS) + +$(BUILT_OBJECTS): $(SOURCES) + @echo " * compiling" $(*F).cpp + @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) + +$(EXT_OBJECTS): + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/sequenceUtilities/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/ + +clean: + @echo "Cleaning up." + @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* + +.PHONY: clean \ No newline at end of file |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/maskFastaFromBed/maskFastaFromBed.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/maskFastaFromBed/maskFastaFromBed.cpp Thu Nov 03 10:25:04 2011 -0400 |
[ |
@@ -0,0 +1,155 @@ +/***************************************************************************** + maskFastaFromBed.cpp + + (c) 2009 - Aaron Quinlan + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licenced under the GNU General Public License 2.0 license. +******************************************************************************/ +#include "lineFileUtilities.h" +#include "maskFastaFromBed.h" + + +MaskFastaFromBed::MaskFastaFromBed(const string &fastaInFile, const string &bedFile, + const string &fastaOutFile, bool softMask, char maskChar) { + _softMask = softMask; + _fastaInFile = fastaInFile; + _bedFile = bedFile; + _fastaOutFile = fastaOutFile; + _maskChar = maskChar; + _bed = new BedFile(_bedFile); + + _bed->loadBedFileIntoMapNoBin(); + // start masking. + MaskFasta(); +} + + +MaskFastaFromBed::~MaskFastaFromBed(void) { +} + + +//****************************************************************************** +// Mask the Fasta file based on the coordinates in the BED file. +//****************************************************************************** +void MaskFastaFromBed::MaskFasta() { + + /* Make sure that we can open all of the files successfully*/ + + // open the fasta database for reading + ifstream fa(_fastaInFile.c_str(), ios::in); + if ( !fa ) { + cerr << "Error: The requested fasta file (" << _fastaInFile << ") could not be opened. Exiting!" << endl; + exit (1); + } + + // open the fasta database for reading + ofstream faOut(_fastaOutFile.c_str(), ios::out); + if ( !faOut ) { + cerr << "Error: The requested fasta output file (" << _fastaOutFile << ") could not be opened. Exiting!" << endl; + exit (1); + } + + + /* Read the fastaDb chromosome by chromosome*/ + string fastaInLine; + string currChrom; + string currDNA = ""; + currDNA.reserve(500000000); + int fastaWidth = -1; + bool widthSet = false; + int start, end, length; + string replacement; + + while (getline(fa,fastaInLine)) { + + if (fastaInLine.find(">",0) != 0 ) { + if (widthSet == false) { + fastaWidth = fastaInLine.size(); + widthSet = true; + } + currDNA += fastaInLine; + } + else { + if (currDNA.size() > 0) { + + vector<BED> bedList = _bed->bedMapNoBin[currChrom]; + + /* + loop through each BED entry for this chrom and + mask the requested sequence in the FASTA file. + */ + for (unsigned int i = 0; i < bedList.size(); i++) { + start = bedList[i].start; + end = bedList[i].end; + length = end - start; + + /* + (1) if soft masking, extract the sequence, lowercase it, + then put it back + (2) otherwise replace with Ns + */ + if (_softMask) { + replacement = currDNA.substr(start, length); + toLowerCase(replacement); + currDNA.replace(start, length, replacement); + } + else { + string hardmask(length, _maskChar); + currDNA.replace(start, length, hardmask); + } + } + // write the masked chrom to the output file + PrettyPrintChrom(faOut, currChrom, currDNA, fastaWidth); + } + + // reset for the next chromosome. + currChrom = fastaInLine.substr(1, fastaInLine.find_first_of(" ")-1); + currDNA = ""; + } + } + + // process the last chromosome. + // exact same logic as in the main loop. + if (currDNA.size() > 0) { + + vector<BED> bedList = _bed->bedMapNoBin[currChrom]; + + for (unsigned int i = 0; i < bedList.size(); i++) { + start = bedList[i].start; + end = bedList[i].end; + length = end - start; + + if (_softMask) { + replacement = currDNA.substr(start, length); + toLowerCase(replacement); + currDNA.replace(start, length, replacement); + } + else { + string hardmask(length, _maskChar); + currDNA.replace(start, length, hardmask); + } + } + PrettyPrintChrom(faOut, currChrom, currDNA, fastaWidth); + } + + // closed for business. + fa.close(); + faOut.close(); +} + + +void MaskFastaFromBed::PrettyPrintChrom(ofstream &out, string chrom, const string &sequence, int width) { + + int seqLength = sequence.size(); + + out << ">" << chrom << endl; + for(int i = 0; i < seqLength; i += width) { + if (i + width < seqLength) out << sequence.substr(i, width) << endl; + else out << sequence.substr(i, seqLength-i) << endl; + } +} + |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/maskFastaFromBed/maskFastaFromBed.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/maskFastaFromBed/maskFastaFromBed.h Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,56 @@ +/***************************************************************************** + maskFastaFromBed.h + (c) 2009 - Aaron Quinlan + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licenced under the GNU General Public License 2.0 license. +******************************************************************************/ +#ifndef MASKFASTAFROMBED_H +#define MASKFASTAFROMBED_H + +#include "bedFile.h" +#include "sequenceUtils.h" +#include <vector> +#include <iostream> +#include <fstream> +#include <cctype> /* for tolower */ + +using namespace std; + +//************************************************ +// Class methods and elements +//************************************************ +class MaskFastaFromBed { + +public: + + // constructor + MaskFastaFromBed(const string &fastaInFile, const string &bedFile, + const string &fastaOutFile, bool softMask, char maskChar); + + // destructor + ~MaskFastaFromBed(void); + + +private: + + bool _softMask; + + string _fastaInFile; + string _bedFile; + string _fastaOutFile; + char _maskChar; // typically "N", but user's can choose something else, e.g., "X" + + // instance of a bed file class. + BedFile *_bed; + + void MaskFasta(); + + void PrettyPrintChrom(ofstream &out, string chrom, const string &sequence, int width); + +}; + +#endif /* MASKFASTAFROMBED */ |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/maskFastaFromBed/maskFastaFromBedMain.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/maskFastaFromBed/maskFastaFromBedMain.cpp Thu Nov 03 10:25:04 2011 -0400 |
[ |
@@ -0,0 +1,146 @@ +/***************************************************************************** + maskFastaFromBedMain.cpp + + (c) 2009 - Aaron Quinlan + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licenced under the GNU General Public License 2.0 license. +******************************************************************************/ +#include "maskFastaFromBed.h" +#include "version.h" + +using namespace std; + +// define our program name +#define PROGRAM_NAME "maskFastaFromBed" + + +// define our parameter checking macro +#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen) + +// function declarations +void ShowHelp(void); + +int main(int argc, char* argv[]) { + + // our configuration variables + bool showHelp = false; + + // input files + string fastaInFile; + string bedFile; + + // output files + string fastaOutFile; + + // defaults for parameters + bool haveFastaIn = false; + bool haveBed = false; + bool haveFastaOut = false; + bool softMask = false; + char maskChar = 'N'; + + // check to see if we should print out some help + if(argc <= 1) showHelp = true; + + for(int i = 1; i < argc; i++) { + int parameterLength = (int)strlen(argv[i]); + + if((PARAMETER_CHECK("-h", 2, parameterLength)) || + (PARAMETER_CHECK("--help", 5, parameterLength))) { + showHelp = true; + } + } + + if(showHelp) ShowHelp(); + + // do some parsing (all of these parameters require 2 strings) + for(int i = 1; i < argc; i++) { + + int parameterLength = (int)strlen(argv[i]); + + if(PARAMETER_CHECK("-fi", 3, parameterLength)) { + if ((i+1) < argc) { + haveFastaIn = true; + fastaInFile = argv[i + 1]; + i++; + } + } + else if(PARAMETER_CHECK("-fo", 3, parameterLength)) { + if ((i+1) < argc) { + haveFastaOut = true; + fastaOutFile = argv[i + 1]; + i++; + } + } + else if(PARAMETER_CHECK("-bed", 4, parameterLength)) { + if ((i+1) < argc) { + haveBed = true; + bedFile = argv[i + 1]; + i++; + } + } + else if(PARAMETER_CHECK("-soft", 5, parameterLength)) { + softMask = true; + } + else if(PARAMETER_CHECK("-mc", 3, parameterLength)) { + if ((i+1) < argc) { + string mask = argv[i + 1]; + if (mask.size() > 1) { + cerr << "*****ERROR: The mask character (-mc) should be a single character.*****" << endl << endl; + showHelp = true; + } + else { + maskChar = mask[0]; + } + i++; + } + } + else { + cerr << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl; + showHelp = true; + } + } + + if (!haveFastaIn || !haveFastaOut || !haveBed) { + showHelp = true; + } + + if (!showHelp) { + + MaskFastaFromBed *maskFasta = new MaskFastaFromBed(fastaInFile, bedFile, fastaOutFile, softMask, maskChar); + delete maskFasta; + return 0; + } + else { + ShowHelp(); + } +} + +void ShowHelp(void) { + + + + cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl; + + cerr << "Author: Aaron Quinlan (aaronquinlan@gmail.com)" << endl; + + cerr << "Summary: Mask a fasta file based on feature coordinates." << endl << endl; + + cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -fi <fasta> -out <fasta> -bed <bed/gff/vcf>" << endl << endl; + + cerr << "Options:" << endl; + cerr << "\t-fi\tInput FASTA file" << endl; + cerr << "\t-bed\tBED/GFF/VCF file of ranges to mask in -fi" << endl; + cerr << "\t-fo\tOutput FASTA file" << endl; + cerr << "\t-soft\tEnforce \"soft\" masking. That is, instead of masking with Ns," << endl; + cerr << "\t\tmask with lower-case bases." << endl; + cerr << "\t-mc\tReplace masking character. That is, instead of masking with Ns, use another character." << endl; + + // end the program here + exit(1); + +} |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/mergeBed/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/mergeBed/Makefile Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,44 @@ +UTILITIES_DIR = ../utils/ +OBJ_DIR = ../../obj/ +BIN_DIR = ../../bin/ + +# ------------------- +# define our includes +# ------------------- +INCLUDES = -I$(UTILITIES_DIR)/bedFile/ -I$(UTILITIES_DIR)/lineFileUtilities/ -I$(UTILITIES_DIR)/version/ -I$(UTILITIES_DIR)/gzstream/ -I$(UTILITIES_DIR)/fileType/ + +# ---------------------------------- +# define our source and object files +# ---------------------------------- +SOURCES= mergeMain.cpp mergeBed.cpp +OBJECTS= $(SOURCES:.cpp=.o) +_EXT_OBJECTS=bedFile.o lineFileUtilities.o gzstream.o fileType.o +EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) +BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) +PROGRAM= mergeBed + + +all: $(PROGRAM) + +.PHONY: all + +$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS) + @echo " * linking $(PROGRAM)" + @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ $(LIBS) + +$(BUILT_OBJECTS): $(SOURCES) + @echo " * compiling" $(*F).cpp + @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) + +$(EXT_OBJECTS): + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/ + +clean: + @echo "Cleaning up." + @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* + +.PHONY: clean \ No newline at end of file |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/mergeBed/mergeBed.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/mergeBed/mergeBed.cpp Thu Nov 03 10:25:04 2011 -0400 |
[ |
b'@@ -0,0 +1,364 @@\n+/*****************************************************************************\n+ mergeBed.cpp\n+\n+ (c) 2009 - Aaron Quinlan\n+ Hall Laboratory\n+ Department of Biochemistry and Molecular Genetics\n+ University of Virginia\n+ aaronquinlan@gmail.com\n+\n+ Licenced under the GNU General Public License 2.0 license.\n+******************************************************************************/\n+#include "lineFileUtilities.h"\n+#include "mergeBed.h"\n+\n+\n+\n+void BedMerge::ReportMergedNames(const vector<string> &names) {\n+ if (names.size() > 0) {\n+ printf("\\t");\n+ vector<string>::const_iterator nameItr = names.begin();\n+ vector<string>::const_iterator nameEnd = names.end();\n+ for (; nameItr != nameEnd; ++nameItr) {\n+ if (nameItr < (nameEnd - 1))\n+ cout << *nameItr << ";";\n+ else\n+ cout << *nameItr;\n+ }\n+ }\n+ else {\n+ cerr << endl \n+ << "*****" << endl \n+ << "*****ERROR: No names found to report for the -names option. Exiting." << endl \n+ << "*****" << endl;\n+ exit(1);\n+ }\n+}\n+\n+\n+void BedMerge::ReportMergedScores(const vector<string> &scores) {\n+ if (scores.size() > 0) {\n+ printf("\\t");\n+\n+ // convert the scores to floats\n+ vector<float> data;\n+ for (size_t i = 0 ; i < scores.size() ; i++) {\n+ data.push_back(atof(scores[i].c_str()));\n+ } \n+\n+ if (_scoreOp == "sum") {\n+ printf("%.3f", accumulate(data.begin(), data.end(), 0.0));\n+ }\n+ else if (_scoreOp == "min") {\n+ printf("%.3f", *min_element( data.begin(), data.end() ));\n+ }\n+ else if (_scoreOp == "max") {\n+ printf("%.3f", *max_element( data.begin(), data.end() ));\n+ }\n+ else if (_scoreOp == "mean") {\n+ double total = accumulate(data.begin(), data.end(), 0.0);\n+ double mean = total / data.size();\n+ printf("%.3f", mean);\n+ }\n+ else if (_scoreOp == "median") {\n+ double median = 0.0;\n+ sort(data.begin(), data.end());\n+ int totalLines = data.size();\n+ if ((totalLines % 2) > 0) {\n+ long mid;\n+ mid = totalLines / 2;\n+ median = data[mid];\n+ }\n+ else {\n+ long midLow, midHigh;\n+ midLow = (totalLines / 2) - 1;\n+ midHigh = (totalLines / 2);\n+ median = (data[midLow] + data[midHigh]) / 2.0;\n+ }\n+ printf("%.3f", median);\n+ }\n+ else if ((_scoreOp == "mode") || (_scoreOp == "antimode")) {\n+ // compute the frequency of each unique value\n+ map<string, int> freqs;\n+ vector<string>::const_iterator dIt = scores.begin();\n+ vector<string>::const_iterator dEnd = scores.end();\n+ for (; dIt != dEnd; ++dIt) {\n+ freqs[*dIt]++;\n+ }\n+\n+ // grab the mode and the anti mode\n+ string mode, antiMode;\n+ int count = 0;\n+ int minCount = INT_MAX;\n+ for(map<string,int>::const_iterator iter = freqs.begin(); iter != freqs.end(); ++iter) {\n+ if (iter->second > count) {\n+ mode = iter->first;\n+ count = iter->second;\n+ }\n+ if (iter->second < minCount) {\n+ antiMode = iter->first;\n+ minCount = iter->second;\n+ }\n+ }\n+ // report\n+ if (_scoreOp == "mode") {\n+ printf("%s", mode.c_str());\n+ }\n+ else if (_scoreOp == "antimode") {\n+ printf("%s", antiMode.c_str());\n+ }\n+ }\n+ else if (_scoreOp == "collapse") { \n+ vector<string>::const_iterator scoreItr = scores.begin();\n+'..b'lock, no overlap\n+ if ( (((int) bedItr->start - end) > _maxDistance) || (end < 0)) {\n+ if (start >= 0) {\n+ Report(chrom, start, end, names, scores, mergeCount);\n+ // reset\n+ mergeCount = 1;\n+ names.clear();\n+ scores.clear();\n+ }\n+ start = bedItr->start;\n+ end = bedItr->end;\n+ if (!bedItr->name.empty()) names.push_back(bedItr->name);\n+ if (!bedItr->score.empty()) scores.push_back(bedItr->score);\n+ }\n+ // same block, overlaps\n+ else {\n+ if ((int) bedItr-> end > end) end = bedItr->end;\n+ mergeCount++;\n+ if (!bedItr->name.empty()) names.push_back(bedItr->name);\n+ if (!bedItr->score.empty()) scores.push_back(bedItr->score);\n+ }\n+ }\n+ if (start >= 0) {\n+ Report(chrom, start, end, names, scores, mergeCount);\n+ }\n+ }\n+}\n+\n+\n+// ==================================================================================\n+// = Merge overlapping BED entries into a single entry, accounting for strandedness =\n+// ==================================================================================\n+void BedMerge::MergeBedStranded() {\n+\n+ // load the "B" bed file into a map so\n+ // that we can easily compare "A" to it for overlaps\n+ _bed->loadBedFileIntoMapNoBin();\n+\n+ // loop through each chromosome and merge their BED entries\n+ masterBedMapNoBin::const_iterator m = _bed->bedMapNoBin.begin();\n+ masterBedMapNoBin::const_iterator mEnd = _bed->bedMapNoBin.end();\n+ for (; m != mEnd; ++m) {\n+ \n+ // bedList is already sorted by start position.\n+ string chrom = m->first;\n+ vector<BED> bedList = m->second;\n+\n+ // make a list of the two strands to merge separately.\n+ vector<string> strands(2);\n+ strands[0] = "+";\n+ strands[1] = "-";\n+\n+ // do two passes, one for each strand.\n+ for (unsigned int s = 0; s < strands.size(); s++) {\n+\n+ int mergeCount = 1;\n+ int numOnStrand = 0;\n+ vector<string> names;\n+ vector<string> scores;\n+\n+ // merge overlapping features for this chromosome.\n+ int start = -1;\n+ int end = -1;\n+ vector<BED>::const_iterator bedItr = bedList.begin();\n+ vector<BED>::const_iterator bedEnd = bedList.end();\n+ for (; bedItr != bedEnd; ++bedItr) {\n+\n+ // if forcing strandedness, move on if the hit\n+ // is not on the current strand.\n+ if (bedItr->strand != strands[s]) { continue; }\n+ else { numOnStrand++; }\n+ \n+ if ( (((int) bedItr->start - end) > _maxDistance) || (end < 0)) {\n+ if (start >= 0) {\n+ ReportStranded(chrom, start, end, names, scores, mergeCount, strands[s]);\n+ // reset\n+ mergeCount = 1;\n+ names.clear();\n+ scores.clear();\n+ }\n+ start = bedItr->start;\n+ end = bedItr->end;\n+ if (!bedItr->name.empty()) names.push_back(bedItr->name);\n+ if (!bedItr->score.empty()) scores.push_back(bedItr->score);\n+ }\n+ else {\n+ if ((int) bedItr-> end > end) end = bedItr->end;\n+ mergeCount++;\n+ if (!bedItr->name.empty()) names.push_back(bedItr->name);\n+ if (!bedItr->score.empty()) scores.push_back(bedItr->score);\n+ }\n+ }\n+ if (start >= 0) {\n+ ReportStranded(chrom, start, end, names, scores, mergeCount, strands[s]);\n+ }\n+ }\n+ }\n+}\n' |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/mergeBed/mergeBed.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/mergeBed/mergeBed.h Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,59 @@ +/***************************************************************************** + mergeBed.h + + (c) 2009 - Aaron Quinlan + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licenced under the GNU General Public License 2.0 license. +******************************************************************************/ +#include "bedFile.h" +#include <vector> +#include <algorithm> +#include <numeric> +#include <iostream> +#include <fstream> +#include <limits.h> +#include <stdlib.h> + +using namespace std; + + +//************************************************ +// Class methods and elements +//************************************************ +class BedMerge { + +public: + + // constructor + BedMerge(string &bedFile, bool numEntries, + int maxDistance, bool forceStrand, + bool reportNames, bool reportScores, const string &scoreOp); + + // destructor + ~BedMerge(void); + + void MergeBed(); + void MergeBedStranded(); + +private: + + string _bedFile; + bool _numEntries; + bool _forceStrand; + bool _reportNames; + bool _reportScores; + string _scoreOp; + int _maxDistance; + // instance of a bed file class. + BedFile *_bed; + + void Report(string chrom, int start, int end, const vector<string> &names, const vector<string> &scores, int mergeCount); + void ReportStranded(string chrom, int start, int end, const vector<string> &names, const vector<string> &scores, int mergeCount, string strand); + void ReportMergedNames(const vector<string> &names); + void ReportMergedScores(const vector<string> &scores); + +}; |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/mergeBed/mergeMain.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/mergeBed/mergeMain.cpp Thu Nov 03 10:25:04 2011 -0400 |
[ |
@@ -0,0 +1,163 @@ +/***************************************************************************** + mergeMain.cpp + + (c) 2009 - Aaron Quinlan + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licenced under the GNU General Public License 2.0 license. +******************************************************************************/ +#include "mergeBed.h" +#include "version.h" + +using namespace std; + +// define our program name +#define PROGRAM_NAME "mergeBed" + + +// define our parameter checking macro +#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen) + +// function declarations +void ShowHelp(void); + +int main(int argc, char* argv[]) { + + // our configuration variables + bool showHelp = false; + + // input files + string bedFile = "stdin"; + int maxDistance = 0; + string scoreOp = ""; + + // input arguments + bool haveBed = true; + bool numEntries = false; + bool haveMaxDistance = false; + bool forceStrand = false; + bool reportNames = false; + bool reportScores = false; + + for(int i = 1; i < argc; i++) { + int parameterLength = (int)strlen(argv[i]); + + if((PARAMETER_CHECK("-h", 2, parameterLength)) || + (PARAMETER_CHECK("--help", 5, parameterLength))) { + showHelp = true; + } + } + + if(showHelp) ShowHelp(); + + // do some parsing (all of these parameters require 2 strings) + for(int i = 1; i < argc; i++) { + + int parameterLength = (int)strlen(argv[i]); + + if(PARAMETER_CHECK("-i", 2, parameterLength)) { + if ((i+1) < argc) { + bedFile = argv[i + 1]; + i++; + } + } + else if(PARAMETER_CHECK("-n", 2, parameterLength)) { + numEntries = true; + } + else if(PARAMETER_CHECK("-d", 2, parameterLength)) { + if ((i+1) < argc) { + haveMaxDistance = true; + maxDistance = atoi(argv[i + 1]); + i++; + } + } + else if (PARAMETER_CHECK("-s", 2, parameterLength)) { + forceStrand = true; + } + else if (PARAMETER_CHECK("-nms", 4, parameterLength)) { + reportNames = true; + } + else if (PARAMETER_CHECK("-scores", 7, parameterLength)) { + reportScores = true; + if ((i+1) < argc) { + scoreOp = argv[i + 1]; + i++; + } + } + else { + cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl; + showHelp = true; + } + } + + // make sure we have both input files + if (!haveBed) { + cerr << endl << "*****" << endl << "*****ERROR: Need -i BED file. " << endl << "*****" << endl; + showHelp = true; + } + if (reportNames && numEntries) { + cerr << endl << "*****" << endl << "*****ERROR: Request either -n OR -nms, not both." << endl << "*****" << endl; + showHelp = true; + } + if ((reportScores == true) && (scoreOp != "sum") && (scoreOp != "max") && (scoreOp != "min") && (scoreOp != "mean") && + (scoreOp != "mode") && (scoreOp != "median") && (scoreOp != "antimode") && (scoreOp != "collapse")) + { + cerr << endl << "*****" << endl << "*****ERROR: Invalid scoreOp selection \"" << scoreOp << endl << "\" *****" << endl; + showHelp = true; + } + + if (!showHelp) { + BedMerge *bm = new BedMerge(bedFile, numEntries, maxDistance, forceStrand, reportNames, reportScores, scoreOp); + delete bm; + return 0; + } + else { + ShowHelp(); + } +} + +void ShowHelp(void) { + + cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl; + + cerr << "Author: Aaron Quinlan (aaronquinlan@gmail.com)" << endl; + + cerr << "Summary: Merges overlapping BED/GFF/VCF entries into a single interval." << endl << endl; + + cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -i <bed/gff/vcf>" << endl << endl; + + cerr << "Options: " << endl; + cerr << "\t-s\t" << "Force strandedness. That is, only merge features" << endl; + cerr << "\t\tthat are the same strand." << endl; + cerr << "\t\t- By default, merging is done without respect to strand." << endl << endl; + + cerr << "\t-n\t" << "Report the number of BED entries that were merged." << endl; + cerr << "\t\t- Note: \"1\" is reported if no merging occurred." << endl << endl; + + + cerr << "\t-d\t" << "Maximum distance between features allowed for features" << endl; + cerr << "\t\tto be merged." << endl; + cerr << "\t\t- Def. 0. That is, overlapping & book-ended features are merged." << endl; + cerr << "\t\t- (INTEGER)" << endl << endl; + + cerr << "\t-nms\t" << "Report the names of the merged features separated by semicolons." << endl << endl; + + cerr << "\t-scores\t" << "Report the scores of the merged features. Specify one of " << endl; + cerr << "\t\tthe following options for reporting scores:" << endl; + cerr << "\t\t sum, min, max," << endl; + cerr << "\t\t mean, median, mode, antimode," << endl; + cerr << "\t\t collapse (i.e., print a semicolon-separated list)," << endl; + cerr << "\t\t- (INTEGER)" << endl << endl; + + cerr << "Notes: " << endl; + cerr << "\t(1) All output, regardless of input type (e.g., GFF or VCF)" << endl; + cerr << "\t will in BED format with zero-based starts" << endl << endl; + + + // end the program here + exit(1); + +} |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/multiBamCov/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/multiBamCov/Makefile Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,48 @@ +UTILITIES_DIR = ../utils/ +OBJ_DIR = ../../obj/ +BIN_DIR = ../../bin/ + +# ------------------- +# define our includes +# ------------------- +INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \ + -I$(UTILITIES_DIR)/version/ \ + -I$(UTILITIES_DIR)/gzstream/ \ + -I$(UTILITIES_DIR)/lineFileUtilities/ \ + -I$(UTILITIES_DIR)/fileType/ \ + -I$(UTILITIES_DIR)/BamTools/include + +# ---------------------------------- +# define our source and object files +# ---------------------------------- +SOURCES= multiBamCovMain.cpp multiBamCov.cpp +OBJECTS= $(SOURCES:.cpp=.o) +_EXT_OBJECTS=bedFile.o lineFileUtilities.o gzstream.o fileType.o +EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) +BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) +PROGRAM= multiBamCov + +all: $(PROGRAM) + +.PHONY: all + +$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS) + @echo " * linking $(PROGRAM)" + @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ -L$(UTILITIES_DIR)/BamTools/lib/ -lbamtools $(LIBS) + +$(BUILT_OBJECTS): $(SOURCES) + @echo " * compiling" $(*F).cpp + @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) + +$(EXT_OBJECTS): + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/BamTools/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/ + +clean: + @echo "Cleaning up." + @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* + +.PHONY: clean |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/multiBamCov/multiBamCov.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/multiBamCov/multiBamCov.cpp Thu Nov 03 10:25:04 2011 -0400 |
[ |
@@ -0,0 +1,134 @@ +/***************************************************************************** + multiBamCov.cpp + + (c) 2009 - Aaron Quinlan + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licenced under the GNU General Public License 2.0 license. +******************************************************************************/ +#include "lineFileUtilities.h" +#include "multiBamCov.h" +#include "api/BamMultiReader.h" + + +/* + Constructor +*/ +MultiCovBam::MultiCovBam(const vector<string> &bam_files, const string bed_file, + int minQual, bool properOnly, + bool keepDuplicates, bool keepFailedQC) +: +_bam_files(bam_files), +_bed_file(bed_file), +_minQual(minQual), +_properOnly(properOnly), +_keepDuplicates(keepDuplicates), +_keepFailedQC(keepFailedQC) +{ + _bed = new BedFile(_bed_file); + LoadBamFileMap(); +} + + +/* + Destructor +*/ +MultiCovBam::~MultiCovBam(void) +{} + + + +void MultiCovBam::CollectCoverage() +{ + BamMultiReader reader; + + if ( !reader.Open(_bam_files) ) + { + cerr << "Could not open input BAM files." << endl; + exit(1); + } + else + { + // attempt to find index files + reader.LocateIndexes(); + + // if index data available for all BAM files, we can use SetRegion + if ( reader.HasIndexes() ) { + BED bed, nullBed; + int lineNum = 0; + BedLineStatus bedStatus; + + _bed->Open(); + // loop through each BED entry, jump to it, + // and collect coverage from each BAM + while ((bedStatus = _bed->GetNextBed(bed, lineNum)) != BED_INVALID) + { + if (bedStatus == BED_VALID) + { + // initialize counts for each file to 0 + vector<int> counts(_bam_files.size(), 0); + // get the BAM refId for this chrom. + int refId = reader.GetReferenceID(bed.chrom); + // set up a BamRegion to which to attempt to jump + BamRegion region(refId, (int)bed.start, refId, (int)bed.end); + + // everything checks out, just iterate through specified region, counting alignments + if ( (refId != -1) && (reader.SetRegion(region)) ) { + BamAlignment al; + while ( reader.GetNextAlignment(al) ) + { + bool duplicate = al.IsDuplicate(); + bool failedQC = al.IsFailedQC(); + if (_keepDuplicates) duplicate = false; + if (_keepFailedQC) failedQC = false; + // map qual must exceed minimum + if ((al.MapQuality >= _minQual) && (!duplicate) && (!failedQC)) { + // ignore if not properly paired and we actually care. + if (_properOnly && !al.IsProperPair()) + continue; + + // lookup the offset of the file name and tabulate + //coverage for the appropriate file + counts[bamFileMap[al.Filename]]++; + } + } + } + // report the cov at this interval for each file and reset + _bed->reportBedTab(bed); + ReportCounts(counts); + bed = nullBed; + } + } + _bed->Close(); + } + else { + cerr << "Could not find indexes." << endl; + reader.Close(); + exit(1); + } + } +} + + +void MultiCovBam::LoadBamFileMap(void) +{ + for (size_t i = 0; i < _bam_files.size(); ++i) + { + bamFileMap[_bam_files[i]] = i; + } +} + +void MultiCovBam::ReportCounts(const vector<int> &counts) +{ + for (size_t i = 0; i < counts.size(); ++i) + { + if (i < counts.size() - 1) + cout << counts[i] << "\t"; + else + cout << counts[i]; + } + cout << endl; +} |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/multiBamCov/multiBamCov.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/multiBamCov/multiBamCov.h Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,64 @@ +/***************************************************************************** + multiBamCov.h + + (c) 2009 - Aaron Quinlan + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licenced under the GNU General Public License 2.0 license. +******************************************************************************/ +#ifndef MULTICOVBAM_H +#define MULTICOVBAM_H + +#include "bedFile.h" +#include "api/BamMultiReader.h" +using namespace BamTools; + + +#include <vector> +#include <iostream> +#include <fstream> +#include <stdlib.h> +using namespace std; + + + +class MultiCovBam { + +public: + + // constructor + MultiCovBam(const vector<string> &bam_files, const string bed_file, + int minQual, bool properOnly, + bool keepDuplicates, bool keepFailedQC); + + // destructor + ~MultiCovBam(void); + + void CollectCoverage(); + +private: + + //------------------------------------------------ + // private attributes + //------------------------------------------------ + vector<string> _bam_files; + string _bed_file; + BedFile *_bed; + + // attributes to control what is counted + int _minQual; + bool _properOnly; + bool _keepDuplicates; + bool _keepFailedQC; + + + map<string, int> bamFileMap; + + void LoadBamFileMap(void); + void ReportCounts(const vector<int> &counts); +}; + +#endif /* MULTIBAMCOV_H */ |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/multiBamCov/multiBamCovMain.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/multiBamCov/multiBamCovMain.cpp Thu Nov 03 10:25:04 2011 -0400 |
[ |
@@ -0,0 +1,145 @@ +/***************************************************************************** + multiBamCovMain.cpp + + (c) 2009 - Aaron Quinlan + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licenced under the GNU General Public License 2.0 license. +******************************************************************************/ +#include "multiBamCov.h" +#include "version.h" + +using namespace std; + +// define our program name +#define PROGRAM_NAME "multiBamCov" + + +// define our parameter checking macro +#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen) + +// function declarations +void ShowHelp(void); + +int main(int argc, char* argv[]) { + + // our configuration variables + bool showHelp = false; + + // input files + string bedFile; + vector<string> bamFiles; + int minQual = 0; + + // input arguments + bool haveBed = false; + bool haveBams = false; + bool properOnly = false; + bool keepDuplicates = false; + bool keepFailedQC = false; + + // check to see if we should print out some help + if(argc <= 1) showHelp = true; + + for(int i = 1; i < argc; i++) { + int parameterLength = (int)strlen(argv[i]); + + if((PARAMETER_CHECK("-h", 2, parameterLength)) || + (PARAMETER_CHECK("--help", 5, parameterLength))) { + showHelp = true; + } + } + + if(showHelp) ShowHelp(); + + // do some parsing (all of these parameters require 2 strings) + for(int i = 1; i < argc; i++) { + + int parameterLength = (int)strlen(argv[i]); + + if(PARAMETER_CHECK("-bed", 4, parameterLength)) { + if ((i+1) < argc) { + haveBed = true; + bedFile = argv[i + 1]; + i++; + } + } + else if(PARAMETER_CHECK("-bams", 5, parameterLength)) { + if ((i+1) < argc) { + haveBams = true; + i = i+1; + string file = argv[i]; + while (file[0] != '-' && i < argc) { + bamFiles.push_back(file); + i++; + if (i < argc) + file = argv[i]; + } + i--; + } + } + else if(PARAMETER_CHECK("-q", 2, parameterLength)) { + if ((i+1) < argc) { + minQual = atoi(argv[i + 1]); + i++; + } + } + else if(PARAMETER_CHECK("-p", 2, parameterLength)) { + properOnly = true; + } + else if(PARAMETER_CHECK("-D", 2, parameterLength)) { + keepDuplicates = true; + } + + else if(PARAMETER_CHECK("-F", 2, parameterLength)) { + keepFailedQC = true; + } + else { + cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl; + showHelp = true; + } + } + + if (!showHelp) { + MultiCovBam *mc = new MultiCovBam(bamFiles, bedFile, minQual, properOnly, keepDuplicates, keepFailedQC); + mc->CollectCoverage(); + delete mc; + return 0; + } + else { + ShowHelp(); + } +} + +void ShowHelp(void) { + + cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl; + + cerr << "Author: Aaron Quinlan (aaronquinlan@gmail.com)" << endl; + + cerr << "Summary: Counts sequence coverage for multiple bams at specific loci." << endl << endl; + + cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -bams aln.1.bam aln.2.bam ... aln.n.bam -bed <bed/gff/vcf>" << endl << endl; + + cerr << "Options: " << endl; + + cerr << "\t-bams\t" << "The bam files." << endl << endl; + + cerr << "\t-bed\t" << "The bed file." << endl << endl; + + cerr << "\t-q\t" << "Minimum mapping quality allowed. Default is 0." << endl << endl; + + cerr << "\t-D\t" << "Include duplicate-marked reads. Default is to count non-duplicates only" << endl << endl; + + cerr << "\t-F\t" << "Include failed-QC reads. Default is to count pass-QC reads only" << endl << endl; + + cerr << "\t-p\t" << "Only count proper pairs. Default is to count all alignments with MAPQ" << endl; + cerr << "\t\t" << "greater than the -q argument, regardless of the BAM FLAG field." << endl << endl; + + // end the program here + exit(1); + +} |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/multiIntersectBed/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/multiIntersectBed/Makefile Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,49 @@ +UTILITIES_DIR = ../utils/ +OBJ_DIR = ../../obj/ +BIN_DIR = ../../bin/ + +# ------------------- +# define our includes +# ------------------- +INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \ + -I$(UTILITIES_DIR)/lineFileUtilities/ \ + -I$(UTILITIES_DIR)/genomeFile/ \ + -I$(UTILITIES_DIR)/version/ \ + -I$(UTILITIES_DIR)/gzstream/ \ + -I$(UTILITIES_DIR)/fileType/ \ + -I$(UTILITIES_DIR)/BamTools/include + +# ---------------------------------- +# define our source and object files +# ---------------------------------- +SOURCES= multiIntersectBed.cpp multiIntersectBedMain.cpp +OBJECTS= $(SOURCES:.cpp=.o) +_EXT_OBJECTS=bedFile.o genomeFile.o lineFileUtilities.o gzstream.o fileType.o +EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) +BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) +PROGRAM= multiIntersectBed + +all: $(PROGRAM) + +.PHONY: all + +$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS) + @echo " * linking $(PROGRAM)" + @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ $(LIBS) + +$(BUILT_OBJECTS): $(SOURCES) + @echo " * compiling" $(*F).cpp + @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) + +$(EXT_OBJECTS): + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedGraphFile/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/genomeFile/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/ + +clean: + @echo "Cleaning up." + @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* + +.PHONY: clean |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/multiIntersectBed/intervalItem.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/multiIntersectBed/intervalItem.h Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,64 @@ +/***************************************************************************** + intervalItem.h + + (c) 2010 - Assaf Gordon + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licenced under the GNU General Public License 2.0 license. +******************************************************************************/ +#ifndef INTERVALITEM_H +#define INTERVALITEM_H + +#include <string> +#include <queue> + +enum COORDINATE_TYPE { + START, + END +}; + +/* + An interval item in the priority queue. + + An IntervalItem can mark either a START position or an END position. + */ +class IntervalItem +{ + + +public: + int source_index; // which source BedGraph file this came from + COORDINATE_TYPE coord_type; // is this the start or the end position? + CHRPOS coord; + + IntervalItem () : + source_index(-1), + coord_type(START), + coord(0) + {} + + IntervalItem(int _index, COORDINATE_TYPE _type, CHRPOS _coord) : + source_index(_index), + coord_type(_type), + coord(_coord) + {} + + IntervalItem(const IntervalItem &other) : + source_index(other.source_index), + coord_type(other.coord_type), + coord(other.coord) + {} + + bool operator< ( const IntervalItem& other ) const + { + return this->coord > other.coord; + } +}; + +// our priority queue +typedef std::priority_queue<IntervalItem> INTERVALS_PRIORITY_QUEUE; + +#endif |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/multiIntersectBed/multiIntersectBed.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/multiIntersectBed/multiIntersectBed.cpp Thu Nov 03 10:25:04 2011 -0400 |
[ |
b'@@ -0,0 +1,289 @@\n+/*****************************************************************************\n+ unionBedGraphs.cpp\n+\n+ (c) 2010 - Assaf Gordon, CSHL\n+ - Aaron Quinlan, UVA\n+ Hall Laboratory\n+ Department of Biochemistry and Molecular Genetics\n+ University of Virginia\n+ aaronquinlan@gmail.com\n+\n+ Licenced under the GNU General Public License 2.0 license.\n+******************************************************************************/\n+#include <cassert>\n+#include <cstring>\n+#include <cstdlib>\n+#include <iostream>\n+#include <algorithm>\n+\n+#include "bedFile.h"\n+#include "multiIntersectBed.h"\n+\n+using namespace std;\n+\n+\n+MultiIntersectBed::MultiIntersectBed(std::ostream& _output,\n+ const vector<string>& _filenames,\n+ const vector<string>& _titles,\n+ bool _print_empty_regions,\n+ const std::string& _genome_size_filename,\n+ const std::string& _no_coverage_value ) :\n+ filenames(_filenames),\n+ titles(_titles),\n+ output(_output),\n+ current_non_zero_inputs(0),\n+ print_empty_regions(_print_empty_regions),\n+ haveTitles(false),\n+ genome_sizes(NULL),\n+ no_coverage_value(_no_coverage_value)\n+{\n+ if (print_empty_regions) {\n+ assert(!_genome_size_filename.empty());\n+\n+ genome_sizes = new GenomeFile(_genome_size_filename);\n+ }\n+ \n+ if (titles.size() > 0) {\n+ haveTitles = true;\n+ }\n+}\n+\n+\n+MultiIntersectBed::~MultiIntersectBed() {\n+ CloseFiles();\n+ if (genome_sizes) {\n+ delete genome_sizes;\n+ genome_sizes = NULL ;\n+ }\n+}\n+\n+\n+void MultiIntersectBed::MultiIntersect() {\n+ OpenFiles();\n+\n+ // Add the first interval from each file\n+ for(size_t i = 0;i < input_files.size(); ++i)\n+ LoadNextItem(i);\n+\n+ // Chromosome loop - once per chromosome\n+ do {\n+ // Find the first chromosome to use\n+ current_chrom = DetermineNextChrom();\n+\n+ // Populate the queue with initial values from all files\n+ // (if they belong to the correct chromosome)\n+ for(size_t i = 0; i < input_files.size(); ++i)\n+ AddInterval(i);\n+\n+ CHRPOS current_start = ConsumeNextCoordinate();\n+\n+ // User wanted empty regions, and the first coordinate is not 0 - print a dummy empty coverage\n+ if (print_empty_regions && current_start > 0)\n+ PrintEmptyCoverage(0,current_start);\n+\n+ // Intervals loop - until all intervals (of current chromosome) from all files are used.\n+ do {\n+ CHRPOS current_end = queue.top().coord;\n+ PrintCoverage(current_start, current_end);\n+ current_start = ConsumeNextCoordinate();\n+ } while (!queue.empty());\n+\n+ // User wanted empty regions, and the last coordinate is not the last coordinate of the chromosome\n+ // print a dummy empty coverage\n+ if (print_empty_regions) {\n+ CHRPOS chrom_size = genome_sizes->getChromSize(current_chrom);\n+ if (current_start < chrom_size)\n+ PrintEmptyCoverage(current_start, chrom_size);\n+ }\n+\n+ } while (!AllFilesDone());\n+}\n+\n+\n+CHRPOS MultiIntersectBed::ConsumeNextCoordinate() {\n+ assert(!queue.empty());\n+\n+ CHRPOS new_position = queue.top().coord;\n+ do {\n+ IntervalItem item = queue.top();\n+ UpdateInformation(item);\n+ queue.pop();\n+ } while (!queue.empty() && queue.top().coord == new_position);\n+\n+ return new_position;\n+}\n+\n+\n+void MultiIntersectBed::UpdateInformation(const IntervalItem &item) {\n+ // Update the depth coverage for this file\n+\n+ // Which coordinate is it - start or end?\n+ switch (item.coord_type)\n+ {\n+ case START:\n+ current_depth[item.source_index] = 1;\n+ current_non_zero_inputs++;\n+ files_with_coverage[item.source_index] = true;\n+ break;\n+ case END:\n+ //Read the next interval from thi'..b'rval(int index) {\n+ assert(static_cast<unsigned int>(index) < input_files.size());\n+\n+ //This file has no more intervals\n+ if (current_item[index].chrom.empty())\n+ return;\n+\n+ //If the next interval belongs to a different chrom, don\'t add it\n+ if (current_item[index].chrom!=current_chrom)\n+ return;\n+\n+ const BED &bed(current_item[index]);\n+\n+ IntervalItem start_item(index, START, bed.start);\n+ IntervalItem end_item(index, END, bed.end);\n+\n+ queue.push(start_item);\n+ queue.push(end_item);\n+\n+ LoadNextItem(index);\n+}\n+\n+\n+void MultiIntersectBed::PrintHeader() {\n+ output << "chrom\\tstart\\tend\\tnum\\tlist" ;\n+ for (size_t i=0;i<titles.size();++i)\n+ output << "\\t" <<titles[i];\n+ output << endl;\n+}\n+\n+\n+void MultiIntersectBed::PrintCoverage(CHRPOS start, CHRPOS end) {\n+ if ( current_non_zero_inputs == 0 && ! print_empty_regions )\n+ return ;\n+\n+ output << current_chrom << "\\t"\n+ << start << "\\t"\n+ << end << "\\t"\n+ << current_non_zero_inputs << "\\t";\n+ \n+ ostringstream file_list_string;\n+ ostringstream file_bool_string;\n+ int depth_count = 0;\n+ for (size_t i = 0; i < current_depth.size(); ++i)\n+ {\n+ if (current_depth[i] > 0) {\n+ if (depth_count < current_non_zero_inputs - 1) {\n+ if (!haveTitles)\n+ file_list_string << i+1 << ",";\n+ else \n+ file_list_string << titles[i] << ",";\n+ }\n+ else {\n+ if (!haveTitles)\n+ file_list_string << i+1;\n+ else \n+ file_list_string << titles[i];\n+ }\n+ depth_count++;\n+ }\n+ file_bool_string << "\\t" << current_depth[i];\n+ }\n+ if (current_non_zero_inputs > 0) {\n+ cout << file_list_string.str() << file_bool_string.str() << endl;\n+ }\n+ else {\n+ cout << "none" << file_bool_string.str() << endl;\n+ }\n+}\n+\n+\n+void MultiIntersectBed::PrintEmptyCoverage(CHRPOS start, CHRPOS end) {\n+ output << current_chrom << "\\t"\n+ << start << "\\t"\n+ << end << "\\t"\n+ << "0" << "\\t" << "none";\n+ \n+ for (size_t i=0;i<current_depth.size();++i)\n+ output << "\\t0";\n+\n+ output << endl;\n+}\n+\n+\n+void MultiIntersectBed::LoadNextItem(int index) {\n+ assert(static_cast<unsigned int>(index) < input_files.size());\n+\n+ current_item[index].chrom="";\n+\n+ BedFile *file = input_files[index];\n+ BED merged_bed;\n+ int lineNum = 0;\n+ //\n+ // TO DO: Do the mergeing on the fly. How best to do this?\n+ // \n+ // IDEA: Implement a Merge class with GetNextMerge element.\n+ //\n+\n+ while (file->GetNextMergedBed(merged_bed, lineNum))\n+ {\n+ current_item[index] = merged_bed;\n+ break;\n+ }\n+}\n+\n+\n+bool MultiIntersectBed::AllFilesDone() {\n+ for (size_t i=0;i<current_item.size();++i)\n+ if (!current_item[i].chrom.empty())\n+ return false;\n+ return true;\n+}\n+\n+\n+string MultiIntersectBed::DetermineNextChrom() {\n+ string next_chrom;\n+ for (size_t i=0;i<current_item.size();++i) {\n+ if (current_item[i].chrom.empty())\n+ continue;\n+\n+ if (next_chrom.empty())\n+ next_chrom = current_item[i].chrom;\n+ else\n+ if (current_item[i].chrom < next_chrom)\n+ next_chrom = current_item[i].chrom ;\n+ }\n+ return next_chrom;\n+}\n+\n+\n+void MultiIntersectBed::OpenFiles() {\n+ for (size_t i = 0; i < filenames.size(); ++i) {\n+ BedFile *file = new BedFile(filenames[i]);\n+ file->Open();\n+ input_files.push_back(file);\n+ current_depth.push_back(0);\n+ }\n+ current_item.resize(filenames.size());\n+}\n+\n+\n+void MultiIntersectBed::CloseFiles() {\n+ for (size_t i=0; i < input_files.size(); ++i) {\n+ BedFile *file = input_files[i];\n+ delete file;\n+ input_files[i] = NULL ;\n+ }\n+ input_files.clear();\n+}\n' |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/multiIntersectBed/multiIntersectBed.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/multiIntersectBed/multiIntersectBed.h Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,125 @@ +/***************************************************************************** + multiIntersectBed.h + + (c) 2010 - Aaron Quinlan, UVA + - Assaf Gordon, CSHL + Quinlan Laboratory + Department of Public Health Sciences + Center for Public Health Genomics + University of Virginia + aaronquinlan@gmail.com + + Licenced under the GNU General Public License 2.0 license. +******************************************************************************/ +#ifndef MULTIINTERSECTBED_H +#define MULTIINTERSECTBED_H + +#include <vector> +#include <string> +#include "bedFile.h" +#include "genomeFile.h" +#include "intervalItem.h" + +class MultiIntersectBed +{ +private: + + vector<string> filenames; + vector<string> titles; + + vector<BedFile*> input_files; + vector<int> current_depth; + vector<BED> current_item; + + std::ostream &output; + + INTERVALS_PRIORITY_QUEUE queue; + std::string current_chrom; + map<int, bool> files_with_coverage; + int current_non_zero_inputs; + bool print_empty_regions; + bool haveTitles; + + GenomeFile* genome_sizes; + + std::string no_coverage_value; + +public: + MultiIntersectBed(std::ostream& _output, + const vector<string>& _filenames, + const vector<string>& _titles, + bool _print_empty_regions, + const std::string& _genomeFileName, + const std::string& _no_coverage_value); + + virtual ~MultiIntersectBed(); + + // Combines all interval files + void MultiIntersect(); + + // Print the header line: chrom/start/end + name of each bedgraph file. + void PrintHeader(); + + +private: + + // Open all input files, initialize "current_XXX" vectors + void OpenFiles(); + + // Close the input files. + void CloseFiles(); + + /* + Add an interval from BedGraph file 'index' into the queue. + will only be added if it belongs to the current chromosome. + + If the interval was added (=consumed), the next interval will be read from the file + using 'LoadNextItem' + */ + void AddInterval(int index); + + /* + Loads the next interval from Bed file 'index'. + Stores it in 'current_bed_item' vector. + */ + void LoadNextItem(int index); + + /* + Scans the 'current_bedgraph_item' vector, + find the 'first' chromosome to use (different BedGraph files can start with different chromosomes). + */ + std::string DetermineNextChrom(); + + /* + Returns 'true' if ALL intervals from ALL BedGraph files were used + */ + bool AllFilesDone(); + + /* + Extract the next coordinate from the queue, and updates the current coverage information. + If multiple interval share the same coordinate values, all of them are handled. + If an END coordinate is consumed, the next interval (from the corresponding file) is read. + */ + CHRPOS ConsumeNextCoordinate(); + + /* + Updates the coverage information based on the given item. + Item can be a START coordinate or an END coordiante. + */ + void UpdateInformation(const IntervalItem &item); + + /* + prints chrom/start/end and the current depth coverage values of all the files. + */ + void PrintCoverage(CHRPOS start, CHRPOS end); + + /* + prints chrom/start/end and the ZERO depth coverage values of all the files. + */ + void PrintEmptyCoverage(CHRPOS start, CHRPOS end); + + void DebugPrintQueue(); +}; + + +#endif |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/multiIntersectBed/multiIntersectBedMain.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/multiIntersectBed/multiIntersectBedMain.cpp Thu Nov 03 10:25:04 2011 -0400 |
[ |
b'@@ -0,0 +1,294 @@\n+/*****************************************************************************\n+ unionBedGraphsMain.cpp\n+\n+ (c) 2010 - Assaf Gordon, CSHL\n+ - Aaron Quinlan, UVA\n+ Hall Laboratory\n+ Department of Biochemistry and Molecular Genetics\n+ University of Virginia\n+ aaronquinlan@gmail.com\n+\n+ Licenced under the GNU General Public License 2.0 license.\n+******************************************************************************/\n+#include <climits>\n+#include <cstring>\n+#include <cstdlib>\n+#include <vector>\n+#include <string>\n+#include <iostream>\n+#include <getopt.h>\n+#include <libgen.h> //for basename()\n+#include "version.h"\n+\n+#include "genomeFile.h"\n+#include "multiIntersectBed.h"\n+\n+using namespace std;\n+\n+// define our program name\n+#define PROGRAM_NAME "multiIntersectBed"\n+\n+// define our parameter checking macro\n+#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)\n+\n+//STLized version of basename()\n+// (because POSIX basename() modifies the input string pointer)\n+// Additionally: removes any extension the basename might have.\n+std::string stl_basename(const std::string& path);\n+\n+// function declarations\n+void ShowHelp(void);\n+void ShowExamples(void);\n+\n+\n+int main(int argc, char* argv[])\n+{\n+ bool haveFiles = false;\n+ bool haveTitles = false;\n+ bool haveGenome = false;\n+ bool haveFiller = true;\n+ bool printHeader = false;\n+ bool printEmptyRegions = false;\n+ bool showHelp = false;\n+ string genomeFile;\n+ string basePath;\n+ string noCoverageValue = "0";\n+ vector<string> inputFiles;\n+ vector<string> inputTitles;\n+\n+ //Parse command line options\n+ if(argc <= 1)\n+ ShowHelp();\n+\n+ for(int i = 1; i < argc; i++) {\n+ int parameterLength = (int)strlen(argv[i]);\n+\n+ if((PARAMETER_CHECK("-h", 2, parameterLength)) ||\n+ (PARAMETER_CHECK("--help", 5, parameterLength))) {\n+ showHelp = true;\n+ }\n+ }\n+\n+ if(showHelp == true) {\n+ ShowHelp();\n+ exit(1);\n+ }\n+\n+ // do some parsing (all of these parameters require 2 strings)\n+ for(int i = 1; i < argc; i++) {\n+\n+ int parameterLength = (int)strlen(argv[i]);\n+\n+ if(PARAMETER_CHECK("-i", 2, parameterLength)) {\n+ if ((i+1) < argc) {\n+ haveFiles = true;\n+ i = i+1;\n+ string file = argv[i];\n+ while (file[0] != \'-\' && i < argc) {\n+ inputFiles.push_back(file);\n+ i++;\n+ if (i < argc)\n+ file = argv[i];\n+ }\n+ i--;\n+ }\n+ }\n+ else if(PARAMETER_CHECK("-names", 6, parameterLength)) {\n+ if ((i+1) < argc) {\n+ haveTitles = true;\n+ i = i+1;\n+ string title = argv[i];\n+ while (title[0] != \'-\' && i < argc) {\n+ inputTitles.push_back(title);\n+ i++;\n+ if (i < argc)\n+ title = argv[i];\n+ }\n+ i--;\n+ }\n+ }\n+ else if(PARAMETER_CHECK("-g", 2, parameterLength)) {\n+ if ((i+1) < argc) {\n+ haveGenome = true;\n+ genomeFile = argv[i + 1];\n+ i++;\n+ }\n+ }\n+ else if(PARAMETER_CHECK("-filler", 7, parameterLength)) {\n+ if ((i+1) < argc) {\n+ haveFiller = true;\n+ noCoverageValue = argv[i + 1];\n+ i++;\n+ }\n+ }\n+ else if(PARAMETER_CHECK("-header", 7, parameterLength)) {\n+ printHeader = true;\n+ }\n+ else if(PARAMETER_CHECK("-empty", 6, parameterLength)) {\n+ printEmptyRegions = true;\n+ }\n+ else if(PARAMETER_CHECK("-examples", 9, parameterLengt'..b'+\n+ cerr << "\\t-names\\t\\t" << "A list of names (one / file) to describe each file in -i." << endl;\n+ cerr << "\\t\\t\\tThese names will be printed in the header line." << endl << endl;\n+\n+ cerr << "\\t-g\\t\\t" << "Use genome file to calculate empty regions." << endl;\n+ cerr << "\\t\\t\\t- STRING." << endl << endl;\n+\n+ cerr << "\\t-empty\\t\\t" << "Report empty regions (i.e., start/end intervals w/o" << endl;\n+ cerr << "\\t\\t\\tvalues in all files)." << endl;\n+ cerr << "\\t\\t\\t- Requires the \'-g FILE\' parameter.\\n" << endl;\n+\n+ cerr << "\\t-filler TEXT\\t" << "Use TEXT when representing intervals having no value." << endl;\n+ cerr << "\\t\\t\\t- Default is \'0\', but you can use \'N/A\' or any other text." << endl << endl;\n+\n+ cerr << "\\t-examples\\t" << "Show detailed usage examples." << endl << endl;\n+}\n+\n+\n+\n+void ShowExamples()\n+{\n+ cerr << "Example usage:\\n\\n" \\\n+"== Input files: ==\\n" \\\n+"\\n" \\\n+" $ cat 1.bg\\n" \\\n+" chr1 1000 1500 10\\n" \\\n+" chr1 2000 2100 20\\n" \\\n+"\\n" \\\n+" $ cat 2.bg\\n" \\\n+" chr1 900 1600 60\\n" \\\n+" chr1 1700 2050 50\\n" \\\n+"\\n" \\\n+" $ cat 3.bg\\n" \\\n+" chr1 1980 2070 80\\n" \\\n+" chr1 2090 2100 20\\n" \\\n+"\\n" \\\n+" $ cat sizes.txt\\n" \\\n+" chr1 5000\\n" \\\n+"\\n" \\\n+"== Union/combine the files: ==\\n" \\\n+"\\n" \\\n+" $ unionBedGraphs -i 1.bg 2.bg 3.bg\\n" \\\n+" chr1 900 1000 0 60 0\\n" \\\n+" chr1 1000 1500 10 60 0\\n" \\\n+" chr1 1500 1600 0 60 0\\n" \\\n+" chr1 1700 1980 0 50 0\\n" \\\n+" chr1 1980 2000 0 50 80\\n" \\\n+" chr1 2000 2050 20 50 80\\n" \\\n+" chr1 2050 2070 20 0 80\\n" \\\n+" chr1 2070 2090 20 0 0\\n" \\\n+" chr1 2090 2100 20 0 20\\n" \\\n+"\\n" \\\n+"== Union/combine the files, with a header line (titles are the file names): ==\\n" \\\n+"\\n" \\\n+" $ unionBedGraphs -header -i 1.bg 2.bg 3.bg\\n" \\\n+" chrom start end 1 2 3\\n" \\\n+" chr1 900 1000 0 60 0\\n" \\\n+" chr1 1000 1500 10 60 0\\n" \\\n+" chr1 1500 1600 0 60 0\\n" \\\n+" chr1 1700 1980 0 50 0\\n" \\\n+" chr1 1980 2000 0 50 80\\n" \\\n+" chr1 2000 2050 20 50 80\\n" \\\n+" chr1 2050 2070 20 0 80\\n" \\\n+" chr1 2070 2090 20 0 0\\n" \\\n+" chr1 2090 2100 20 0 20\\n" \\\n+"\\n" \\\n+"== Union/combine the files, with a header line and custom names: ==\\n" \\\n+"\\n" \\\n+" $ unionBedGraphs -header -i 1.bg 2.bg 3.bg -names WT-1 WT-2 KO-1\\n" \\\n+" chrom start end WT-1 WT-2 KO-1\\n" \\\n+" chr1 900 1000 0 60 0\\n" \\\n+" chr1 1000 1500 10 60 0\\n" \\\n+" chr1 1500 1600 0 60 0\\n" \\\n+" chr1 1700 1980 0 50 0\\n" \\\n+" chr1 1980 2000 0 50 80\\n" \\\n+" chr1 2000 2050 20 50 80\\n" \\\n+" chr1 2050 2070 20 0 80\\n" \\\n+" chr1 2070 2090 20 0 0\\n" \\\n+" chr1 2090 2100 20 0 20\\n" \\\n+"\\n" \\\n+"== Union/combine, showing empty regions (note, requires -g): ==\\n" \\\n+"\\n" \\\n+" $ unionBedGraphs -header -empty -g sizes.TXT -i 1.bg 2.bg 3.bg\\n" \\\n+" chrom start end 1 2 3\\n" \\\n+" chr1 0 900 0 0 0\\n" \\\n+" chr1 900 1000 0 60 0\\n" \\\n+" chr1 1000 1500 10 60 0\\n" \\\n+" chr1 1500 1600 0 60 0\\n" \\\n+" chr1 1600 1700 0 0 0\\n" \\\n+" chr1 1700 1980 0 50 0\\n" \\\n+" chr1 1980 2000 0 50 80\\n" \\\n+" chr1 2000 2050 20 50 80\\n" \\\n+" chr1 2050 2070 20 0 80\\n" \\\n+" chr1 2070 2090 20 0 0\\n" \\\n+" chr1 2090 2100 20 0 20\\n" \\\n+" chr1 2100 5000 0 0 0\\n" \\\n+"\\n" \\\n+;\n+}\n+\n+std::string stl_basename(const std::string& path)\n+{\n+ string result;\n+\n+ char* path_dup = strdup(path.c_str());\n+ char* basename_part = basename(path_dup);\n+ result = basename_part;\n+ free(path_dup);\n+\n+ size_t pos = result.find_last_of(\'.\');\n+ if (pos != string::npos )\n+ result = result.substr(0,pos);\n+\n+ return result;\n+}\n+\n' |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/nucBed/LargeFileSupport.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/nucBed/LargeFileSupport.h Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,13 @@ +#pragma once + +#define _FILE_OFFSET_BITS 64 + +#ifdef WIN32 +#define ftell64(a) _ftelli64(a) +#define fseek64(a,b,c) _fseeki64(a,b,c) +typedef __int64_t off_type; +#else +#define ftell64(a) ftello(a) +#define fseek64(a,b,c) fseeko(a,b,c) +typedef off_t off_type; +#endif |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/nucBed/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/nucBed/Makefile Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,52 @@ +UTILITIES_DIR = ../utils/ +OBJ_DIR = ../../obj/ +BIN_DIR = ../../bin/ + +# ------------------- +# define our includes +# ------------------- +INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \ + -I$(UTILITIES_DIR)/sequenceUtilities/ \ + -I$(UTILITIES_DIR)/lineFileUtilities/ \ + -I$(UTILITIES_DIR)/version/ \ + -I$(UTILITIES_DIR)/gzstream/ \ + -I$(UTILITIES_DIR)/fileType/ \ + -I$(UTILITIES_DIR)/Fasta/ + +# ---------------------------------- +# define our source and object files +# ---------------------------------- +SOURCES= nucBedMain.cpp nucBed.cpp +OBJECTS= $(SOURCES:.cpp=.o) +_EXT_OBJECTS=bedFile.o sequenceUtils.o lineFileUtilities.o gzstream.o fileType.o Fasta.o split.o +EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) +BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) +PROGRAM= nucBed + + +all: $(PROGRAM) + +.PHONY: all + +$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS) + @echo " * linking $(PROGRAM)" + @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ $(LIBS) + +$(BUILT_OBJECTS): $(SOURCES) + @echo " * compiling" $(*F).cpp + @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) + +$(EXT_OBJECTS): + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/sequenceUtilities/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/Fasta/ + + +clean: + @echo "Cleaning up." + @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* + +.PHONY: clean \ No newline at end of file |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/nucBed/nucBed.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/nucBed/nucBed.cpp Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,158 @@ +/***************************************************************************** + nucBed.cpp + + (c) 2009 - Aaron Quinlan + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licenced under the GNU General Public License 2.0 license. +******************************************************************************/ +#include "lineFileUtilities.h" +#include "nucBed.h" + + +NucBed::NucBed(string &dbFile, string &bedFile, bool printSeq, + bool hasPattern, const string &pattern, bool forceStrand) { + + _dbFile = dbFile; + _bedFile = bedFile; + _printSeq = printSeq; + _hasPattern = hasPattern; + _pattern = pattern; + _forceStrand = forceStrand; + + _bed = new BedFile(_bedFile); + + // Compute the DNA content in each BED/GFF/VCF interval + ProfileDNA(); +} + + +NucBed::~NucBed(void) +{} + + +void NucBed::ReportDnaProfile(const BED& bed, const string &sequence, int seqLength) +{ + int a,c,g,t,n,other,userPatternCount; + a = c = g = t = n = other = userPatternCount = 0; + + getDnaContent(sequence,a,c,g,t,n,other); + + if (_hasPattern) + userPatternCount = countPattern(sequence, _pattern); + + + // report the original interval + _bed->reportBedTab(bed); + // report AT and GC content + printf("%f\t%f\t",(float)(a+t)/seqLength, (float)(c+g)/seqLength); + // report raw nucleotide counts + printf("%d\t%d\t%d\t%d\t%d\t%d\t%d",a,c,g,t,n,other,seqLength); + // add the original sequence if requested. + + if (_printSeq) + printf("\t%s",sequence.c_str()); + if (_hasPattern) + printf("\t%d",userPatternCount); + printf("\n"); + +} + + +void NucBed::PrintHeader(void) { + printf("#"); + + int numOrigColumns = (int) _bed->bedType; + for (int i = 1; i <= numOrigColumns; ++i) { + printf("%d_usercol\t", i); + } + printf("%d_pct_at\t", numOrigColumns + 1); + printf("%d_pct_gc\t", numOrigColumns + 2); + printf("%d_num_A\t", numOrigColumns + 3); + printf("%d_num_C\t", numOrigColumns + 4); + printf("%d_num_G\t", numOrigColumns + 5); + printf("%d_num_T\t", numOrigColumns + 6); + printf("%d_num_N\t", numOrigColumns + 7); + printf("%d_num_oth\t", numOrigColumns + 8); + printf("%d_seq_len\t", numOrigColumns + 9); + + if (_printSeq) + printf("%d_seq", numOrigColumns + 10); + if (_hasPattern && !_printSeq) + printf("%d_user_patt_count", numOrigColumns + 10); + else if (_hasPattern && _printSeq) + printf("\t%d_user_patt_count", numOrigColumns + 11); + printf("\n"); + +} + + +//****************************************************************************** +// ExtractDNA +//****************************************************************************** +void NucBed::ProfileDNA() { + + /* Make sure that we can oen all of the files successfully*/ + + // open the fasta database for reading + ifstream faDb(_dbFile.c_str(), ios::in); + if ( !faDb ) { + cerr << "Error: The requested fasta database file (" << _dbFile << ") could not be opened. Exiting!" << endl; + exit (1); + } + + // open and memory-map genome file + FastaReference fr; + bool memmap = true; + fr.open(_dbFile, memmap); + + bool headerReported = false; + BED bed, nullBed; + int lineNum = 0; + BedLineStatus bedStatus; + string sequence; + + _bed->Open(); + while ((bedStatus = _bed->GetNextBed(bed, lineNum)) != BED_INVALID) { + if (bedStatus == BED_VALID) { + if (headerReported == false) { + PrintHeader(); + headerReported = true; + } + // make sure we are extracting >= 1 bp + if (bed.zeroLength == false) { + size_t seqLength = fr.sequenceLength(bed.chrom); + // make sure this feature will not exceed the end of the chromosome. + if ( (bed.start <= seqLength) && (bed.end <= seqLength) ) + { + // grab the dna at this interval + int length = bed.end - bed.start; + // report the sequence's content + string dna = fr.getSubSequence(bed.chrom, bed.start, length); + // rev comp si necessaire + if ((_forceStrand == true) && (bed.strand == "-")) + reverseComplement(dna); + ReportDnaProfile(bed, dna, length); + bed = nullBed; + } + else + { + cerr << "Feature (" << bed.chrom << ":" << bed.start << "-" << bed.end << ") beyond the length of " + << bed.chrom << " size (" << seqLength << " bp). Skipping." << endl; + } + } + // handle zeroLength + else { + cerr << "Feature (" << bed.chrom << ":" << bed.start+1 << "-" << bed.end-1 << ") has length = 0, Skipping." << endl; + } + bed = nullBed; + } + } + _bed->Close(); +} + + + |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/nucBed/nucBed.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/nucBed/nucBed.h Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,55 @@ +/***************************************************************************** + nucBed.h + + (c) 2009 - Aaron Quinlan + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licenced under the GNU General Public License 2.0 license. +******************************************************************************/ +#ifndef NUCBED_H +#define NUCBED_H + +#include "bedFile.h" +#include "sequenceUtils.h" +#include "Fasta.h" +#include <vector> +#include <iostream> +#include <fstream> + +using namespace std; + +//************************************************ +// Class methods and elements +//************************************************ +class NucBed { + +public: + + // constructor + NucBed(string &dbFile, string &bedFile, bool printSeq, + bool hasPattern, const string &pattern, + bool forceStrand); + // destructor + ~NucBed(void); + + void ProfileDNA(); + + +private: + string _dbFile; + string _bedFile; + bool _printSeq; + bool _hasPattern; + string _pattern; + bool _forceStrand; + + // instance of a bed file class. + BedFile *_bed; + void PrintHeader(void); + void ReportDnaProfile(const BED& bed, const string &sequence, int seqLength); +}; + +#endif |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/nucBed/nucBedMain.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/nucBed/nucBedMain.cpp Thu Nov 03 10:25:04 2011 -0400 |
[ |
@@ -0,0 +1,147 @@ +/***************************************************************************** + nucBedMain.cpp + + (c) 2009 - Aaron Quinlan + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licenced under the GNU General Public License 2.0 license. +******************************************************************************/ +#include "nucBed.h" +#include "version.h" + +using namespace std; + +// define our program name +#define PROGRAM_NAME "nucBed" + + +// define our parameter checking macro +#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen) + +// function declarations +void ShowHelp(void); + +int main(int argc, char* argv[]) { + + // our configuration variables + bool showHelp = false; + + // input files + string fastaDbFile; + string bedFile; + string pattern; + + // checks for existence of parameters + bool haveFastaDb = false; + bool haveBed = false; + bool printSeq = false; + bool hasPattern = false; + bool forceStrand = false; + + // check to see if we should print out some help + if(argc <= 1) showHelp = true; + + for(int i = 1; i < argc; i++) { + int parameterLength = (int)strlen(argv[i]); + + if((PARAMETER_CHECK("-h", 2, parameterLength)) || + (PARAMETER_CHECK("--help", 5, parameterLength))) { + showHelp = true; + } + } + + if(showHelp) ShowHelp(); + + // do some parsing (all of these parameters require 2 strings) + for(int i = 1; i < argc; i++) { + + int parameterLength = (int)strlen(argv[i]); + + if(PARAMETER_CHECK("-fi", 3, parameterLength)) { + if ((i+1) < argc) { + haveFastaDb = true; + fastaDbFile = argv[i + 1]; + i++; + } + } + else if(PARAMETER_CHECK("-bed", 4, parameterLength)) { + if ((i+1) < argc) { + haveBed = true; + bedFile = argv[i + 1]; + i++; + } + } + else if(PARAMETER_CHECK("-seq", 4, parameterLength)) { + printSeq = true; + } + else if(PARAMETER_CHECK("-s", 2, parameterLength)) { + forceStrand = true; + } + else if(PARAMETER_CHECK("-pattern", 8, parameterLength)) { + if ((i+1) < argc) { + hasPattern = true; + pattern = argv[i + 1]; + i++; + } + } + else { + cerr << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl; + showHelp = true; + } + } + + if (!haveFastaDb || !haveBed) { + showHelp = true; + } + + if (!showHelp) { + + NucBed *nuc = new NucBed(fastaDbFile, bedFile, printSeq, hasPattern, pattern, forceStrand); + delete nuc; + + return 0; + } + else { + ShowHelp(); + } +} + +void ShowHelp(void) { + + cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl; + + cerr << "Author: Aaron Quinlan (aaronquinlan@gmail.com)" << endl; + + cerr << "Summary: Profiles the nucleotide content of intervals in a fasta file." << endl << endl; + + cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -fi <fasta> -bed <bed/gff/vcf>" << endl << endl; + + cerr << "Options: " << endl; + cerr << "\t-fi\tInput FASTA file" << endl << endl; + cerr << "\t-bed\tBED/GFF/VCF file of ranges to extract from -fi" << endl << endl; + cerr << "\t-s\tProfile the sequence according to strand." << endl << endl; + cerr << "\t-seq\tPrint the extracted sequence" << endl << endl; + cerr << "\t-pattern\tReport the number of times a user-defined sequence is observed (case-insensitive)." << endl << endl; + + + cerr << "Output format: " << endl; + cerr << "\tThe following information will be reported after each original BED entry:" << endl; + cerr << "\t 1) %AT content" << endl; + cerr << "\t 2) %GC content" << endl; + cerr << "\t 3) Number of As observed" << endl; + cerr << "\t 4) Number of Cs observed" << endl; + cerr << "\t 5) Number of Gs observed" << endl; + cerr << "\t 6) Number of Ts observed" << endl; + cerr << "\t 7) Number of Ns observed" << endl; + cerr << "\t 8) Number of other bases observed" << endl; + cerr << "\t 9) The length of the explored sequence/interval." << endl; + cerr << "\t 10) The sequence extracted from the FASTA file. (optional, if -seq is used)" << endl; + cerr << "\t 11) The number of times a user defined pattern was observed. (optional, if -pattern is used.)" << endl; + + // end the program here + exit(1); + +} |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/overlap/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/overlap/Makefile Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,47 @@ +UTILITIES_DIR = ../utils/ +OBJ_DIR = ../../obj/ +BIN_DIR = ../../bin/ + + +# ------------------- +# define our includes +# ------------------- +INCLUDES = -I$(UTILITIES_DIR)/bedFile/ -I$(UTILITIES_DIR)/lineFileUtilities/ -I$(UTILITIES_DIR)/version/ -I$(UTILITIES_DIR)/gzstream/ -I$(UTILITIES_DIR)/fileType/ + +# ---------------------------------- +# define our source and object files +# ---------------------------------- +SOURCES= overlap.cpp +OBJECTS= $(SOURCES:.cpp=.o) +_EXT_OBJECTS=bedFile.o lineFileUtilities.o gzstream.o fileType.o +EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) +BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) +PROGRAM= overlap + + +all: $(PROGRAM) + +.PHONY: all + + +$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS) + @echo " * linking $(PROGRAM)" + @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ $(LIBS) + +$(BUILT_OBJECTS): $(SOURCES) + @echo " * compiling" $(*F).cpp + @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) + +$(EXT_OBJECTS): + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/ + + + +clean: + @echo "Cleaning up." + @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* + +.PHONY: clean |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/overlap/overlap.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/overlap/overlap.cpp Thu Nov 03 10:25:04 2011 -0400 |
[ |
@@ -0,0 +1,202 @@ +/***************************************************************************** + overlap.cpp + + (c) 2009 - Aaron Quinlan + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licenced under the GNU General Public License 2.0 license. +******************************************************************************/ +#include <vector> +#include <iostream> +#include <fstream> +#include <stdlib.h> + +#include "version.h" +#include "lineFileUtilities.h" +#include "bedFile.h" +using namespace std; + + +// define our program name +#define PROGRAM_NAME "overlap" + +// define our parameter checking macro +#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen) + + +// function declarations +void ShowHelp(void); +void DetermineInput(string &inFile, short &s1Col, short &e1Col, short &s2Col, short &e2Col); +void ComputeOverlaps(istream &input, short &s1Col, short &e1Col, short &s2Col, short &e2Col); + +int main(int argc, char* argv[]) { + + // input files + string inFile = "stdin"; + string columns; + + // our configuration variables + bool showHelp = false; + bool haveInFile = true; + bool haveColumns = false; + + + for(int i = 1; i < argc; i++) { + int parameterLength = (int)strlen(argv[i]); + + if((PARAMETER_CHECK("-h", 2, parameterLength)) || + (PARAMETER_CHECK("--help", 5, parameterLength))) { + showHelp = true; + } + } + + if(showHelp) ShowHelp(); + + // do some parsing (all of these parameters require 2 strings) + for(int i = 1; i < argc; i++) { + + int parameterLength = (int)strlen(argv[i]); + + if(PARAMETER_CHECK("-i", 2, parameterLength)) { + if ((i+1) < argc) { + inFile = argv[i + 1]; + i++; + } + } + else if(PARAMETER_CHECK("-cols", 5, parameterLength)) { + haveColumns = true; + columns = argv[i + 1]; + i++; + } + else { + cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl; + showHelp = true; + } + } + + // make sure we have an input files + if (!haveInFile ) { + cerr << endl << "*****" << endl << "*****ERROR: Need -i file. " << endl << "*****" << endl; + showHelp = true; + } + + if (!showHelp) { + + // Split the column string sent by the user into discrete column numbers + // A comma separated string is expected. + vector<string> posColumns; + Tokenize(columns, posColumns, ","); + + if (posColumns.size() != 4) { + cerr << endl << "*****" << endl << "*****ERROR: Please specify 4, comma-separated position columns. " << endl << "*****" << endl; + ShowHelp(); + } + else { + short s1, e1, s2, e2; + s1 = atoi(posColumns[0].c_str()); + e1 = atoi(posColumns[1].c_str()); + s2 = atoi(posColumns[2].c_str()); + e2 = atoi(posColumns[3].c_str()); + + DetermineInput(inFile, s1, e1, s2, e2); + } + } + else { + ShowHelp(); + } +} + +void ShowHelp(void) { + + cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl; + + cerr << "Author: Aaron Quinlan (aaronquinlan@gmail.com)" << endl; + + cerr << "Summary: Computes the amount of overlap (positive values)" << endl; + cerr << "\t or distance (negative values) between genome features" << endl; + cerr << "\t and reports the result at the end of the same line." << endl << endl; + + cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -i <input> -cols s1,e1,s2,e2 " << endl << endl; + + cerr << "Options: " << endl; + cerr << "\t-i\t" << "Input file. Use \"stdin\" for pipes." << endl << endl; + + cerr << "\t-cols\t" << "Specify the columns (1-based) for the starts and ends of the" << endl; + cerr << "\t\tfeatures for which you'd like to compute the overlap/distance." << endl; + cerr << "\t\tThe columns must be listed in the following order: " << endl << endl; + cerr << "\t\tstart1,end1,start2,end2" << endl << endl; + + cerr << "Example: " << endl; + cerr << "\t$ windowBed -a A.bed -b B.bed -w 10" << endl; + cerr << "\tchr1 10 20 A chr1 15 25 B" << endl; + cerr << "\tchr1 10 20 C chr1 25 35 D" << endl << endl; + cerr << "\t$ windowBed -a A.bed -b B.bed -w 10 | overlap -i stdin -cols 2,3,6,7" << endl; + cerr << "\tchr1 10 20 A chr1 15 25 B 5" << endl; + cerr << "\tchr1 10 20 C chr1 25 35 D -5" << endl; + + // end the program here + exit(1); + +} + + +void DetermineInput(string &inFile, short &s1Col, short &e1Col, short &s2Col, short &e2Col) { + + + if (inFile != "stdin") { // process a file + + ifstream in(inFile.c_str(), ios::in); + if ( !in ) { + cerr << "Error: The requested input file (" << inFile << ") could not be opened. Exiting!" << endl; + exit (1); + } + ComputeOverlaps(in, s1Col, e1Col, s2Col, e2Col); + } + else ComputeOverlaps(cin, s1Col, e1Col, s2Col, e2Col); +} + + +void ComputeOverlaps(istream &input, short &s1Col, short &e1Col, short &s2Col, short &e2Col) { + + int lineNum = 0; + string inLine; + vector<string> inFields; + + int overlap; + + char *s1End, *e1End, *s2End, *e2End; + long s1, e1, s2, e2; + + while (getline(input, inLine)) { + lineNum++; + Tokenize(inLine, inFields); + + if (inFields.size() > 1) { + + // test if columns 2 and 3 are integers. If so, assume BED. + s1 = strtol(inFields[s1Col-1].c_str(), &s1End, 10); + e1 = strtol(inFields[e1Col-1].c_str(), &e1End, 10); + s2 = strtol(inFields[s2Col-1].c_str(), &s2End, 10); + e2 = strtol(inFields[e2Col-1].c_str(), &e2End, 10); + + // strtol will set pointers to the start of the string if non-integral, base 10 + // if they all check out, we have valid numeric columns. Otherwise, complain. + if (s1End != inFields[s1Col-1].c_str() && + e1End != inFields[e1Col-1].c_str() && + s2End != inFields[s2Col-1].c_str() && + e2End != inFields[e2Col-1].c_str()) { + + overlap = overlaps(s1, e1, s2, e2); + printf("%s\t%d\n", inLine.c_str(), overlap); + } + else { + cerr << "One of your columns appears to be non-numeric at line " << lineNum << ". Exiting..." << endl << endl; + exit(1); + } + } + inFields.clear(); + } +} |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/pairToBed/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/pairToBed/Makefile Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,52 @@ +UTILITIES_DIR = ../utils/ +OBJ_DIR = ../../obj/ +BIN_DIR = ../../bin/ + +# ------------------- +# define our includes +# ------------------- +INCLUDES = -I$(UTILITIES_DIR)/bedFilePE/ \ + -I$(UTILITIES_DIR)/bedFile/ \ + -I$(UTILITIES_DIR)/lineFileUtilities/ \ + -I$(UTILITIES_DIR)/version/ \ + -I$(UTILITIES_DIR)/gzstream/ \ + -I$(UTILITIES_DIR)/fileType/ \ + -I$(UTILITIES_DIR)/BamTools/include \ + -I$(UTILITIES_DIR)/BamTools-Ancillary + +# ---------------------------------- +# define our source and object files +# ---------------------------------- +SOURCES= pairToBedMain.cpp pairToBed.cpp +OBJECTS= $(SOURCES:.cpp=.o) +_EXT_OBJECTS=bedFilePE.o bedFile.o lineFileUtilities.o gzstream.o fileType.o +EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) +BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) +PROGRAM= pairToBed + +all: $(PROGRAM) + +.PHONY: all + +$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS) + @echo " * linking $(PROGRAM)" + @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ -L$(UTILITIES_DIR)/BamTools/lib/ -lbamtools $(LIBS) + +$(BUILT_OBJECTS): $(SOURCES) + @echo " * compiling" $(*F).cpp + @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) + +$(EXT_OBJECTS): + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFilePE/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/BamTools/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/BamTools-Ancillary/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/ + +clean: + @echo "Cleaning up." + @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* + +.PHONY: clean |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/pairToBed/pairToBed.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/pairToBed/pairToBed.cpp Thu Nov 03 10:25:04 2011 -0400 |
b |
b'@@ -0,0 +1,525 @@\n+/*****************************************************************************\n+ pairToBed.cpp\n+\n+ (c) 2009 - Aaron Quinlan\n+ Hall Laboratory\n+ Department of Biochemistry and Molecular Genetics\n+ University of Virginia\n+ aaronquinlan@gmail.com\n+\n+ Licenced under the GNU General Public License 2.0 license.\n+******************************************************************************/\n+#include "lineFileUtilities.h"\n+#include "pairToBed.h"\n+\n+\n+bool IsCorrectMappingForBEDPE (const BamAlignment &bam) {\n+\n+ if ( (bam.RefID == bam.MateRefID) && (bam.InsertSize > 0) ) {\n+ return true;\n+ }\n+ else if ( (bam.RefID == bam.MateRefID) && (bam.InsertSize == 0) && bam.IsFirstMate() ) {\n+ return true;\n+ }\n+ else if ( (bam.RefID != bam.MateRefID) && bam.IsFirstMate() ) {\n+ return true;\n+ }\n+ else return false;\n+}\n+\n+\n+/*\n+ Constructor\n+*/\n+\n+\n+BedIntersectPE::BedIntersectPE(string bedAFilePE, string bedBFile, float overlapFraction,\n+ string searchType, bool sameStrand, bool diffStrand, bool bamInput,\n+ bool bamOutput, bool uncompressedBam, bool useEditDistance) {\n+\n+ _bedAFilePE = bedAFilePE;\n+ _bedBFile = bedBFile;\n+ _overlapFraction = overlapFraction;\n+ _sameStrand = sameStrand;\n+ _diffStrand = diffStrand;\n+ _useEditDistance = useEditDistance;\n+ _searchType = searchType;\n+ _bamInput = bamInput;\n+ _bamOutput = bamOutput;\n+ _isUncompressedBam = uncompressedBam;\n+\n+ _bedA = new BedFilePE(bedAFilePE);\n+ _bedB = new BedFile(bedBFile);\n+\n+ if (_bamInput == false)\n+ IntersectBedPE();\n+ else\n+ IntersectBamPE(_bedAFilePE);\n+}\n+\n+\n+/*\n+ Destructor\n+*/\n+\n+BedIntersectPE::~BedIntersectPE(void) {\n+}\n+\n+\n+\n+void BedIntersectPE::FindOverlaps(const BEDPE &a, vector<BED> &hits1, vector<BED> &hits2, const string &type) {\n+\n+ // list of hits on each end of BEDPE\n+ // that exceed the requested overlap fraction\n+ vector<BED> qualityHits1;\n+ vector<BED> qualityHits2;\n+\n+ // count of hits on each end of BEDPE\n+ // that exceed the requested overlap fraction\n+ int numOverlapsEnd1 = 0;\n+ int numOverlapsEnd2 = 0;\n+\n+ // make sure we have a valid chromosome before we search\n+ if (a.chrom1 != ".") {\n+ // Find the quality hits between ***end1*** of the BEDPE and the B BED file\n+ _bedB->FindOverlapsPerBin(a.chrom1, a.start1, a.end1, a.strand1, hits1, _sameStrand, _diffStrand);\n+\n+ vector<BED>::const_iterator h = hits1.begin();\n+ vector<BED>::const_iterator hitsEnd = hits1.end();\n+ for (; h != hitsEnd; ++h) {\n+\n+ int s = max(a.start1, h->start);\n+ int e = min(a.end1, h->end);\n+ int overlapBases = (e - s); // the number of overlapping bases b/w a and b\n+ int aLength = (a.end1 - a.start1); // the length of a in b.p.\n+\n+ // is there enough overlap relative to the user\'s request? (default ~ 1bp)\n+ if ( ( (float) overlapBases / (float) aLength ) >= _overlapFraction ) {\n+ numOverlapsEnd1++;\n+\n+ if (type == "either") {\n+ _bedA->reportBedPETab(a);\n+ _bedB->reportBedNewLine(*h);\n+ }\n+ else {\n+ qualityHits1.push_back(*h);\n+ }\n+ }\n+ }\n+ }\n+\n+\n+ // make sure we have a valid chromosome before we search\n+ if (a.chrom2 != ".") {\n+ // Now find the quality hits between ***end2*** of the BEDPE and the B BED file\n+ _bedB->FindOverlapsPerBin(a.chrom2, a.start2, a.end2, a.strand2, hits2, _sameStrand, _diffStrand);\n+\n+ vector<BED>::const_iterator h = hits2.begin();\n+ vector<BED>::const_iterator hitsEnd = hits2.end();\n+ for (; h != hitsEnd; ++h) {\n+\n+ int s = max(a.start2, h->start);\n+ int '..b'\n+ reader.Close();\n+ if (_bamOutput == true) {\n+ writer.Close();\n+ }\n+}\n+\n+\n+void BedIntersectPE::ProcessBamBlock (const BamAlignment &bam1, const BamAlignment &bam2,\n+ const RefVector &refs, BamWriter &writer) {\n+\n+ vector<BED> hits, hits1, hits2; // vector of potential hits\n+ hits.reserve(1000); // reserve some space\n+ hits1.reserve(1000);\n+ hits2.reserve(1000);\n+\n+ bool overlapsFound; // flag to indicate if overlaps were found\n+\n+ if ( (_searchType == "either") || (_searchType == "xor") ||\n+ (_searchType == "both") || (_searchType == "notboth") ||\n+ (_searchType == "neither") ) {\n+\n+ // create a new BEDPE feature from the BAM alignments.\n+ BEDPE a;\n+ ConvertBamToBedPE(bam1, bam2, refs, a);\n+ if (_bamOutput == true) { // BAM output\n+ // write to BAM if correct hits found\n+ overlapsFound = FindOneOrMoreOverlaps(a, _searchType);\n+ if (overlapsFound == true) {\n+ writer.SaveAlignment(bam1);\n+ writer.SaveAlignment(bam2);\n+ }\n+ }\n+ else { // BEDPE output\n+ FindOverlaps(a, hits1, hits2, _searchType);\n+ hits1.clear();\n+ hits2.clear();\n+ }\n+ }\n+ else if ( (_searchType == "ispan") || (_searchType == "ospan") ) {\n+ // only look for ispan and ospan when both ends are mapped.\n+ if (bam1.IsMapped() && bam2.IsMapped()) {\n+ // only do an inspan or outspan check if the alignment is intrachromosomal\n+ if (bam1.RefID == bam2.RefID) {\n+ // create a new BEDPE feature from the BAM alignments.\n+ BEDPE a;\n+ ConvertBamToBedPE(bam1, bam2, refs, a);\n+ if (_bamOutput == true) { // BAM output\n+ // look for overlaps, and write to BAM if >=1 were found\n+ overlapsFound = FindOneOrMoreSpanningOverlaps(a, _searchType);\n+ if (overlapsFound == true) {\n+ writer.SaveAlignment(bam1);\n+ writer.SaveAlignment(bam2);\n+ }\n+ }\n+ else { // BEDPE output\n+ FindSpanningOverlaps(a, hits, _searchType);\n+ hits.clear();\n+ }\n+ }\n+ }\n+ }\n+ else if ( (_searchType == "notispan") || (_searchType == "notospan") ) {\n+ // only look for notispan and notospan when both ends are mapped.\n+ if (bam1.IsMapped() && bam2.IsMapped()) {\n+ // only do an inspan or outspan check if the alignment is intrachromosomal\n+ if (bam1.RefID == bam2.RefID) {\n+ // create a new BEDPE feature from the BAM alignments.\n+ BEDPE a;\n+ ConvertBamToBedPE(bam1, bam2, refs, a);\n+ if (_bamOutput == true) { // BAM output\n+ // write to BAM if there were no overlaps\n+ overlapsFound = FindOneOrMoreSpanningOverlaps(a, _searchType);\n+ if (overlapsFound == false) {\n+ writer.SaveAlignment(bam1);\n+ writer.SaveAlignment(bam2);\n+ }\n+ }\n+ else { // BEDPE output\n+ FindSpanningOverlaps(a, hits, _searchType);\n+ hits.clear();\n+ }\n+ }\n+ // if inter-chromosomal or orphaned, we know it\'s not ispan and not ospan\n+ else if (_bamOutput == true) {\n+ writer.SaveAlignment(bam1);\n+ writer.SaveAlignment(bam2);\n+ }\n+ }\n+ // if both ends aren\'t mapped, we know that it\'s notispan and not ospan\n+ else if (_bamOutput == true) {\n+ writer.SaveAlignment(bam1);\n+ writer.SaveAlignment(bam2);\n+ }\n+ }\n+}\n+\n+\n' |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/pairToBed/pairToBed.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/pairToBed/pairToBed.h Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,161 @@ +/***************************************************************************** + pairToBed.h + + (c) 2009 - Aaron Quinlan + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licenced under the GNU General Public License 2.0 license. +******************************************************************************/ +#ifndef INTERSECTBED_H +#define INTERSECTBED_H + +#include "api/BamReader.h" +#include "api/BamWriter.h" +#include "api/BamAux.h" +using namespace BamTools; + +#include "bedFile.h" +#include "bedFilePE.h" +#include <vector> +#include <iostream> +#include <fstream> + +using namespace std; + + + +/************************************************** +Helper function protoypes +**************************************************/ +void IsCorrectMappingForBEDPE (const BamAlignment &bam, const RefVector &refs, BEDPE &a); + + + +//************************************************ +// Class methods and elements +//************************************************ +class BedIntersectPE { + +public: + + // constructor + BedIntersectPE(string bedAFilePE, string bedBFile, float overlapFraction, + string searchType, bool sameStrand, bool diffStrand, bool bamInput, bool bamOutput, bool uncompressedBam, bool useEditDistance); + // destructor + ~BedIntersectPE(void); + + void FindOverlaps(const BEDPE &, vector<BED> &hits1, vector<BED> &hits2, const string &type); + + bool FindOneOrMoreOverlaps(const BEDPE &, const string &type); + + void FindSpanningOverlaps(const BEDPE &a, vector<BED> &hits, const string &type); + bool FindOneOrMoreSpanningOverlaps(const BEDPE &a, const string &type); + + void IntersectBedPE(); + void IntersectBamPE(string bamFile); + + void DetermineBedPEInput(); + +private: + + string _bedAFilePE; + string _bedBFile; + float _overlapFraction; + string _searchType; + bool _sameStrand; + bool _diffStrand; + bool _useEditDistance; + bool _bamInput; + bool _bamOutput; + bool _isUncompressedBam; + + // instance of a paired-end bed file class. + BedFilePE *_bedA; + + // instance of a bed file class. + BedFile *_bedB; + + inline + void ConvertBamToBedPE(const BamAlignment &bam1, const BamAlignment &bam2, const RefVector &refs, BEDPE &a) { + + // initialize BEDPE variables + a.start1 = a.start2 = a.end1 = a.end2 = -1; + a.chrom1 = a.chrom2 = "."; + a.strand1 = a.strand2 = '.'; + uint32_t editDistance1, editDistance2; + editDistance1 = editDistance2 = 0; + + // take the qname from end 1. + a.name = bam1.Name; + + // end 1 + if (bam1.IsMapped()) { + a.chrom1 = refs.at(bam1.RefID).RefName; + a.start1 = bam1.Position; + a.end1 = bam1.GetEndPosition(false, false); + a.strand1 = "+"; + if (bam1.IsReverseStrand()) a.strand1 = "-"; + + // extract the edit distance from the NM tag + // if possible. otherwise, complain. + if (_useEditDistance == true) { + if (bam1.GetTag("NM", editDistance1) == false) { + cerr << "The edit distance tag (NM) was not found in the BAM file. Please disable -ed. Exiting\n"; + exit(1); + } + } + } + + // end 2 + if (bam2.IsMapped()) { + a.chrom2 = refs.at(bam2.RefID).RefName; + a.start2 = bam2.Position; + a.end2 = bam2.GetEndPosition(false, false); + a.strand2 = "+"; + if (bam2.IsReverseStrand()) a.strand2 = "-"; + + // extract the edit distance from the NM tag + // if possible. otherwise, complain. + if (_useEditDistance == true) { + if (bam2.GetTag("NM", editDistance2) == false) { + cerr << "The edit distance tag (NM) was not found in the BAM file. Please disable -ed. Exiting\n"; + exit(1); + } + } + } + + // swap the ends if necessary + if ( a.chrom1 > a.chrom2 || ((a.chrom1 == a.chrom2) && (a.start1 > a.start2)) ) { + swap(a.chrom1, a.chrom2); + swap(a.start1, a.start2); + swap(a.end1, a.end2); + swap(a.strand1, a.strand2); + } + + // compute the minimum mapping quality b/w the two ends of the pair. + a.score = "0"; + if (_useEditDistance == false) { + if (bam1.IsMapped() == true && bam2.IsMapped() == true) + a.score = ToString(min(bam1.MapQuality, bam2.MapQuality)); + } + // BEDPE using edit distance + else { + if (bam1.IsMapped() == true && bam2.IsMapped() == true) + a.score = ToString((int) (editDistance1 + editDistance2)); + else if (bam1.IsMapped() == true) + a.score = ToString((int) editDistance1); + else if (bam2.IsMapped() == true) + a.score = ToString((int) editDistance2); + } + }; + + inline + void ProcessBamBlock (const BamAlignment &bam1, const BamAlignment &bam2, + const RefVector &refs, + BamWriter &writer); +}; + +#endif /* PEINTERSECTBED_H */ |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/pairToBed/pairToBedMain.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/pairToBed/pairToBedMain.cpp Thu Nov 03 10:25:04 2011 -0400 |
[ |
b'@@ -0,0 +1,237 @@\n+/*****************************************************************************\n+ pairToBedMain.cpp\n+\n+ (c) 2009 - Aaron Quinlan\n+ Hall Laboratory\n+ Department of Biochemistry and Molecular Genetics\n+ University of Virginia\n+ aaronquinlan@gmail.com\n+\n+ Licenced under the GNU General Public License 2.0 license.\n+******************************************************************************/\n+#include "pairToBed.h"\n+#include "version.h"\n+\n+using namespace std;\n+\n+// define our program name\n+#define PROGRAM_NAME "pairToBed"\n+\n+// define our parameter checking macro\n+#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)\n+\n+// function declarations\n+void ShowHelp(void);\n+\n+int main(int argc, char* argv[]) {\n+\n+ // our configuration variables\n+ bool showHelp = false;\n+\n+ // input files\n+ string bedAFile;\n+ string bedBFile;\n+\n+ // input arguments\n+ float overlapFraction = 1E-9;\n+ string searchType = "either";\n+\n+ // flags to track parameters\n+ bool haveBedA = false;\n+ bool haveBedB = false;\n+ bool haveSearchType = false;\n+ bool haveFraction = false;\n+ bool sameStrand = false;\n+ bool diffStrand = false;\n+ bool useEditDistance = false;\n+ bool inputIsBam = false;\n+ bool outputIsBam = true;\n+ bool uncompressedBam = false;\n+\n+ // check to see if we should print out some help\n+ if(argc <= 1) showHelp = true;\n+\n+ for(int i = 1; i < argc; i++) {\n+ int parameterLength = (int)strlen(argv[i]);\n+\n+ if((PARAMETER_CHECK("-h", 2, parameterLength)) ||\n+ (PARAMETER_CHECK("--help", 5, parameterLength))) {\n+ showHelp = true;\n+ }\n+ }\n+\n+ if(showHelp) ShowHelp();\n+\n+ // do some parsing (all of these parameters require 2 strings)\n+ for(int i = 1; i < argc; i++) {\n+\n+ int parameterLength = (int)strlen(argv[i]);\n+\n+ if(PARAMETER_CHECK("-a", 2, parameterLength)) {\n+ if ((i+1) < argc) {\n+ haveBedA = true;\n+ outputIsBam = false;\n+ bedAFile = argv[i + 1];\n+ i++;\n+ }\n+ }\n+ else if(PARAMETER_CHECK("-abam", 5, parameterLength)) {\n+ if ((i+1) < argc) {\n+ haveBedA = true;\n+ inputIsBam = true;\n+ bedAFile = argv[i + 1];\n+ i++;\n+ }\n+ }\n+ else if(PARAMETER_CHECK("-b", 2, parameterLength)) {\n+ if ((i+1) < argc) {\n+ haveBedB = true;\n+ bedBFile = argv[i + 1];\n+ i++;\n+ }\n+ }\n+ else if(PARAMETER_CHECK("-bedpe", 6, parameterLength)) {\n+ outputIsBam = false;\n+ }\n+ else if(PARAMETER_CHECK("-ed", 3, parameterLength)) {\n+ useEditDistance = true;\n+ }\n+ else if(PARAMETER_CHECK("-type", 5, parameterLength)) {\n+ if ((i+1) < argc) {\n+ haveSearchType = true;\n+ searchType = argv[i + 1];\n+ i++;\n+ }\n+ }\n+ else if(PARAMETER_CHECK("-f", 2, parameterLength)) {\n+ if ((i+1) < argc) {\n+ haveFraction = true;\n+ overlapFraction = atof(argv[i + 1]);\n+ i++;\n+ }\n+ }\n+ else if (PARAMETER_CHECK("-s", 2, parameterLength)) {\n+ sameStrand = true;\n+ }\n+ else if (PARAMETER_CHECK("-S", 2, parameterLength)) {\n+ diffStrand = true;\n+ }\n+ else if(PARAMETER_CHECK("-ubam", 5, parameterLength)) {\n+ uncompressedBam = true;\n+ }\n+ else {\n+ cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl;\n+ showHelp = true;\n+ }\n+ }\n+\n+\n+ // make sure we have both input files\n+ if (!haveBedA || !haveBedB)'..b' << endl << endl;\n+\n+ cerr << "Options: " << endl;\n+\n+ cerr << "\\t-abam\\t" << "The A input file is in BAM format. Output will be BAM as well." << endl;\n+ cerr << "\\t\\t- Requires BAM to be grouped or sorted by query." << endl << endl;\n+\n+ cerr << "\\t-ubam\\t" << "Write uncompressed BAM output. Default is to write compressed BAM." << endl << endl;\n+ cerr << "\\t\\tis to write output in BAM when using -abam." << endl << endl;\n+\n+ cerr << "\\t-bedpe\\t" << "When using BAM input (-abam), write output as BEDPE. The default" << endl;\n+ cerr << "\\t\\tis to write output in BAM when using -abam." << endl << endl;\n+\n+ cerr << "\\t-ed\\t" << "Use BAM total edit distance (NM tag) for BEDPE score." << endl;\n+ cerr << "\\t\\t- Default for BEDPE is to use the minimum of" << endl;\n+ cerr << "\\t\\t of the two mapping qualities for the pair." << endl;\n+ cerr << "\\t\\t- When -ed is used the total edit distance" << endl;\n+ cerr << "\\t\\t from the two mates is reported as the score." << endl << endl;\n+\n+ cerr << "\\t-f\\t" << "Minimum overlap required as fraction of A (e.g. 0.05)." << endl;\n+ cerr << "\\t\\tDefault is 1E-9 (effectively 1bp)." << endl << endl;\n+\n+ cerr << "\\t-s\\t" << "Require same strandedness when finding overlaps." << endl;\n+ cerr << "\\t\\tDefault is to ignore stand." << endl;\n+ cerr << "\\t\\tNot applicable with -type inspan or -type outspan." << endl << endl;\n+\n+ cerr << "\\t-S\\t" << "Require different strandedness when finding overlaps." << endl;\n+ cerr << "\\t\\tDefault is to ignore stand." << endl;\n+ cerr << "\\t\\tNot applicable with -type inspan or -type outspan." << endl << endl;\n+\n+ cerr << "\\t-type \\t" << "Approach to reporting overlaps between BEDPE and BED." << endl << endl;\n+ cerr << "\\t\\teither\\tReport overlaps if either end of A overlaps B." << endl;\n+ cerr << "\\t\\t\\t- Default." << endl;\n+\n+ cerr << "\\t\\tneither\\tReport A if neither end of A overlaps B." << endl;\n+\n+ cerr << "\\t\\tboth\\tReport overlaps if both ends of A overlap B." << endl;\n+\n+ cerr << "\\t\\txor\\tReport overlaps if one and only one end of A overlaps B." << endl;\n+\n+ cerr << "\\t\\tnotboth\\tReport overlaps if neither end or one and only one " << endl;\n+ cerr << "\\t\\t\\tend of A overlap B. That is, xor + neither." << endl << endl;\n+\n+ cerr << "\\t\\tispan\\tReport overlaps between [end1, start2] of A and B." << endl;\n+ cerr << "\\t\\t\\t- Note: If chrom1 <> chrom2, entry is ignored." << endl << endl;\n+\n+ cerr << "\\t\\tospan\\tReport overlaps between [start1, end2] of A and B." << endl;\n+ cerr << "\\t\\t\\t- Note: If chrom1 <> chrom2, entry is ignored." << endl << endl;\n+\n+ cerr << "\\t\\tnotispan\\tReport A if ispan of A doesn\'t overlap B." << endl;\n+ cerr << "\\t\\t\\t\\t- Note: If chrom1 <> chrom2, entry is ignored." << endl << endl;\n+\n+ cerr << "\\t\\tnotospan\\tReport A if ospan of A doesn\'t overlap B." << endl;\n+ cerr << "\\t\\t\\t\\t- Note: If chrom1 <> chrom2, entry is ignored." << endl << endl;\n+\n+ cerr << "Refer to the BEDTools manual for BEDPE format." << endl << endl;\n+\n+ exit(1);\n+}\n' |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/pairToPair/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/pairToPair/Makefile Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,44 @@ +UTILITIES_DIR = ../utils/ +OBJ_DIR = ../../obj/ +BIN_DIR = ../../bin/ + +# ------------------- +# define our includes +# ------------------- +INCLUDES = -I$(UTILITIES_DIR)/bedFilePE/ -I$(UTILITIES_DIR)/bedFile/ -I$(UTILITIES_DIR)/lineFileUtilities/ -I$(UTILITIES_DIR)/gzstream/ -I$(UTILITIES_DIR)/version/ -I$(UTILITIES_DIR)/fileType/ + +# ---------------------------------- +# define our source and object files +# ---------------------------------- +SOURCES= pairToPairMain.cpp pairToPair.cpp +OBJECTS= $(SOURCES:.cpp=.o) +_EXT_OBJECTS=bedFilePE.o bedFile.o lineFileUtilities.o gzstream.o fileType.o +EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) +BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) +PROGRAM= pairToPair + + +all: $(PROGRAM) + +.PHONY: all + +$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS) + @echo " * linking $(PROGRAM)" + @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ $(LIBS) + +$(BUILT_OBJECTS): $(SOURCES) + @echo " * compiling" $(*F).cpp + @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) + +$(EXT_OBJECTS): + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFilePE/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/ + +clean: + @echo "Cleaning up." + @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* + +.PHONY: clean |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/pairToPair/pairToPair.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/pairToPair/pairToPair.cpp Thu Nov 03 10:25:04 2011 -0400 |
[ |
b'@@ -0,0 +1,216 @@\n+/*****************************************************************************\n+ pairToPair.cpp\n+\n+ (c) 2009 - Aaron Quinlan\n+ Hall Laboratory\n+ Department of Biochemistry and Molecular Genetics\n+ University of Virginia\n+ aaronquinlan@gmail.com\n+\n+ Licenced under the GNU General Public License 2.0 license.\n+******************************************************************************/\n+#include "lineFileUtilities.h"\n+#include "pairToPair.h"\n+\n+\n+/*\n+ Constructor\n+*/\n+PairToPair::PairToPair(string &bedAFilePE, string &bedBFilePE, float &overlapFraction,\n+ string searchType, bool ignoreStrand, bool reqDiffNames, int slop, bool strandedSlop) {\n+\n+ _bedAFilePE = bedAFilePE;\n+ _bedBFilePE = bedBFilePE;\n+ _overlapFraction = overlapFraction;\n+ _searchType = searchType;\n+ _ignoreStrand = ignoreStrand;\n+ _reqDiffNames = reqDiffNames;\n+ _slop = slop;\n+ _strandedSlop = strandedSlop;\n+\n+ _bedA = new BedFilePE(bedAFilePE);\n+ _bedB = new BedFilePE(bedBFilePE);\n+\n+ IntersectPairs();\n+}\n+\n+\n+/*\n+ Destructor\n+*/\n+PairToPair::~PairToPair(void) {\n+}\n+\n+\n+\n+void PairToPair::IntersectPairs() {\n+\n+ // load the "B" bed file into a map so\n+ // that we can easily compare "A" to it for overlaps\n+ _bedB->loadBedPEFileIntoMap();\n+\n+ int lineNum = 0;\n+ BedLineStatus bedStatus;\n+ BEDPE a, nullBedPE;\n+\n+ _bedA->Open();\n+ while ((bedStatus = _bedA->GetNextBedPE(a, lineNum)) != BED_INVALID) {\n+ if (bedStatus == BED_VALID) {\n+ // identify overlaps b/w the pairs\n+ FindOverlaps(a);\n+ a = nullBedPE;\n+ }\n+ }\n+ _bedA->Close();\n+}\n+// END IntersectPE\n+\n+\n+\n+void PairToPair::FindOverlaps(const BEDPE &a) {\n+ //\n+ vector<MATE> hitsA1B1, hitsA1B2, hitsA2B1, hitsA2B2;\n+\n+ // add the appropriate slop to the starts and ends\n+ int start1 = a.start1;\n+ int end1 = a.end1;\n+ int start2 = a.start2;\n+ int end2 = a.end2;\n+\n+ if (_strandedSlop == true) {\n+ if (a.strand1 == "+")\n+ end1 += _slop;\n+ else\n+ start1 -= _slop;\n+ if (a.strand2 == "+")\n+ end2 += _slop;\n+ else\n+ start2 -= _slop;\n+ }\n+ else {\n+ (start1 - _slop) >= 0 ? start1 -= _slop : start1 = 0;\n+ (start2 - _slop) >= 0 ? start2 -= _slop : start2 = 0;\n+ end1 += _slop;\n+ end2 += _slop;\n+ }\n+\n+ // Find the _potential_ hits between each end of A and B\n+ _bedB->FindOverlapsPerBin(1, a.chrom1, start1, end1, a.name, a.strand1, hitsA1B1, _overlapFraction, !(_ignoreStrand), _reqDiffNames); // hits b/w A1 & B1\n+ _bedB->FindOverlapsPerBin(1, a.chrom2, start2, end2, a.name, a.strand2, hitsA2B1, _overlapFraction, !(_ignoreStrand), _reqDiffNames); // hits b/w A2 & B1\n+ _bedB->FindOverlapsPerBin(2, a.chrom1, start1, end1, a.name, a.strand1, hitsA1B2, _overlapFraction, !(_ignoreStrand), _reqDiffNames); // hits b/w A1 & B2\n+ _bedB->FindOverlapsPerBin(2, a.chrom2, start2, end2, a.name, a.strand2, hitsA2B2, _overlapFraction, !(_ignoreStrand), _reqDiffNames); // hits b/w A2 & B2\n+\n+ unsigned int matchCount1 = (hitsA1B1.size() + hitsA2B2.size());\n+ unsigned int matchCount2 = (hitsA2B1.size() + hitsA1B2.size());\n+\n+ \n+ // report the fact that no hits were found iff _searchType is neither.\n+ if ((matchCount1 == 0) && (matchCount2 == 0) && (_searchType == "neither")) {\n+ _bedA->reportBedPENewLine(a);\n+ }\n+ else if (_searchType == "both") {\n+ bool found1 = false;\n+ bool found2 = false;\n+ if ((hitsA1B1.size() > 0) || (hitsA2B2.size() > 0))\n+ found1 = FindHitsOnBothEnds(a, hitsA1B1, hitsA2B2);\n+ if ((hitsA2B1.size() > 0) || (hitsA1B2.size() > 0))\n+ found2 = FindHitsOnBothEnds(a, hitsA2B1, hitsA1B2);\n+ }\n+ else if (_searchType == "notboth") {\n+ bool found1 = false;\n+ bool found2 = false'..b'End(a, hitsA2B1, hitsA1B2);\n+ }\n+}\n+\n+\n+bool PairToPair::FindHitsOnBothEnds(const BEDPE &a, const vector<MATE> &qualityHitsEnd1,\n+ const vector<MATE> &qualityHitsEnd2) {\n+\n+ map<unsigned int, vector<MATE>, less<int> > hitsMap;\n+\n+ for (vector<MATE>::const_iterator h = qualityHitsEnd1.begin(); h != qualityHitsEnd1.end(); ++h) {\n+ hitsMap[h->lineNum].push_back(*h);\n+ }\n+ for (vector<MATE>::const_iterator h = qualityHitsEnd2.begin(); h != qualityHitsEnd2.end(); ++h) {\n+ hitsMap[h->lineNum].push_back(*h);\n+ }\n+\n+\n+ bool bothFound = false;\n+ for (map<unsigned int, vector<MATE>, less<unsigned int> >::iterator m = hitsMap.begin(); m != hitsMap.end(); ++m) {\n+ \n+ // hits on both sides\n+ if (m->second.size() >= 2) {\n+ bothFound = true;\n+ MATE b1 = m->second[0];\n+ MATE b2 = m->second[1];\n+\n+ if (_searchType == "both") {\n+ _bedA->reportBedPETab(a);\n+ printf("%s\\t%d\\t%d\\t%s\\t%d\\t%d\\t%s\\t%s\\t%s\\t%s", b1.bed.chrom.c_str(), b1.bed.start, b1.bed.end,\n+ b2.bed.chrom.c_str(), b2.bed.start, b2.bed.end,\n+ b1.bed.name.c_str(), b1.bed.score.c_str(),\n+ b1.bed.strand.c_str(), b2.bed.strand.c_str());\n+ for (size_t i = 0; i < b1.bed.otherFields.size(); ++i)\n+ printf("\\t%s", b1.bed.otherFields[i].c_str());\n+ printf("\\n");\n+ }\n+ }\n+ }\n+ return bothFound;\n+}\n+\n+\n+void PairToPair::FindHitsOnEitherEnd(const BEDPE &a, const vector<MATE> &qualityHitsEnd1,\n+ const vector<MATE> &qualityHitsEnd2) {\n+\n+ map<unsigned int, vector<MATE>, less<int> > hitsMap;\n+\n+ for (vector<MATE>::const_iterator h = qualityHitsEnd1.begin(); h != qualityHitsEnd1.end(); ++h) {\n+ hitsMap[h->lineNum].push_back(*h);\n+ }\n+ for (vector<MATE>::const_iterator h = qualityHitsEnd2.begin(); h != qualityHitsEnd2.end(); ++h) {\n+ hitsMap[h->lineNum].push_back(*h);\n+ }\n+\n+ for (map<unsigned int, vector<MATE>, less<unsigned int> >::iterator m = hitsMap.begin(); m != hitsMap.end(); ++m) {\n+ if (m->second.size() >= 1) {\n+\n+ if ((m->second.size()) == 2) {\n+ MATE b1 = m->second[0];\n+ MATE b2 = m->second[1];\n+\n+ _bedA->reportBedPETab(a);\n+ printf("%s\\t%d\\t%d\\t%s\\t%d\\t%d\\t%s\\t%s\\t%s\\t%s", b1.bed.chrom.c_str(), b1.bed.start, b1.bed.end,\n+ b2.bed.chrom.c_str(), b2.bed.start, b2.bed.end,\n+ b1.bed.name.c_str(), b1.bed.score.c_str(),\n+ b1.bed.strand.c_str(), b2.bed.strand.c_str());\n+ for (size_t i = 0; i < b1.bed.otherFields.size(); ++i)\n+ printf("\\t%s", b1.bed.otherFields[i].c_str());\n+ printf("\\n");\n+ }\n+ else {\n+ MATE b1 = m->second[0];\n+\n+ _bedA->reportBedPETab(a);\n+ printf("%s\\t%d\\t%d\\t%s\\t%d\\t%d\\t%s\\t%s\\t%s\\t%s", b1.bed.chrom.c_str(), b1.bed.start, b1.bed.end,\n+ b1.mate->bed.chrom.c_str(), b1.mate->bed.start, b1.mate->bed.end,\n+ b1.bed.name.c_str(), b1.bed.score.c_str(),\n+ b1.bed.strand.c_str(), b1.mate->bed.strand.c_str());\n+ for (size_t i = 0; i < b1.bed.otherFields.size(); ++i)\n+ printf("\\t%s", b1.bed.otherFields[i].c_str());\n+ printf("\\n");\n+ }\n+ }\n+ }\n+}\n' |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/pairToPair/pairToPair.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/pairToPair/pairToPair.h Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,76 @@ +/***************************************************************************** + pairToPair.h + + (c) 2009 - Aaron Quinlan + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licenced under the GNU General Public License 2.0 license. +******************************************************************************/ +#ifndef PAIRTOPAIR_H +#define PAIRTOPAIR_H + +#include "bedFile.h" +#include "bedFilePE.h" +#include <vector> +#include <iostream> +#include <fstream> + +using namespace std; + + + +//************************************************ +// Class methods and elements +//************************************************ +class PairToPair { + +public: + + // constructor + PairToPair(string &bedAFilePE, string &bedBFilePE, float &overlapFraction, + string searchType, bool ignoreStrand, bool reqDiffNames, int slop, bool strandedSlop); + + // destructor + ~PairToPair(void); + + void IntersectPairs(); + + +private: + + string _bedAFilePE; + string _bedBFilePE; + + float _overlapFraction; + string _searchType; + bool _ignoreStrand; + bool _reqDiffNames; + int _slop; + bool _strandedSlop; + + // instance of a paired-end bed file class. + BedFilePE *_bedA; + + // instance of a bed file class. + BedFilePE *_bedB; + + // methods + // void FindOverlaps(const BEDPE &a, vector<MATE> &hitsA1B1, vector<MATE> &hitsA1B2, + // vector<MATE> &hitsA2B1, vector<MATE> &hitsA2B2); + void FindOverlaps(const BEDPE &a); + + void FindQualityHitsBetweenEnds(CHRPOS start, CHRPOS end, + const vector<MATE> &hits, vector<MATE> &qualityHits, int &numOverlaps); + + bool FindHitsOnBothEnds(const BEDPE &a, const vector<MATE> &qualityHitsEnd1, + const vector<MATE> &qualityHitsEnd2); + + void FindHitsOnEitherEnd(const BEDPE &a, const vector<MATE> &qualityHitsEnd1, + const vector<MATE> &qualityHitsEnd2); + +}; + +#endif /* PAIRTOPAIR_H */ |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/pairToPair/pairToPairMain.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/pairToPair/pairToPairMain.cpp Thu Nov 03 10:25:04 2011 -0400 |
[ |
@@ -0,0 +1,188 @@ +/***************************************************************************** + pairToPairMain.cpp + + (c) 2009 - Aaron Quinlan + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licenced under the GNU General Public License 2.0 license. +******************************************************************************/ +#include "pairToPair.h" +#include "version.h" + +using namespace std; + +// define our program name +#define PROGRAM_NAME "pairToPair" + +// define our parameter checking macro +#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen) + +// function declarations +void ShowHelp(void); + +int main(int argc, char* argv[]) { + + // our configuration variables + bool showHelp = false; + + // input files + string bedAFile; + string bedBFile; + + // input arguments + float overlapFraction = 1E-9; + int slop = 0; + string searchType = "both"; + + // flags to track parameters + bool haveBedA = false; + bool haveBedB = false; + bool haveSearchType = false; + bool haveFraction = false; + bool ignoreStrand = false; + bool requireDifferentNames = false; + bool haveSlop = false; + bool strandedSlop = false; + // check to see if we should print out some help + if(argc <= 1) showHelp = true; + + for(int i = 1; i < argc; i++) { + int parameterLength = (int)strlen(argv[i]); + + if((PARAMETER_CHECK("-h", 2, parameterLength)) || + (PARAMETER_CHECK("--help", 5, parameterLength))) { + showHelp = true; + } + } + + if(showHelp) ShowHelp(); + + // do some parsing (all of these parameters require 2 strings) + for(int i = 1; i < argc; i++) { + + int parameterLength = (int)strlen(argv[i]); + + if(PARAMETER_CHECK("-a", 2, parameterLength)) { + if ((i+1) < argc) { + haveBedA = true; + bedAFile = argv[i + 1]; + i++; + } + } + else if(PARAMETER_CHECK("-b", 2, parameterLength)) { + if ((i+1) < argc) { + haveBedB = true; + bedBFile = argv[i + 1]; + i++; + } + } + else if(PARAMETER_CHECK("-type", 5, parameterLength)) { + if ((i+1) < argc) { + haveSearchType = true; + searchType = argv[i + 1]; + i++; + } + } + else if(PARAMETER_CHECK("-f", 2, parameterLength)) { + if ((i+1) < argc) { + haveFraction = true; + overlapFraction = atof(argv[i + 1]); + i++; + } + } + else if(PARAMETER_CHECK("-slop", 5, parameterLength)) { + if ((i+1) < argc) { + haveSlop = true; + slop = atoi(argv[i + 1]); + i++; + } + } + else if(PARAMETER_CHECK("-ss", 3, parameterLength)) { + strandedSlop = true; + } + else if(PARAMETER_CHECK("-rdn", 4, parameterLength)) { + requireDifferentNames = true; + } + else if(PARAMETER_CHECK("-is", 3, parameterLength)) { + ignoreStrand = true; + } + else { + cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl; + showHelp = true; + } + } + + + // make sure we have both input files + if (!haveBedA || !haveBedB) { + cerr << endl << "*****" << endl << "*****ERROR: Need -a and -b files. " << endl << "*****" << endl; + showHelp = true; + } + + if (haveSearchType && (searchType != "neither") && (searchType != "both") && (searchType != "either") && (searchType != "notboth")) { + cerr << endl << "*****" << endl << "*****ERROR: Request \"both\",\"neither\",\"either\",or \"notboth\"" << endl << "*****" << endl; + showHelp = true; + } + + if (strandedSlop == true && haveSlop == false) { + cerr << endl << "*****" << endl << "*****ERROR: Need a -slop value if requesting -ss." << endl << "*****" << endl; + showHelp = true; + } + + if (!showHelp) { + + PairToPair *bi = new PairToPair(bedAFile, bedBFile, overlapFraction, searchType, + ignoreStrand, requireDifferentNames, slop, strandedSlop); + delete bi; + return 0; + } + else { + ShowHelp(); + } +} + + +void ShowHelp(void) { + cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl; + + cerr << "Author: Aaron Quinlan (aaronquinlan@gmail.com)" << endl; + + cerr << "Summary: Report overlaps between two paired-end BED files (BEDPE)." << endl << endl; + + cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -a <BEDPE> -b <BEDPE>" << endl << endl; + + cerr << "Options: " << endl; + cerr << "\t-f\t" << "Minimum overlap required as fraction of A (e.g. 0.05)." << endl; + cerr << "\t\tDefault is 1E-9 (effectively 1bp)." << endl << endl; + + cerr << "\t-type \t" << "Approach to reporting overlaps between A and B." << endl << endl; + cerr << "\t\tneither\tReport overlaps if neither end of A overlaps B." << endl; + cerr << "\t\teither\tReport overlaps if either ends of A overlap B." << endl; + cerr << "\t\tboth\tReport overlaps if both ends of A overlap B." << endl; + cerr << "\t\tnotboth\tReport overlaps if one or neither of ends of A overlap B." << endl; + + cerr << "\t\t- Default = both." << endl << endl; + + cerr << "\t-slop \t" << "The amount of slop (in b.p.). to be added to each footprint." << endl; + cerr << "\t\t*Note*: Slop is subtracted from start1 and start2 and added to end1 and end2." << endl << endl; + + cerr << "\t-ss\t" << "Add slop based to each BEDPE footprint based on strand." << endl; + cerr << "\t\t- If strand is \"+\", slop is only added to the end coordinates." << endl; + cerr << "\t\t- If strand is \"-\", slop is only added to the start coordinates." << endl; + cerr << "\t\t- By default, slop is added in both directions." << endl << endl; + + cerr << "\t-is\t" << "Ignore strands when searching for overlaps." << endl; + cerr << "\t\t- By default, strands are enforced." << endl << endl; + + cerr << "\t-rdn\t" << "Require the hits to have different names (i.e. avoid self-hits)." << endl; + cerr << "\t\t- By default, same names are allowed." << endl << endl; + + + cerr << "Refer to the BEDTools manual for BEDPE format." << endl << endl; + + // end the program here + exit(1); +} |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/shuffleBed/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/shuffleBed/Makefile Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,50 @@ +UTILITIES_DIR = ../utils/ +OBJ_DIR = ../../obj/ +BIN_DIR = ../../bin/ + +# ------------------- +# define our includes +# ------------------- +INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \ + -I$(UTILITIES_DIR)/genomeFile/ \ + -I$(UTILITIES_DIR)/lineFileUtilities/ \ + -I$(UTILITIES_DIR)/version/ \ + -I$(UTILITIES_DIR)/gzstream/ \ + -I$(UTILITIES_DIR)/fileType/ \ + -I$(UTILITIES_DIR)/BamTools/include + +# ---------------------------------- +# define our source and object files +# ---------------------------------- +SOURCES= shuffleBedMain.cpp shuffleBed.cpp +OBJECTS= $(SOURCES:.cpp=.o) +_EXT_OBJECTS=bedFile.o genomeFile.o lineFileUtilities.o gzstream.o fileType.o +EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) +BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) +PROGRAM= shuffleBed + + +all: $(PROGRAM) + +.PHONY: all + +$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS) + @echo " * linking $(PROGRAM)" + @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ $(LIBS) + +$(BUILT_OBJECTS): $(SOURCES) + @echo " * compiling" $(*F).cpp + @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) + +$(EXT_OBJECTS): + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/genomeFile/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/ + +clean: + @echo "Cleaning up." + @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* + +.PHONY: clean |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/shuffleBed/shuffleBed.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/shuffleBed/shuffleBed.cpp Thu Nov 03 10:25:04 2011 -0400 |
[ |
b'@@ -0,0 +1,244 @@\n+/*****************************************************************************\n+ shuffleBed.cpp\n+\n+ (c) 2009 - Aaron Quinlan\n+ Hall Laboratory\n+ Department of Biochemistry and Molecular Genetics\n+ University of Virginia\n+ aaronquinlan@gmail.com\n+\n+ Licenced under the GNU General Public License 2.0 license.\n+******************************************************************************/\n+#include "lineFileUtilities.h"\n+#include "shuffleBed.h"\n+\n+\n+BedShuffle::BedShuffle(string &bedFile, string &genomeFile, string &excludeFile, string &includeFile, \n+ bool haveSeed, bool haveExclude, bool haveInclude, bool sameChrom, \n+ float overlapFraction, int seed) {\n+\n+ _bedFile = bedFile;\n+ _genomeFile = genomeFile;\n+ _excludeFile = excludeFile;\n+ _includeFile = includeFile;\n+ _sameChrom = sameChrom;\n+ _haveExclude = haveExclude;\n+ _haveInclude = haveInclude;\n+ _overlapFraction = overlapFraction;\n+ _haveSeed = haveSeed;\n+\n+\n+ // use the supplied seed for the random\n+ // number generation if given. else,\n+ // roll our own.\n+ if (_haveSeed) {\n+ _seed = seed;\n+ srand(seed);\n+ }\n+ else {\n+ // thanks to Rob Long for the tip.\n+ _seed = (unsigned)time(0)+(unsigned)getpid();\n+ srand(_seed);\n+ }\n+\n+ _bed = new BedFile(bedFile);\n+ _genome = new GenomeFile(genomeFile);\n+ _chroms = _genome->getChromList();\n+ _numChroms = _genome->getNumberOfChroms();\n+\n+ if (_haveExclude) {\n+ _exclude = new BedFile(excludeFile);\n+ _exclude->loadBedFileIntoMap();\n+ }\n+ \n+ if (_haveInclude) {\n+ _include = new BedFile(includeFile);\n+ _include->loadBedFileIntoMapNoBin();\n+ \n+ _numIncludeChroms = 0;\n+ masterBedMapNoBin::const_iterator it = _include->bedMapNoBin.begin(); \n+ masterBedMapNoBin::const_iterator itEnd = _include->bedMapNoBin.end();\n+ for(; it != itEnd; ++it) {\n+ _includeChroms.push_back(it->first);\n+ _numIncludeChroms++;\n+ }\n+ }\n+\n+ if (_haveExclude == true && _haveInclude == false)\n+ ShuffleWithExclusions();\n+ else if (_haveExclude == false && _haveInclude == true)\n+ ShuffleWithInclusions();\n+ else\n+ Shuffle();\n+}\n+\n+\n+BedShuffle::~BedShuffle(void) {\n+\n+}\n+\n+\n+void BedShuffle::Shuffle() {\n+\n+ int lineNum = 0;\n+ BED bedEntry, nullBed; // used to store the current BED line from the BED file.\n+ BedLineStatus bedStatus;\n+\n+ _bed->Open();\n+ while ((bedStatus = _bed->GetNextBed(bedEntry, lineNum)) != BED_INVALID) {\n+ if (bedStatus == BED_VALID) {\n+ ChooseLocus(bedEntry);\n+ _bed->reportBedNewLine(bedEntry);\n+ bedEntry = nullBed;\n+ }\n+ }\n+ _bed->Close();\n+}\n+\n+\n+\n+void BedShuffle::ShuffleWithExclusions() {\n+\n+ int lineNum = 0;\n+ BED bedEntry, nullBed; // used to store the current BED line from the BED file.\n+ BedLineStatus bedStatus;\n+\n+ _bed->Open();\n+ while ((bedStatus = _bed->GetNextBed(bedEntry, lineNum)) != BED_INVALID) {\n+ if (bedStatus == BED_VALID) {\n+ // keep looking as long as the chosen\n+ // locus happens to overlap with regions\n+ // that the user wishes to exclude.\n+ int tries = 0;\n+ bool haveOverlap = false;\n+ do \n+ {\n+ // choose a new locus\n+ ChooseLocus(bedEntry);\n+ haveOverlap = _exclude->FindOneOrMoreOverlapsPerBin(bedEntry.chrom, bedEntry.start, bedEntry.end,\n+ bedEntry.strand, false, _overlapFraction);\n+ tries++;\n+ } while ((haveOverlap == true) && (tries <= MAX_TRIES));\n+ \n+\n+ if (tries > MAX_TRIES) {\n+ cerr << "Error, line " << lineNum << ":'..b't avoid excluded regions. Ignoring entry and moving on." << endl;\n+ }\n+ else {\n+ _bed->reportBedNewLine(bedEntry);\n+ }\n+ }\n+ bedEntry = nullBed;\n+ }\n+ _bed->Close();\n+}\n+\n+\n+void BedShuffle::ShuffleWithInclusions() {\n+\n+ int lineNum = 0;\n+ BED bedEntry, nullBed; // used to store the current BED line from the BED file.\n+ BedLineStatus bedStatus;\n+\n+ _bed->Open();\n+ while ((bedStatus = _bed->GetNextBed(bedEntry, lineNum)) != BED_INVALID) {\n+ if (bedStatus == BED_VALID) {\n+ // choose a new locus\n+ ChooseLocusFromInclusionFile(bedEntry);\n+ _bed->reportBedNewLine(bedEntry);\n+ }\n+ bedEntry = nullBed;\n+ }\n+ _bed->Close();\n+}\n+\n+\n+void BedShuffle::ChooseLocus(BED &bedEntry) {\n+\n+ string chrom = bedEntry.chrom;\n+ CHRPOS start = bedEntry.start;\n+ CHRPOS end = bedEntry.end;\n+ CHRPOS length = end - start;\n+\n+ string randomChrom;\n+ CHRPOS randomStart;\n+ CHRPOS chromSize;\n+\n+ if (_sameChrom == false) {\n+ randomChrom = _chroms[rand() % _numChroms];\n+ chromSize = _genome->getChromSize(randomChrom);\n+ randomStart = rand() % chromSize;\n+ bedEntry.chrom = randomChrom;\n+ bedEntry.start = randomStart;\n+ bedEntry.end = randomStart + length;\n+ }\n+ else {\n+ chromSize = _genome->getChromSize(chrom);\n+ randomStart = rand() % chromSize;\n+ bedEntry.start = randomStart;\n+ bedEntry.end = randomStart + length;\n+ }\n+\n+ // ensure that the chosen location doesn\'t go past\n+ // the length of the chromosome. if so, keep looking\n+ // for a new spot.\n+ while (bedEntry.end > chromSize) {\n+ if (_sameChrom == false) {\n+ randomChrom = _chroms[rand() % _numChroms];\n+ chromSize = _genome->getChromSize(randomChrom);\n+ randomStart = rand() % chromSize;\n+ bedEntry.chrom = randomChrom;\n+ bedEntry.start = randomStart;\n+ bedEntry.end = randomStart + length;\n+ }\n+ else {\n+ chromSize = _genome->getChromSize(chrom);\n+ randomStart = rand() % chromSize;\n+ bedEntry.start = randomStart;\n+ bedEntry.end = randomStart + length;\n+ }\n+ }\n+}\n+\n+\n+void BedShuffle::ChooseLocusFromInclusionFile(BED &bedEntry) {\n+\n+ string chrom = bedEntry.chrom;\n+ CHRPOS length = bedEntry.end - bedEntry.start;\n+\n+ string randomChrom;\n+ CHRPOS randomStart;\n+ BED includeInterval;\n+ \n+ if (_sameChrom == false) {\n+\n+ // grab a random chromosome from the inclusion file.\n+ randomChrom = _includeChroms[rand() % _numIncludeChroms];\n+ // get the number of inclusion intervals for that chrom\n+ size_t size = _include->bedMapNoBin[randomChrom].size();\n+ // grab a random interval on the chosen chromosome.\n+ size_t interval = rand() % size;\n+ // retreive a ranom -incl interval on the selected chrom\n+ includeInterval = _include->bedMapNoBin[randomChrom][interval];\n+\n+ bedEntry.chrom = randomChrom; \n+ }\n+ else {\n+ // get the number of inclusion intervals for the original chrom\n+ size_t size = _include->bedMapNoBin[chrom].size();\n+ // grab a random interval on the chosen chromosome.\n+ includeInterval = _include->bedMapNoBin[chrom][rand() % size];\n+ }\n+ \n+ randomStart = includeInterval.start + rand() % (includeInterval.size());\n+ bedEntry.start = randomStart;\n+ bedEntry.end = randomStart + length;\n+ \n+ // use recursion to ensure that the chosen location \n+ // doesn\'t go past the end of the chrom\n+ if (bedEntry.end > ((size_t) _genome->getChromSize(chrom))) {\n+ //bedEntry.end = _genome->getChromSize(chrom);\n+ ChooseLocusFromInclusionFile(bedEntry);\n+ }\n+}\n+\n' |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/shuffleBed/shuffleBed.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/shuffleBed/shuffleBed.h Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,76 @@ +/***************************************************************************** + shuffleBed.h + + (c) 2009 - Aaron Quinlan + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licenced under the GNU General Public License 2.0 license. +******************************************************************************/ +#include "bedFile.h" +#include "genomeFile.h" + +#include <vector> +#include <iostream> +#include <fstream> +#include <map> +#include <cstdlib> +#include <ctime> +#include <sys/time.h> +#include <unistd.h> +#include <sys/types.h> +using namespace std; + +const int MAX_TRIES = 1000000; + +//************************************************ +// Class methods and elements +//************************************************ +class BedShuffle { + +public: + + // constructor + BedShuffle(string &bedFile, string &genomeFile, string &excludeFile, string &includeFile, + bool haveSeed, bool haveExclude, bool haveInclude, bool sameChrom, + float overlapFraction, int seed); + + // destructor + ~BedShuffle(void); + +private: + + string _bedFile; + string _genomeFile; + string _excludeFile; + string _includeFile; + float _overlapFraction; + int _seed; + bool _sameChrom; + bool _haveExclude; + bool _haveInclude; + bool _haveSeed; + + + // The BED file from which to compute coverage. + BedFile *_bed; + BedFile *_exclude; + BedFile *_include; + + GenomeFile *_genome; + + vector<string> _chroms; + int _numChroms; + vector<string> _includeChroms; + int _numIncludeChroms; + + // methods + void Shuffle(); + void ShuffleWithExclusions(); + void ShuffleWithInclusions(); + + void ChooseLocus(BED &); + void ChooseLocusFromInclusionFile(BED &); +}; |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/shuffleBed/shuffleBedMain.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/shuffleBed/shuffleBedMain.cpp Thu Nov 03 10:25:04 2011 -0400 |
[ |
@@ -0,0 +1,188 @@ +/***************************************************************************** + shuffleBedMain.cpp + + (c) 2009 - Aaron Quinlan + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licenced under the GNU General Public License 2.0 license. +******************************************************************************/ +#include "shuffleBed.h" +#include "version.h" + +using namespace std; + +// define our program name +#define PROGRAM_NAME "shuffleBed" + + +// define our parameter checking macro +#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen) + +// function declarations +void ShowHelp(void); + +int main(int argc, char* argv[]) { + + // our configuration variables + bool showHelp = false; + + // input files + string bedFile = "stdin"; + string excludeFile; + string includeFile; + string genomeFile; + + bool haveBed = true; + bool haveGenome = false; + bool haveExclude = false; + bool haveInclude = false; + bool haveSeed = false; + float overlapFraction = 0.0; + int seed = -1; + bool sameChrom = false; + + + for(int i = 1; i < argc; i++) { + int parameterLength = (int)strlen(argv[i]); + + if((PARAMETER_CHECK("-h", 2, parameterLength)) || + (PARAMETER_CHECK("--help", 5, parameterLength))) { + showHelp = true; + } + } + + if(showHelp) ShowHelp(); + + // do some parsing (all of these parameters require 2 strings) + for(int i = 1; i < argc; i++) { + + int parameterLength = (int)strlen(argv[i]); + + if(PARAMETER_CHECK("-i", 2, parameterLength)) { + if ((i+1) < argc) { + bedFile = argv[i + 1]; + i++; + } + } + else if(PARAMETER_CHECK("-g", 2, parameterLength)) { + if ((i+1) < argc) { + haveGenome = true; + genomeFile = argv[i + 1]; + i++; + } + } + else if(PARAMETER_CHECK("-excl", 5, parameterLength)) { + if ((i+1) < argc) { + haveExclude = true; + excludeFile = argv[i + 1]; + i++; + } + } + else if(PARAMETER_CHECK("-incl", 5, parameterLength)) { + if ((i+1) < argc) { + haveInclude = true; + includeFile = argv[i + 1]; + i++; + } + } + else if(PARAMETER_CHECK("-seed", 5, parameterLength)) { + if ((i+1) < argc) { + haveSeed = true; + seed = atoi(argv[i + 1]); + i++; + } + } + else if(PARAMETER_CHECK("-chrom", 6, parameterLength)) { + sameChrom = true; + } + else if(PARAMETER_CHECK("-f", 2, parameterLength)) { + if ((i+1) < argc) { + overlapFraction = atof(argv[i + 1]); + i++; + } + } + else { + cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl; + showHelp = true; + } + } + + // make sure we have both input files + if (!haveBed || !haveGenome) { + cerr << endl << "*****" << endl << "*****ERROR: Need both a BED (-i) and a genome (-g) file. " << endl << "*****" << endl; + showHelp = true; + } + + if (haveInclude && haveExclude) { + cerr << endl << "*****" << endl << "*****ERROR: Cannot use -incl and -excl together." << endl << "*****" << endl; + showHelp = true; + } + + if (!showHelp) { + BedShuffle *bc = new BedShuffle(bedFile, genomeFile, excludeFile, includeFile, + haveSeed, haveExclude, haveInclude, sameChrom, + overlapFraction, seed); + delete bc; + return 0; + } + else { + ShowHelp(); + } +} + +void ShowHelp(void) { + + cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl; + + cerr << "Author: Aaron Quinlan (aaronquinlan@gmail.com)" << endl; + + cerr << "Summary: Randomly permute the locations of a feature file among a genome." << endl << endl; + + cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -i <bed/gff/vcf> -g <genome>" << endl << endl; + + cerr << "Options: " << endl; + cerr << "\t-excl\t" << "A BED/GFF/VCF file of coordinates in which features in -i" << endl; + cerr << "\t\tshould not be placed (e.g. gaps.bed)." << endl << endl; + + cerr << "\t-incl\t" << "Instead of randomly placing features in a genome, the -incl" << endl; + cerr << "\t\toptions defines a BED/GFF/VCF file of coordinates in which " << endl; + cerr << "\t\tfeatures in -i should be randomly placed (e.g. genes.bed). " << endl << endl; + + cerr << "\t-chrom\t" << "Keep features in -i on the same chromosome."<< endl; + cerr << "\t\t- By default, the chrom and position are randomly chosen." << endl << endl; + + cerr << "\t-seed\t" << "Supply an integer seed for the shuffling." << endl; + cerr << "\t\t- By default, the seed is chosen automatically." << endl; + cerr << "\t\t- (INTEGER)" << endl << endl; + + cerr << "\t-f\t" << "Maximum overlap (as a fraction of the -i feature) with an -excl" << endl; + cerr << "\t\tfeature that is tolerated before searching for a new, " << endl; + cerr << "\t\trandomized locus. For example, -f 0.10 allows up to 10%" << endl; + cerr << "\t\tof a randomized feature to overlap with a given feature" << endl; + cerr << "\t\tin the -excl file. **Cannot be used with -incl file.**" << endl; + cerr << "\t\t- Default is 1E-9 (i.e., 1bp)." << endl; + cerr << "\t\t- FLOAT (e.g. 0.50)" << endl << endl; + + cerr << "Notes: " << endl; + cerr << "\t(1) The genome file should tab delimited and structured as follows:" << endl; + cerr << "\t <chromName><TAB><chromSize>" << endl << endl; + cerr << "\tFor example, Human (hg19):" << endl; + cerr << "\tchr1\t249250621" << endl; + cerr << "\tchr2\t243199373" << endl; + cerr << "\t..." << endl; + cerr << "\tchr18_gl000207_random\t4262" << endl << endl; + + + cerr << "Tips: " << endl; + cerr << "\tOne can use the UCSC Genome Browser's MySQL database to extract" << endl; + cerr << "\tchromosome sizes. For example, H. sapiens:" << endl << endl; + cerr << "\tmysql --user=genome --host=genome-mysql.cse.ucsc.edu -A -e \\" << endl; + cerr << "\t\"select chrom, size from hg19.chromInfo\" > hg19.genome" << endl << endl; + + + // end the program here + exit(1); +} |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/slopBed/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/slopBed/Makefile Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,50 @@ +UTILITIES_DIR = ../utils/ +OBJ_DIR = ../../obj/ +BIN_DIR = ../../bin/ + +# ------------------- +# define our includes +# ------------------- +INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \ + -I$(UTILITIES_DIR)/genomeFile/ \ + -I$(UTILITIES_DIR)/lineFileUtilities/ \ + -I$(UTILITIES_DIR)/version/ \ + -I$(UTILITIES_DIR)/gzstream/ \ + -I$(UTILITIES_DIR)/fileType/ \ + -I$(UTILITIES_DIR)/BamTools/include + +# ---------------------------------- +# define our source and object files +# ---------------------------------- +SOURCES= slopBedMain.cpp slopBed.cpp +OBJECTS= $(SOURCES:.cpp=.o) +_EXT_OBJECTS=bedFile.o genomeFile.o lineFileUtilities.o gzstream.o fileType.o +EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) +BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) +PROGRAM= slopBed + + +all: $(PROGRAM) + +.PHONY: all + +$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS) + @echo " * linking $(PROGRAM)" + @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ $(LIBS) + +$(BUILT_OBJECTS): $(SOURCES) + @echo " * compiling" $(*F).cpp + @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) + +$(EXT_OBJECTS): + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/genomeFile/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/ + +clean: + @echo "Cleaning up." + @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* + +.PHONY: clean |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/slopBed/slopBed.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/slopBed/slopBed.cpp Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,91 @@ +/***************************************************************************** + slopBed.cpp + + (c) 2009 - Aaron Quinlan + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licensed under the GNU General Public License 2.0 license. +******************************************************************************/ +#include "lineFileUtilities.h" +#include "slopBed.h" + + +BedSlop::BedSlop(string &bedFile, string &genomeFile, bool forceStrand, float leftSlop, float rightSlop, bool fractional) { + + _bedFile = bedFile; + _genomeFile = genomeFile; + _forceStrand = forceStrand; + _leftSlop = leftSlop; + _rightSlop = rightSlop; + _fractional = fractional; + + _bed = new BedFile(bedFile); + _genome = new GenomeFile(genomeFile); + + // get going, slop it up. + SlopBed(); +} + + +BedSlop::~BedSlop(void) { + +} + + +void BedSlop::SlopBed() { + + int lineNum = 0; + BED bedEntry, nullBed; // used to store the current BED line from the BED file. + BedLineStatus bedStatus; + + _bed->Open(); + bedStatus = _bed->GetNextBed(bedEntry, lineNum); + while (bedStatus != BED_INVALID) { + if (bedStatus == BED_VALID) { + if (_fractional == false) { + AddSlop(bedEntry, (int) _leftSlop, (int) _rightSlop); + } + else { + int leftSlop = (int) (_leftSlop * bedEntry.size()); + int rightSlop = (int) (_rightSlop * bedEntry.size()); + AddSlop(bedEntry, leftSlop, rightSlop); + } + _bed->reportBedNewLine(bedEntry); + bedEntry = nullBed; + } + bedStatus = _bed->GetNextBed(bedEntry, lineNum); + } + _bed->Close(); +} + + +void BedSlop::AddSlop(BED &bed, int leftSlop, int rightSlop) { + + // special handling if the BED entry is on the negative + // strand and the user cares about strandedness. + CHRPOS chromSize = _genome->getChromSize(bed.chrom); + + if ( (_forceStrand) && (bed.strand == "-") ) { + // inspect the start + if ( (static_cast<int>(bed.start) - rightSlop) > 0 ) bed.start -= rightSlop; + else bed.start = 0; + + // inspect the start + if ( (static_cast<int>(bed.end) + leftSlop) <= static_cast<int>(chromSize)) bed.end += leftSlop; + else bed.end = chromSize; + } + else { + // inspect the start + if ( (static_cast<int>(bed.start) - leftSlop) > 0) bed.start -= leftSlop; + else bed.start = 0; + + // inspect the end + if ( (static_cast<int>(bed.end) + rightSlop) <= static_cast<int>(chromSize)) bed.end += rightSlop; + else bed.end = chromSize; + } +} + + |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/slopBed/slopBed.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/slopBed/slopBed.h Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,59 @@ +/***************************************************************************** + slopBed.h + + (c) 2009 - Aaron Quinlan + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licenced under the GNU General Public License 2.0 license. +******************************************************************************/ + +#include "bedFile.h" +#include "genomeFile.h" + +#include <vector> +#include <iostream> +#include <fstream> +#include <map> +#include <cstdlib> +#include <ctime> +using namespace std; + + +//************************************************ +// Class methods and elements +//************************************************ +class BedSlop { + +public: + + // constructor + BedSlop(string &bedFile, string &genomeFile, bool forceStrand, float leftSlop, float rightSlop, bool fractional); + + // destructor + ~BedSlop(void); + + + +private: + + string _bedFile; + string _genomeFile; + + bool _forceStrand; + float _leftSlop; + float _rightSlop; + bool _fractional; + + BedFile *_bed; + GenomeFile *_genome; + + // methods + + void SlopBed(); + + // method to add requested "slop" to a single BED entry + void AddSlop(BED &bed, int leftSlop, int rightSlop); +}; |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/slopBed/slopBedMain.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/slopBed/slopBedMain.cpp Thu Nov 03 10:25:04 2011 -0400 |
[ |
@@ -0,0 +1,190 @@ +/***************************************************************************** + slopBedMain.cpp + + (c) 2009 - Aaron Quinlan + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licenced under the GNU General Public License 2.0 license. +******************************************************************************/ +#include "slopBed.h" +#include "version.h" + +using namespace std; + +// define our program name +#define PROGRAM_NAME "slopBed" + + +// define our parameter checking macro +#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen) + +// function declarations +void ShowHelp(void); + +int main(int argc, char* argv[]) { + + // our configuration variables + bool showHelp = false; + + // input files + string bedFile = "stdin"; + string genomeFile; + + bool haveBed = true; + bool haveGenome = false; + bool haveLeft = false; + bool haveRight = false; + bool haveBoth = false; + + bool forceStrand = false; + float leftSlop = 0.0; + float rightSlop = 0.0; + bool fractional = false; + + for(int i = 1; i < argc; i++) { + int parameterLength = (int)strlen(argv[i]); + + if((PARAMETER_CHECK("-h", 2, parameterLength)) || + (PARAMETER_CHECK("--help", 5, parameterLength))) { + showHelp = true; + } + } + + if(showHelp) ShowHelp(); + + // do some parsing (all of these parameters require 2 strings) + for(int i = 1; i < argc; i++) { + + int parameterLength = (int)strlen(argv[i]); + + if(PARAMETER_CHECK("-i", 2, parameterLength)) { + if ((i+1) < argc) { + bedFile = argv[i + 1]; + i++; + } + } + else if(PARAMETER_CHECK("-g", 2, parameterLength)) { + if ((i+1) < argc) { + haveGenome = true; + genomeFile = argv[i + 1]; + i++; + } + } + else if(PARAMETER_CHECK("-l", 2, parameterLength)) { + if ((i+1) < argc) { + haveLeft = true; + leftSlop = atof(argv[i + 1]); + i++; + } + } + else if(PARAMETER_CHECK("-r", 2, parameterLength)) { + if ((i+1) < argc) { + haveRight = true; + rightSlop = atof(argv[i + 1]); + i++; + } + } + else if(PARAMETER_CHECK("-b", 2, parameterLength)) { + if ((i+1) < argc) { + haveBoth = true; + leftSlop = atof(argv[i + 1]); + rightSlop = atof(argv[i + 1]); + i++; + } + } + else if(PARAMETER_CHECK("-s", 2, parameterLength)) { + forceStrand = true; + } + else if(PARAMETER_CHECK("-pct", 4, parameterLength)) { + fractional = true; + } + else { + cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl; + showHelp = true; + } + } + + // make sure we have both input files + if (!haveBed || !haveGenome) { + cerr << endl << "*****" << endl << "*****ERROR: Need both a BED (-i) and a genome (-g) file. " << endl << "*****" << endl; + showHelp = true; + } + if (!haveLeft && !haveRight && !haveBoth) { + cerr << endl << "*****" << endl << "*****ERROR: Need -l and -r together or -b alone. " << endl << "*****" << endl; + showHelp = true; + } + if ((!haveLeft && haveRight) || (haveLeft && !haveRight)) { + cerr << endl << "*****" << endl << "*****ERROR: Need both -l and -r. " << endl << "*****" << endl; + showHelp = true; + } + if (forceStrand && (!(haveLeft) || !(haveRight))) { + cerr << endl << "*****" << endl << "*****ERROR: Must supply -l and -r with -s. " << endl << "*****" << endl; + showHelp = true; + } + + if (!showHelp) { + BedSlop *bc = new BedSlop(bedFile, genomeFile, forceStrand, leftSlop, rightSlop, fractional); + delete bc; + + return 0; + } + else { + ShowHelp(); + } +} + +void ShowHelp(void) { + + cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl; + + cerr << "Author: Aaron Quinlan (aaronquinlan@gmail.com)" << endl; + + cerr << "Summary: Add requested base pairs of \"slop\" to each feature." << endl << endl; + + cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -i <bed/gff/vcf> -g <genome> [-b <int> or (-l and -r)]" << endl << endl; + + cerr << "Options: " << endl; + cerr << "\t-b\t" << "Increase the BED/GFF/VCF entry by -b base pairs in each direction." << endl; + cerr << "\t\t- (Integer) or (Float, e.g. 0.1) if used with -pct." << endl << endl; + + cerr << "\t-l\t" << "The number of base pairs to subtract from the start coordinate." << endl; + cerr << "\t\t- (Integer) or (Float, e.g. 0.1) if used with -pct." << endl << endl; + + cerr << "\t-r\t" << "The number of base pairs to add to the end coordinate." << endl; + cerr << "\t\t- (Integer) or (Float, e.g. 0.1) if used with -pct." << endl << endl; + + cerr << "\t-s\t" << "Define -l and -r based on strand." << endl; + cerr << "\t\tE.g. if used, -l 500 for a negative-stranded feature, " << endl; + cerr << "\t\tit will add 500 bp downstream. Default = false." << endl << endl; + + cerr << "\t-pct\t" << "Define -l and -r as a fraction of the feature's length." << endl; + cerr << "\t\tE.g. if used on a 1000bp feature, -l 0.50, " << endl; + cerr << "\t\twill add 500 bp \"upstream\". Default = false." << endl << endl; + + cerr << "Notes: " << endl; + cerr << "\t(1) Starts will be set to 0 if options would force it below 0." << endl; + cerr << "\t(2) Ends will be set to the chromosome length if requested slop would" << endl; + cerr << "\tforce it above the max chrom length." << endl; + + cerr << "\t(3) The genome file should tab delimited and structured as follows:" << endl; + cerr << "\n\t<chromName><TAB><chromSize>" << endl << endl; + cerr << "\tFor example, Human (hg19):" << endl; + cerr << "\tchr1\t249250621" << endl; + cerr << "\tchr2\t243199373" << endl; + cerr << "\t..." << endl; + cerr << "\tchr18_gl000207_random\t4262" << endl << endl; + + + cerr << "Tips: " << endl; + cerr << "\tOne can use the UCSC Genome Browser's MySQL database to extract" << endl; + cerr << "\tchromosome sizes. For example, H. sapiens:" << endl << endl; + cerr << "\tmysql --user=genome --host=genome-mysql.cse.ucsc.edu -A -e \\" << endl; + cerr << "\t\"select chrom, size from hg19.chromInfo\" > hg19.genome" << endl << endl; + + + // end the program here + exit(1); +} |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/sortBed/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/sortBed/Makefile Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,43 @@ +UTILITIES_DIR = ../utils/ +OBJ_DIR = ../../obj/ +BIN_DIR = ../../bin/ + +# ------------------- +# define our includes +# ------------------- +INCLUDES = -I$(UTILITIES_DIR)/bedFile/ -I$(UTILITIES_DIR)/lineFileUtilities/ -I$(UTILITIES_DIR)/version/ -I$(UTILITIES_DIR)/gzstream/ -I$(UTILITIES_DIR)/fileType/ + +# ---------------------------------- +# define our source and object files +# ---------------------------------- +SOURCES= sortMain.cpp sortBed.cpp +OBJECTS= $(SOURCES:.cpp=.o) +_EXT_OBJECTS=bedFile.o lineFileUtilities.o gzstream.o fileType.o +EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) +BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) +PROGRAM= sortBed + + +all: $(PROGRAM) + +.PHONY: all + +$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS) + @echo " * linking $(PROGRAM)" + @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ $(LIBS) + +$(BUILT_OBJECTS): $(SOURCES) + @echo " * compiling" $(*F).cpp + @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) + +$(EXT_OBJECTS): + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/ + +clean: + @echo "Cleaning up." + @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* + +.PHONY: clean \ No newline at end of file |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/sortBed/sortBed.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/sortBed/sortBed.cpp Thu Nov 03 10:25:04 2011 -0400 |
[ |
@@ -0,0 +1,201 @@ +/***************************************************************************** + sortBed.cpp + + (c) 2009 - Aaron Quinlan + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licenced under the GNU General Public License 2.0 license. +******************************************************************************/ +#include "lineFileUtilities.h" +#include "sortBed.h" + +// +// Constructor +// +BedSort::BedSort(string &bedFile) { + _bedFile = bedFile; + _bed = new BedFile(bedFile); +} + +// +// Destructor +// +BedSort::~BedSort(void) { +} + + +void BedSort::SortBed() { + + // load the "B" bed file into a map so + // that we can easily compare "A" to it for overlaps + _bed->loadBedFileIntoMapNoBin(); + + // loop through each chromosome and merge their BED entries + for (masterBedMapNoBin::iterator m = _bed->bedMapNoBin.begin(); m != _bed->bedMapNoBin.end(); ++m) { + + // bedList is already sorted by start position. + vector<BED> bedList = m->second; + + for (unsigned int i = 0; i < bedList.size(); ++i) { + _bed->reportBedNewLine(bedList[i]); + } + } +} + + +void BedSort::SortBedBySizeAsc() { + + // load the "B" bed file into a map so + // that we can easily compare "A" to it for overlaps + _bed->loadBedFileIntoMapNoBin(); + + vector<BED> masterList; + masterList.reserve(1000000); + + // loop through each chromosome and merge their BED entries + for (masterBedMapNoBin::iterator m = _bed->bedMapNoBin.begin(); m != _bed->bedMapNoBin.end(); ++m) { + + // bedList is already sorted by start position. + vector<BED> bedList = m->second; + + // add the entries from this chromosome to the current list + for (unsigned int i = 0; i < m->second.size(); ++i) { + masterList.push_back(m->second[i]); + } + } + + // sort the master list by size (asc.) + sort(masterList.begin(), masterList.end(), sortBySizeAsc); + + // report the entries in ascending order + for (unsigned int i = 0; i < masterList.size(); ++i) { + _bed->reportBedNewLine(masterList[i]); + } +} + + +void BedSort::SortBedBySizeDesc() { + + // load the "B" bed file into a map so + // that we can easily compare "A" to it for overlaps + _bed->loadBedFileIntoMapNoBin(); + + vector<BED> masterList; + masterList.reserve(1000000); + + // loop through each chromosome and merge their BED entries + for (masterBedMapNoBin::iterator m = _bed->bedMapNoBin.begin(); m != _bed->bedMapNoBin.end(); ++m) { + + // bedList is already sorted by start position. + vector<BED> bedList = m->second; + + // add the entries from this chromosome to the current list + for (unsigned int i = 0; i < m->second.size(); ++i) { + masterList.push_back(m->second[i]); + } + } + + // sort the master list by size (asc.) + sort(masterList.begin(), masterList.end(), sortBySizeDesc); + + // report the entries in ascending order + for (unsigned int i = 0; i < masterList.size(); ++i) { + _bed->reportBedNewLine(masterList[i]); + } +} + +void BedSort::SortBedByChromThenSizeAsc() { + + // load the "B" bed file into a map so + // that we can easily compare "A" to it for overlaps + _bed->loadBedFileIntoMapNoBin(); + + // loop through each chromosome and merge their BED entries + for (masterBedMapNoBin::iterator m = _bed->bedMapNoBin.begin(); m != _bed->bedMapNoBin.end(); ++m) { + + // bedList is already sorted by start position. + vector<BED> bedList = m->second; + sort(bedList.begin(), bedList.end(), sortBySizeAsc); + + for (unsigned int i = 0; i < bedList.size(); ++i) { + _bed->reportBedNewLine(bedList[i]); + } + } +} + + +void BedSort::SortBedByChromThenSizeDesc() { + + // load the "B" bed file into a map so + // that we can easily compare "A" to it for overlaps + _bed->loadBedFileIntoMapNoBin(); + + // loop through each chromosome and merge their BED entries + for (masterBedMapNoBin::iterator m = _bed->bedMapNoBin.begin(); m != _bed->bedMapNoBin.end(); ++m) { + + // bedList is already sorted by start position. + vector<BED> bedList = m->second; + + sort(bedList.begin(), bedList.end(), sortBySizeDesc); + + for (unsigned int i = 0; i < bedList.size(); ++i) { + _bed->reportBedNewLine(bedList[i]); + } + } +} + + +void BedSort::SortBedByChromThenScoreAsc() { + + // load the "B" bed file into a map so + // that we can easily compare "A" to it for overlaps + _bed->loadBedFileIntoMapNoBin(); + + if (_bed->bedType >= 5) { + // loop through each chromosome and merge their BED entries + for (masterBedMapNoBin::iterator m = _bed->bedMapNoBin.begin(); m != _bed->bedMapNoBin.end(); ++m) { + + // bedList is already sorted by start position. + vector<BED> bedList = m->second; + sort(bedList.begin(), bedList.end(), sortByScoreAsc); + + for (unsigned int i = 0; i < bedList.size(); ++i) { + _bed->reportBedNewLine(bedList[i]); + } + } + } + else { + cerr << "Error: Requested a sort by score, but your BED file does not appear to be in BED 5 format or greater. Exiting." << endl; + exit(1); + } +} + + +void BedSort::SortBedByChromThenScoreDesc() { + + // load the "B" bed file into a map so + // that we can easily compare "A" to it for overlaps + _bed->loadBedFileIntoMapNoBin(); + + if (_bed->bedType >= 5) { + // loop through each chromosome and merge their BED entries + for (masterBedMapNoBin::iterator m = _bed->bedMapNoBin.begin(); m != _bed->bedMapNoBin.end(); ++m) { + + // bedList is already sorted by start position. + vector<BED> bedList = m->second; + sort(bedList.begin(), bedList.end(), sortByScoreDesc); + + for (unsigned int i = 0; i < bedList.size(); ++i) { + _bed->reportBedNewLine(bedList[i]); + } + } + } + else { + cerr << "Error: Requested a sort by score, but your BED file does not appear to be in BED 5 format or greater. Exiting." << endl; + exit(1); + } +} + |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/sortBed/sortBed.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/sortBed/sortBed.h Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,50 @@ +/***************************************************************************** + sortBed.h + + (c) 2009 - Aaron Quinlan + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licenced under the GNU General Public License 2.0 license. +******************************************************************************/ +#include "bedFile.h" +#include <vector> +#include <algorithm> +#include <iostream> +#include <fstream> + +using namespace std; + + +//************************************************ +// Class methods and elements +//************************************************ +class BedSort { + +public: + + // constructor + BedSort(string &); + + // destructor + ~BedSort(void); + + void SortBed(); // the default. sorts by chrom (asc.) then by start (asc.) + void SortBedBySizeAsc(); + void SortBedBySizeDesc(); + void SortBedByChromThenSizeAsc(); + void SortBedByChromThenSizeDesc(); + void SortBedByChromThenScoreAsc(); + void SortBedByChromThenScoreDesc(); + +private: + string _bedFile; + + // instance of a bed file class. + BedFile *_bed; + + // methods + +}; |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/sortBed/sortMain.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/sortBed/sortMain.cpp Thu Nov 03 10:25:04 2011 -0400 |
[ |
@@ -0,0 +1,157 @@ +/***************************************************************************** + sortBedMain.cpp + + (c) 2009 - Aaron Quinlan + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licenced under the GNU General Public License 2.0 license. +******************************************************************************/ +#include "sortBed.h" +#include "version.h" + +using namespace std; + +// define our program name +#define PROGRAM_NAME "sortBed" + + +// define our parameter checking macro +#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen) + +// function declarations +void ShowHelp(void); + +int main(int argc, char* argv[]) { + + // our configuration variables + bool showHelp = false; + + // input files + string bedFile = "stdin"; + bool haveBed = true; + int sortChoices = 0; + + bool sortBySizeAsc = false; + bool sortBySizeDesc = false; + bool sortByChromThenSizeAsc = false; + bool sortByChromThenSizeDesc = false; + bool sortByChromThenScoreAsc = false; + bool sortByChromThenScoreDesc = false; + + + for(int i = 1; i < argc; i++) { + int parameterLength = (int)strlen(argv[i]); + + if((PARAMETER_CHECK("-h", 2, parameterLength)) || + (PARAMETER_CHECK("--help", 5, parameterLength))) { + showHelp = true; + } + } + + if(showHelp) ShowHelp(); + + // do some parsing (all of these parameters require 2 strings) + for(int i = 1; i < argc; i++) { + + int parameterLength = (int)strlen(argv[i]); + + if(PARAMETER_CHECK("-i", 2, parameterLength)) { + if ((i+1) < argc) { + bedFile = argv[i + 1]; + i++; + } + } + else if(PARAMETER_CHECK("-sizeA", 6, parameterLength)) { + sortBySizeAsc = true; + sortChoices++; + } + else if(PARAMETER_CHECK("-sizeD", 6, parameterLength)) { + sortBySizeDesc = true; + sortChoices++; + } + else if(PARAMETER_CHECK("-chrThenSizeA", 13, parameterLength)) { + sortByChromThenSizeAsc = true; + sortChoices++; + } + else if(PARAMETER_CHECK("-chrThenSizeD", 13, parameterLength)) { + sortByChromThenSizeDesc = true; + sortChoices++; + } + else if(PARAMETER_CHECK("-chrThenScoreA", 14, parameterLength)) { + sortByChromThenScoreAsc = true; + sortChoices++; + } + else if(PARAMETER_CHECK("-chrThenScoreD", 14, parameterLength)) { + sortByChromThenScoreDesc = true; + sortChoices++; + } + else { + cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl; + showHelp = true; + } + } + + // make sure we have both input files + if (!haveBed) { + cerr << endl << "*****" << endl << "*****ERROR: Need -i BED file. " << endl << "*****" << endl; + showHelp = true; + } + if (sortChoices > 1) { + cerr << endl << "*****" << endl << "*****ERROR: Sorting options are mutually exclusive. Please choose just one. " << endl << "*****" << endl; + showHelp = true; + } + + + if (!showHelp) { + BedSort *bm = new BedSort(bedFile); + + if (sortBySizeAsc) { + bm->SortBedBySizeAsc(); + } + else if (sortBySizeDesc) { + bm->SortBedBySizeDesc(); + } + else if (sortByChromThenSizeAsc) { + bm->SortBedByChromThenSizeAsc(); + } + else if (sortByChromThenSizeDesc) { + bm->SortBedByChromThenSizeDesc(); + } + else if (sortByChromThenScoreAsc) { + bm->SortBedByChromThenScoreAsc(); + } + else if (sortByChromThenScoreDesc) { + bm->SortBedByChromThenScoreDesc(); + } + else { + bm->SortBed(); + } + return 0; + } + else { + ShowHelp(); + } +} + +void ShowHelp(void) { + + cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl; + + cerr << "Author: Aaron Quinlan (aaronquinlan@gmail.com)" << endl; + cerr << "Summary: Sorts a feature file in various and useful ways." << endl << endl; + cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -i <bed/gff/vcf>" << endl << endl; + + cerr << "Options: " << endl; + cerr << "\t" << "-sizeA\t\t" << "Sort by feature size in ascending order." << endl; + cerr << "\t" << "-sizeD\t\t" << "Sort by feature size in descending order." << endl; + cerr << "\t" << "-chrThenSizeA\t" << "Sort by chrom (asc), then feature size (asc)." << endl; + cerr << "\t" << "-chrThenSizeD\t" << "Sort by chrom (asc), then feature size (desc)." << endl; + cerr << "\t" << "-chrThenScoreA\t" << "Sort by chrom (asc), then score (asc)." << endl; + cerr << "\t" << "-chrThenScoreD\t" << "Sort by chrom (asc), then score (desc)." << endl << endl; + + exit(1); + +} |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/subtractBed/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/subtractBed/Makefile Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,47 @@ +UTILITIES_DIR = ../utils/ +OBJ_DIR = ../../obj/ +BIN_DIR = ../../bin/ + +# ------------------- +# define our includes +# ------------------- +INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \ + -I$(UTILITIES_DIR)/lineFileUtilities/ \ + -I$(UTILITIES_DIR)/version/ \ + -I$(UTILITIES_DIR)/gzstream/ \ + -I$(UTILITIES_DIR)/fileType/ + +# ---------------------------------- +# define our source and object files +# ---------------------------------- +SOURCES= subtractMain.cpp subtractBed.cpp +OBJECTS= $(SOURCES:.cpp=.o) +_EXT_OBJECTS=bedFile.o lineFileUtilities.o gzstream.o fileType.o +EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) +BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) +PROGRAM= subtractBed + + +all: $(PROGRAM) + +.PHONY: all + +$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS) + @echo " * linking $(PROGRAM)" + @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ $(LIBS) + +$(BUILT_OBJECTS): $(SOURCES) + @echo " * compiling" $(*F).cpp + @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) + +$(EXT_OBJECTS): + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/ + +clean: + @echo "Cleaning up." + @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* + +.PHONY: clean |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/subtractBed/subtractBed.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/subtractBed/subtractBed.cpp Thu Nov 03 10:25:04 2011 -0400 |
[ |
@@ -0,0 +1,178 @@ +/***************************************************************************** + subtractBed.cpp + + (c) 2009 - Aaron Quinlan + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licenced under the GNU General Public License 2.0 license. +******************************************************************************/ +#include "lineFileUtilities.h" +#include "subtractBed.h" + + +/* + Constructor +*/ +BedSubtract::BedSubtract(string &bedAFile, string &bedBFile, float overlapFraction, bool sameStrand, bool diffStrand) { + + _bedAFile = bedAFile; + _bedBFile = bedBFile; + _overlapFraction = overlapFraction; + _sameStrand = sameStrand; + _diffStrand = diffStrand; + + _bedA = new BedFile(bedAFile); + _bedB = new BedFile(bedBFile); + + SubtractBed(); +} + + +/* + Destructor +*/ +BedSubtract::~BedSubtract(void) { +} + + +void BedSubtract::FindAndSubtractOverlaps(BED &a, vector<BED> &hits) { + + // find all of the overlaps between a and B. + _bedB->FindOverlapsPerBin(a.chrom, a.start, a.end, a.strand, hits, _sameStrand, _diffStrand); + + // is A completely spanned by an entry in B? + // if so, A should not be reported. + int numConsumedByB = 0; + int numOverlaps = 0; + vector<BED> bOverlaps; // list of hits in B. Special processing if there are multiple. + + vector<BED>::const_iterator h = hits.begin(); + vector<BED>::const_iterator hitsEnd = hits.end(); + for (; h != hitsEnd; ++h) { + + int s = max(a.start, h->start); + int e = min(a.end, h->end); + int overlapBases = (e - s); // the number of overlapping bases b/w a and b + int aLength = (a.end - a.start); // the length of a in b.p. + + if (s < e) { + + // is there enough overlap (default ~ 1bp) + float overlap = ((float) overlapBases / (float) aLength); + + if (overlap >= 1.0) { + numOverlaps++; + numConsumedByB++; + } + else if ( overlap >= _overlapFraction ) { + numOverlaps++; + bOverlaps.push_back(*h); + } + } + } + + if (numOverlaps == 0) { + // no overlap found, so just report A as-is. + _bedA->reportBedNewLine(a); + } + else if (numOverlaps == 1) { + // one overlap found. only need to look at the single + // entry in bOverlaps. + + // if A was not "consumed" by any entry in B + if (numConsumedByB == 0) { + + BED theHit = bOverlaps[0]; + + // A ++++++++++++ + // B ---- + // Res. ==== ==== + if ( (theHit.start > a.start) && (theHit.end < a.end) ) { + _bedA->reportBedRangeNewLine(a,a.start,theHit.start); + _bedA->reportBedRangeNewLine(a,theHit.end,a.end); + } + // A ++++++++++++ + // B ---------- + // Res. == + else if (theHit.start == a.start) { + _bedA->reportBedRangeNewLine(a,theHit.end,a.end); + } + // A ++++++++++++ + // B ---------- + // Res. ==== + else if (theHit.start < a.start) { + _bedA->reportBedRangeNewLine(a,theHit.end,a.end); + } + // A ++++++++++++ + // B ---------- + // Res. ======= + else if (theHit.start > a.start) { + _bedA->reportBedRangeNewLine(a,a.start,theHit.start); + } + } + } + else if (numOverlaps > 1) { + // multiple overlapz found. look at all the hits + // and figure out which bases in A survived. then + // report the contigous intervals that survived. + + vector<bool> aKeep(a.end - a.start, true); + + if (numConsumedByB == 0) { + // track the number of hit starts and ends at each position in A + for (vector<BED>::iterator h = bOverlaps.begin(); h != bOverlaps.end(); ++h) { + int s = max(a.start, h->start); + int e = min(a.end, h->end); + + for (int i = s+1; i <= e; ++i) { + aKeep[i-a.start-1] = false; + } + } + // report the remaining blocks. + for (unsigned int i = 0; i < aKeep.size(); ++i) { + if (aKeep[i] == true) { + CHRPOS blockStart = i + a.start; + while ((aKeep[i] == true) && (i < aKeep.size())) { + i++; + } + CHRPOS blockEnd = i + a.start; + blockEnd = min(a.end, blockEnd); + _bedA->reportBedRangeNewLine(a,blockStart,blockEnd); + } + } + } + } +} + + + +void BedSubtract::SubtractBed() { + + // load the "B" bed file into a map so + // that we can easily compare "A" to it for overlaps + _bedB->loadBedFileIntoMap(); + + BED a, nullBed; + BedLineStatus bedStatus; + int lineNum = 0; // current input line number + vector<BED> hits; // vector of potential hits + // reserve some space + hits.reserve(100); + + _bedA->Open(); + while ((bedStatus = _bedA->GetNextBed(a, lineNum)) != BED_INVALID) { + if (bedStatus == BED_VALID) { + FindAndSubtractOverlaps(a, hits); + hits.clear(); + a = nullBed; + } + } + _bedA->Close(); + +} +// END Intersect + + |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/subtractBed/subtractBed.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/subtractBed/subtractBed.h Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,53 @@ +/***************************************************************************** + subtractBed.h + + (c) 2009 - Aaron Quinlan + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licenced under the GNU General Public License 2.0 license. +******************************************************************************/ +#ifndef SUBTRACTBED_H +#define SUBTRACTBED_H + +#include "bedFile.h" +#include <vector> +#include <iostream> +#include <fstream> + +using namespace std; + +//************************************************ +// Class methods and elements +//************************************************ +class BedSubtract { + +public: + + // constructor + BedSubtract(string &bedAFile, string &bedBFile, float overlapFraction, bool sameStrand, bool diffStrand); + + // destructor + ~BedSubtract(void); + +private: + + // processing variables + string _bedAFile; + string _bedBFile; + float _overlapFraction; + bool _sameStrand; + bool _diffStrand; + + + // instances of bed file class. + BedFile *_bedA, *_bedB; + + // methods + void FindAndSubtractOverlaps(BED &a, vector<BED> &hits); + void SubtractBed(); +}; + +#endif /* SUBTRACTBED_H */ |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/subtractBed/subtractMain.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/subtractBed/subtractMain.cpp Thu Nov 03 10:25:04 2011 -0400 |
[ |
@@ -0,0 +1,145 @@ +/***************************************************************************** + subtractMain.cpp + + (c) 2009 - Aaron Quinlan + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licenced under the GNU General Public License 2.0 license. +******************************************************************************/ +#include "subtractBed.h" +#include "version.h" + +using namespace std; + +// define our program name +#define PROGRAM_NAME "subtractBed" + + +// define our parameter checking macro +#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen) + +// function declarations +void ShowHelp(void); + +int main(int argc, char* argv[]) { + + // our configuration variables + bool showHelp = false; + + // input files + string bedAFile; + string bedBFile; + + // input arguments + float overlapFraction = 1E-9; + + bool haveBedA = false; + bool haveBedB = false; + bool haveFraction = false; + bool sameStrand = false; + bool diffStrand = false; + + // check to see if we should print out some help + if(argc <= 1) showHelp = true; + + for(int i = 1; i < argc; i++) { + int parameterLength = (int)strlen(argv[i]); + + if((PARAMETER_CHECK("-h", 2, parameterLength)) || + (PARAMETER_CHECK("--help", 5, parameterLength))) { + showHelp = true; + } + } + + if(showHelp) ShowHelp(); + + // do some parsing (all of these parameters require 2 strings) + for(int i = 1; i < argc; i++) { + + int parameterLength = (int)strlen(argv[i]); + + if(PARAMETER_CHECK("-a", 2, parameterLength)) { + if ((i+1) < argc) { + haveBedA = true; + bedAFile = argv[i + 1]; + i++; + } + } + else if(PARAMETER_CHECK("-b", 2, parameterLength)) { + if ((i+1) < argc) { + haveBedB = true; + bedBFile = argv[i + 1]; + i++; + } + } + else if(PARAMETER_CHECK("-f", 2, parameterLength)) { + if ((i+1) < argc) { + haveFraction = true; + overlapFraction = atof(argv[i + 1]); + i++; + } + } + else if (PARAMETER_CHECK("-s", 2, parameterLength)) { + sameStrand = true; + } + else if (PARAMETER_CHECK("-S", 2, parameterLength)) { + diffStrand = true; + } + else { + cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl; + showHelp = true; + } + } + + // make sure we have both input files + if (!haveBedA || !haveBedB) { + cerr << endl << "*****" << endl << "*****ERROR: Need -a and -b files. " << endl << "*****" << endl; + showHelp = true; + } + + if (sameStrand && diffStrand) { + cerr << endl << "*****" << endl << "*****ERROR: Request either -s OR -S, not both." << endl << "*****" << endl; + showHelp = true; + } + + if (!showHelp) { + + BedSubtract *bs = new BedSubtract(bedAFile, bedBFile, overlapFraction, sameStrand, diffStrand); + delete bs; + return 0; + } + else { + ShowHelp(); + } +} + +void ShowHelp(void) { + + cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl; + + cerr << "Author: Aaron Quinlan (aaronquinlan@gmail.com)" << endl; + + cerr << "Summary: Removes the portion(s) of an interval that is overlapped" << endl; + cerr << "\t by another feature(s)." << endl << endl; + + cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -a <bed/gff/vcf> -b <bed/gff/vcf>" << endl << endl; + + cerr << "Options: " << endl; + cerr << "\t-f\t" << "Minimum overlap required as a fraction of A." << endl; + cerr << "\t\t- Default is 1E-9 (i.e., 1bp)." << endl; + cerr << "\t\t- (FLOAT) (e.g. 0.50)" << endl << endl; + + cerr << "\t-s\t" << "Require same strandedness. That is, only subtract hits in B that" << endl; + cerr << "\t\toverlap A on the _same_ strand." << endl; + cerr << "\t\t- By default, overlaps are subtracted without respect to strand." << endl << endl; + + cerr << "\t-S\t" << "Force strandedness. That is, only subtract hits in B that" << endl; + cerr << "\t\toverlap A on the _opposite_ strand." << endl; + cerr << "\t\t- By default, overlaps are subtracted without respect to strand." << endl << endl; + + // end the program here + exit(1); +} |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/tagBam/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/tagBam/Makefile Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,51 @@ +UTILITIES_DIR = ../utils/ +OBJ_DIR = ../../obj/ +BIN_DIR = ../../bin/ + +# ------------------- +# define our includes +# ------------------- +INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \ + -I$(UTILITIES_DIR)/version/ \ + -I$(UTILITIES_DIR)/gzstream/ \ + -I$(UTILITIES_DIR)/genomeFile/ \ + -I$(UTILITIES_DIR)/lineFileUtilities/ \ + -I$(UTILITIES_DIR)/fileType/ \ + -I$(UTILITIES_DIR)/BamTools/include \ + -I$(UTILITIES_DIR)/BamTools-Ancillary +# ---------------------------------- +# define our source and object files +# ---------------------------------- +SOURCES= tagBamMain.cpp tagBam.cpp +OBJECTS= $(SOURCES:.cpp=.o) +_EXT_OBJECTS=bedFile.o lineFileUtilities.o gzstream.o fileType.o +EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) +BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) +PROGRAM= tagBam + + +all: $(PROGRAM) + +.PHONY: all + +$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS) + @echo " * linking $(PROGRAM)" + @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ -L$(UTILITIES_DIR)/BamTools/lib/ -lbamtools $(LIBS) + +$(BUILT_OBJECTS): $(SOURCES) + @echo " * compiling" $(*F).cpp + @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) + +$(EXT_OBJECTS): + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/BamTools/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/BamTools-Ancillary/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/ + +clean: + @echo "Cleaning up." + @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* + +.PHONY: clean |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/tagBam/tagBam.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/tagBam/tagBam.cpp Thu Nov 03 10:25:04 2011 -0400 |
[ |
@@ -0,0 +1,136 @@ +/***************************************************************************** + tagBam.cpp + + (c) 2009 - Aaron Quinlan + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licenced under the GNU General Public License 2.0 license. +******************************************************************************/ +#include "lineFileUtilities.h" +#include "tagBam.h" + +// build +TagBam::TagBam(const string &bamFile, const vector<string> &annoFileNames, + const vector<string> &annoLables, const string &tag, + bool useNames, bool useScores, bool sameStrand, bool diffStrand, float overlapFraction): + + _bamFile(bamFile), + _annoFileNames(annoFileNames), + _annoLabels(annoLables), + _tag(tag), + _useNames(useNames), + _useScores(useScores), + _sameStrand(sameStrand), + _diffStrand(diffStrand), + _overlapFraction(overlapFraction) +{} + + +// destroy and delete the open file pointers +TagBam::~TagBam(void) { + delete _bed; + CloseAnnoFiles(); +} + + +void TagBam::OpenAnnoFiles() { + for (size_t i=0; i < _annoFileNames.size(); ++i) { + BedFile *file = new BedFile(_annoFileNames[i]); + file->loadBedFileIntoMap(); + _annoFiles.push_back(file); + } +} + + +void TagBam::CloseAnnoFiles() { + for (size_t i=0; i < _annoFiles.size(); ++i) { + BedFile *file = _annoFiles[i]; + delete file; + _annoFiles[i] = NULL; + } +} + + +void TagBam::Tag() { + + // open the annotations files for processing; + OpenAnnoFiles(); + + // open the BAM file + BamReader reader; + BamWriter writer; + reader.Open(_bamFile); + // get header & reference information + string bamHeader = reader.GetHeaderText(); + RefVector refs = reader.GetReferenceData(); + + // set compression mode + BamWriter::CompressionMode compressionMode = BamWriter::Compressed; +// if ( _isUncompressedBam ) compressionMode = BamWriter::Uncompressed; + writer.SetCompressionMode(compressionMode); + // open our BAM writer + writer.Open("stdout", bamHeader, refs); + + // rip through the BAM file and test for overlaps with each annotation file. + BamAlignment al; + vector<BED> hits; + + while (reader.GetNextAlignment(al)) { + if (al.IsMapped() == true) { + BED a; + a.chrom = refs.at(al.RefID).RefName; + a.start = al.Position; + a.end = al.GetEndPosition(false, false); + a.strand = "+"; + if (al.IsReverseStrand()) a.strand = "-"; + + ostringstream annotations; + // annotate the BAM file based on overlaps with the annotation files. + for (size_t i = 0; i < _annoFiles.size(); ++i) + { + // grab the current annotation file. + BedFile *anno = _annoFiles[i]; + + if (!_useNames && !_useScores) { + // add the label for this annotation file to tag if there is overlap + if (anno->FindOneOrMoreOverlapsPerBin(a.chrom, a.start, a.end, a.strand, _sameStrand, _diffStrand, _overlapFraction)) + { + annotations << _annoLabels[i] << ";"; + } + } + // use the score field + else if (!_useNames && _useScores) { + anno->FindOverlapsPerBin(a.chrom, a.start, a.end, a.strand, hits, _sameStrand, _diffStrand); + for (size_t i = 0; i < hits.size(); ++i) { + annotations << hits[i].score; + if (i < hits.size() - 1) annotations << ","; + } + if (hits.size() > 0) annotations << ";"; + hits.clear(); + } + // use the name field from the annotation files to populate tag + else if (_useNames && !_useScores) { + anno->FindOverlapsPerBin(a.chrom, a.start, a.end, a.strand, hits, _sameStrand, _diffStrand); + for (size_t i = 0; i < hits.size(); ++i) { + annotations << hits[i].name; + if (i < hits.size() - 1) annotations << ","; + } + if (hits.size() > 0) annotations << ";"; + hits.clear(); + } + } + // were there any overlaps with which to make a tag? + if (annotations.str().size() > 0) { + al.AddTag(_tag, "Z", annotations.str().substr(0, annotations.str().size() - 1)); // get rid of the last ";" + } + writer.SaveAlignment(al); + } + } + reader.Close(); + + // close the annotations files; + CloseAnnoFiles(); +} |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/tagBam/tagBam.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/tagBam/tagBam.h Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,83 @@ +/***************************************************************************** + tagBam.h + + (c) 2009 - Aaron Quinlan + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licenced under the GNU General Public License 2.0 license. +******************************************************************************/ +#ifndef TAGBAM_H +#define TAGBAM_H + +#include "bedFile.h" + +#include "version.h" +#include "api/BamReader.h" +#include "api/BamWriter.h" +#include "api/BamAux.h" +#include "BamAncillary.h" +using namespace BamTools; + +#include "bedFile.h" +#include <vector> +#include <algorithm> +#include <iostream> +#include <iomanip> +#include <fstream> +#include <stdlib.h> + +using namespace std; + +//************************************************ +// Class methods and elements +//************************************************ +class TagBam { + +public: + + // constructor + TagBam(const string &bamFile, const vector<string> &annoFileNames, + const vector<string> &annoLabels, const string &tag, + bool useNames, bool useScores, bool sameStrand, + bool diffStrand, float overlapFraction); + + // destructor + ~TagBam(void); + + // annotate the BAM file with all of the annotation files. + void Tag(); + +private: + + // input files. + string _bamFile; + vector<string> _annoFileNames; + vector<string> _annoLabels; + + string _tag; + + // instance of a bed file class. + BedFile *_bed; + vector<BedFile*> _annoFiles; + + // should we use the name field from the annotation files? + bool _useNames; + bool _useScores; + + // do we care about strandedness when tagging? + bool _sameStrand; + bool _diffStrand; + float _overlapFraction; + + // private function for reporting coverage information + void ReportAnnotations(); + + void OpenAnnoFiles(); + + void CloseAnnoFiles(); + +}; +#endif /* TAGBAM_H */ |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/tagBam/tagBamMain.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/tagBam/tagBamMain.cpp Thu Nov 03 10:25:04 2011 -0400 |
[ |
@@ -0,0 +1,209 @@ +/***************************************************************************** + annotateMain.cpp + + (c) 2009 - Aaron Quinlan + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licenced under the GNU General Public License 2.0 license. +******************************************************************************/ +#include "tagBam.h" +#include "version.h" + +using namespace std; + +// define the version +#define PROGRAM_NAME "tagBam" + +// define our parameter checking macro +#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen) + +// function declarations +void ShowHelp(void); + +int main(int argc, char* argv[]) { + + // our configuration variables + bool showHelp = false; + + // input file + string bamFile; + float overlapFraction = 1E-9; + string tag = "YB"; + + // parm flags + bool haveTag = false; + bool haveFraction = false; + bool useNames = false; + bool useScores = false; + bool sameStrand = false; + bool diffStrand = false; + bool haveBam = false; + bool haveFiles = false; + bool haveLabels = false; + + + // list of annotation files / names + vector<string> inputFiles; + vector<string> inputLabels; + + // check to see if we should print out some help + if(argc <= 1) showHelp = true; + + for(int i = 1; i < argc; i++) { + int parameterLength = (int)strlen(argv[i]); + + if((PARAMETER_CHECK("-h", 2, parameterLength)) || + (PARAMETER_CHECK("--help", 5, parameterLength))) { + showHelp = true; + } + } + + if(showHelp) ShowHelp(); + + // do some parsing (all of these parameters require 2 strings) + for(int i = 1; i < argc; i++) { + + int parameterLength = (int)strlen(argv[i]); + + if(PARAMETER_CHECK("-i", 2, parameterLength)) { + if ((i+1) < argc) { + haveBam = true; + bamFile = argv[i + 1]; + i++; + } + } + else if(PARAMETER_CHECK("-files", 6, parameterLength)) { + if ((i+1) < argc) { + haveFiles = true; + i = i+1; + string file = argv[i]; + while (file[0] != '-' && i < argc) { + inputFiles.push_back(file); + i++; + if (i < argc) + file = argv[i]; + } + i--; + } + } + else if(PARAMETER_CHECK("-labels", 7, parameterLength)) { + if ((i+1) < argc) { + haveLabels = true; + i = i+1; + string label = argv[i]; + while (label[0] != '-' && i < argc) { + inputLabels.push_back(label); + i++; + if (i < argc) + label = argv[i]; + } + i--; + } + } + else if (PARAMETER_CHECK("-names", 6, parameterLength)) { + useNames = true; + } + else if (PARAMETER_CHECK("-scores", 7, parameterLength)) { + useScores = true; + } + else if (PARAMETER_CHECK("-s", 2, parameterLength)) { + sameStrand = true; + } + else if (PARAMETER_CHECK("-S", 2, parameterLength)) { + diffStrand = true; + } + else if(PARAMETER_CHECK("-f", 2, parameterLength)) { + if ((i+1) < argc) { + haveFraction = true; + overlapFraction = atof(argv[i + 1]); + i++; + } + } + else if(PARAMETER_CHECK("-tag", 4, parameterLength)) { + if ((i+1) < argc) { + haveTag = true; + tag = argv[i + 1]; + i++; + } + } + else { + cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl; + showHelp = true; + } + } + + // make sure we have both input files + if (!haveBam || !haveFiles) { + cerr << endl << "*****" << endl << "*****ERROR: Need -i, -files" << endl << "*****" << endl; + showHelp = true; + } + if (!useNames && !haveLabels && !useScores) { + cerr << endl << "*****" << endl << "*****ERROR: Need -labels or -names or -scores" << endl << "*****" << endl; + showHelp = true; + } + if (sameStrand && diffStrand) { + cerr << endl << "*****" << endl << "*****ERROR: Use -s or -S, not both. " << endl << "*****" << endl; + showHelp = true; + } + if (haveLabels && useNames) { + cerr << endl << "*****" << endl << "*****ERROR: Use -labels or -names, not both. " << endl << "*****" << endl; + showHelp = true; + } + if (useScores && useNames) { + cerr << endl << "*****" << endl << "*****ERROR: Use -scores or -names, not both. " << endl << "*****" << endl; + showHelp = true; + } + if (haveTag && tag.size() > 2) { + cerr << endl << "*****" << endl << "*****ERROR: Custom tags should be at most two characters per the SAM specification. " << endl << "*****" << endl; + showHelp = true; + } + + if (!showHelp) { + TagBam *ba = new TagBam(bamFile, inputFiles, inputLabels, tag, useNames, useScores, sameStrand, diffStrand, overlapFraction); + ba->Tag(); + delete ba; + return 0; + } + else { + ShowHelp(); + } +} + +void ShowHelp(void) { + + cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl; + + cerr << "Author: Aaron Quinlan (aaronquinlan@gmail.com)" << endl; + + cerr << "Summary: Annotates a BAM file based on overlaps with multiple BED/GFF/VCF files" << endl; + cerr << "\t on the intervals in -i." << endl << endl; + + cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -i <BAM> -files FILE1 .. FILEn -labels LAB1 .. LABn" << endl << endl; + + cerr << "Options: " << endl; + + cerr << "\t-s\t" << "Require overlaps on the same strand. That is, only tag alignments that have the same" << endl; + cerr << "\t\tstrand as a feature in the annotation file(s)." << endl << endl; + + cerr << "\t-S\t" << "Require overlaps on the opposite strand. That is, only tag alignments that have the opposite" << endl; + cerr << "\t\tstrand as a feature in the annotation file(s)." << endl << endl; + + cerr << "\t-f\t" << "Minimum overlap required as a fraction of the alignment." << endl; + cerr << "\t\t- Default is 1E-9 (i.e., 1bp)." << endl; + cerr << "\t\t- FLOAT (e.g. 0.50)" << endl << endl; + + cerr << "\t-tag\t" << "Dictate what the tag should be. Default is YB." << endl; + cerr << "\t\t- STRING (two characters, e.g., YK)" << endl << endl; + + cerr << "\t-names\t" << "Use the name field from the annotation files to populate tags." << endl; + cerr << "\t\tBy default, the -labels values are used." << endl << endl; + + cerr << "\t-scores\t" << "A list of 1-based columns for each annotation file" << endl; + cerr << "\t\tin which a color can be found." << endl << endl; + + + exit(1); +} |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/unionBedGraphs/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/unionBedGraphs/Makefile Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,49 @@ +UTILITIES_DIR = ../utils/ +OBJ_DIR = ../../obj/ +BIN_DIR = ../../bin/ + +# ------------------- +# define our includes +# ------------------- +INCLUDES = -I$(UTILITIES_DIR)/bedGraphFile/ \ + -I$(UTILITIES_DIR)/lineFileUtilities/ \ + -I$(UTILITIES_DIR)/genomeFile/ \ + -I$(UTILITIES_DIR)/version/ \ + -I$(UTILITIES_DIR)/gzstream/ \ + -I$(UTILITIES_DIR)/fileType/ \ + -I$(UTILITIES_DIR)/BamTools/include + +# ---------------------------------- +# define our source and object files +# ---------------------------------- +SOURCES= unionBedGraphs.cpp unionBedGraphsMain.cpp +OBJECTS= $(SOURCES:.cpp=.o) +_EXT_OBJECTS=bedGraphFile.o genomeFile.o lineFileUtilities.o gzstream.o fileType.o +EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) +BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) +PROGRAM= unionBedGraphs + +all: $(PROGRAM) + +.PHONY: all + +$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS) + @echo " * linking $(PROGRAM)" + @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ $(LIBS) + +$(BUILT_OBJECTS): $(SOURCES) + @echo " * compiling" $(*F).cpp + @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) + +$(EXT_OBJECTS): + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedGraphFile/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/genomeFile/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/ + +clean: + @echo "Cleaning up." + @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* + +.PHONY: clean |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/unionBedGraphs/intervalItem.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/unionBedGraphs/intervalItem.h Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,62 @@ +/***************************************************************************** + intervalItem.h + + (c) 2010 - Assaf Gordon + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licenced under the GNU General Public License 2.0 license. +******************************************************************************/ +#ifndef INTERVALITEM_H +#define INTERVALITEM_H + +#include <string> +#include <queue> + +enum COORDINATE_TYPE { + START, + END +}; + +/* + An interval item in the priority queue. + + An IntervalItem can mark either a START position or an END position. + */ +class IntervalItem +{ +private: + IntervalItem(); + +public: + int source_index; // which source BedGraph file this came from + COORDINATE_TYPE coord_type; // is this the start or the end position? + CHRPOS coord; + std::string depth; + + IntervalItem(int _index, COORDINATE_TYPE _type, CHRPOS _coord, std::string _depth) : + source_index(_index), + coord_type(_type), + coord(_coord), + depth(_depth) + {} + + IntervalItem(const IntervalItem &other) : + source_index(other.source_index), + coord_type(other.coord_type), + coord(other.coord), + depth(other.depth) + {} + + bool operator< ( const IntervalItem& other ) const + { + return this->coord > other.coord; + } +}; + +// our priority queue +typedef std::priority_queue<IntervalItem> INTERVALS_PRIORITY_QUEUE; + +#endif |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/unionBedGraphs/unionBedGraphs.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/unionBedGraphs/unionBedGraphs.cpp Thu Nov 03 10:25:04 2011 -0400 |
[ |
@@ -0,0 +1,255 @@ +/***************************************************************************** + unionBedGraphs.cpp + + (c) 2010 - Assaf Gordon, CSHL + - Aaron Quinlan, UVA + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licenced under the GNU General Public License 2.0 license. +******************************************************************************/ +#include <cassert> +#include <cstring> +#include <cstdlib> +#include <iostream> +#include <algorithm> + +#include "bedGraphFile.h" +#include "unionBedGraphs.h" + +using namespace std; + + +UnionBedGraphs::UnionBedGraphs(std::ostream& _output, + const vector<string>& _filenames, + const vector<string>& _titles, + bool _print_empty_regions, + const std::string& _genome_size_filename, + const std::string& _no_coverage_value ) : + filenames(_filenames), + titles(_titles), + output(_output), + current_non_zero_inputs(0), + print_empty_regions(_print_empty_regions), + genome_sizes(NULL), + no_coverage_value(_no_coverage_value) +{ + if (print_empty_regions) { + assert(!_genome_size_filename.empty()); + + genome_sizes = new GenomeFile(_genome_size_filename); + } +} + + +UnionBedGraphs::~UnionBedGraphs() { + CloseBedgraphFiles(); + if (genome_sizes) { + delete genome_sizes; + genome_sizes = NULL ; + } +} + + +void UnionBedGraphs::Union() { + OpenBedgraphFiles(); + + // Add the first interval from each file + for(size_t i=0;i<bedgraph_files.size();++i) + LoadNextBedgraphItem(i); + + // Chromosome loop - once per chromosome + do { + // Find the first chromosome to use + current_chrom = DetermineNextChrom(); + + // Populate the queue with initial values from all files + // (if they belong to the correct chromosome) + for(size_t i=0;i<bedgraph_files.size();++i) + AddInterval(i); + + CHRPOS current_start = ConsumeNextCoordinate(); + + // User wanted empty regions, and the first coordinate is not 0 - print a dummy empty coverage + if (print_empty_regions && current_start > 0) + PrintEmptyCoverage(0,current_start); + + // Intervals loop - until all intervals (of current chromosome) from all files are used. + do { + CHRPOS current_end = queue.top().coord; + PrintCoverage(current_start, current_end); + current_start = ConsumeNextCoordinate(); + } while (!queue.empty()); + + // User wanted empty regions, and the last coordinate is not the last coordinate of the chromosome + // print a dummy empty coverage + if (print_empty_regions) { + CHRPOS chrom_size = genome_sizes->getChromSize(current_chrom); + if (current_start < chrom_size) + PrintEmptyCoverage(current_start, chrom_size); + } + + } while (!AllFilesDone()); +} + + +CHRPOS UnionBedGraphs::ConsumeNextCoordinate() { + assert(!queue.empty()); + + CHRPOS new_position = queue.top().coord; + do { + IntervalItem item = queue.top(); + UpdateInformation(item); + queue.pop(); + } while (!queue.empty() && queue.top().coord == new_position); + + return new_position; +} + + +void UnionBedGraphs::UpdateInformation(const IntervalItem &item) { + // Update the depth coverage for this file + + // Which coordinate is it - start or end? + switch (item.coord_type) + { + case START: + current_depth[item.source_index] = item.depth; + current_non_zero_inputs++; + break; + case END: + //Read the next interval from this file + AddInterval(item.source_index); + current_depth[item.source_index] = no_coverage_value; + current_non_zero_inputs--; + break; + default: + assert(0); + } +} + + +void UnionBedGraphs::PrintHeader() { + output << "chrom\tstart\tend" ; + for (size_t i=0;i<titles.size();++i) + output << "\t" <<titles[i]; + output << endl; +} + + +void UnionBedGraphs::PrintCoverage(CHRPOS start, CHRPOS end) { + if ( current_non_zero_inputs == 0 && ! print_empty_regions ) + return ; + + output << current_chrom << "\t" + << start << "\t" + << end; + + for (size_t i=0;i<current_depth.size();++i) + output << "\t" << current_depth[i] ; + + output << endl; +} + + +void UnionBedGraphs::PrintEmptyCoverage(CHRPOS start, CHRPOS end) { + output << current_chrom << "\t" + << start << "\t" + << end; + + for (size_t i=0;i<current_depth.size();++i) + output << "\t" << no_coverage_value ; + + output << endl; +} + + +void UnionBedGraphs::LoadNextBedgraphItem(int index) { + assert(static_cast<unsigned int>(index) < bedgraph_files.size()); + + current_bedgraph_item[index].chrom=""; + + BedGraphFile *file = bedgraph_files[index]; + BEDGRAPH_STR bg; + int lineNum = 0; + BedGraphLineStatus status; + + while ( (status = file->GetNextBedGraph(bg, lineNum)) != BEDGRAPH_INVALID ) { + if (status != BEDGRAPH_VALID) + continue; + + current_bedgraph_item[index] = bg ; + break; + } +} + + +bool UnionBedGraphs::AllFilesDone() { + for (size_t i=0;i<current_bedgraph_item.size();++i) + if (!current_bedgraph_item[i].chrom.empty()) + return false; + return true; +} + + +string UnionBedGraphs::DetermineNextChrom() { + string next_chrom; + for (size_t i=0;i<current_bedgraph_item.size();++i) { + if (current_bedgraph_item[i].chrom.empty()) + continue; + + if (next_chrom.empty()) + next_chrom = current_bedgraph_item[i].chrom; + else + if (current_bedgraph_item[i].chrom < next_chrom) + next_chrom = current_bedgraph_item[i].chrom ; + } + return next_chrom; +} + + +void UnionBedGraphs::AddInterval(int index) { + assert(static_cast<unsigned int>(index) < bedgraph_files.size()); + + //This file has no more intervals + if (current_bedgraph_item[index].chrom.empty()) + return ; + + //If the next interval belongs to a different chrom, don't add it + if (current_bedgraph_item[index].chrom!=current_chrom) + return ; + + const BEDGRAPH_STR &bg(current_bedgraph_item[index]); + + IntervalItem start_item(index, START, bg.start, bg.depth); + IntervalItem end_item(index, END, bg.end, bg.depth); + + queue.push(start_item); + queue.push(end_item); + + LoadNextBedgraphItem(index); +} + + +void UnionBedGraphs::OpenBedgraphFiles() { + for (size_t i=0;i<filenames.size();++i) { + BedGraphFile *file = new BedGraphFile(filenames[i]); + file->Open(); + bedgraph_files.push_back(file); + + current_depth.push_back(no_coverage_value); + } + current_bedgraph_item.resize(filenames.size()); +} + + +void UnionBedGraphs::CloseBedgraphFiles() { + for (size_t i=0;i<bedgraph_files.size();++i) { + BedGraphFile *file = bedgraph_files[i]; + delete file; + bedgraph_files[i] = NULL ; + } + bedgraph_files.clear(); +} |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/unionBedGraphs/unionBedGraphs.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/unionBedGraphs/unionBedGraphs.h Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,123 @@ +/***************************************************************************** + unionBedGraphs.h + + (c) 2010 - Assaf Gordon, CSHL + - Aaron Quinlan, UVA + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licenced under the GNU General Public License 2.0 license. +******************************************************************************/ +#ifndef UNIONBEDGRAPHS_H +#define UNIONBEDGRAPHS_H + +#include <vector> +#include <string> +#include "bedGraphFile.h" +#include "genomeFile.h" +#include "intervalItem.h" + +class UnionBedGraphs +{ +private: + typedef BEDGRAPH_STR BEDGRAPH_TYPE; + + vector<string> filenames; + vector<string> titles; + + vector<BedGraphFile*> bedgraph_files; + vector<BEDGRAPH_TYPE::DEPTH_TYPE> current_depth; + vector<BEDGRAPH_TYPE> current_bedgraph_item; + + std::ostream &output; + + INTERVALS_PRIORITY_QUEUE queue; + std::string current_chrom; + int current_non_zero_inputs; + bool print_empty_regions; + + GenomeFile* genome_sizes; + + std::string no_coverage_value; + +public: + UnionBedGraphs(std::ostream& _output, + const vector<string>& _filenames, + const vector<string>& _titles, + bool _print_empty_regions, + const std::string& _genomeFileName, + const std::string& _no_coverage_value); + + virtual ~UnionBedGraphs(); + + // Combines all bedgraph files + void Union(); + + // Print the header line: chrom/start/end + name of each bedgraph file. + void PrintHeader(); + + +private: + + // Open all BedGraph files, initialize "current_XXX" vectors + void OpenBedgraphFiles(); + + // Close the BedGraph files. + void CloseBedgraphFiles(); + + /* + Add an interval from BedGraph file 'index' into the queue. + will only be added if it belongs to the current chromosome. + + If the interval was added (=consumed), the next interval will be read from the file + using 'LoadNextBedgraphItem' + */ + void AddInterval(int index); + + /* + Loads the next interval from BedGraph file 'index'. + Stores it in 'current_bedgraph_item' vector. + */ + void LoadNextBedgraphItem(int index); + + /* + Scans the 'current_bedgraph_item' vector, + find the 'first' chromosome to use (different BedGraph files can start with different chromosomes). + */ + std::string DetermineNextChrom(); + + /* + Returns 'true' if ALL intervals from ALL BedGraph files were used + */ + bool AllFilesDone(); + + /* + Extract the next coordinate from the queue, and updates the current coverage information. + If multiple interval share the same coordinate values, all of them are handled. + If an END coordinate is consumed, the next interval (from the corresponding file) is read. + */ + CHRPOS ConsumeNextCoordinate(); + + /* + Updates the coverage information based on the given item. + Item can be a START coordinate or an END coordiante. + */ + void UpdateInformation(const IntervalItem &item); + + /* + prints chrom/start/end and the current depth coverage values of all the files. + */ + void PrintCoverage(CHRPOS start, CHRPOS end); + + /* + prints chrom/start/end and the ZERO depth coverage values of all the files. + */ + void PrintEmptyCoverage(CHRPOS start, CHRPOS end); + + void DebugPrintQueue(); +}; + + +#endif |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/unionBedGraphs/unionBedGraphsMain.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/unionBedGraphs/unionBedGraphsMain.cpp Thu Nov 03 10:25:04 2011 -0400 |
[ |
b'@@ -0,0 +1,294 @@\n+/*****************************************************************************\n+ unionBedGraphsMain.cpp\n+\n+ (c) 2010 - Assaf Gordon, CSHL\n+ - Aaron Quinlan, UVA\n+ Hall Laboratory\n+ Department of Biochemistry and Molecular Genetics\n+ University of Virginia\n+ aaronquinlan@gmail.com\n+\n+ Licenced under the GNU General Public License 2.0 license.\n+******************************************************************************/\n+#include <climits>\n+#include <cstring>\n+#include <cstdlib>\n+#include <vector>\n+#include <string>\n+#include <iostream>\n+#include <getopt.h>\n+#include <libgen.h> //for basename()\n+#include "version.h"\n+\n+#include "genomeFile.h"\n+#include "unionBedGraphs.h"\n+\n+using namespace std;\n+\n+// define our program name\n+#define PROGRAM_NAME "unionBedGraphs"\n+\n+// define our parameter checking macro\n+#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)\n+\n+//STLized version of basename()\n+// (because POSIX basename() modifies the input string pointer)\n+// Additionally: removes any extension the basename might have.\n+std::string stl_basename(const std::string& path);\n+\n+// function declarations\n+void ShowHelp(void);\n+void ShowExamples(void);\n+\n+\n+int main(int argc, char* argv[])\n+{\n+ bool haveFiles = false;\n+ bool haveTitles = false;\n+ bool haveGenome = false;\n+ bool haveFiller = true;\n+ bool printHeader = false;\n+ bool printEmptyRegions = false;\n+ bool showHelp = false;\n+ string genomeFile;\n+ string basePath;\n+ string noCoverageValue = "0";\n+ vector<string> inputFiles;\n+ vector<string> inputTitles;\n+\n+ //Parse command line options\n+ if(argc <= 1)\n+ ShowHelp();\n+\n+ for(int i = 1; i < argc; i++) {\n+ int parameterLength = (int)strlen(argv[i]);\n+\n+ if((PARAMETER_CHECK("-h", 2, parameterLength)) ||\n+ (PARAMETER_CHECK("--help", 5, parameterLength))) {\n+ showHelp = true;\n+ }\n+ }\n+\n+ if(showHelp == true) {\n+ ShowHelp();\n+ exit(1);\n+ }\n+\n+ // do some parsing (all of these parameters require 2 strings)\n+ for(int i = 1; i < argc; i++) {\n+\n+ int parameterLength = (int)strlen(argv[i]);\n+\n+ if(PARAMETER_CHECK("-i", 2, parameterLength)) {\n+ if ((i+1) < argc) {\n+ haveFiles = true;\n+ i = i+1;\n+ string file = argv[i];\n+ while (file[0] != \'-\' && i < argc) {\n+ inputFiles.push_back(file);\n+ i++;\n+ if (i < argc)\n+ file = argv[i];\n+ }\n+ i--;\n+ }\n+ }\n+ else if(PARAMETER_CHECK("-names", 6, parameterLength)) {\n+ if ((i+1) < argc) {\n+ haveTitles = true;\n+ i = i+1;\n+ string title = argv[i];\n+ while (title[0] != \'-\' && i < argc) {\n+ inputTitles.push_back(title);\n+ i++;\n+ if (i < argc)\n+ title = argv[i];\n+ }\n+ i--;\n+ }\n+ }\n+ else if(PARAMETER_CHECK("-g", 2, parameterLength)) {\n+ if ((i+1) < argc) {\n+ haveGenome = true;\n+ genomeFile = argv[i + 1];\n+ i++;\n+ }\n+ }\n+ else if(PARAMETER_CHECK("-filler", 7, parameterLength)) {\n+ if ((i+1) < argc) {\n+ haveFiller = true;\n+ noCoverageValue = argv[i + 1];\n+ i++;\n+ }\n+ }\n+ else if(PARAMETER_CHECK("-header", 7, parameterLength)) {\n+ printHeader = true;\n+ }\n+ else if(PARAMETER_CHECK("-empty", 6, parameterLength)) {\n+ printEmptyRegions = true;\n+ }\n+ else if(PARAMETER_CHECK("-examples", 9, parameterLength)) {\n'..b'+\n+ cerr << "\\t-names\\t\\t" << "A list of names (one / file) to describe each file in -i." << endl;\n+ cerr << "\\t\\t\\tThese names will be printed in the header line." << endl << endl;\n+\n+ cerr << "\\t-g\\t\\t" << "Use genome file to calculate empty regions." << endl;\n+ cerr << "\\t\\t\\t- STRING." << endl << endl;\n+\n+ cerr << "\\t-empty\\t\\t" << "Report empty regions (i.e., start/end intervals w/o" << endl;\n+ cerr << "\\t\\t\\tvalues in all files)." << endl;\n+ cerr << "\\t\\t\\t- Requires the \'-g FILE\' parameter.\\n" << endl;\n+\n+ cerr << "\\t-filler TEXT\\t" << "Use TEXT when representing intervals having no value." << endl;\n+ cerr << "\\t\\t\\t- Default is \'0\', but you can use \'N/A\' or any other text." << endl << endl;\n+\n+ cerr << "\\t-examples\\t" << "Show detailed usage examples." << endl << endl;\n+}\n+\n+\n+\n+void ShowExamples()\n+{\n+ cerr << "Example usage:\\n\\n" \\\n+"== Input files: ==\\n" \\\n+"\\n" \\\n+" $ cat 1.bg\\n" \\\n+" chr1 1000 1500 10\\n" \\\n+" chr1 2000 2100 20\\n" \\\n+"\\n" \\\n+" $ cat 2.bg\\n" \\\n+" chr1 900 1600 60\\n" \\\n+" chr1 1700 2050 50\\n" \\\n+"\\n" \\\n+" $ cat 3.bg\\n" \\\n+" chr1 1980 2070 80\\n" \\\n+" chr1 2090 2100 20\\n" \\\n+"\\n" \\\n+" $ cat sizes.txt\\n" \\\n+" chr1 5000\\n" \\\n+"\\n" \\\n+"== Union/combine the files: ==\\n" \\\n+"\\n" \\\n+" $ unionBedGraphs -i 1.bg 2.bg 3.bg\\n" \\\n+" chr1 900 1000 0 60 0\\n" \\\n+" chr1 1000 1500 10 60 0\\n" \\\n+" chr1 1500 1600 0 60 0\\n" \\\n+" chr1 1700 1980 0 50 0\\n" \\\n+" chr1 1980 2000 0 50 80\\n" \\\n+" chr1 2000 2050 20 50 80\\n" \\\n+" chr1 2050 2070 20 0 80\\n" \\\n+" chr1 2070 2090 20 0 0\\n" \\\n+" chr1 2090 2100 20 0 20\\n" \\\n+"\\n" \\\n+"== Union/combine the files, with a header line (titles are the file names): ==\\n" \\\n+"\\n" \\\n+" $ unionBedGraphs -header -i 1.bg 2.bg 3.bg\\n" \\\n+" chrom start end 1 2 3\\n" \\\n+" chr1 900 1000 0 60 0\\n" \\\n+" chr1 1000 1500 10 60 0\\n" \\\n+" chr1 1500 1600 0 60 0\\n" \\\n+" chr1 1700 1980 0 50 0\\n" \\\n+" chr1 1980 2000 0 50 80\\n" \\\n+" chr1 2000 2050 20 50 80\\n" \\\n+" chr1 2050 2070 20 0 80\\n" \\\n+" chr1 2070 2090 20 0 0\\n" \\\n+" chr1 2090 2100 20 0 20\\n" \\\n+"\\n" \\\n+"== Union/combine the files, with a header line and custom names: ==\\n" \\\n+"\\n" \\\n+" $ unionBedGraphs -header -i 1.bg 2.bg 3.bg -names WT-1 WT-2 KO-1\\n" \\\n+" chrom start end WT-1 WT-2 KO-1\\n" \\\n+" chr1 900 1000 0 60 0\\n" \\\n+" chr1 1000 1500 10 60 0\\n" \\\n+" chr1 1500 1600 0 60 0\\n" \\\n+" chr1 1700 1980 0 50 0\\n" \\\n+" chr1 1980 2000 0 50 80\\n" \\\n+" chr1 2000 2050 20 50 80\\n" \\\n+" chr1 2050 2070 20 0 80\\n" \\\n+" chr1 2070 2090 20 0 0\\n" \\\n+" chr1 2090 2100 20 0 20\\n" \\\n+"\\n" \\\n+"== Union/combine, showing empty regions (note, requires -g): ==\\n" \\\n+"\\n" \\\n+" $ unionBedGraphs -header -empty -g sizes.TXT -i 1.bg 2.bg 3.bg\\n" \\\n+" chrom start end 1 2 3\\n" \\\n+" chr1 0 900 0 0 0\\n" \\\n+" chr1 900 1000 0 60 0\\n" \\\n+" chr1 1000 1500 10 60 0\\n" \\\n+" chr1 1500 1600 0 60 0\\n" \\\n+" chr1 1600 1700 0 0 0\\n" \\\n+" chr1 1700 1980 0 50 0\\n" \\\n+" chr1 1980 2000 0 50 80\\n" \\\n+" chr1 2000 2050 20 50 80\\n" \\\n+" chr1 2050 2070 20 0 80\\n" \\\n+" chr1 2070 2090 20 0 0\\n" \\\n+" chr1 2090 2100 20 0 20\\n" \\\n+" chr1 2100 5000 0 0 0\\n" \\\n+"\\n" \\\n+;\n+}\n+\n+std::string stl_basename(const std::string& path)\n+{\n+ string result;\n+\n+ char* path_dup = strdup(path.c_str());\n+ char* basename_part = basename(path_dup);\n+ result = basename_part;\n+ free(path_dup);\n+\n+ size_t pos = result.find_last_of(\'.\');\n+ if (pos != string::npos )\n+ result = result.substr(0,pos);\n+\n+ return result;\n+}\n+\n' |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools-Ancillary/BamAncillary.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/BamTools-Ancillary/BamAncillary.cpp Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,73 @@ +/***************************************************************************** + bamAncillary.cpp + + (c) 2009 - Aaron Quinlan + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licensed under the GNU General Public License 2.0 license. +******************************************************************************/ +#include "BamAncillary.h" +using namespace std; + +// 10 15 20 25 30 4000 +// acccctttggacct---ataggga.................aaaa +// acccc---ggaccttttataggga.................aaaa +// 5M 3D 6M 2I 7M 20N 4M + +namespace BamTools { + void getBamBlocks(const BamAlignment &bam, const RefVector &refs, + vector<BED> &blocks, bool breakOnDeletionOps) { + + CHRPOS currPosition = bam.Position; + CHRPOS blockStart = bam.Position; + string chrom = refs.at(bam.RefID).RefName; + string name = bam.Name; + string strand = "+"; + string score = ToString(bam.MapQuality); + char prevOp = '\0'; + if (bam.IsReverseStrand()) strand = "-"; + bool blocksFound = false; + + vector<CigarOp>::const_iterator cigItr = bam.CigarData.begin(); + vector<CigarOp>::const_iterator cigEnd = bam.CigarData.end(); + for ( ; cigItr != cigEnd; ++cigItr ) { + if (cigItr->Type == 'M') { + currPosition += cigItr->Length; + // we only want to create a new block if the current M op + // was preceded by an N op or a D op (and we are breaking on D ops) + if ((prevOp == 'D' && breakOnDeletionOps == true) || (prevOp == 'N')) { + blocks.push_back( BED(chrom, blockStart, currPosition, name, score, strand) ); + blockStart = currPosition; + } + } + else if (cigItr->Type == 'D') { + if (breakOnDeletionOps == false) + currPosition += cigItr->Length; + else { + currPosition += cigItr->Length; + blockStart = currPosition; + } + } + else if (cigItr->Type == 'N') { + currPosition += cigItr->Length; + blockStart = currPosition; + } + else if (cigItr->Type == 'S' || cigItr->Type == 'H' || cigItr->Type == 'P' || cigItr->Type == 'I') { + // do nothing + } + else { + cerr << "Input error: invalid CIGAR type (" << cigItr->Type + << ") for: " << bam.Name << endl; + exit(1); + } + prevOp = cigItr->Type; + } + // if there were no splits, we just create a block representing the contiguous alignment. + if (blocksFound == false) { + blocks.push_back( BED(chrom, bam.Position, currPosition, name, score, strand) ); + } + } +} |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools-Ancillary/BamAncillary.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/BamTools-Ancillary/BamAncillary.h Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,19 @@ +/***************************************************************************** + bamAncillary.h + + (c) 2009 - Aaron Quinlan + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licensed under the GNU General Public License 2.0 license. +******************************************************************************/ +#include "bedFile.h" +#include "lineFileUtilities.h" +#include "api/BamAlignment.h" + +namespace BamTools { + void getBamBlocks(const BamAlignment &bam, const RefVector &refs, + vector<BED> &blocks, bool includeDeletions = true); +} |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools-Ancillary/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/BamTools-Ancillary/Makefile Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,26 @@ +OBJ_DIR = ../../../obj/ +BIN_DIR = ../../../bin/ +UTILITIES_DIR = ../ + +INCLUDES = -I$(UTILITIES_DIR)/BamTools/include -I$(UTILITIES_DIR)/bedFile/ -I$(UTILITIES_DIR)/lineFileUtilities/ -I$(UTILITIES_DIR)/gzstream/ -I$(UTILITIES_DIR)/fileType/ + +# ---------------------------------- +# define our source and object files +# ---------------------------------- +SOURCES= BamAncillary.cpp +OBJECTS= $(SOURCES:.cpp=.o) +BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) + +all: $(BUILT_OBJECTS) + +.PHONY: all + +$(BUILT_OBJECTS): $(SOURCES) + @echo " * compiling" $(*F).cpp + @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) -L$(BT_ROOT)/lib + +clean: + @echo "Cleaning up." + @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* + +.PHONY: clean \ No newline at end of file |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/LICENSE --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/BamTools/LICENSE Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,22 @@ +The MIT License + +Copyright (c) 2009-2010 Derek Barnett, Erik Garrison, Gabor Marth, Michael Stromberg + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/BamTools/Makefile Thu Nov 03 10:25:04 2011 -0400 |
[ |
@@ -0,0 +1,61 @@ +# ------------------- +# define our includes +# ------------------- +OBJ_DIR = ../../../obj/ +INCLUDES = -Isrc/ -Iinclude/ + +# ---------------------------------- +# define our source and object files +# ---------------------------------- + +SOURCES= src/api/BamAlignment.cpp \ + src/api/BamMultiReader.cpp \ + src/api/BamReader.cpp \ + src/api/BamWriter.cpp \ + src/api/SamHeader.cpp \ + src/api/SamProgram.cpp \ + src/api/SamProgramChain.cpp \ + src/api/SamReadGroup.cpp \ + src/api/SamReadGroupDictionary.cpp \ + src/api/SamSequence.cpp \ + src/api/SamSequenceDictionary.cpp \ + src/api/internal/BamHeader_p.cpp \ + src/api/internal/BamIndexFactory_p.cpp \ + src/api/internal/BamMultiReader_p.cpp \ + src/api/internal/BamRandomAccessController_p.cpp \ + src/api/internal/BamReader_p.cpp \ + src/api/internal/BamStandardIndex_p.cpp \ + src/api/internal/BamToolsIndex_p.cpp \ + src/api/internal/BamWriter_p.cpp \ + src/api/internal/BgzfStream_p.cpp \ + src/api/internal/SamFormatParser_p.cpp \ + src/api/internal/SamFormatPrinter_p.cpp \ + src/api/internal/SamHeaderValidator_p.cpp + +# ---------------------------------- +# define our source and object files +# ---------------------------------- +OBJECTS= $(SOURCES:.cpp=.o) +LIBRARY=libbamtools.a + +all: $(LIBRARY) + +.PHONY: all + +$(LIBRARY): $(OBJECTS) + [ -d lib ] || mkdir -p lib + [ -d include ] || mkdir -p include + [ -d include/api ] || mkdir -p include/api + [ -d include/shared ] || mkdir -p include/shared + + @cp src/api/*.h include/api + @cp src/shared/*.h include/shared + + + @echo " * linking $(LIBRARY)" + ar cr lib/$@ $^ + +$(OBJECTS): $(SOURCES) + @echo " * compiling" $(*F).cpp + @$(CXX) -c $(*D)/$(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) -o $(*D)/$(*F).o + \ No newline at end of file |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamAlignment.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamAlignment.cpp Thu Nov 03 10:25:04 2011 -0400 |
b |
b'@@ -0,0 +1,2433 @@\n+// ***************************************************************************\n+// BamAlignment.cpp (c) 2009 Derek Barnett\n+// Marth Lab, Department of Biology, Boston College\n+// All rights reserved.\n+// ---------------------------------------------------------------------------\n+// Last modified: 22 April 2011 (DB)\n+// ---------------------------------------------------------------------------\n+// Provides the BamAlignment data structure\n+// ***************************************************************************\n+\n+#include <api/BamAlignment.h>\n+#include <api/BamConstants.h>\n+using namespace BamTools;\n+\n+#include <cctype>\n+#include <cstdio>\n+#include <cstdlib>\n+#include <cstring>\n+#include <exception>\n+#include <iostream>\n+#include <map>\n+#include <utility>\n+using namespace std;\n+\n+/*! \\class BamTools::BamAlignment\n+ \\brief The main BAM alignment data structure.\n+\n+ Provides methods to query/modify BAM alignment data fields.\n+*/\n+/*! \\var BamAlignment::Name\n+ \\brief read name\n+*/\n+/*! \\var BamAlignment::Length\n+ \\brief length of query sequence\n+*/\n+/*! \\var BamAlignment::QueryBases\n+ \\brief \'original\' sequence (as reported from sequencing machine)\n+*/\n+/*! \\var BamAlignment::AlignedBases\n+ \\brief \'aligned\' sequence (includes any indels, padding, clipping)\n+*/\n+/*! \\var BamAlignment::Qualities\n+ \\brief FASTQ qualities (ASCII characters, not numeric values)\n+*/\n+/*! \\var BamAlignment::TagData\n+ \\brief tag data (use the provided methods to query/modify)\n+*/\n+/*! \\var BamAlignment::RefID\n+ \\brief ID number for reference sequence\n+*/\n+/*! \\var BamAlignment::Position\n+ \\brief position (0-based) where alignment starts\n+*/\n+/*! \\var BamAlignment::Bin\n+ \\brief BAM (standard) index bin number for this alignment\n+*/\n+/*! \\var BamAlignment::MapQuality\n+ \\brief mapping quality score\n+*/\n+/*! \\var BamAlignment::AlignmentFlag\n+ \\brief alignment bit-flag (use the provided methods to query/modify)\n+*/\n+/*! \\var BamAlignment::CigarData\n+ \\brief CIGAR operations for this alignment\n+*/\n+/*! \\var BamAlignment::MateRefID\n+ \\brief ID number for reference sequence where alignment\'s mate was aligned\n+*/\n+/*! \\var BamAlignment::MatePosition\n+ \\brief position (0-based) where alignment\'s mate starts\n+*/\n+/*! \\var BamAlignment::InsertSize\n+ \\brief mate-pair insert size\n+*/\n+/*! \\var BamAlignment::Filename\n+ \\brief name of BAM file which this alignment comes from\n+*/\n+\n+/*! \\fn BamAlignment::BamAlignment(void)\n+ \\brief constructor\n+*/\n+BamAlignment::BamAlignment(void)\n+ : RefID(-1)\n+ , Position(-1)\n+ , MateRefID(-1)\n+ , MatePosition(-1)\n+ , InsertSize(0)\n+{ }\n+\n+/*! \\fn BamAlignment::BamAlignment(const BamAlignment& other)\n+ \\brief copy constructor\n+*/\n+BamAlignment::BamAlignment(const BamAlignment& other)\n+ : Name(other.Name)\n+ , Length(other.Length)\n+ , QueryBases(other.QueryBases)\n+ , AlignedBases(other.AlignedBases)\n+ , Qualities(other.Qualities)\n+ , TagData(other.TagData)\n+ , RefID(other.RefID)\n+ , Position(other.Position)\n+ , Bin(other.Bin)\n+ , MapQuality(other.MapQuality)\n+ , AlignmentFlag(other.AlignmentFlag)\n+ , CigarData(other.CigarData)\n+ , MateRefID(other.MateRefID)\n+ , MatePosition(other.MatePosition)\n+ , InsertSize(other.InsertSize)\n+ , Filename(other.Filename)\n+ , SupportData(other.SupportData)\n+{ }\n+\n+/*! \\fn BamAlignment::~BamAlignment(void)\n+ \\brief destructor\n+*/\n+BamAlignment::~BamAlignment(void) { }\n+\n+/*! \\fn bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const std::string& value)\n+ \\brief Adds a field with string data to the BAM tags.\n+\n+ Does NOT modify an existing tag - use \\link BamAlignment::EditTag() \\endlink instead.\n+\n+ \\param tag 2-character tag name\n+ \\param type 1-character tag type (must be "Z" or "H")\n+ \\param value string data to store\n+\n+ \\return \\c true if the \\b new tag was added successfully\n+ \\sa \\samSpecURL for'..b'ped(bool ok)\n+ \\brief Complement of using SetIsMapped().\n+ \\deprecated For sake of symmetry with the query methods\n+ \\sa IsMapped(), SetIsMapped()\n+*/\n+void BamAlignment::SetIsUnmapped(bool ok) {\n+ SetIsMapped(!ok);\n+}\n+\n+/*! \\fn bool BamAlignment::SkipToNextTag(const char storageType, char*& pTagData, unsigned int& numBytesParsed)\n+ \\internal\n+\n+ Moves to next available tag in tag data string\n+\n+ \\param storageType BAM tag type-code that determines how far to move cursor\n+ \\param pTagData pointer to current position (cursor) in tag string\n+ \\param numBytesParsed report of how many bytes were parsed (cumulatively)\n+\n+ \\return \\c if storageType was a recognized BAM tag type\n+ \\post \\a pTagData will point to the byte where the next tag data begins.\n+ \\a numBytesParsed will correspond to the cursor\'s position in the full TagData string.\n+*/\n+bool BamAlignment::SkipToNextTag(const char storageType,\n+ char*& pTagData,\n+ unsigned int& numBytesParsed) const\n+{\n+ switch (storageType) {\n+\n+ case (Constants::BAM_TAG_TYPE_ASCII) :\n+ case (Constants::BAM_TAG_TYPE_INT8) :\n+ case (Constants::BAM_TAG_TYPE_UINT8) :\n+ ++numBytesParsed;\n+ ++pTagData;\n+ break;\n+\n+ case (Constants::BAM_TAG_TYPE_INT16) :\n+ case (Constants::BAM_TAG_TYPE_UINT16) :\n+ numBytesParsed += sizeof(uint16_t);\n+ pTagData += sizeof(uint16_t);\n+ break;\n+\n+ case (Constants::BAM_TAG_TYPE_FLOAT) :\n+ case (Constants::BAM_TAG_TYPE_INT32) :\n+ case (Constants::BAM_TAG_TYPE_UINT32) :\n+ numBytesParsed += sizeof(uint32_t);\n+ pTagData += sizeof(uint32_t);\n+ break;\n+\n+ case (Constants::BAM_TAG_TYPE_STRING) :\n+ case (Constants::BAM_TAG_TYPE_HEX) :\n+ while( *pTagData ) {\n+ ++numBytesParsed;\n+ ++pTagData;\n+ }\n+ // increment for null-terminator\n+ ++numBytesParsed;\n+ ++pTagData;\n+ break;\n+\n+ case (Constants::BAM_TAG_TYPE_ARRAY) :\n+\n+ {\n+ // read array type\n+ const char arrayType = *pTagData;\n+ ++numBytesParsed;\n+ ++pTagData;\n+\n+ // read number of elements\n+ int32_t numElements;\n+ memcpy(&numElements, pTagData, sizeof(uint32_t)); // already endian-swapped if necessary\n+ numBytesParsed += sizeof(uint32_t);\n+ pTagData += sizeof(uint32_t);\n+\n+ // calculate number of bytes to skip\n+ int bytesToSkip = 0;\n+ switch (arrayType) {\n+ case (Constants::BAM_TAG_TYPE_INT8) :\n+ case (Constants::BAM_TAG_TYPE_UINT8) :\n+ bytesToSkip = numElements;\n+ break;\n+ case (Constants::BAM_TAG_TYPE_INT16) :\n+ case (Constants::BAM_TAG_TYPE_UINT16) :\n+ bytesToSkip = numElements*sizeof(uint16_t);\n+ break;\n+ case (Constants::BAM_TAG_TYPE_FLOAT) :\n+ case (Constants::BAM_TAG_TYPE_INT32) :\n+ case (Constants::BAM_TAG_TYPE_UINT32) :\n+ bytesToSkip = numElements*sizeof(uint32_t);\n+ break;\n+ default:\n+ cerr << "BamAlignment ERROR: unknown binary array type encountered: "\n+ << arrayType << endl;\n+ return false;\n+ }\n+\n+ // skip binary array contents\n+ numBytesParsed += bytesToSkip;\n+ pTagData += bytesToSkip;\n+ break;\n+ }\n+\n+ default:\n+ cerr << "BamAlignment ERROR: unknown tag type encountered"\n+ << storageType << endl;\n+ return false;\n+ }\n+\n+ // return success\n+ return true;\n+}\n' |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamAlignment.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamAlignment.h Thu Nov 03 10:25:04 2011 -0400 |
b |
b'@@ -0,0 +1,207 @@\n+// ***************************************************************************\n+// BamAlignment.h (c) 2009 Derek Barnett\n+// Marth Lab, Department of Biology, Boston College\n+// All rights reserved.\n+// ---------------------------------------------------------------------------\n+// Last modified: 22 April 2011 (DB)\n+// ---------------------------------------------------------------------------\n+// Provides the BamAlignment data structure\n+// ***************************************************************************\n+\n+#ifndef BAMALIGNMENT_H\n+#define BAMALIGNMENT_H\n+\n+#include <api/api_global.h>\n+#include <api/BamAux.h>\n+#include <string>\n+#include <vector>\n+\n+namespace BamTools {\n+\n+// forward declaration of BamAlignment\'s friend classes\n+namespace Internal {\n+ class BamReaderPrivate;\n+ class BamWriterPrivate;\n+} // namespace Internal\n+\n+// BamAlignment data structure\n+struct API_EXPORT BamAlignment {\n+\n+ // constructors & destructor\n+ public:\n+ BamAlignment(void);\n+ BamAlignment(const BamAlignment& other);\n+ ~BamAlignment(void);\n+\n+ // queries against alignment flags\n+ public: \n+ bool IsDuplicate(void) const; // returns true if this read is a PCR duplicate\n+ bool IsFailedQC(void) const; // returns true if this read failed quality control\n+ bool IsFirstMate(void) const; // returns true if alignment is first mate on read\n+ bool IsMapped(void) const; // returns true if alignment is mapped\n+ bool IsMateMapped(void) const; // returns true if alignment\'s mate is mapped\n+ bool IsMateReverseStrand(void) const; // returns true if alignment\'s mate mapped to reverse strand\n+ bool IsPaired(void) const; // returns true if alignment part of paired-end read\n+ bool IsPrimaryAlignment(void) const; // returns true if reported position is primary alignment\n+ bool IsProperPair(void) const; // returns true if alignment is part of read that satisfied paired-end resolution\n+ bool IsReverseStrand(void) const; // returns true if alignment mapped to reverse strand\n+ bool IsSecondMate(void) const; // returns true if alignment is second mate on read\n+\n+ // manipulate alignment flags\n+ public: \n+ void SetIsDuplicate(bool ok); // sets value of "PCR duplicate" flag\n+ void SetIsFailedQC(bool ok); // sets value of "failed quality control" flag\n+ void SetIsFirstMate(bool ok); // sets value of "alignment is first mate" flag\n+ void SetIsMapped(bool ok); // sets value of "alignment is mapped" flag\n+ void SetIsMateMapped(bool ok); // sets value of "alignment\'s mate is mapped" flag\n+ void SetIsMateReverseStrand(bool ok); // sets value of "alignment\'s mate mapped to reverse strand" flag\n+ void SetIsPaired(bool ok); // sets value of "alignment part of paired-end read" flag\n+ void SetIsPrimaryAlignment(bool ok); // sets value of "position is primary alignment" flag\n+ void SetIsProperPair(bool ok); // sets value of "alignment is part of read that satisfied paired-end resolution" flag\n+ void SetIsReverseStrand(bool ok); // sets value of "alignment mapped to reverse strand" flag\n+ void SetIsSecondMate(bool ok); // sets value of "alignment is second mate on read" flag\n+\n+ // legacy methods (consider deprecated, but still available)\n+ void SetIsMateUnmapped(bool ok); // complement of using SetIsMateMapped()\n+ void SetIsSecondaryAlignment(bool ok); // complement of using SetIsPrimaryAlignment()\n+ void SetIsUnmapped(bool ok); // complement of using SetIsMapped()\n+\n+ // tag data access methods\n+ public:\n+\n+ // -------------------------------------------------------------------------------------\n+ '..b', std::vector<uint32_t>& destination) const;\n+ bool GetTag(const std::string& tag, std::vector<int32_t>& destination) const;\n+ bool GetTag(const std::string& tag, std::vector<float>& destination) const;\n+\n+ // retrieves the BAM tag-type character for a tag\n+ bool GetTagType(const std::string& tag, char& type) const;\n+\n+ // legacy methods (consider deprecated, but still available)\n+ bool GetEditDistance(uint32_t& editDistance) const; // retrieves value of "NM" tag\n+ bool GetReadGroup(std::string& readGroup) const; // retrieves value of "RG" tag\n+ \n+ // returns true if alignment has a record for this tag name\n+ bool HasTag(const std::string& tag) const;\n+\n+ // removes a tag\n+ bool RemoveTag(const std::string& tag);\n+\n+ // additional methods\n+ public:\n+ // populates alignment string fields\n+ bool BuildCharData(void);\n+ // calculates alignment end position\n+ int GetEndPosition(bool usePadded = false, bool zeroBased = true) const; \n+\n+ // public data fields\n+ public:\n+ std::string Name; // read name\n+ int32_t Length; // length of query sequence\n+ std::string QueryBases; // \'original\' sequence (as reported from sequencing machine)\n+ std::string AlignedBases; // \'aligned\' sequence (includes any indels, padding, clipping)\n+ std::string Qualities; // FASTQ qualities (ASCII characters, not numeric values)\n+ std::string TagData; // tag data (use provided methods to query/modify)\n+ int32_t RefID; // ID number for reference sequence\n+ int32_t Position; // position (0-based) where alignment starts\n+ uint16_t Bin; // BAM (standard) index bin number for this alignment\n+ uint16_t MapQuality; // mapping quality score\n+ uint32_t AlignmentFlag; // alignment bit-flag (use provided methods to query/modify)\n+ std::vector<CigarOp> CigarData; // CIGAR operations for this alignment\n+ int32_t MateRefID; // ID number for reference sequence where alignment\'s mate was aligned\n+ int32_t MatePosition; // position (0-based) where alignment\'s mate starts\n+ int32_t InsertSize; // mate-pair insert size\n+ std::string Filename; // name of BAM file which this alignment comes from\n+\n+ //! \\cond\n+ // internal utility methods\n+ private:\n+ bool FindTag(const std::string& tag,\n+ char*& pTagData,\n+ const unsigned int& tagDataLength,\n+ unsigned int& numBytesParsed) const;\n+ bool IsValidSize(const std::string& tag,\n+ const std::string& type) const;\n+ bool SkipToNextTag(const char storageType,\n+ char*& pTagData,\n+ unsigned int& numBytesParsed) const;\n+\n+ // internal data\n+ private:\n+\n+ struct BamAlignmentSupportData {\n+ \n+ // data members\n+ std::string AllCharData;\n+ uint32_t BlockLength;\n+ uint32_t NumCigarOperations;\n+ uint32_t QueryNameLength;\n+ uint32_t QuerySequenceLength;\n+ bool HasCoreOnly;\n+ \n+ // constructor\n+ BamAlignmentSupportData(void)\n+ : BlockLength(0)\n+ , NumCigarOperations(0)\n+ , QueryNameLength(0)\n+ , QuerySequenceLength(0)\n+ , HasCoreOnly(false)\n+ { }\n+ };\n+ BamAlignmentSupportData SupportData;\n+ friend class Internal::BamReaderPrivate;\n+ friend class Internal::BamWriterPrivate;\n+ //! \\endcond\n+};\n+\n+typedef std::vector<BamAlignment> BamAlignmentVector;\n+\n+} // namespace BamTools\n+\n+#endif // BAMALIGNMENT_H\n' |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamAux.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamAux.h Thu Nov 03 10:25:04 2011 -0400 |
[ |
b'@@ -0,0 +1,457 @@\n+// ***************************************************************************\r\n+// BamAux.h (c) 2009 Derek Barnett, Michael Str\xef\xbf\xbdmberg\r\n+// Marth Lab, Department of Biology, Boston College\r\n+// All rights reserved.\r\n+// ---------------------------------------------------------------------------\r\n+// Last modified: 4 March 2011 (DB)\r\n+// ---------------------------------------------------------------------------\r\n+// Provides data structures & utility methods that are used throughout the API.\r\n+// ***************************************************************************\r\n+\r\n+#ifndef BAMAUX_H\r\n+#define BAMAUX_H\r\n+\r\n+#include <api/api_global.h>\r\n+#include <fstream> \r\n+#include <iostream>\r\n+#include <string>\r\n+#include <vector>\r\n+\r\n+/*! \\file BamAux.h\r\n+\r\n+ Provides data structures & utility methods that are used throughout the API.\r\n+*/\r\n+/*! \\namespace BamTools\r\n+ \\brief Contains all BamTools classes & methods.\r\n+\r\n+ The BamTools API contained in this namespace contains classes and methods\r\n+ for reading, writing, and manipulating BAM alignment files.\r\n+*/\r\n+namespace BamTools {\r\n+\r\n+// ----------------------------------------------------------------\r\n+// CigarOp\r\n+\r\n+/*! \\struct BamTools::CigarOp\r\n+ \\brief Represents a CIGAR alignment operation.\r\n+\r\n+ \\sa http://samtools.sourceforge.net/SAM-1.3.pdf for more details on using CIGAR operations.\r\n+*/\r\n+struct API_EXPORT CigarOp {\r\n+ \r\n+ char Type; //!< CIGAR operation type (MIDNSHP)\r\n+ uint32_t Length; //!< CIGAR operation length (number of bases)\r\n+ \r\n+ //! constructor\r\n+ CigarOp(const char type = \'\\0\', \r\n+ const uint32_t& length = 0)\r\n+ : Type(type)\r\n+ , Length(length) \r\n+ { }\r\n+};\r\n+\r\n+// ----------------------------------------------------------------\r\n+// RefData\r\n+\r\n+/*! \\struct BamTools::RefData\r\n+ \\brief Represents a reference sequence entry\r\n+*/\r\n+struct API_EXPORT RefData {\r\n+ \r\n+ std::string RefName; //!< name of reference sequence\r\n+ int32_t RefLength; //!< length of reference sequence\r\n+ \r\n+ //! constructor\r\n+ RefData(const std::string& name = "",\r\n+ const int32_t& length = 0)\r\n+ : RefName(name)\r\n+ , RefLength(length)\r\n+ { }\r\n+};\r\n+\r\n+//! convenience typedef for vector of RefData entries\r\n+typedef std::vector<RefData> RefVector;\r\n+\r\n+// ----------------------------------------------------------------\r\n+// BamRegion\r\n+\r\n+/*! \\struct BamTools::BamRegion\r\n+ \\brief Represents a sequential genomic region\r\n+\r\n+ Allowed to span multiple (sequential) references.\r\n+*/\r\n+struct API_EXPORT BamRegion {\r\n+ \r\n+ int LeftRefID; //!< reference ID for region\'s left boundary\r\n+ int LeftPosition; //!< position for region\'s left boundary\r\n+ int RightRefID; //!< reference ID for region\'s right boundary\r\n+ int RightPosition; //!< position for region\'s right boundary\r\n+ \r\n+ //! constructor\r\n+ BamRegion(const int& leftID = -1, \r\n+ const int& leftPos = -1,\r\n+ const int& rightID = -1,\r\n+ const int& rightPos = -1)\r\n+ : LeftRefID(leftID)\r\n+ , LeftPosition(leftPos)\r\n+ , RightRefID(rightID)\r\n+ , RightPosition(rightPos)\r\n+ { }\r\n+ \r\n+ //! copy constructor\r\n+ BamRegion(const BamRegion& other)\r\n+ : LeftRefID(other.LeftRefID)\r\n+ , LeftPosition(other.LeftPosition)\r\n+ , RightRefID(other.RightRefID)\r\n+ , RightPosition(other.RightPosition)\r\n+ { }\r\n+ \r\n+ //! Clears region boundaries\r\n+ void clear(void) {\r\n+ LeftRefID = -1; LeftPosition = -1;\r\n+ RightRefID = -1; RightPosition = -1;\r\n+ }\r\n+\r\n+ //! Returns true if region has a left boundary\r\n+ bool isLeftBoundSpecified(void) const {\r\n+ return ( LeftRefID >= 0 && LeftPosition >= 0 );\r\n+ }\r\n+\r\n+ //! Returns true if region boundaries are not defined\r\n+ bool isNull(void) const {\r\n+ return ( !isLeftBoundSpecified()'..b'oat) value read from the buffer\r\n+*/\r\n+API_EXPORT inline float UnpackFloat(char* buffer) {\r\n+ return UnpackFloat( (const char*)buffer );\r\n+}\r\n+\r\n+/*! \\fn signed int UnpackSignedInt(const char* buffer)\r\n+ \\brief reads a signed integer value from byte buffer\r\n+\r\n+ \\param buffer source byte buffer\r\n+ \\return the (signed int) value read from the buffer\r\n+*/\r\n+API_EXPORT inline signed int UnpackSignedInt(const char* buffer) {\r\n+ union { signed int value; unsigned char valueBuffer[sizeof(signed int)]; } un;\r\n+ un.value = 0;\r\n+ un.valueBuffer[0] = buffer[0];\r\n+ un.valueBuffer[1] = buffer[1];\r\n+ un.valueBuffer[2] = buffer[2];\r\n+ un.valueBuffer[3] = buffer[3];\r\n+ return un.value;\r\n+}\r\n+\r\n+/*! \\fn signed int UnpackSignedInt(char* buffer)\r\n+ \\brief reads a signed integer value from byte buffer\r\n+\r\n+ This is an overloaded function.\r\n+\r\n+ \\param buffer source byte buffer\r\n+ \\return the (signed int) value read from the buffer\r\n+*/\r\n+API_EXPORT inline signed int UnpackSignedInt(char* buffer) {\r\n+ return UnpackSignedInt( (const char*) buffer );\r\n+}\r\n+\r\n+/*! \\fn signed short UnpackSignedShort(const char* buffer)\r\n+ \\brief reads a signed short integer value from byte buffer\r\n+\r\n+ \\param buffer source byte buffer\r\n+ \\return the (signed short) value read from the buffer\r\n+*/\r\n+API_EXPORT inline signed short UnpackSignedShort(const char* buffer) {\r\n+ union { signed short value; unsigned char valueBuffer[sizeof(signed short)]; } un;\r\n+ un.value = 0;\r\n+ un.valueBuffer[0] = buffer[0];\r\n+ un.valueBuffer[1] = buffer[1];\r\n+ return un.value;\r\n+}\r\n+\r\n+/*! \\fn signed short UnpackSignedShort(char* buffer)\r\n+ \\brief reads a signed short integer value from byte buffer\r\n+\r\n+ This is an overloaded function.\r\n+\r\n+ \\param buffer source byte buffer\r\n+ \\return the (signed short) value read from the buffer\r\n+*/\r\n+API_EXPORT inline signed short UnpackSignedShort(char* buffer) {\r\n+ return UnpackSignedShort( (const char*)buffer );\r\n+}\r\n+\r\n+/*! \\fn unsigned int UnpackUnsignedInt(const char* buffer)\r\n+ \\brief reads an unsigned integer value from byte buffer\r\n+\r\n+ \\param buffer source byte buffer\r\n+ \\return the (unsigned int) value read from the buffer\r\n+*/\r\n+API_EXPORT inline unsigned int UnpackUnsignedInt(const char* buffer) {\r\n+ union { unsigned int value; unsigned char valueBuffer[sizeof(unsigned int)]; } un;\r\n+ un.value = 0;\r\n+ un.valueBuffer[0] = buffer[0];\r\n+ un.valueBuffer[1] = buffer[1];\r\n+ un.valueBuffer[2] = buffer[2];\r\n+ un.valueBuffer[3] = buffer[3];\r\n+ return un.value;\r\n+}\r\n+\r\n+/*! \\fn unsigned int UnpackUnsignedInt(char* buffer)\r\n+ \\brief reads an unsigned integer value from byte buffer\r\n+\r\n+ This is an overloaded function.\r\n+\r\n+ \\param buffer source byte buffer\r\n+ \\return the (unsigned int) value read from the buffer\r\n+*/\r\n+API_EXPORT inline unsigned int UnpackUnsignedInt(char* buffer) {\r\n+ return UnpackUnsignedInt( (const char*)buffer );\r\n+}\r\n+\r\n+/*! \\fn unsigned short UnpackUnsignedShort(const char* buffer)\r\n+ \\brief reads an unsigned short integer value from byte buffer\r\n+\r\n+ \\param buffer source byte buffer\r\n+ \\return the (unsigned short) value read from the buffer\r\n+*/\r\n+API_EXPORT inline unsigned short UnpackUnsignedShort(const char* buffer) {\r\n+ union { unsigned short value; unsigned char valueBuffer[sizeof(unsigned short)]; } un;\r\n+ un.value = 0;\r\n+ un.valueBuffer[0] = buffer[0];\r\n+ un.valueBuffer[1] = buffer[1];\r\n+ return un.value;\r\n+}\r\n+\r\n+/*! \\fn unsigned short UnpackUnsignedShort(char* buffer)\r\n+ \\brief reads an unsigned short integer value from byte buffer\r\n+\r\n+ This is an overloaded function.\r\n+\r\n+ \\param buffer source byte buffer\r\n+ \\return the (unsigned short) value read from the buffer\r\n+*/\r\n+API_EXPORT inline unsigned short UnpackUnsignedShort(char* buffer) {\r\n+ return UnpackUnsignedShort( (const char*)buffer );\r\n+}\r\n+\r\n+} // namespace BamTools\r\n+\r\n+#endif // BAMAUX_H\r\n' |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamConstants.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamConstants.h Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,128 @@ +// *************************************************************************** +// BamConstants.h (c) 2011 Derek Barnett +// Marth Lab, Department of Biology, Boston College +// All rights reserved. +// --------------------------------------------------------------------------- +// Last modified: 19 April 2011 (DB) +// --------------------------------------------------------------------------- +// Provides basic constants for handling BAM files. +// *************************************************************************** + +#ifndef BAM_CONSTANTS_H +#define BAM_CONSTANTS_H + +#include <string> + +/*! \namespace BamTools::Constants + \brief Provides basic constants for handling BAM files. +*/ + +namespace BamTools { +namespace Constants { + +const int BAM_SIZEOF_INT = 4; + +// header magic number +const char* const BAM_HEADER_MAGIC = "BAM\1"; +const unsigned int BAM_HEADER_MAGIC_LENGTH = 4; + +// BAM alignment core size +const int BAM_CORE_SIZE = 32; +const int BAM_CORE_BUFFER_SIZE = 8; + +// BAM alignment flags +const int BAM_ALIGNMENT_PAIRED = 0x0001; +const int BAM_ALIGNMENT_PROPER_PAIR = 0x0002; +const int BAM_ALIGNMENT_UNMAPPED = 0x0004; +const int BAM_ALIGNMENT_MATE_UNMAPPED = 0x0008; +const int BAM_ALIGNMENT_REVERSE_STRAND = 0x0010; +const int BAM_ALIGNMENT_MATE_REVERSE_STRAND = 0x0020; +const int BAM_ALIGNMENT_READ_1 = 0x0040; +const int BAM_ALIGNMENT_READ_2 = 0x0080; +const int BAM_ALIGNMENT_SECONDARY = 0x0100; +const int BAM_ALIGNMENT_QC_FAILED = 0x0200; +const int BAM_ALIGNMENT_DUPLICATE = 0x0400; + +// CIGAR constants +const char* const BAM_CIGAR_LOOKUP = "MIDNSHP=X"; +const int BAM_CIGAR_MATCH = 0; +const int BAM_CIGAR_INS = 1; +const int BAM_CIGAR_DEL = 2; +const int BAM_CIGAR_REFSKIP = 3; +const int BAM_CIGAR_SOFTCLIP = 4; +const int BAM_CIGAR_HARDCLIP = 5; +const int BAM_CIGAR_PAD = 6; +const int BAM_CIGAR_SEQMATCH = 7; +const int BAM_CIGAR_MISMATCH = 8; + +const char BAM_CIGAR_MATCH_CHAR = 'M'; +const char BAM_CIGAR_INS_CHAR = 'I'; +const char BAM_CIGAR_DEL_CHAR = 'D'; +const char BAM_CIGAR_REFSKIP_CHAR = 'N'; +const char BAM_CIGAR_SOFTCLIP_CHAR = 'S'; +const char BAM_CIGAR_HARDCLIP_CHAR = 'H'; +const char BAM_CIGAR_PAD_CHAR = 'P'; +const char BAM_CIGAR_SEQMATCH_CHAR = '='; +const char BAM_CIGAR_MISMATCH_CHAR = 'X'; + +const int BAM_CIGAR_SHIFT = 4; +const int BAM_CIGAR_MASK = ((1 << BAM_CIGAR_SHIFT) - 1); + +// BAM tag types +const char BAM_TAG_TYPE_ASCII = 'A'; +const char BAM_TAG_TYPE_UINT8 = 'c'; +const char BAM_TAG_TYPE_INT8 = 'C'; +const char BAM_TAG_TYPE_UINT16 = 's'; +const char BAM_TAG_TYPE_INT16 = 'S'; +const char BAM_TAG_TYPE_UINT32 = 'i'; +const char BAM_TAG_TYPE_INT32 = 'I'; +const char BAM_TAG_TYPE_FLOAT = 'f'; +const char BAM_TAG_TYPE_STRING = 'Z'; +const char BAM_TAG_TYPE_HEX = 'H'; +const char BAM_TAG_TYPE_ARRAY = 'B'; + +const size_t BAM_TAG_TAGSIZE = 2; +const size_t BAM_TAG_TYPESIZE = 1; +const int BAM_TAG_ARRAYBASE_SIZE = 8; + +// DNA bases +const char* const BAM_DNA_LOOKUP = "=ACMGRSVTWYHKDBN"; +const unsigned char BAM_BASECODE_EQUAL = 0; +const unsigned char BAM_BASECODE_A = 1; +const unsigned char BAM_BASECODE_C = 2; +const unsigned char BAM_BASECODE_G = 4; +const unsigned char BAM_BASECODE_T = 8; +const unsigned char BAM_BASECODE_N = 15; + +const char BAM_DNA_EQUAL = '='; +const char BAM_DNA_A = 'A'; +const char BAM_DNA_C = 'C'; +const char BAM_DNA_G = 'G'; +const char BAM_DNA_T = 'T'; +const char BAM_DNA_N = 'N'; +const char BAM_DNA_DEL = '-'; +const char BAM_DNA_PAD = '*'; + +// zlib constants +const int GZIP_ID1 = 31; +const int GZIP_ID2 = 139; +const int CM_DEFLATE = 8; +const int FLG_FEXTRA = 4; +const int OS_UNKNOWN = 255; +const int BGZF_XLEN = 6; +const int BGZF_ID1 = 66; +const int BGZF_ID2 = 67; +const int BGZF_LEN = 2; +const int GZIP_WINDOW_BITS = -15; +const int Z_DEFAULT_MEM_LEVEL = 8; + +// BZGF constants +const int BGZF_BLOCK_HEADER_LENGTH = 18; +const int BGZF_BLOCK_FOOTER_LENGTH = 8; +const int BGZF_MAX_BLOCK_SIZE = 65536; +const int BGZF_DEFAULT_BLOCK_SIZE = 65536; + +} // namespace Constants +} // namespace BamTools + +#endif // BAM_CONSTANTS_H |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamIndex.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamIndex.h Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,80 @@ +// *************************************************************************** +// BamIndex.h (c) 2009 Derek Barnett +// Marth Lab, Department of Biology, Boston College +// All rights reserved. +// --------------------------------------------------------------------------- +// Last modified: 5 April 2011 (DB) +// --------------------------------------------------------------------------- +// Provides basic BAM index interface +// *************************************************************************** + +#ifndef BAM_INDEX_H +#define BAM_INDEX_H + +#include <api/api_global.h> +#include <api/BamAux.h> +#include <string> + +namespace BamTools { + +namespace Internal { + class BamReaderPrivate; +} // namespace Internal + +/*! \class BamTools::BamIndex + \brief Provides methods for generating & loading BAM index files. + + This class straddles the line between public API and internal + implementation detail. Most client code should never have to use this + class directly. + + It is exposed to the public API to allow advanced users to implement + their own custom indexing schemes. + + More documentation on methods & enums coming soon. +*/ + +class API_EXPORT BamIndex { + + // enums + public: + // specify index-caching behavior + enum IndexCacheMode { FullIndexCaching = 0 // store entire index file contents in memory + , LimitedIndexCaching // store only index data for current reference + , NoIndexCaching // do not store any index data between jumps + }; + + // list of supported BamIndex types + enum IndexType { BAMTOOLS = 0 + , STANDARD + }; + + // ctor & dtor + public: + BamIndex(Internal::BamReaderPrivate* reader) : m_reader(reader) { } + virtual ~BamIndex(void) { } + + // index interface + public: + // builds index from associated BAM file & writes out to index file + virtual bool Create(void) =0; // creates index file from BAM file + // returns whether reference has alignments or no + virtual bool HasAlignments(const int& referenceID) const =0; + // attempts to use index data to jump to @region, returns success/fail + // a "successful" jump indicates no error, but not whether this region has data + // * thus, the method sets a flag to indicate whether there are alignments + // available after the jump position + virtual bool Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion) =0; + // loads existing data from file into memory + virtual bool Load(const std::string& filename) =0; + // change the index caching behavior + virtual void SetCacheMode(const BamIndex::IndexCacheMode& mode) =0; + + // data members + protected: + Internal::BamReaderPrivate* m_reader; // copy, not ownedprivate: +}; + +} // namespace BamTools + +#endif // BAM_INDEX_H |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamMultiReader.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamMultiReader.cpp Thu Nov 03 10:25:04 2011 -0400 |
b |
b'@@ -0,0 +1,396 @@\n+// ***************************************************************************\n+// BamMultiReader.cpp (c) 2010 Erik Garrison, Derek Barnett\n+// Marth Lab, Department of Biology, Boston College\n+// All rights reserved.\n+// ---------------------------------------------------------------------------\n+// Last modified: 15 March 2011 (DB)\n+// ---------------------------------------------------------------------------\n+// Convenience class for reading multiple BAM files.\n+//\n+// This functionality allows applications to work on very large sets of files\n+// without requiring intermediate merge, sort, and index steps for each file\n+// subset. It also improves the performance of our merge system as it\n+// precludes the need to sort merged files.\n+// ***************************************************************************\n+\n+#include <api/BamMultiReader.h>\n+#include <api/internal/BamMultiReader_p.h>\n+using namespace BamTools;\n+\n+#include <string>\n+#include <vector>\n+using namespace std;\n+\n+/*! \\class BamTools::BamReader\n+ \\brief Convenience class for reading multiple BAM files.\n+*/\n+\n+/*! \\fn BamMultiReader::BamMultiReader(void)\n+ \\brief constructor\n+*/\n+BamMultiReader::BamMultiReader(void)\n+ : d(new Internal::BamMultiReaderPrivate)\n+{ }\n+\n+/*! \\fn BamMultiReader::~BamMultiReader(void)\n+ \\brief destructor\n+*/\n+BamMultiReader::~BamMultiReader(void) {\n+ delete d;\n+ d = 0;\n+}\n+\n+/*! \\fn void BamMultiReader::Close(void)\n+ \\brief Closes all open BAM files.\n+\n+ Also clears out all header and reference data.\n+\n+ \\sa CloseFile(), IsOpen(), Open(), BamReader::Close()\n+*/\n+void BamMultiReader::Close(void) {\n+ d->Close();\n+}\n+\n+/*! \\fn void BamMultiReader::CloseFile(const std::string& filename)\n+ \\brief Closes requested BAM file.\n+\n+ Leaves any other file(s) open, along with header and reference data.\n+\n+ \\sa Close(), IsOpen(), Open(), BamReader::Close()\n+*/\n+void BamMultiReader::CloseFile(const std::string& filename) {\n+ d->CloseFile(filename);\n+}\n+\n+/*! \\fn bool BamMultiReader::CreateIndexes(const BamIndex::IndexType& type)\n+ \\brief Creates index files for the current BAM files.\n+\n+ \\param type file format to create, see BamIndex::IndexType for available formats\n+ \\return \\c true if index files created OK\n+ \\sa LocateIndexes(), OpenIndexes(), BamReader::CreateIndex()\n+*/\n+bool BamMultiReader::CreateIndexes(const BamIndex::IndexType& type) {\n+ return d->CreateIndexes(type);\n+}\n+\n+/*! \\fn const std::vector<std::string> BamMultiReader::Filenames(void) const\n+ \\brief Returns list of filenames for all open BAM files.\n+\n+ Retrieved filenames will contain whatever was passed via Open().\n+ If you need full directory paths here, be sure to include them\n+ when you open the BAM files.\n+\n+ \\returns names of open BAM files. If no files are open, returns an empty vector.\n+ \\sa IsOpen(), BamReader::GetFilename()\n+*/\n+const std::vector<std::string> BamMultiReader::Filenames(void) const {\n+ return d->Filenames();\n+}\n+\n+/*! \\fn SamHeader BamMultiReader::GetHeader(void) const\n+ \\brief Returns unified SAM-format header for all files\n+\n+ N.B. - Modifying the retrieved text does NOT affect the current\n+ BAM files. Thesse file have been opened in a read-only mode. However,\n+ your modified header text can be used in conjunction with BamWriter\n+ to generate a new BAM file with the appropriate header information.\n+\n+ \\returns header data wrapped in SamHeader object\n+ \\sa GetHeaderText(), BamReader::GetHeader()\n+*/\n+SamHeader BamMultiReader::GetHeader(void) const {\n+ return d->GetHeader();\n+}\n+\n+/*! \\fn std::string BamMultiReader::GetHeaderText(void) const\n+ \\brief Returns unified SAM-format header text for all files\n+\n+ N.B. - Modifying the retrieved text does NOT affect the current\n+ BAM files. Thesse file have been opened in a read-only mode. However,\n+ your modified header text can be used in conjunction with BamWriter\n+ to gener'..b'mMultiReader::OpenIndexes(const std::vector<std::string>& indexFilenames)\n+ \\brief Opens index files for current BAM files.\n+\n+ N.B. - Currently assumes that index filenames match the order (and number) of\n+ BAM files passed to Open().\n+\n+ \\param indexFilenames list of BAM index file names\n+ \\returns \\c true if BAM index file was opened & data loaded successfully\n+ \\sa LocateIndex(), Open(), SetIndex(), BamReader::OpenIndex()\n+*/\n+bool BamMultiReader::OpenIndexes(const std::vector<std::string>& indexFilenames) {\n+ return d->OpenIndexes(indexFilenames);\n+}\n+\n+/*! \\fn void BamMultiReader::PrintFilenames(void) const\n+ \\brief Convenience method for printing filenames to stdout.\n+ \\deprecated Doesn\'t really belong as an API function. Clients should\n+ determine how the data is reported.\n+ \\sa Filenames(), BamReader::GetFilename()\n+*/\n+void BamMultiReader::PrintFilenames(void) const {\n+ d->PrintFilenames();\n+}\n+\n+/*! \\fn bool BamMultiReader::Rewind(void)\n+ \\brief Returns the internal file pointers to the beginning of alignment records.\n+\n+ Useful for performing multiple sequential passes through BAM files.\n+ Calling this function clears any prior region that may have been set.\n+\n+ \\returns \\c true if rewind operation was successful\n+ \\sa Jump(), SetRegion(), BamReader::Rewind()\n+*/\n+bool BamMultiReader::Rewind(void) {\n+ return d->Rewind();\n+}\n+\n+/*! \\fn void BamMultiReader::SetIndexCacheMode(const BamIndex::IndexCacheMode& mode)\n+ \\brief Changes the caching behavior of the index data.\n+\n+ Default mode is BamIndex::LimitedIndexCaching.\n+\n+ \\param mode desired cache mode for index, see BamIndex::IndexCacheMode for\n+ description of the available cache modes\n+ \\sa HasIndex(), BamReader::SetIndexCacheMode()\n+*/\n+void BamMultiReader::SetIndexCacheMode(const BamIndex::IndexCacheMode& mode) {\n+ d->SetIndexCacheMode(mode);\n+}\n+\n+/*! \\fn bool BamMultiReader::SetRegion(const BamRegion& region)\n+ \\brief Sets a target region of interest\n+\n+ Equivalent to calling BamReader::SetRegion() on all open BAM files.\n+\n+ \\param region desired region-of-interest to activate\n+ \\returns \\c true if ALL readers set the region successfully\n+ \\sa HasIndexes(), Jump(), BamReader::SetRegion()\n+*/\n+bool BamMultiReader::SetRegion(const BamRegion& region) {\n+ return d->SetRegion(region);\n+}\n+\n+/*! \\fn bool BamMultiReader::SetRegion(const int& leftRefID,\n+ const int& leftPosition,\n+ const int& rightRefID,\n+ const int& rightPosition)\n+ \\brief Sets a target region of interest\n+\n+ This is an overloaded function.\n+\n+ Equivalent to calling BamReader::SetRegion() on all open BAM files.\n+\n+ \\param leftRefID referenceID of region\'s left boundary\n+ \\param leftPosition position of region\'s left boundary\n+ \\param rightRefID reference ID of region\'s right boundary\n+ \\param rightPosition position of region\'s right boundary\n+\n+ \\returns \\c true if ALL readers set the region successfully\n+ \\sa HasIndexes(), Jump(), BamReader::SetRegion()\n+*/\n+bool BamMultiReader::SetRegion(const int& leftRefID,\n+ const int& leftPosition,\n+ const int& rightRefID,\n+ const int& rightPosition)\n+{\n+ BamRegion region(leftRefID, leftPosition, rightRefID, rightPosition);\n+ return d->SetRegion(region);\n+}\n+\n+/*! \\fn void BamMultiReader::SetSortOrder(const SortOrder& order)\n+ \\brief Sets the expected sorting order for reading across multiple BAM files.\n+\n+ Default is BamMultiReader::SortedByPosition.\n+\n+ The SortOrder determines how the reader determines which alignment is "next"\n+ from among its open readers.\n+\n+ \\param order expected sort order\n+*/\n+void BamMultiReader::SetSortOrder(const SortOrder& order) {\n+ d->SetSortOrder(order);\n+}\n' |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamMultiReader.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamMultiReader.h Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,127 @@ +// *************************************************************************** +// BamMultiReader.h (c) 2010 Erik Garrison, Derek Barnett +// Marth Lab, Department of Biology, Boston College +// All rights reserved. +// --------------------------------------------------------------------------- +// Last modified: 15 March 2011 (DB) +// --------------------------------------------------------------------------- +// Convenience class for reading multiple BAM files. +// *************************************************************************** + +#ifndef BAMMULTIREADER_H +#define BAMMULTIREADER_H + +#include <api/api_global.h> +#include <api/BamReader.h> +#include <map> +#include <sstream> +#include <string> +#include <utility> + +namespace BamTools { + +namespace Internal { + class BamMultiReaderPrivate; +} // namespace Internal + +class API_EXPORT BamMultiReader { + + public: + enum SortOrder { SortedByPosition = 0 + , SortedByReadName + , Unsorted + }; + + // constructor / destructor + public: + BamMultiReader(void); + ~BamMultiReader(void); + + // public interface + public: + + // ---------------------- + // BAM file operations + // ---------------------- + + // closes all open BAM files + void Close(void); + // close only the requested BAM file + void CloseFile(const std::string& filename); + // returns list of filenames for all open BAM files + const std::vector<std::string> Filenames(void) const; + // returns true if multireader has any open BAM files + bool HasOpenReaders(void) const; + // performs random-access jump within current BAM files + bool Jump(int refID, int position = 0); + // opens BAM files + bool Open(const std::vector<std::string>& filenames); + // opens a single BAM file, adding to any other current BAM files + bool OpenFile(const std::string& filename); + // returns file pointers to beginning of alignments + bool Rewind(void); + // sets the target region of interest + bool SetRegion(const BamRegion& region); + // sets the target region of interest + bool SetRegion(const int& leftRefID, + const int& leftPosition, + const int& rightRefID, + const int& rightPosition); + + // ---------------------- + // access alignment data + // ---------------------- + + // retrieves next available alignment + bool GetNextAlignment(BamAlignment& alignment); + // retrieves next available alignmnet (without populating the alignment's string data fields) + bool GetNextAlignmentCore(BamAlignment& alignment); + + // sets the expected sorting order for reading across multiple BAM files + void SetSortOrder(const SortOrder& order); + + // ---------------------- + // access auxiliary data + // ---------------------- + + // returns unified SAM header for all files + SamHeader GetHeader(void) const; + // returns unified SAM header text for all files + std::string GetHeaderText(void) const; + // returns number of reference sequences + int GetReferenceCount(void) const; + // returns all reference sequence entries. + const BamTools::RefVector GetReferenceData(void) const; + // returns the ID of the reference with this name. + int GetReferenceID(const std::string& refName) const; + + // ---------------------- + // BAM index operations + // ---------------------- + + // creates index files for current BAM files + bool CreateIndexes(const BamIndex::IndexType& type = BamIndex::STANDARD); + // returns true if all BAM files have index data available + bool HasIndexes(void) const; + // looks for index files that match current BAM files + bool LocateIndexes(const BamIndex::IndexType& preferredType = BamIndex::STANDARD); + // opens index files for current BAM files. + bool OpenIndexes(const std::vector<std::string>& indexFilenames); + // changes the caching behavior of the index data + void SetIndexCacheMode(const BamIndex::IndexCacheMode& mode); + + // deprecated methods + public: + // returns \c true if all BAM files have index data available. + bool IsIndexLoaded(void) const; + // convenience method for printing filenames to stdout + void PrintFilenames(void) const; + + // private implementation + private: + Internal::BamMultiReaderPrivate* d; +}; + +} // namespace BamTools + +#endif // BAMMULTIREADER_H |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamReader.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamReader.cpp Thu Nov 03 10:25:04 2011 -0400 |
b |
b"@@ -0,0 +1,370 @@\n+// ***************************************************************************\n+// BamReader.cpp (c) 2009 Derek Barnett, Michael Str\xef\xbf\xbdmberg\n+// Marth Lab, Department of Biology, Boston College\n+// All rights reserved.\n+// ---------------------------------------------------------------------------\n+// Last modified: 4 March 2011 (DB)\n+// ---------------------------------------------------------------------------\n+// Provides read access to BAM files.\n+// ***************************************************************************\n+\n+#include <api/BamReader.h>\n+#include <api/internal/BamReader_p.h>\n+using namespace BamTools;\n+using namespace BamTools::Internal;\n+\n+#include <algorithm>\n+#include <iostream>\n+#include <iterator>\n+#include <string>\n+#include <vector>\n+using namespace std;\n+\n+/*! \\class BamTools::BamReader\n+ \\brief Provides read access to BAM files.\n+*/\n+\n+/*! \\fn BamReader::BamReader(void)\n+ \\brief constructor\n+*/\n+BamReader::BamReader(void)\n+ : d(new BamReaderPrivate(this))\n+{ }\n+\n+/*! \\fn BamReader::~BamReader(void)\n+ \\brief destructor\n+*/\n+BamReader::~BamReader(void) {\n+ delete d;\n+ d = 0;\n+}\n+\n+/*! \\fn void BamReader::Close(void)\n+ \\brief Closes the current BAM file.\n+\n+ Also clears out all header and reference data.\n+\n+ \\sa IsOpen(), Open()\n+*/\n+void BamReader::Close(void) {\n+ d->Close();\n+}\n+\n+/*! \\fn bool BamReader::CreateIndex(const BamIndex::IndexType& type)\n+ \\brief Creates an index file for current BAM file.\n+\n+ \\param type file format to create, see BamIndex::IndexType for available formats\n+ \\return \\c true if index created OK\n+ \\sa LocateIndex(), OpenIndex()\n+*/\n+bool BamReader::CreateIndex(const BamIndex::IndexType& type) {\n+ return d->CreateIndex(type);\n+}\n+\n+/*! \\fn const std::string BamReader::GetFilename(void) const\n+ \\brief Returns name of current BAM file.\n+\n+ Retrieved filename will contain whatever was passed via Open().\n+ If you need full directory paths here, be sure to include them\n+ when you open the BAM file.\n+\n+ \\returns name of open BAM file. If no file is open, returns an empty string.\n+ \\sa IsOpen()\n+*/\n+const std::string BamReader::GetFilename(void) const {\n+ return d->Filename();\n+}\n+\n+/*! \\fn SamHeader BamReader::GetHeader(void) const\n+ \\brief Returns SAM header data.\n+\n+ Header data is wrapped in a SamHeader object that can be conveniently queried & modified.\n+\n+ N.B. - Modifying the retrieved SamHeader object does NOT affect the\n+ current BAM file. This file has been opened in a read-only mode.\n+ However, your modified SamHeader object can be used in conjunction with\n+ BamWriter to generate a new BAM file with the appropriate header information.\n+\n+ \\returns header data object\n+ \\sa GetHeaderText()\n+*/\n+SamHeader BamReader::GetHeader(void) const {\n+ return d->GetSamHeader();\n+}\n+\n+/*! \\fn std::string BamReader::GetHeaderText(void) const\n+ \\brief Returns SAM header data, as SAM-formatted text.\n+\n+ N.B. - Modifying the retrieved text does NOT affect the current\n+ BAM file. This file has been opened in a read-only mode. However,\n+ your modified header text can be used in conjunction with BamWriter\n+ to generate a new BAM file with the appropriate header information.\n+\n+ \\returns SAM-formatted header text\n+ \\sa GetHeader()\n+*/\n+std::string BamReader::GetHeaderText(void) const {\n+ return d->GetHeaderText();\n+}\n+\n+/*! \\fn bool BamReader::GetNextAlignment(BamAlignment& alignment)\n+ \\brief Retrieves next available alignment.\n+\n+ Attempts to read the next alignment record from BAM file, and checks to see\n+ if it overlaps the current region. If no region is currently set, then the\n+ next alignment available is always considered valid.\n+\n+ If a region has been set, via Jump() or SetRegion(), an alignment is only\n+ considered valid if it overlaps the region. If the actual 'next' alignment record\n+ in the BAM file does not overlap this r"..b"ng& indexFilename)\n+ \\brief Opens a BAM index file.\n+\n+ \\param indexFilename name of BAM index file\n+\n+ \\returns \\c true if BAM index file was opened & data loaded successfully\n+ \\sa LocateIndex(), Open(), SetIndex()\n+*/\n+bool BamReader::OpenIndex(const std::string& indexFilename) {\n+ return d->OpenIndex(indexFilename);\n+}\n+\n+/*! \\fn bool BamReader::Rewind(void)\n+ \\brief Returns the internal file pointer to the first alignment record.\n+\n+ Useful for performing multiple sequential passes through a BAM file.\n+ Calling this function clears any prior region that may have been set.\n+\n+ N.B. - Note that this function sets the file pointer to first alignment record\n+ in the BAM file, NOT the beginning of the file.\n+\n+ \\returns \\c true if rewind operation was successful\n+ \\sa Jump(), SetRegion()\n+*/\n+bool BamReader::Rewind(void) {\n+ return d->Rewind();\n+}\n+\n+/*! \\fn void BamReader::SetIndex(BamIndex* index)\n+ \\brief Sets a custom BamIndex on this reader.\n+\n+ Only necessary for custom BamIndex subclasses. Most clients should\n+ never have to use this function.\n+\n+ Example:\n+ \\code\n+ BamReader reader;\n+ reader.SetIndex(new MyCustomBamIndex);\n+ \\endcode\n+\n+ N.B. - BamReader takes ownership of \\a index - i.e. BamReader will\n+ take care of deleting the pointer when the reader is destructed,\n+ when the current BAM file is closed, or when a new index is requested.\n+\n+ \\param index custom BamIndex subclass created by client\n+ \\sa CreateIndex(), LocateIndex(), OpenIndex()\n+*/\n+void BamReader::SetIndex(BamIndex* index) {\n+ d->SetIndex(index);\n+}\n+\n+/*! \\fn void BamReader::SetIndexCacheMode(const BamIndex::IndexCacheMode& mode)\n+ \\brief Changes the caching behavior of the index data.\n+\n+ Default mode is BamIndex::LimitedIndexCaching.\n+\n+ \\param mode desired cache mode for index, see BamIndex::IndexCacheMode for\n+ description of the available cache modes\n+ \\sa HasIndex()\n+*/\n+void BamReader::SetIndexCacheMode(const BamIndex::IndexCacheMode& mode) {\n+ d->SetIndexCacheMode(mode);\n+}\n+\n+/*! \\fn bool BamReader::SetRegion(const BamRegion& region)\n+ \\brief Sets a target region of interest\n+\n+ Requires that index data be available. Attempts a random-access\n+ jump in the BAM file, near \\a region left boundary position.\n+\n+ Subsequent calls to GetNextAlignment() or GetNextAlignmentCore()\n+ will only return \\c true when alignments can be found that overlap\n+ this \\a region.\n+\n+ A \\a region with no right boundary is considered open-ended, meaning\n+ that all alignments that lie downstream of the left boundary are\n+ considered valid, continuing to the end of the BAM file.\n+\n+ \\param region desired region-of-interest to activate\n+ \\returns \\c true if reader was able to jump successfully to the region's left boundary\n+ \\sa HasIndex(), Jump()\n+*/\n+bool BamReader::SetRegion(const BamRegion& region) {\n+ return d->SetRegion(region);\n+}\n+\n+/*! \\fn bool BamReader::SetRegion(const int& leftRefID,\n+ const int& leftPosition,\n+ const int& rightRefID,\n+ const int& rightPosition)\n+ \\brief Sets a target region of interest.\n+\n+ This is an overloaded function.\n+\n+ \\param leftRefID referenceID of region's left boundary\n+ \\param leftPosition position of region's left boundary\n+ \\param rightRefID reference ID of region's right boundary\n+ \\param rightPosition position of region's right boundary\n+\n+ \\returns \\c true if reader was able to jump successfully to the region's left boundary\n+ \\sa HasIndex(), Jump()\n+*/\n+bool BamReader::SetRegion(const int& leftRefID,\n+ const int& leftBound,\n+ const int& rightRefID,\n+ const int& rightBound)\n+{\n+ return d->SetRegion( BamRegion(leftRefID, leftBound, rightRefID, rightBound) );\n+}\n" |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamReader.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamReader.h Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,118 @@ +// *************************************************************************** +// BamReader.h (c) 2009 Derek Barnett, Michael Str�mberg +// Marth Lab, Department of Biology, Boston College +// All rights reserved. +// --------------------------------------------------------------------------- +// Last modified: 4 March 2011 (DB) +// --------------------------------------------------------------------------- +// Provides read access to BAM files. +// *************************************************************************** + +#ifndef BAMREADER_H +#define BAMREADER_H + +#include <api/api_global.h> +#include <api/BamAlignment.h> +#include <api/BamIndex.h> +#include <api/SamHeader.h> +#include <string> + +namespace BamTools { + +namespace Internal { + class BamReaderPrivate; +} // namespace Internal + +class API_EXPORT BamReader { + + // constructor / destructor + public: + BamReader(void); + ~BamReader(void); + + // public interface + public: + + // ---------------------- + // BAM file operations + // ---------------------- + + // closes the current BAM file + void Close(void); + // returns filename of current BAM file + const std::string GetFilename(void) const; + // returns true if a BAM file is open for reading + bool IsOpen(void) const; + // performs random-access jump within BAM file + bool Jump(int refID, int position = 0); + // opens a BAM file + bool Open(const std::string& filename); + // returns internal file pointer to beginning of alignment data + bool Rewind(void); + // sets the target region of interest + bool SetRegion(const BamRegion& region); + // sets the target region of interest + bool SetRegion(const int& leftRefID, + const int& leftPosition, + const int& rightRefID, + const int& rightPosition); + + // ---------------------- + // access alignment data + // ---------------------- + + // retrieves next available alignment + bool GetNextAlignment(BamAlignment& alignment); + // retrieves next available alignmnet (without populating the alignment's string data fields) + bool GetNextAlignmentCore(BamAlignment& alignment); + + // ---------------------- + // access header data + // ---------------------- + + // returns SAM header data + SamHeader GetHeader(void) const; + // returns SAM header data, as SAM-formatted text + std::string GetHeaderText(void) const; + + // ---------------------- + // access reference data + // ---------------------- + + // returns the number of reference sequences + int GetReferenceCount(void) const; + // returns all reference sequence entries + const RefVector& GetReferenceData(void) const; + // returns the ID of the reference with this name + int GetReferenceID(const std::string& refName) const; + + // ---------------------- + // BAM index operations + // ---------------------- + + // creates an index file for current BAM file, using the requested index type + bool CreateIndex(const BamIndex::IndexType& type = BamIndex::STANDARD); + // returns true if index data is available + bool HasIndex(void) const; + // looks in BAM file's directory for a matching index file + bool LocateIndex(const BamIndex::IndexType& preferredType = BamIndex::STANDARD); + // opens a BAM index file + bool OpenIndex(const std::string& indexFilename); + // sets a custom BamIndex on this reader + void SetIndex(BamIndex* index); + // changes the caching behavior of the index data + void SetIndexCacheMode(const BamIndex::IndexCacheMode& mode); + + // deprecated methods + public: + // returns true if index data is available + bool IsIndexLoaded(void) const; + + // private implementation + private: + Internal::BamReaderPrivate* d; +}; + +} // namespace BamTools + +#endif // BAMREADER_H |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamWriter.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamWriter.cpp Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,143 @@ +// *************************************************************************** +// BamWriter.cpp (c) 2009 Michael Str�mberg, Derek Barnett +// Marth Lab, Department of Biology, Boston College +// All rights reserved. +// --------------------------------------------------------------------------- +// Last modified: 4 March 2011 (DB) +// --------------------------------------------------------------------------- +// Provides the basic functionality for producing BAM files +// *************************************************************************** + +#include <api/BamAlignment.h> +#include <api/BamWriter.h> +#include <api/SamHeader.h> +#include <api/internal/BamWriter_p.h> +using namespace BamTools; +using namespace BamTools::Internal; + +#include <iostream> +using namespace std; + +/*! \class BamTools::BamWriter + \brief Provides write access for generating BAM files. +*/ +/*! \enum BamTools::BamWriter::CompressionMode + \brief This enum describes the compression behaviors for output BAM files. +*/ +/*! \var BamWriter::CompressionMode BamWriter::Compressed + \brief Use normal BAM compression +*/ +/*! \var BamWriter::CompressionMode BamWriter::Uncompressed + \brief Disable BAM compression + + Useful in situations where the BAM data is streamed (e.g. piping). + It would be wasteful to compress, and then immediately decompress + the data. +*/ + +/*! \fn BamWriter::BamWriter(void) + \brief constructor +*/ +BamWriter::BamWriter(void) + : d(new BamWriterPrivate) +{ } + +/*! \fn BamWriter::~BamWriter(void) + \brief destructor +*/ +BamWriter::~BamWriter(void) { + delete d; + d = 0; +} + +/*! \fn BamWriter::Close(void) + \brief Closes the current BAM file. + \sa Open() +*/ +void BamWriter::Close(void) { + d->Close(); +} + +/*! \fn bool BamWriter::IsOpen(void) const + \brief Returns \c true if BAM file is open for writing. + \sa Open() +*/ +bool BamWriter::IsOpen(void) const { + return d->IsOpen(); +} + +/*! \fn bool BamWriter::Open(const std::string& filename, + const std::string& samHeaderText, + const RefVector& referenceSequences) + \brief Opens a BAM file for writing. + + Will overwrite the BAM file if it already exists. + + \param filename name of output BAM file + \param samHeaderText header data, as SAM-formatted string + \param referenceSequences list of reference entries + + \return \c true if opened successfully + \sa Close(), IsOpen(), BamReader::GetHeaderText(), BamReader::GetReferenceData() +*/ +bool BamWriter::Open(const std::string& filename, + const std::string& samHeaderText, + const RefVector& referenceSequences) +{ + return d->Open(filename, samHeaderText, referenceSequences); +} + +/*! \fn bool BamWriter::Open(const std::string& filename, + const SamHeader& samHeader, + const RefVector& referenceSequences) + \brief Opens a BAM file for writing. + + This is an overloaded function. + + Will overwrite the BAM file if it already exists. + + \param filename name of output BAM file + \param samHeader header data, wrapped in SamHeader object + \param referenceSequences list of reference entries + + \return \c true if opened successfully + \sa Close(), IsOpen(), BamReader::GetHeader(), BamReader::GetReferenceData() +*/ +bool BamWriter::Open(const std::string& filename, + const SamHeader& samHeader, + const RefVector& referenceSequences) +{ + return d->Open(filename, samHeader.ToString(), referenceSequences); +} + +/*! \fn void BamWriter::SaveAlignment(const BamAlignment& alignment) + \brief Saves an alignment to the BAM file. + + \param alignment BamAlignment record to save + \sa BamReader::GetNextAlignment(), BamReader::GetNextAlignmentCore() +*/ +void BamWriter::SaveAlignment(const BamAlignment& alignment) { + d->SaveAlignment(alignment); +} + +/*! \fn void BamWriter::SetCompressionMode(const CompressionMode& compressionMode) + \brief Sets the output compression mode. + + Default mode is BamWriter::Compressed. + + N.B. - Changing the compression mode is disabled on open files (i.e. the request will be ignored). + Be sure to call this function before opening the BAM file. + + \code + BamWriter writer; + writer.SetCompressionMode(BamWriter::Uncompressed); + writer.Open( ... ); + // ... + \endcode + + \param compressionMode desired output compression behavior + \sa IsOpen(), Open() +*/ +void BamWriter::SetCompressionMode(const CompressionMode& compressionMode) { + d->SetWriteCompressed( compressionMode == BamWriter::Compressed ); +} |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamWriter.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamWriter.h Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,64 @@ +// *************************************************************************** +// BamWriter.h (c) 2009 Michael Str�mberg, Derek Barnett +// Marth Lab, Department of Biology, Boston College +// All rights reserved. +// --------------------------------------------------------------------------- +// Last modified: 4 March 2011 (DB) +// --------------------------------------------------------------------------- +// Provides the basic functionality for producing BAM files +// *************************************************************************** + +#ifndef BAMWRITER_H +#define BAMWRITER_H + +#include <api/api_global.h> +#include <api/BamAux.h> +#include <string> + +namespace BamTools { + +class BamAlignment; +class SamHeader; + +namespace Internal { + class BamWriterPrivate; +} // namespace Internal + +class API_EXPORT BamWriter { + + public: enum CompressionMode { Compressed = 0 + , Uncompressed + }; + + // ctor & dtor + public: + BamWriter(void); + ~BamWriter(void); + + // public interface + public: + // closes the current BAM file + void Close(void); + // returns true if BAM file is open for writing + bool IsOpen(void) const; + // opens a BAM file for writing + bool Open(const std::string& filename, + const std::string& samHeaderText, + const RefVector& referenceSequences); + // opens a BAM file for writing + bool Open(const std::string& filename, + const SamHeader& samHeader, + const RefVector& referenceSequences); + // saves the alignment to the alignment archive + void SaveAlignment(const BamAlignment& alignment); + // sets the output compression mode + void SetCompressionMode(const CompressionMode& compressionMode); + + // private implementation + private: + Internal::BamWriterPrivate* d; +}; + +} // namespace BamTools + +#endif // BAMWRITER_H |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/CMakeLists.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/CMakeLists.txt Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,78 @@ +# ========================== +# BamTools CMakeLists.txt +# (c) 2010 Derek Barnett +# +# src/api/ +# ========================== + +# list include paths +include_directories( ${BamTools_SOURCE_DIR}/src ) + +# add compiler definitions +add_definitions( -DBAMTOOLS_API_LIBRARY ) # (for proper exporting of library symbols) +add_definitions( -fPIC ) # (attempt to force PIC compiling on some archs) + +# list of all BamTools API source (.cpp) files +set( BamToolsAPISources + BamAlignment.cpp + BamMultiReader.cpp + BamReader.cpp + BamWriter.cpp + SamHeader.cpp + SamProgram.cpp + SamProgramChain.cpp + SamReadGroup.cpp + SamReadGroupDictionary.cpp + SamSequence.cpp + SamSequenceDictionary.cpp + internal/BamHeader_p.cpp + internal/BamIndexFactory_p.cpp + internal/BamMultiReader_p.cpp + internal/BamRandomAccessController_p.cpp + internal/BamReader_p.cpp + internal/BamStandardIndex_p.cpp + internal/BamToolsIndex_p.cpp + internal/BamWriter_p.cpp + internal/BgzfStream_p.cpp + internal/SamFormatParser_p.cpp + internal/SamFormatPrinter_p.cpp + internal/SamHeaderValidator_p.cpp +) + +# create main BamTools API shared library +add_library( BamTools SHARED ${BamToolsAPISources} ) +set_target_properties( BamTools PROPERTIES SOVERSION "1.0.2" ) +set_target_properties( BamTools PROPERTIES OUTPUT_NAME "bamtools" ) + +# create main BamTools API static library +add_library( BamTools-static STATIC ${BamToolsAPISources} ) +set_target_properties( BamTools-static PROPERTIES OUTPUT_NAME "bamtools" ) +set_target_properties( BamTools-static PROPERTIES PREFIX "lib" ) + +# link libraries with zlib automatically +target_link_libraries( BamTools z ) +target_link_libraries( BamTools-static z ) + +# set library install destinations +install( TARGETS BamTools LIBRARY DESTINATION "lib/bamtools") +install( TARGETS BamTools-static ARCHIVE DESTINATION "lib/bamtools") + +# export API headers +include(../ExportHeader.cmake) +set(ApiIncludeDir "api") +ExportHeader(APIHeaders api_global.h ${ApiIncludeDir}) +ExportHeader(APIHeaders BamAlignment.h ${ApiIncludeDir}) +ExportHeader(APIHeaders BamAux.h ${ApiIncludeDir}) +ExportHeader(APIHeaders BamConstants.h ${ApiIncludeDir}) +ExportHeader(APIHeaders BamIndex.h ${ApiIncludeDir}) +ExportHeader(APIHeaders BamMultiReader.h ${ApiIncludeDir}) +ExportHeader(APIHeaders BamReader.h ${ApiIncludeDir}) +ExportHeader(APIHeaders BamWriter.h ${ApiIncludeDir}) +ExportHeader(APIHeaders SamConstants.h ${ApiIncludeDir}) +ExportHeader(APIHeaders SamHeader.h ${ApiIncludeDir}) +ExportHeader(APIHeaders SamProgram.h ${ApiIncludeDir}) +ExportHeader(APIHeaders SamProgramChain.h ${ApiIncludeDir}) +ExportHeader(APIHeaders SamReadGroup.h ${ApiIncludeDir}) +ExportHeader(APIHeaders SamReadGroupDictionary.h ${ApiIncludeDir}) +ExportHeader(APIHeaders SamSequence.h ${ApiIncludeDir}) +ExportHeader(APIHeaders SamSequenceDictionary.h ${ApiIncludeDir}) |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamConstants.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamConstants.h Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,96 @@ +// *************************************************************************** +// SamConstants.h (c) 2010 Derek Barnett +// Marth Lab, Department of Biology, Boston College +// All rights reserved. +// --------------------------------------------------------------------------- +// Last modified: 19 April 2011 (DB) +// --------------------------------------------------------------------------- +// Provides constants for SAM header +// *************************************************************************** + +#ifndef SAM_CONSTANTS_H +#define SAM_CONSTANTS_H + +#include <api/api_global.h> +#include <string> + +namespace BamTools { +namespace Constants { + +// basic char constants used in SAM format +const char SAM_COLON = ':'; +const char SAM_EQUAL = '='; +const char SAM_PERIOD = '.'; +const char SAM_STAR = '*'; +const char SAM_TAB = '\t'; +const std::string SAM_DIGITS = "0123456789"; + +// HD entries +const std::string SAM_HD_BEGIN_TOKEN = "@HD"; +const std::string SAM_HD_VERSION_TAG = "VN"; +const std::string SAM_HD_SORTORDER_TAG = "SO"; +const std::string SAM_HD_GROUPORDER_TAG = "GO"; + +// SQ entries +const std::string SAM_SQ_BEGIN_TOKEN = "@SQ"; +const std::string SAM_SQ_ASSEMBLYID_TAG = "AS"; +const std::string SAM_SQ_CHECKSUM_TAG = "M5"; +const std::string SAM_SQ_LENGTH_TAG = "LN"; +const std::string SAM_SQ_NAME_TAG = "SN"; +const std::string SAM_SQ_SPECIES_TAG = "SP"; +const std::string SAM_SQ_URI_TAG = "UR"; + +// RG entries +const std::string SAM_RG_BEGIN_TOKEN = "@RG"; +const std::string SAM_RG_DESCRIPTION_TAG = "DS"; +const std::string SAM_RG_FLOWORDER_TAG = "FO"; +const std::string SAM_RG_ID_TAG = "ID"; +const std::string SAM_RG_KEYSEQUENCE_TAG = "KS"; +const std::string SAM_RG_LIBRARY_TAG = "LB"; +const std::string SAM_RG_PLATFORMUNIT_TAG = "PU"; +const std::string SAM_RG_PREDICTEDINSERTSIZE_TAG = "PI"; +const std::string SAM_RG_PRODUCTIONDATE_TAG = "DT"; +const std::string SAM_RG_PROGRAM_TAG = "PG"; +const std::string SAM_RG_SAMPLE_TAG = "SM"; +const std::string SAM_RG_SEQCENTER_TAG = "CN"; +const std::string SAM_RG_SEQTECHNOLOGY_TAG = "PL"; + +// PG entries +const std::string SAM_PG_BEGIN_TOKEN = "@PG"; +const std::string SAM_PG_COMMANDLINE_TAG = "CL"; +const std::string SAM_PG_ID_TAG = "ID"; +const std::string SAM_PG_NAME_TAG = "PN"; +const std::string SAM_PG_PREVIOUSPROGRAM_TAG = "PP"; +const std::string SAM_PG_VERSION_TAG = "VN"; + +// CO entries +const std::string SAM_CO_BEGIN_TOKEN = "@CO"; + +// HD:SO values +const std::string SAM_HD_SORTORDER_COORDINATE = "coordinate"; +const std::string SAM_HD_SORTORDER_QUERYNAME = "queryname"; +const std::string SAM_HD_SORTORDER_UNKNOWN = "unknown"; +const std::string SAM_HD_SORTORDER_UNSORTED = "unsorted"; + +// HD:GO values +const std::string SAM_HD_GROUPORDER_NONE = "none"; +const std::string SAM_HD_GROUPORDER_QUERY = "query"; +const std::string SAM_HD_GROUPORDER_REFERENCE = "reference"; + +// SQ:LN values +const unsigned int SAM_SQ_LENGTH_MIN = 1; +const unsigned int SAM_SQ_LENGTH_MAX = 536870911; // 2^29 - 1 + +// RG:PL values +const std::string SAM_RG_SEQTECHNOLOGY_CAPILLARY = "CAPILLARY"; +const std::string SAM_RG_SEQTECHNOLOGY_HELICOS = "HELICOS"; +const std::string SAM_RG_SEQTECHNOLOGY_ILLUMINA = "ILLUMINA"; +const std::string SAM_RG_SEQTECHNOLOGY_IONTORRENT = "IONTORRENT"; +const std::string SAM_RG_SEQTECHNOLOGY_LS454 = "LS454"; +const std::string SAM_RG_SEQTECHNOLOGY_PACBIO = "PACBIO"; +const std::string SAM_RG_SEQTECHNOLOGY_SOLID = "SOLID"; + +} // namespace Constants +} // namespace BamTools + +#endif // SAM_CONSTANTS_H |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamHeader.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamHeader.cpp Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,185 @@ +// *************************************************************************** +// SamHeader.cpp (c) 2010 Derek Barnett +// Marth Lab, Department of Biology, Boston College +// All rights reserved. +// --------------------------------------------------------------------------- +// Last modified: 19 April 2011 (DB) +// --------------------------------------------------------------------------- +// Provides direct read/write access to the SAM header data fields. +// *************************************************************************** + +#include <api/SamConstants.h> +#include <api/SamHeader.h> +#include <api/internal/SamFormatParser_p.h> +#include <api/internal/SamFormatPrinter_p.h> +#include <api/internal/SamHeaderValidator_p.h> +using namespace BamTools; +using namespace BamTools::Internal; +using namespace std; + +/*! \struct BamTools::SamHeader + \brief Represents the SAM-formatted text header that is part of the BAM file header. + + Provides direct read/write access to the SAM header data fields. + + \sa \samSpecURL +*/ +/*! \var SamHeader::Version + \brief corresponds to \@HD VN:\<Version\> + + Required for valid SAM header, if @HD record is present. +*/ +/*! \var SamHeader::SortOrder + \brief corresponds to \@HD SO:\<SortOrder\> +*/ +/*! \var SamHeader::GroupOrder + \brief corresponds to \@HD GO:\<GroupOrder\> +*/ +/*! \var SamHeader::Sequences + \brief corresponds to \@SQ entries + \sa SamSequence, SamSequenceDictionary +*/ +/*! \var SamHeader::ReadGroups + \brief corresponds to \@RG entries + \sa SamReadGroup, SamReadGroupDictionary +*/ +/*! \var SamHeader::ProgramName + \brief corresponds to \@PG ID:\<ProgramName\> +*/ +/*! \var SamHeader::ProgramVersion + \brief corresponds to \@PG VN:\<ProgramVersion\> +*/ +/*! \var SamHeader::ProgramCommandLine + \brief corresponds to \@PG CL:\<ProgramCommandLine\> +*/ +/*! \var SamHeader::Comments + \brief corresponds to \@CO entries +*/ + +/*! \fn SamHeader::SamHeader(const std::string& headerText = "") + \brief constructor +*/ +SamHeader::SamHeader(const std::string& headerText) + : Version("") + , SortOrder(Constants::SAM_HD_SORTORDER_UNKNOWN) + , GroupOrder("") +{ + SamFormatParser parser(*this); + parser.Parse(headerText); +} + +/*! \fn SamHeader::SamHeader(const SamHeader& other) + \brief copy constructor +*/ +SamHeader::SamHeader(const SamHeader& other) + : Version(other.Version) + , SortOrder(other.SortOrder) + , GroupOrder(other.GroupOrder) + , Sequences(other.Sequences) + , ReadGroups(other.ReadGroups) + , Programs(other.Programs) +{ } + +/*! \fn SamHeader::~SamHeader(void) + \brief destructor +*/ +SamHeader::~SamHeader(void) { } + +/*! \fn void SamHeader::Clear(void) + \brief Clears all header contents. +*/ +void SamHeader::Clear(void) { + Version.clear(); + SortOrder.clear(); + GroupOrder.clear(); + Sequences.Clear(); + ReadGroups.Clear(); + Programs.Clear(); + Comments.clear(); +} + +/*! \fn bool SamHeader::HasVersion(void) const + \brief Returns \c true if header contains \@HD ID:\<Version\> +*/ +bool SamHeader::HasVersion(void) const { + return (!Version.empty()); +} + +/*! \fn bool SamHeader::HasSortOrder(void) const + \brief Returns \c true if header contains \@HD SO:\<SortOrder\> +*/ +bool SamHeader::HasSortOrder(void) const { + return (!SortOrder.empty()); +} + +/*! \fn bool SamHeader::HasGroupOrder(void) const + \brief Returns \c true if header contains \@HD GO:\<GroupOrder\> +*/ +bool SamHeader::HasGroupOrder(void) const { + return (!GroupOrder.empty()); +} + +/*! \fn bool SamHeader::HasSequences(void) const + \brief Returns \c true if header contains any \@SQ entries +*/ +bool SamHeader::HasSequences(void) const { + return (!Sequences.IsEmpty()); +} + +/*! \fn bool SamHeader::HasReadGroups(void) const + \brief Returns \c true if header contains any \@RG entries +*/ +bool SamHeader::HasReadGroups(void) const { + return (!ReadGroups.IsEmpty()); +} + +/*! \fn bool SamHeader::HasPrograms(void) const + \brief Returns \c true if header contains any \@PG entries +*/ +bool SamHeader::HasPrograms(void) const { + return (!Programs.IsEmpty()); +} + +/*! \fn bool SamHeader::HasComments(void) const + \brief Returns \c true if header contains any \@CO entries +*/ +bool SamHeader::HasComments(void) const { + return (!Comments.empty()); +} + +/*! \fn bool SamHeader::IsValid(bool verbose = false) const + \brief Checks header contents for required data and proper formatting. + \param verbose If set to true, validation errors & warnings will be printed to stderr. + Otherwise, output is suppressed and only validation check occurs. + \return \c true if SAM header is well-formed +*/ +bool SamHeader::IsValid(bool verbose) const { + SamHeaderValidator validator(*this); + return validator.Validate(verbose); +} + +/*! \fn void SamHeader::SetHeaderText(const std::string& headerText) + \brief Replaces header contents with \a headerText. + \param headerText SAM formatted-text that will be parsed into data fields +*/ +void SamHeader::SetHeaderText(const std::string& headerText) { + + // clear prior data + Clear(); + + // parse header text into data + SamFormatParser parser(*this); + parser.Parse(headerText); +} + +/*! \fn std::string SamHeader::ToString(void) const + \brief Converts data fields to SAM-formatted text. + + Applies any local modifications made since creating this object or calling SetHeaderText(). + + \return SAM-formatted header text +*/ +string SamHeader::ToString(void) const { + SamFormatPrinter printer(*this); + return printer.ToString(); +} |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamHeader.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamHeader.h Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,69 @@ +// *************************************************************************** +// SamHeader.h (c) 2010 Derek Barnett +// Marth Lab, Department of Biology, Boston College +// All rights reserved. +// --------------------------------------------------------------------------- +// Last modified: 18 April 2011 (DB) +// --------------------------------------------------------------------------- +// Provides direct read/write access to the SAM header data fields. +// *************************************************************************** + +#ifndef SAM_HEADER_H +#define SAM_HEADER_H + +#include <api/api_global.h> +#include <api/SamProgramChain.h> +#include <api/SamReadGroupDictionary.h> +#include <api/SamSequenceDictionary.h> +#include <string> +#include <vector> + +namespace BamTools { + +struct API_EXPORT SamHeader { + + // ctor & dtor + SamHeader(const std::string& headerText = ""); + SamHeader(const SamHeader& other); + ~SamHeader(void); + + // query/modify entire SamHeader + void Clear(void); // clears all header contents + bool IsValid(bool verbose = false) const; // returns true if SAM header is well-formed + void SetHeaderText(const std::string& headerText); // replaces data fields with contents of SAM-formatted text + std::string ToString(void) const; // returns the printable, SAM-formatted header text + + // convenience query methods + bool HasVersion(void) const; // returns true if header contains format version entry + bool HasSortOrder(void) const; // returns true if header contains sort order entry + bool HasGroupOrder(void) const; // returns true if header contains group order entry + bool HasSequences(void) const; // returns true if header contains any sequence entries + bool HasReadGroups(void) const; // returns true if header contains any read group entries + bool HasPrograms(void) const; // returns true if header contains any program record entries + bool HasComments(void) const; // returns true if header contains comments + + // -------------- + // data members + // -------------- + + // header metadata (@HD line) + std::string Version; // VN:<Version> *Required for valid SAM header, if @HD record is present* + std::string SortOrder; // SO:<SortOrder> + std::string GroupOrder; // GO:<GroupOrder> + + // header sequences (@SQ entries) + SamSequenceDictionary Sequences; + + // header read groups (@RG entries) + SamReadGroupDictionary ReadGroups; + + // header program data (@PG entries) + SamProgramChain Programs; + + // header comments (@CO entries) + std::vector<std::string> Comments; +}; + +} // namespace BamTools + +#endif // SAM_HEADER_H |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamProgram.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamProgram.cpp Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,140 @@ +// *************************************************************************** +// SamProgram.cpp (c) 2011 Derek Barnett +// Marth Lab, Department of Biology, Boston College +// All rights reserved. +// --------------------------------------------------------------------------- +// Last modified: 19 April 2011 (DB) +// --------------------------------------------------------------------------- +// Provides direct read/write access to the SAM header program records. +// *************************************************************************** + +#include <api/SamProgram.h> +using namespace BamTools; +using namespace std; + +/*! \struct BamTools::SamProgram + \brief Represents a SAM program record. + + Provides direct read/write access to the SAM header program records. + + \sa \samSpecURL +*/ +/*! \var SamProgram::CommandLine + \brief corresponds to \@PG CL:\<CommandLine\> +*/ +/*! \var SamProgram::ID + \brief corresponds to \@PG ID:\<ID\> + + Required for valid SAM header. +*/ +/*! \var SamProgram::Name + \brief corresponds to \@PG PN:\<Name\> +*/ +/*! \var SamProgram::PreviousProgramID + \brief corresponds to \@PG PP:\<PreviousProgramID\> +*/ +/*! \var SamProgram::Version + \brief corresponds to \@PG VN:\<Version\> +*/ +/*! \var SamProgram::NextProgramID + \internal + Holds ID of the "next" program record in a SamProgramChain +*/ + +/*! \fn SamProgram::SamProgram(void) + \brief default constructor +*/ +SamProgram::SamProgram(void) + : CommandLine("") + , ID("") + , Name("") + , PreviousProgramID("") + , Version("") + , NextProgramID("") +{ } + +/*! \fn SamProgram::SamProgram(const std::string& id) + \brief constructs program record with \a id + + \param id desired program record ID +*/ +SamProgram::SamProgram(const std::string& id) + : CommandLine("") + , ID(id) + , Name("") + , PreviousProgramID("") + , Version("") + , NextProgramID("") +{ } + +/*! \fn SamProgram::SamProgram(const SamProgram& other) + \brief copy constructor +*/ +SamProgram::SamProgram(const SamProgram& other) + : CommandLine(other.CommandLine) + , ID(other.ID) + , Name(other.Name) + , PreviousProgramID(other.PreviousProgramID) + , Version(other.Version) + , NextProgramID(other.NextProgramID) +{ } + +/*! \fn SamProgram::~SamProgram(void) + \brief destructor +*/ +SamProgram::~SamProgram(void) { } + +/*! \fn void SamProgram::Clear(void) + \brief Clears all data fields. +*/ +void SamProgram::Clear(void) { + CommandLine.clear(); + ID.clear(); + Name.clear(); + PreviousProgramID.clear(); + Version.clear(); + NextProgramID.clear(); +} + +/*! \fn bool SamProgram::HasCommandLine(void) const + \brief Returns \c true if program record contains \@PG: CL:\<CommandLine\> +*/ +bool SamProgram::HasCommandLine(void) const { + return (!CommandLine.empty()); +} + +/*! \fn bool SamProgram::HasID(void) const + \brief Returns \c true if program record contains \@PG: ID:\<ID\> +*/ +bool SamProgram::HasID(void) const { + return (!ID.empty()); +} + +/*! \fn bool SamProgram::HasName(void) const + \brief Returns \c true if program record contains \@PG: PN:\<Name\> +*/ +bool SamProgram::HasName(void) const { + return (!Name.empty()); +} + +/*! \fn bool SamProgram::HasNextProgramID(void) const + \internal + \return true if program has a "next" record in a SamProgramChain +*/ +bool SamProgram::HasNextProgramID(void) const { + return (!NextProgramID.empty()); +} + +/*! \fn bool SamProgram::HasPreviousProgramID(void) const + \brief Returns \c true if program record contains \@PG: PP:\<PreviousProgramID\> +*/ +bool SamProgram::HasPreviousProgramID(void) const { + return (!PreviousProgramID.empty()); +} + +/*! \fn bool SamProgram::HasVersion(void) const + \brief Returns \c true if program record contains \@PG: VN:\<Version\> +*/ +bool SamProgram::HasVersion(void) const { + return (!Version.empty()); +} |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamProgram.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamProgram.h Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,62 @@ +// *************************************************************************** +// SamProgram.h (c) 2011 Derek Barnett +// Marth Lab, Department of Biology, Boston College +// All rights reserved. +// --------------------------------------------------------------------------- +// Last modified: 19 April 2011 (DB) +// --------------------------------------------------------------------------- +// Provides direct read/write access to the SAM header program records. +// *************************************************************************** + +#ifndef SAM_PROGRAM_H +#define SAM_PROGRAM_H + +#include "api/api_global.h" +#include <string> + +namespace BamTools { + +class SamProgramChain; + +struct API_EXPORT SamProgram { + + // ctor & dtor + SamProgram(void); + SamProgram(const std::string& id); + SamProgram(const SamProgram& other); + ~SamProgram(void); + + // query/modify entire program record + void Clear(void); // clears all data fields + + // convenience query methods + bool HasCommandLine(void) const; // returns true if program record has a command line entry + bool HasID(void) const; // returns true if program record has an ID + bool HasName(void) const; // returns true if program record has a name + bool HasPreviousProgramID(void) const; // returns true if program record has a 'previous program ID' + bool HasVersion(void) const; // returns true if program record has a version + + // data members + std::string CommandLine; // CL:<CommandLine> + std::string ID; // ID:<ID> *Required for valid SAM header* + std::string Name; // PN:<Name> + std::string PreviousProgramID; // PP:<PreviousProgramID> + std::string Version; // VN:<Version> + + // internal (non-standard) methods & fields + private: + bool HasNextProgramID(void) const; + std::string NextProgramID; + friend class BamTools::SamProgramChain; +}; + +/*! \fn bool operator==(const SamProgram& lhs, const SamProgram& rhs) + \brief tests equality by comparing program IDs +*/ +API_EXPORT inline bool operator==(const SamProgram& lhs, const SamProgram& rhs) { + return lhs.ID == rhs.ID; +} + +} // namespace BamTools + +#endif // SAM_PROGRAM_H |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamProgramChain.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamProgramChain.cpp Thu Nov 03 10:25:04 2011 -0400 |
[ |
b'@@ -0,0 +1,352 @@\n+// ***************************************************************************\n+// SamProgramChain.cpp (c) 2011 Derek Barnett\n+// Marth Lab, Department of Biology, Boston College\n+// All rights reserved.\n+// ---------------------------------------------------------------------------\n+// Last modified: 19 April 2011 (DB)\n+// ---------------------------------------------------------------------------\n+// Provides methods for operating on a SamProgram record "chain"\n+// ***************************************************************************\n+\n+#include <api/SamProgramChain.h>\n+using namespace BamTools;\n+\n+#include <algorithm>\n+#include <iostream>\n+#include <cstdlib>\n+using namespace std;\n+\n+/*! \\class BamTools::SamProgramChain\n+ \\brief Sorted container "chain" of SamProgram records.\n+\n+ Provides methods for operating on a collection of SamProgram records.\n+\n+ N.B. - Underlying container is *NOT* ordered by linkage, but by order of\n+ appearance in SamHeader and subsequent Add() calls. Using the current\n+ iterators will not allow you to step through the header\'s program history.\n+ Instead use First()/Last() to access oldest/newest records, respectively.\n+*/\n+\n+/*! \\fn SamProgramChain::SamProgramChain(void)\n+ \\brief constructor\n+*/\n+SamProgramChain::SamProgramChain(void) { }\n+\n+/*! \\fn SamProgramChain::SamProgramChain(const SamProgramChain& other)\n+ \\brief copy constructor\n+*/\n+SamProgramChain::SamProgramChain(const SamProgramChain& other)\n+ : m_data(other.m_data)\n+{ }\n+\n+/*! \\fn SamProgramChain::~SamProgramChain(void)\n+ \\brief destructor\n+*/\n+SamProgramChain::~SamProgramChain(void) { }\n+\n+/*! \\fn void SamProgramChain::Add(SamProgram& program)\n+ \\brief Appends a program to program chain.\n+\n+ Duplicate entries are silently discarded.\n+\n+ N.B. - Underlying container is *NOT* ordered by linkage, but by order of\n+ appearance in SamHeader and subsequent Add() calls. Using the current\n+ iterators will not allow you to step through the header\'s program history.\n+ Instead use First()/Last() to access oldest/newest records, respectively.\n+\n+ \\param program entry to be appended\n+*/\n+void SamProgramChain::Add(SamProgram& program) {\n+\n+ // ignore duplicated records\n+ if ( Contains(program) )\n+ return;\n+\n+ // if other programs already in chain, try to find the "next" record\n+ // tries to match another record\'s PPID with @program\'s ID\n+ if ( !IsEmpty() )\n+ program.NextProgramID = NextIdFor(program.ID);\n+\n+ // store program record\n+ m_data.push_back(program);\n+}\n+\n+/*! \\fn void SamProgramChain::Add(const std::vector<SamProgram>& programs)\n+ \\brief Appends a batch of programs to the end of the chain.\n+\n+ This is an overloaded function.\n+\n+ \\param programs batch of program records to append\n+ \\sa Add()\n+*/\n+void SamProgramChain::Add(std::vector<SamProgram>& programs) {\n+ vector<SamProgram>::iterator pgIter = programs.begin();\n+ vector<SamProgram>::iterator pgEnd = programs.end();\n+ for ( ; pgIter != pgEnd; ++pgIter )\n+ Add(*pgIter);\n+}\n+\n+/*! \\fn SamProgramIterator SamProgramChain::Begin(void)\n+ \\return an STL iterator pointing to the first (oldest) program record\n+ \\sa ConstBegin(), End(), First()\n+*/\n+SamProgramIterator SamProgramChain::Begin(void) {\n+ return m_data.begin();\n+}\n+\n+/*! \\fn SamProgramConstIterator SamProgramChain::Begin(void) const\n+ \\return an STL const_iterator pointing to the first (oldest) program record\n+\n+ This is an overloaded function.\n+\n+ \\sa ConstBegin(), End(), First()\n+*/\n+SamProgramConstIterator SamProgramChain::Begin(void) const {\n+ return m_data.begin();\n+}\n+\n+/*! \\fn void SamProgramChain::Clear(void)\n+ \\brief Clears all program records.\n+*/\n+void SamProgramChain::Clear(void) {\n+ m_data.clear();\n+}\n+\n+/*! \\fn SamProgramConstIterator SamProgramChain::ConstBegin(void) const\n+ \\return an STL const_iterator pointing to the first (oldest) program record\n+ '..b'!= end; ++iter ) {\n+ const SamProgram& current = (*iter);\n+ if ( current.ID == programId )\n+ break;\n+ }\n+ return distance( begin, iter );\n+}\n+\n+/*! \\fn bool SamProgramChain::IsEmpty(void) const\n+ \\brief Returns \\c true if chain contains no records\n+ \\sa Size()\n+*/\n+bool SamProgramChain::IsEmpty(void) const {\n+ return m_data.empty();\n+}\n+\n+/*! \\fn SamProgram& SamProgramChain::Last(void)\n+ \\brief Fetches last (newest) record in the chain.\n+\n+ N.B. - This function will fail if the chain is empty. If this is possible,\n+ check the result of IsEmpty() before calling this function.\n+\n+ \\return a modifiable reference to the last (newest) program entry\n+ \\sa End(), First()\n+*/\n+SamProgram& SamProgramChain::Last(void) {\n+ // find first record in container that has no NextProgramID entry\n+ SamProgramIterator iter = Begin();\n+ SamProgramIterator end = End();\n+ for ( ; iter != end; ++iter ) {\n+ SamProgram& current = (*iter);\n+ if ( !current.HasNextProgramID() )\n+ return current;\n+ }\n+\n+ // otherwise error\n+ cerr << "SamProgramChain ERROR - could not determine last record" << endl;\n+ exit(1);\n+}\n+\n+/*! \\fn const SamProgram& SamProgramChain::Last(void) const\n+ \\brief Fetches last (newest) record in the chain.\n+\n+ This is an overloaded function.\n+\n+ N.B. - This function will fail if the chain is empty. If this is possible,\n+ check the result of IsEmpty() before calling this function.\n+\n+ \\return a read-only reference to the last (newest) program entry\n+ \\sa End(), ConstEnd(), First()\n+*/\n+const SamProgram& SamProgramChain::Last(void) const {\n+ // find first record in container that has no NextProgramID entry\n+ SamProgramConstIterator iter = ConstBegin();\n+ SamProgramConstIterator end = ConstEnd();\n+ for ( ; iter != end; ++iter ) {\n+ const SamProgram& current = (*iter);\n+ if ( !current.HasNextProgramID() )\n+ return current;\n+ }\n+\n+ // otherwise error\n+ cerr << "SamProgramChain ERROR - could not determine last record" << endl;\n+ exit(1);\n+}\n+\n+/*! \\fn const std::string SamProgramChain::NextIdFor(const std::string& programId) const\n+ \\internal\n+ \\return ID of program record, whose PreviousProgramID matches \\a programId.\n+ Otherwise, returns empty string if none found.\n+*/\n+const std::string SamProgramChain::NextIdFor(const std::string& programId) const {\n+\n+ // find first record in container whose PreviousProgramID matches @programId\n+ SamProgramConstIterator iter = ConstBegin();\n+ SamProgramConstIterator end = ConstEnd();\n+ for ( ; iter != end; ++iter ) {\n+ const SamProgram& current = (*iter);\n+ if ( !current.HasPreviousProgramID() &&\n+ current.PreviousProgramID == programId\n+ )\n+ {\n+ return current.ID;\n+ }\n+ }\n+\n+ // none found\n+ return string();\n+}\n+\n+/*! \\fn int SamProgramChain::Size(void) const\n+ \\brief Returns number of program records in the chain.\n+ \\sa IsEmpty()\n+*/\n+int SamProgramChain::Size(void) const {\n+ return m_data.size();\n+}\n+\n+/*! \\fn SamProgram& SamProgramChain::operator[](const std::string& programId)\n+ \\brief Retrieves the modifiable SamProgram record that matches \\a programId.\n+\n+ NOTE - If the chain contains no read group matching this ID, this function will\n+ print an error and terminate.\n+\n+ \\param programId ID of program record to retrieve\n+ \\return a modifiable reference to the SamProgram associated with the ID\n+*/\n+SamProgram& SamProgramChain::operator[](const std::string& programId) {\n+\n+ // look up program record matching this ID\n+ int index = IndexOf(programId);\n+\n+ // if record not found\n+ if ( index == (int)m_data.size() ) {\n+ cerr << "SamProgramChain ERROR - unknown programId: " << programId << endl;\n+ exit(1);\n+ }\n+\n+ // otherwise return program record at index\n+ return m_data.at(index);\n+}\n' |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamProgramChain.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamProgramChain.h Thu Nov 03 10:25:04 2011 -0400 |
[ |
@@ -0,0 +1,86 @@ +// *************************************************************************** +// SamProgramChain.h (c) 2011 Derek Barnett +// Marth Lab, Department of Biology, Boston College +// All rights reserved. +// --------------------------------------------------------------------------- +// Last modified: 19 April 2011 (DB) +// --------------------------------------------------------------------------- +// Provides methods for operating on a SamProgram record "chain" +// *************************************************************************** + +#ifndef SAM_PROGRAMCHAIN_H +#define SAM_PROGRAMCHAIN_H + +#include <api/api_global.h> +#include <api/SamProgram.h> +#include <string> +#include <vector> + +namespace BamTools { + +// chain is *NOT* sorted in any order +// use First()/Last() to retrieve oldest/newest programs, respectively +typedef std::vector<SamProgram> SamProgramContainer; +typedef SamProgramContainer::iterator SamProgramIterator; +typedef SamProgramContainer::const_iterator SamProgramConstIterator; + +class API_EXPORT SamProgramChain { + + // ctor & dtor + public: + SamProgramChain(void); + SamProgramChain(const SamProgramChain& other); + ~SamProgramChain(void); + + // query/modify program data + public: + // appends a program record to the chain + void Add(SamProgram& program); + void Add(std::vector<SamProgram>& programs); + + // clears all read group entries + void Clear(void); + + // returns true if chain contains this program record (matches on ID) + bool Contains(const SamProgram& program) const; + bool Contains(const std::string& programId) const; + + // returns the first (oldest) program in the chain + SamProgram& First(void); + const SamProgram& First(void) const; + + // returns true if chain is empty + bool IsEmpty(void) const; + + // returns last (most recent) program in the chain + SamProgram& Last(void); + const SamProgram& Last(void) const; + + // returns number of program records in the chain + int Size(void) const; + + // retrieves a modifiable reference to the SamProgram object associated with this ID + SamProgram& operator[](const std::string& programId); + + // retrieve STL-compatible iterators + public: + SamProgramIterator Begin(void); // returns iterator to begin() + SamProgramConstIterator Begin(void) const; // returns const_iterator to begin() + SamProgramConstIterator ConstBegin(void) const; // returns const_iterator to begin() + SamProgramIterator End(void); // returns iterator to end() + SamProgramConstIterator End(void) const; // returns const_iterator to end() + SamProgramConstIterator ConstEnd(void) const; // returns const_iterator to end() + + // internal methods + private: + int IndexOf(const std::string& programId) const; + const std::string NextIdFor(const std::string& programId) const; + + // data members + private: + SamProgramContainer m_data; +}; + +} // namespace BamTools + +#endif // SAM_PROGRAMCHAIN_H |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamReadGroup.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamReadGroup.cpp Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,222 @@ +// *************************************************************************** +// SamReadGroup.cpp (c) 2010 Derek Barnett +// Marth Lab, Department of Biology, Boston College +// All rights reserved. +// --------------------------------------------------------------------------- +// Last modified: 18 April 2011 (DB) +// --------------------------------------------------------------------------- +// Provides direct read/write access to the SAM read group data fields. +// *************************************************************************** + +#include <api/SamReadGroup.h> +using namespace BamTools; +using namespace std; + +/*! \struct BamTools::SamReadGroup + \brief Represents a SAM read group entry. + + Provides direct read/write access to the SAM read group data fields. + + \sa \samSpecURL +*/ +/*! \var SamReadGroup::Description + \brief corresponds to \@RG DS:\<Description\> +*/ +/*! \var SamReadGroup::FlowOrder + \brief corresponds to \@RG FO:\<FlowOrder\> +*/ +/*! \var SamReadGroup::ID + \brief corresponds to \@RG ID:\<ID\> + + Required for valid SAM header. +*/ +/*! \var SamReadGroup::KeySequence + \brief corresponds to \@RG KS:\<KeySequence\> +*/ +/*! \var SamReadGroup::Library + \brief corresponds to \@RG LB:\<Library\> +*/ +/*! \var SamReadGroup::PlatformUnit + \brief corresponds to \@RG PU:\<PlatformUnit\> +*/ +/*! \var SamReadGroup::PredictedInsertSize + \brief corresponds to \@RG PI:\<PredictedInsertSize\> +*/ +/*! \var SamReadGroup::ProductionDate + \brief corresponds to \@RG DT:\<ProductionDate\> +*/ +/*! \var SamReadGroup::Program + \brief corresponds to \@RG PG:\<Program\> +*/ +/*! \var SamReadGroup::Sample + \brief corresponds to \@RG SM:\<Sample\> +*/ +/*! \var SamReadGroup::SequencingCenter + \brief corresponds to \@RG CN:\<SequencingCenter\> +*/ +/*! \var SamReadGroup::SequencingTechnology + \brief corresponds to \@RG PL:\<SequencingTechnology\> +*/ + +/*! \fn SamReadGroup::SamReadGroup(void) + \brief default constructor +*/ +SamReadGroup::SamReadGroup(void) + : Description("") + , FlowOrder("") + , ID("") + , KeySequence("") + , Library("") + , PlatformUnit("") + , PredictedInsertSize("") + , ProductionDate("") + , Program("") + , Sample("") + , SequencingCenter("") + , SequencingTechnology("") +{ } + +/*! \fn SamReadGroup::SamReadGroup(const std::string& id) + \brief constructs read group with \a id + + \param id desired read group ID +*/ +SamReadGroup::SamReadGroup(const std::string& id) + : Description("") + , FlowOrder("") + , ID(id) + , KeySequence("") + , Library("") + , PlatformUnit("") + , PredictedInsertSize("") + , ProductionDate("") + , Program("") + , Sample("") + , SequencingCenter("") + , SequencingTechnology("") +{ } + +/*! \fn SamReadGroup::SamReadGroup(const SamReadGroup& other) + \brief copy constructor +*/ +SamReadGroup::SamReadGroup(const SamReadGroup& other) + : Description(other.Description) + , FlowOrder(other.FlowOrder) + , ID(other.ID) + , KeySequence(other.KeySequence) + , Library(other.Library) + , PlatformUnit(other.PlatformUnit) + , PredictedInsertSize(other.PredictedInsertSize) + , ProductionDate(other.ProductionDate) + , Program(other.Program) + , Sample(other.Sample) + , SequencingCenter(other.SequencingCenter) + , SequencingTechnology(other.SequencingTechnology) +{ } + +/*! \fn SamReadGroup::~SamReadGroup(void) + \brief destructor +*/ +SamReadGroup::~SamReadGroup(void) { } + +/*! \fn void SamReadGroup::Clear(void) + \brief Clears all data fields. +*/ +void SamReadGroup::Clear(void) { + Description.clear(); + FlowOrder.clear(); + ID.clear(); + KeySequence.clear(); + Library.clear(); + PlatformUnit.clear(); + PredictedInsertSize.clear(); + ProductionDate.clear(); + Program.clear(); + Sample.clear(); + SequencingCenter.clear(); + SequencingTechnology.clear(); +} + +/*! \fn bool SamReadGroup::HasDescription(void) const + \brief Returns \c true if read group contains \@RG DS:\<Description\> +*/ +bool SamReadGroup::HasDescription(void) const { + return (!Description.empty()); +} + +/*! \fn bool SamReadGroup::HasFlowOrder(void) const + \brief Returns \c true if read group contains \@RG FO:\<FlowOrder\> +*/ +bool SamReadGroup::HasFlowOrder(void) const { + return (!FlowOrder.empty()); +} + +/*! \fn bool SamReadGroup::HasID(void) const + \brief Returns \c true if read group contains \@RG: ID:\<ID\> +*/ +bool SamReadGroup::HasID(void) const { + return (!ID.empty()); +} + +/*! \fn bool SamReadGroup::HasKeySequence(void) const + \brief Returns \c true if read group contains \@RG KS:\<KeySequence\> +*/ +bool SamReadGroup::HasKeySequence(void) const { + return (!KeySequence.empty()); +} + +/*! \fn bool SamReadGroup::HasLibrary(void) const + \brief Returns \c true if read group contains \@RG LB:\<Library\> +*/ +bool SamReadGroup::HasLibrary(void) const { + return (!Library.empty()); +} + +/*! \fn bool SamReadGroup::HasPlatformUnit(void) const + \brief Returns \c true if read group contains \@RG PU:\<PlatformUnit\> +*/ +bool SamReadGroup::HasPlatformUnit(void) const { + return (!PlatformUnit.empty()); +} + +/*! \fn bool SamReadGroup::HasPredictedInsertSize(void) const + \brief Returns \c true if read group contains \@RG PI:\<PredictedInsertSize\> +*/ +bool SamReadGroup::HasPredictedInsertSize(void) const { + return (!PredictedInsertSize.empty()); +} + +/*! \fn bool SamReadGroup::HasProductionDate(void) const + \brief Returns \c true if read group contains \@RG DT:\<ProductionDate\> +*/ +bool SamReadGroup::HasProductionDate(void) const { + return (!ProductionDate.empty()); +} + +/*! \fn bool SamReadGroup::HasProgram(void) const + \brief Returns \c true if read group contains \@RG PG:\<Program\> +*/ +bool SamReadGroup::HasProgram(void) const { + return (!Program.empty()); +} + +/*! \fn bool SamReadGroup::HasSample(void) const + \brief Returns \c true if read group contains \@RG SM:\<Sample\> +*/ +bool SamReadGroup::HasSample(void) const { + return (!Sample.empty()); +} + +/*! \fn bool SamReadGroup::HasSequencingCenter(void) const + \brief Returns \c true if read group contains \@RG CN:\<SequencingCenter\> +*/ +bool SamReadGroup::HasSequencingCenter(void) const { + return (!SequencingCenter.empty()); +} + +/*! \fn bool SamReadGroup::HasSequencingTechnology(void) const + \brief Returns \c true if read group contains \@RG PL:\<SequencingTechnology\> +*/ +bool SamReadGroup::HasSequencingTechnology(void) const { + return (!SequencingTechnology.empty()); +} |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamReadGroup.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamReadGroup.h Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,69 @@ +// *************************************************************************** +// SamReadGroup.h (c) 2010 Derek Barnett +// Marth Lab, Department of Biology, Boston College +// All rights reserved. +// --------------------------------------------------------------------------- +// Last modified: 18 April 2011 (DB) +// --------------------------------------------------------------------------- +// Provides direct read/write access to the SAM read group data fields. +// *************************************************************************** + +#ifndef SAM_READGROUP_H +#define SAM_READGROUP_H + +#include "api/api_global.h" +#include <string> + +namespace BamTools { + +struct API_EXPORT SamReadGroup { + + // ctor & dtor + SamReadGroup(void); + SamReadGroup(const std::string& id); + SamReadGroup(const SamReadGroup& other); + ~SamReadGroup(void); + + // query/modify entire read group + void Clear(void); // clears all data fields + + // convenience query methods + bool HasDescription(void) const; // returns true if read group has a description + bool HasFlowOrder(void) const; // returns true if read group has a flow order entry + bool HasID(void) const; // returns true if read group has a group ID + bool HasKeySequence(void) const; // returns true if read group has a key sequence + bool HasLibrary(void) const; // returns true if read group has a library name + bool HasPlatformUnit(void) const; // returns true if read group has a platform unit ID + bool HasPredictedInsertSize(void) const; // returns true if read group has a predicted insert size + bool HasProductionDate(void) const; // returns true if read group has a production date + bool HasProgram(void) const; // returns true if read group has a program entry + bool HasSample(void) const; // returns true if read group has a sample name + bool HasSequencingCenter(void) const; // returns true if read group has a sequencing center ID + bool HasSequencingTechnology(void) const; // returns true if read group has a sequencing technology ID + + + // data fields + std::string Description; // DS:<Description> + std::string FlowOrder; // FO:<FlowOrder> + std::string ID; // ID:<ID> *Required for valid SAM header* + std::string KeySequence; // KS:<KeySequence> + std::string Library; // LB:<Library> + std::string PlatformUnit; // PU:<PlatformUnit> + std::string PredictedInsertSize; // PI:<PredictedInsertSize> + std::string ProductionDate; // DT:<ProductionDate> + std::string Program; // PG:<Program> + std::string Sample; // SM:<Sample> + std::string SequencingCenter; // CN:<SequencingCenter> + std::string SequencingTechnology; // PL:<SequencingTechnology> +}; + +/*! \fn bool operator==(const SamReadGroup& lhs, const SamReadGroup& rhs) + \brief tests equality by comparing read group IDs +*/ +API_EXPORT inline bool operator==(const SamReadGroup& lhs, const SamReadGroup& rhs) { + return lhs.ID == rhs.ID; +} + +} // namespace BamTools + +#endif // SAM_READGROUP_H |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamReadGroupDictionary.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamReadGroupDictionary.cpp Thu Nov 03 10:25:04 2011 -0400 |
[ |
b'@@ -0,0 +1,290 @@\n+// ***************************************************************************\n+// SamReadGroupDictionary.cpp (c) 2010 Derek Barnett\n+// Marth Lab, Department of Biology, Boston College\n+// All rights reserved.\n+// ---------------------------------------------------------------------------\n+// Last modified: 18 April 2011 (DB)\n+// ---------------------------------------------------------------------------\n+// Provides methods for operating on a collection of SamReadGroup entries.\n+// ***************************************************************************\n+\n+#include <api/SamReadGroupDictionary.h>\n+using namespace BamTools;\n+\n+#include <algorithm>\n+#include <iostream>\n+using namespace std;\n+\n+/*! \\class BamTools::SamReadGroupDictionary\n+ \\brief Container of SamReadGroup entries.\n+\n+ Provides methods for operating on a collection of SamReadGroup entries.\n+*/\n+\n+/*! \\fn SamReadGroupDictionary::SamReadGroupDictionary(void)\n+ \\brief constructor\n+*/\n+SamReadGroupDictionary::SamReadGroupDictionary(void) { }\n+\n+/*! \\fn SamReadGroupDictionary::SamReadGroupDictionary(const SamReadGroupDictionary& other)\n+ \\brief copy constructor\n+*/\n+SamReadGroupDictionary::SamReadGroupDictionary(const SamReadGroupDictionary& other)\n+ : m_data(other.m_data)\n+{ }\n+\n+/*! \\fn SamReadGroupDictionary::~SamReadGroupDictionary(void)\n+ \\brief destructor\n+*/\n+SamReadGroupDictionary::~SamReadGroupDictionary(void) { }\n+\n+/*! \\fn void SamReadGroupDictionary::Add(const SamReadGroup& readGroup)\n+ \\brief Adds a read group to the dictionary.\n+\n+ Duplicate entries are silently discarded.\n+\n+ \\param readGroup entry to be added\n+*/\n+void SamReadGroupDictionary::Add(const SamReadGroup& readGroup) {\n+\n+ // TODO: report error on attempted duplicate?\n+\n+ if ( IsEmpty() || !Contains(readGroup) )\n+ m_data.push_back(readGroup);\n+}\n+\n+/*! \\fn void SamReadGroupDictionary::Add(const std::string& readGroupId)\n+ \\brief Adds a read group to the dictionary.\n+\n+ This is an overloaded function.\n+\n+ \\param readGroupId ID of read group to be added\n+ \\sa Add()\n+*/\n+void SamReadGroupDictionary::Add(const std::string& readGroupId) {\n+ Add( SamReadGroup(readGroupId) );\n+}\n+\n+/*! \\fn void SamReadGroupDictionary::Add(const std::vector<SamReadGroup>& readGroups)\n+ \\brief Adds multiple read groups to the dictionary.\n+\n+ This is an overloaded function.\n+\n+ \\param readGroups entries to be added\n+ \\sa Add()\n+*/\n+void SamReadGroupDictionary::Add(const std::vector<SamReadGroup>& readGroups) {\n+ vector<SamReadGroup>::const_iterator rgIter = readGroups.begin();\n+ vector<SamReadGroup>::const_iterator rgEnd = readGroups.end();\n+ for ( ; rgIter!= rgEnd; ++rgIter )\n+ Add(*rgIter);\n+}\n+\n+/*! \\fn void SamReadGroupDictionary::Add(const std::vector<std::string>& readGroupIds)\n+ \\brief Adds multiple read groups to the dictionary.\n+\n+ This is an overloaded function.\n+\n+ \\param readGroupIds IDs of read groups to be added\n+ \\sa Add()\n+*/\n+void SamReadGroupDictionary::Add(const std::vector<std::string>& readGroupIds) {\n+ vector<string>::const_iterator rgIter = readGroupIds.begin();\n+ vector<string>::const_iterator rgEnd = readGroupIds.end();\n+ for ( ; rgIter!= rgEnd; ++rgIter )\n+ Add(*rgIter);\n+}\n+\n+/*! \\fn SamReadGroupIterator SamReadGroupDictionary::Begin(void)\n+ \\return an STL iterator pointing to the first read group\n+ \\sa ConstBegin(), End()\n+*/\n+SamReadGroupIterator SamReadGroupDictionary::Begin(void) {\n+ return m_data.begin();\n+}\n+\n+/*! \\fn SamReadGroupConstIterator SamReadGroupDictionary::Begin(void) const\n+ \\return an STL const_iterator pointing to the first read group\n+\n+ This is an overloaded function.\n+\n+ \\sa ConstBegin(), End()\n+*/\n+SamReadGroupConstIterator SamReadGroupDictionary::Begin(void) const {\n+ return m_data.begin();\n+}\n+\n+/*! \\fn void SamReadGroupDictionary::Clear(void)\n+ \\brief Clears all read group entries.\n+*/\n+void SamReadGroupDicti'..b' return m_data.end();\n+}\n+\n+/*! \\fn int SamReadGroupDictionary::IndexOf(const std::string& readGroupId) const\n+ \\internal\n+ \\return index of read group if found. Otherwise, returns vector::size() (invalid index).\n+*/\n+int SamReadGroupDictionary::IndexOf(const std::string& readGroupId) const {\n+ SamReadGroupConstIterator begin = ConstBegin();\n+ SamReadGroupConstIterator iter = begin;\n+ SamReadGroupConstIterator end = ConstEnd();\n+ for ( ; iter != end; ++iter ) {\n+ const SamReadGroup& current = (*iter);\n+ if ( current.ID == readGroupId )\n+ break;\n+ }\n+ return distance( begin, iter );\n+}\n+\n+/*! \\fn bool SamReadGroupDictionary::IsEmpty(void) const\n+ \\brief Returns \\c true if dictionary contains no read groups\n+ \\sa Size()\n+*/\n+bool SamReadGroupDictionary::IsEmpty(void) const {\n+ return m_data.empty();\n+}\n+\n+/*! \\fn void SamReadGroupDictionary::Remove(const SamReadGroup& readGroup)\n+ \\brief Removes read group from dictionary, if found (matching on ID).\n+\n+ This is an overloaded function.\n+\n+ \\param readGroup read group to remove (matches on ID)\n+*/\n+void SamReadGroupDictionary::Remove(const SamReadGroup& readGroup) {\n+ Remove( readGroup.ID );\n+}\n+\n+/*! \\fn void SamReadGroupDictionary::Remove(const std::string& readGroupId)\n+ \\brief Removes read group from dictionary, if found.\n+ \\param readGroupId ID of read group to remove\n+ \\sa Remove()\n+*/\n+void SamReadGroupDictionary::Remove(const std::string& readGroupId) {\n+ if ( Contains(readGroupId) )\n+ m_data.erase( m_data.begin() + IndexOf(readGroupId) );\n+}\n+\n+/*! \\fn void SamReadGroupDictionary::Remove(const std::vector<SamReadGroup>& readGroups)\n+ \\brief Removes multiple read groups from dictionary (matching on ID).\n+\n+ This is an overloaded function.\n+\n+ \\param readGroups read groups to remove\n+ \\sa Remove()\n+*/\n+void SamReadGroupDictionary::Remove(const std::vector<SamReadGroup>& readGroups) {\n+ vector<SamReadGroup>::const_iterator rgIter = readGroups.begin();\n+ vector<SamReadGroup>::const_iterator rgEnd = readGroups.end();\n+ for ( ; rgIter!= rgEnd; ++rgIter )\n+ Remove(*rgIter);\n+}\n+\n+/*! \\fn void SamReadGroupDictionary::Remove(const std::vector<std::string>& readGroupIds)\n+ \\brief Removes multiple read groups from dictionary.\n+\n+ This is an overloaded function.\n+\n+ \\param readGroupIds IDs of the read groups to remove\n+ \\sa Remove()\n+*/\n+void SamReadGroupDictionary::Remove(const std::vector<std::string>& readGroupIds) {\n+ vector<string>::const_iterator rgIter = readGroupIds.begin();\n+ vector<string>::const_iterator rgEnd = readGroupIds.end();\n+ for ( ; rgIter!= rgEnd; ++rgIter )\n+ Remove(*rgIter);\n+}\n+\n+/*! \\fn int SamReadGroupDictionary::Size(void) const\n+ \\brief Returns number of read groups in dictionary.\n+ \\sa IsEmpty()\n+*/\n+int SamReadGroupDictionary::Size(void) const {\n+ return m_data.size();\n+}\n+\n+/*! \\fn SamReadGroup& SamReadGroupDictionary::operator[](const std::string& readGroupId)\n+ \\brief Retrieves the modifiable SamReadGroup that matches \\a readGroupId.\n+\n+ NOTE - If the dictionary contains no read group matching this ID, this function inserts\n+ a new one with this ID, and returns a reference to it.\n+\n+ If you want to avoid this insertion behavior, check the result of Contains() before\n+ using this operator.\n+\n+ \\param readGroupId ID of read group to retrieve\n+ \\return a modifiable reference to the SamReadGroup associated with the ID\n+*/\n+SamReadGroup& SamReadGroupDictionary::operator[](const std::string& readGroupId) {\n+\n+ // look up read group ID\n+ int index = IndexOf(readGroupId);\n+\n+ // if found, return read group at index\n+ if ( index != (int)m_data.size() )\n+ return m_data[index];\n+\n+ // otherwise, append new read group and return reference\n+ else {\n+ SamReadGroup rg(readGroupId);\n+ m_data.push_back(rg);\n+ return m_data.back();\n+ }\n+}\n' |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamReadGroupDictionary.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamReadGroupDictionary.h Thu Nov 03 10:25:04 2011 -0400 |
[ |
@@ -0,0 +1,87 @@ +// *************************************************************************** +// SamReadGroupDictionary.h (c) 2010 Derek Barnett +// Marth Lab, Department of Biology, Boston College +// All rights reserved. +// --------------------------------------------------------------------------- +// Last modified: 18 April 2011 (DB) +// --------------------------------------------------------------------------- +// Provides methods for operating on a collection of SamReadGroup entries. +// *************************************************************************** + +#ifndef SAM_READGROUP_DICTIONARY_H +#define SAM_READGROUP_DICTIONARY_H + +#include <api/api_global.h> +#include <api/SamReadGroup.h> +#include <string> +#include <vector> + +namespace BamTools { + +typedef std::vector<SamReadGroup> SamReadGroupContainer; +typedef SamReadGroupContainer::iterator SamReadGroupIterator; +typedef SamReadGroupContainer::const_iterator SamReadGroupConstIterator; + +class API_EXPORT SamReadGroupDictionary { + + // ctor & dtor + public: + SamReadGroupDictionary(void); + SamReadGroupDictionary(const SamReadGroupDictionary& other); + ~SamReadGroupDictionary(void); + + // query/modify read group data + public: + // adds a read group + void Add(const SamReadGroup& readGroup); + void Add(const std::string& readGroupId); + + // adds multiple read groups + void Add(const std::vector<SamReadGroup>& readGroups); + void Add(const std::vector<std::string>& readGroupIds); + + // clears all read group entries + void Clear(void); + + // returns true if dictionary contains this read group + bool Contains(const SamReadGroup& readGroup) const; + bool Contains(const std::string& readGroupId) const; + + // returns true if dictionary is empty + bool IsEmpty(void) const; + + // removes read group, if found + void Remove(const SamReadGroup& readGroup); + void Remove(const std::string& readGroupId); + + // removes multiple read groups + void Remove(const std::vector<SamReadGroup>& readGroups); + void Remove(const std::vector<std::string>& readGroupIds); + + // returns number of read groups in dictionary + int Size(void) const; + + // retrieves a modifiable reference to the SamReadGroup object associated with this ID + SamReadGroup& operator[](const std::string& readGroupId); + + // retrieve STL-compatible iterators + public: + SamReadGroupIterator Begin(void); // returns iterator to begin() + SamReadGroupConstIterator Begin(void) const; // returns const_iterator to begin() + SamReadGroupConstIterator ConstBegin(void) const; // returns const_iterator to begin() + SamReadGroupIterator End(void); // returns iterator to end() + SamReadGroupConstIterator End(void) const; // returns const_iterator to end() + SamReadGroupConstIterator ConstEnd(void) const; // returns const_iterator to end() + + // internal methods + private: + int IndexOf(const std::string& readGroupId) const; + + // data members + private: + SamReadGroupContainer m_data; +}; + +} // namespace BamTools + +#endif // SAM_READGROUP_DICTIONARY_H |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamSequence.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamSequence.cpp Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,162 @@ +// *************************************************************************** +// SamSequence.cpp (c) 2010 Derek Barnett +// Marth Lab, Department of Biology, Boston College +// All rights reserved. +// --------------------------------------------------------------------------- +// Last modified: 18 April 2011 (DB) +// --------------------------------------------------------------------------- +// Provides direct read/write access to the SAM sequence data fields. +// *************************************************************************** + +#include <api/SamSequence.h> +#include <sstream> +using namespace BamTools; +using namespace std; + +/*! \struct BamTools::SamSequence + \brief Represents a SAM sequence entry. + + Provides direct read/write access to the SAM sequence data fields. + + \sa \samSpecURL +*/ +/*! \var SamSequence::AssemblyID + \brief corresponds to \@SQ AS:\<AssemblyID\> +*/ +/*! \var SamSequence::Checksum + \brief corresponds to \@SQ M5:\<Checksum\> +*/ +/*! \var SamSequence::Length + \brief corresponds to \@SQ LN:\<Length\> + + Required for valid SAM header. +*/ +/*! \var SamSequence::Name + \brief corresponds to \@SQ SN:\<Name\> + + Required for valid SAM header. +*/ +/*! \var SamSequence::Species + \brief corresponds to \@SQ SP:\<Species\> +*/ +/*! \var SamSequence::URI + \brief corresponds to \@SQ UR:\<URI\> +*/ + +/*! \fn SamSequence::SamSequence(void) + \brief default constructor +*/ +SamSequence::SamSequence(void) + : AssemblyID("") + , Checksum("") + , Length("") + , Name("") + , Species("") + , URI("") +{ } + +/*! \fn SamSequence::SamSequence(const std::string& name, const int& length) + \brief constructs sequence with \a name and \a length + + \param name desired sequence name + \param length desired sequence length (numeric value) +*/ +SamSequence::SamSequence(const std::string& name, + const int& length) + : AssemblyID("") + , Checksum("") + , Name(name) + , Species("") + , URI("") +{ + stringstream s(""); + s << length; + Length = s.str(); +} + +/*! \fn SamSequence::SamSequence(const std::string& name, const std::string& length) + \brief constructs sequence with \a name and \a length + + \param name desired sequence name + \param length desired sequence length (string value) +*/ +SamSequence::SamSequence(const std::string& name, + const std::string& length) + : AssemblyID("") + , Checksum("") + , Length(length) + , Name(name) + , Species("") + , URI("") +{ } + +/*! \fn SamSequence::SamSequence(const SamSequence& other) + \brief copy constructor +*/ +SamSequence::SamSequence(const SamSequence& other) + : AssemblyID(other.AssemblyID) + , Checksum(other.Checksum) + , Length(other.Length) + , Name(other.Name) + , Species(other.Species) + , URI(other.URI) +{ } + +/*! \fn SamSequence::~SamSequence(void) + \brief destructor +*/ +SamSequence::~SamSequence(void) { } + +/*! \fn void SamSequence::Clear(void) + \brief Clears all data fields. +*/ +void SamSequence::Clear(void) { + AssemblyID.clear(); + Checksum.clear(); + Length.clear(); + Name.clear(); + Species.clear(); + URI.clear(); +} + +/*! \fn bool SamSequence::HasAssemblyID(void) const + \brief Returns \c true if sequence contains \@SQ AS:\<AssemblyID\> +*/ +bool SamSequence::HasAssemblyID(void) const { + return (!AssemblyID.empty()); +} + +/*! \fn bool SamSequence::HasChecksum(void) const + \brief Returns \c true if sequence contains \@SQ M5:\<Checksum\> +*/ +bool SamSequence::HasChecksum(void) const { + return (!Checksum.empty()); +} + +/*! \fn bool SamSequence::HasLength(void) const + \brief Returns \c true if sequence contains \@SQ LN:\<Length\> +*/ +bool SamSequence::HasLength(void) const { + return (!Length.empty()); +} + +/*! \fn bool SamSequence::HasName(void) const + \brief Returns \c true if sequence contains \@SQ SN:\<Name\> +*/ +bool SamSequence::HasName(void) const { + return (!Name.empty()); +} + +/*! \fn bool SamSequence::HasSpecies(void) const + \brief Returns \c true if sequence contains \@SQ SP:\<Species\> +*/ +bool SamSequence::HasSpecies(void) const { + return (!Species.empty()); +} + +/*! \fn bool SamSequence::HasURI(void) const + \brief Returns \c true if sequence contains \@SQ UR:\<URI\> +*/ +bool SamSequence::HasURI(void) const { + return (!URI.empty()); +} |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamSequence.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamSequence.h Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,61 @@ +// *************************************************************************** +// SamSequence.h (c) 2010 Derek Barnett +// Marth Lab, Department of Biology, Boston College +// All rights reserved. +// --------------------------------------------------------------------------- +// Last modified: 18 April 2011 (DB) +// --------------------------------------------------------------------------- +// Provides direct read/write access to the SAM sequence data fields. +// *************************************************************************** + +#ifndef SAM_SEQUENCE_H +#define SAM_SEQUENCE_H + +#include <api/api_global.h> +#include <string> + +namespace BamTools { + +struct API_EXPORT SamSequence { + + // ctor & dtor + SamSequence(void); + SamSequence(const std::string& name, const int& length); + SamSequence(const std::string& name, const std::string& length); + SamSequence(const SamSequence& other); + ~SamSequence(void); + + // query/modify entire sequence + void Clear(void); // clears all contents + + // convenience query methods + bool HasAssemblyID(void) const; // returns true if sequence has an assembly ID + bool HasChecksum(void) const; // returns true if sequence has an MD5 checksum + bool HasLength(void) const; // returns true if sequence has a length + bool HasName(void) const; // returns true if sequence has a name + bool HasSpecies(void) const; // returns true if sequence has a species ID + bool HasURI(void) const; // returns true if sequence has a URI + + // data members + std::string AssemblyID; // AS:<AssemblyID> + std::string Checksum; // M5:<Checksum> + std::string Length; // LN:<Length> *Required for valid SAM header* + std::string Name; // SN:<Name> *Required for valid SAM header* + std::string Species; // SP:<Species> + std::string URI; // UR:<URI> +}; + +/*! \fn bool operator==(const SamSequence& lhs, const SamSequence& rhs) + \brief tests equality by comparing sequence names, lengths, & checksums (if available) +*/ +API_EXPORT inline bool operator==(const SamSequence& lhs, const SamSequence& rhs) { + if ( lhs.Name != rhs.Name ) return false; + if ( lhs.Length != rhs.Length ) return false; + if ( lhs.HasChecksum() && rhs.HasChecksum() ) + return (lhs.Checksum == rhs.Checksum); + else return true; +} + +} // namespace BamTools + +#endif // SAM_SEQUENCE_H |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamSequenceDictionary.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamSequenceDictionary.cpp Thu Nov 03 10:25:04 2011 -0400 |
[ |
b'@@ -0,0 +1,293 @@\n+// ***************************************************************************\n+// SamSequenceDictionary.cpp (c) 2010 Derek Barnett\n+// Marth Lab, Department of Biology, Boston College\n+// All rights reserved.\n+// ---------------------------------------------------------------------------\n+// Last modified: 18 April 2011 (DB)\n+// ---------------------------------------------------------------------------\n+// Provides methods for operating on a collection of SamSequence entries.\n+// *************************************************************************\n+\n+#include <api/SamSequenceDictionary.h>\n+using namespace BamTools;\n+\n+#include <iostream>\n+using namespace std;\n+\n+/*! \\class BamTools::SamSequenceDictionary\n+ \\brief Container of SamSequence entries.\n+\n+ Provides methods for operating on a collection of SamSequence entries.\n+*/\n+\n+/*! \\fn SamSequenceDictionary::SamSequenceDictionary(void)\n+ \\brief constructor\n+*/\n+SamSequenceDictionary::SamSequenceDictionary(void) { }\n+\n+/*! \\fn SamSequenceDictionary::SamSequenceDictionary(const SamSequenceDictionary& other)\n+ \\brief copy constructor\n+*/\n+SamSequenceDictionary::SamSequenceDictionary(const SamSequenceDictionary& other)\n+ : m_data(other.m_data)\n+{ }\n+\n+/*! \\fn SamSequenceDictionary::~SamSequenceDictionary(void)\n+ \\brief destructor\n+*/\n+SamSequenceDictionary::~SamSequenceDictionary(void) { }\n+\n+/*! \\fn void SamSequenceDictionary::Add(const SamSequence& sequence)\n+ \\brief Adds a sequence to the dictionary.\n+\n+ Duplicate entries are silently discarded.\n+\n+ \\param sequence entry to be added\n+*/\n+void SamSequenceDictionary::Add(const SamSequence& sequence) {\n+\n+ // TODO: report error on attempted duplicate?\n+\n+ if ( IsEmpty() || !Contains(sequence) )\n+ m_data.push_back(sequence);\n+}\n+\n+/*! \\fn void SamSequenceDictionary::Add(const std::string& name, const int& length)\n+ \\brief Adds a sequence to the dictionary.\n+\n+ This is an overloaded function.\n+\n+ \\param name name of sequence entry to be added\n+ \\param length length of sequence entry to be added\n+ \\sa Add()\n+*/\n+void SamSequenceDictionary::Add(const std::string& name, const int& length) {\n+ Add( SamSequence(name, length) );\n+}\n+\n+/*! \\fn void SamSequenceDictionary::Add(const std::vector<SamSequence>& sequences)\n+ \\brief Adds multiple sequences to the dictionary.\n+\n+ This is an overloaded function.\n+\n+ \\param sequences entries to be added\n+ \\sa Add()\n+*/\n+void SamSequenceDictionary::Add(const std::vector<SamSequence>& sequences) {\n+ vector<SamSequence>::const_iterator seqIter = sequences.begin();\n+ vector<SamSequence>::const_iterator seqEnd = sequences.end();\n+ for ( ; seqIter!= seqEnd; ++seqIter )\n+ Add(*seqIter);\n+}\n+\n+/*! \\fn void SamSequenceDictionary::Add(const std::map<std::string, int>& sequenceMap)\n+ \\brief Adds multiple sequences to the dictionary.\n+\n+ This is an overloaded function.\n+\n+ \\param sequenceMap map of sequence entries (name => length) to be added\n+ \\sa Add()\n+*/\n+void SamSequenceDictionary::Add(const std::map<std::string, int>& sequenceMap) {\n+ map<string, int>::const_iterator seqIter = sequenceMap.begin();\n+ map<string, int>::const_iterator seqEnd = sequenceMap.end();\n+ for ( ; seqIter != seqEnd; ++seqIter ) {\n+ const string& name = (*seqIter).first;\n+ const int& length = (*seqIter).second;\n+ Add( SamSequence(name, length) );\n+ }\n+}\n+\n+/*! \\fn SamSequenceIterator SamSequenceDictionary::Begin(void)\n+ \\return an STL iterator pointing to the first sequence\n+ \\sa ConstBegin(), End()\n+*/\n+SamSequenceIterator SamSequenceDictionary::Begin(void) {\n+ return m_data.begin();\n+}\n+\n+/*! \\fn SamSequenceConstIterator SamSequenceDictionary::Begin(void) const\n+ \\return an STL const_iterator pointing to the first sequence\n+\n+ This is an overloaded function.\n+\n+ \\sa ConstBegin(), End()\n+*/\n+SamSequenceConstIterator SamSequenceDictionary::Begin(void) const {\n'..b'r SamSequenceDictionary::End(void) const {\n+ return m_data.end();\n+}\n+\n+/*! \\fn int SamSequenceDictionary::IndexOf(const std::string& name) const\n+ \\internal\n+ \\return index of sequence if found (matching on name). Otherwise, returns vector::size() (invalid index).\n+*/\n+int SamSequenceDictionary::IndexOf(const std::string& name) const {\n+ SamSequenceConstIterator begin = ConstBegin();\n+ SamSequenceConstIterator iter = begin;\n+ SamSequenceConstIterator end = ConstEnd();\n+ for ( ; iter != end; ++iter ) {\n+ const SamSequence& currentSeq = (*iter);\n+ if ( currentSeq.Name == name )\n+ break;\n+ }\n+ return distance( begin, iter );\n+}\n+\n+/*! \\fn bool SamSequenceDictionary::IsEmpty(void) const\n+ \\brief Returns \\c true if dictionary contains no sequences\n+ \\sa Size()\n+*/\n+bool SamSequenceDictionary::IsEmpty(void) const {\n+ return m_data.empty();\n+}\n+\n+/*! \\fn void SamSequenceDictionary::Remove(const SamSequence& sequence)\n+ \\brief Removes sequence from dictionary, if found (matches on name).\n+\n+ This is an overloaded function.\n+\n+ \\param sequence SamSequence to remove (matching on name)\n+*/\n+void SamSequenceDictionary::Remove(const SamSequence& sequence) {\n+ Remove( sequence.Name );\n+}\n+\n+/*! \\fn void SamSequenceDictionary::Remove(const std::string& sequenceName)\n+ \\brief Removes sequence from dictionary, if found.\n+\n+ \\param sequenceName name of sequence to remove\n+ \\sa Remove()\n+*/\n+void SamSequenceDictionary::Remove(const std::string& sequenceName) {\n+ if ( Contains(sequenceName) )\n+ m_data.erase( m_data.begin() + IndexOf(sequenceName) );\n+}\n+\n+/*! \\fn void SamSequenceDictionary::Remove(const std::vector<SamSequence>& sequences)\n+ \\brief Removes multiple sequences from dictionary.\n+\n+ This is an overloaded function.\n+\n+ \\param sequences sequences to remove\n+ \\sa Remove()\n+*/\n+void SamSequenceDictionary::Remove(const std::vector<SamSequence>& sequences) {\n+ vector<SamSequence>::const_iterator rgIter = sequences.begin();\n+ vector<SamSequence>::const_iterator rgEnd = sequences.end();\n+ for ( ; rgIter!= rgEnd; ++rgIter )\n+ Remove(*rgIter);\n+}\n+\n+/*! \\fn void SamSequenceDictionary::Remove(const std::vector<std::string>& sequenceNames)\n+ \\brief Removes multiple sequences from dictionary.\n+\n+ This is an overloaded function.\n+\n+ \\param sequenceNames names of the sequences to remove\n+ \\sa Remove()\n+*/\n+void SamSequenceDictionary::Remove(const std::vector<std::string>& sequenceNames) {\n+ vector<string>::const_iterator rgIter = sequenceNames.begin();\n+ vector<string>::const_iterator rgEnd = sequenceNames.end();\n+ for ( ; rgIter!= rgEnd; ++rgIter )\n+ Remove(*rgIter);\n+}\n+\n+/*! \\fn int SamSequenceDictionary::Size(void) const\n+ \\brief Returns number of sequences in dictionary.\n+ \\sa IsEmpty()\n+*/\n+int SamSequenceDictionary::Size(void) const {\n+ return m_data.size();\n+}\n+\n+/*! \\fn SamSequence& SamSequenceDictionary::operator[](const std::string& sequenceName)\n+ \\brief Retrieves the modifiable SamSequence that matches \\a sequenceName.\n+\n+ NOTE - If the dictionary contains no sequence matching this name, this function inserts\n+ a new one with this name (length:0), and returns a reference to it.\n+\n+ If you want to avoid this insertion behavior, check the result of Contains() before\n+ using this operator.\n+\n+ \\param sequenceName name of sequence to retrieve\n+ \\return a modifiable reference to the SamSequence associated with the name\n+*/\n+SamSequence& SamSequenceDictionary::operator[](const std::string& sequenceName) {\n+\n+ // look up sequence ID\n+ int index = IndexOf(sequenceName);\n+\n+ // if found, return sequence at index\n+ if ( index != (int)m_data.size() )\n+ return m_data[index];\n+\n+ // otherwise, append new sequence and return reference\n+ else {\n+ m_data.push_back( SamSequence(sequenceName, 0) );\n+ return m_data.back();\n+ }\n+}\n' |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamSequenceDictionary.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamSequenceDictionary.h Thu Nov 03 10:25:04 2011 -0400 |
[ |
@@ -0,0 +1,89 @@ +// *************************************************************************** +// SamSequenceDictionary.h (c) 2010 Derek Barnett +// Marth Lab, Department of Biology, Boston College +// All rights reserved. +// --------------------------------------------------------------------------- +// Last modified: 18 April 2011 +// --------------------------------------------------------------------------- +// Provides methods for operating on a collection of SamSequence entries. +// *************************************************************************** + +#ifndef SAM_SEQUENCE_DICTIONARY_H +#define SAM_SEQUENCE_DICTIONARY_H + +#include <api/api_global.h> +#include <api/SamSequence.h> +#include <string> +#include <map> +#include <vector> + +namespace BamTools { + +typedef std::vector<SamSequence> SamSequenceContainer; +typedef SamSequenceContainer::iterator SamSequenceIterator; +typedef SamSequenceContainer::const_iterator SamSequenceConstIterator; + +class API_EXPORT SamSequenceDictionary { + + // ctor & dtor + public: + SamSequenceDictionary(void); + SamSequenceDictionary(const SamSequenceDictionary& other); + ~SamSequenceDictionary(void); + + // query/modify sequence data + public: + // adds a sequence + void Add(const SamSequence& sequence); + void Add(const std::string& name, const int& length); + + // adds multiple sequences + void Add(const std::vector<SamSequence>& sequences); + void Add(const std::map<std::string, int>& sequenceMap); + + // clears all sequence entries + void Clear(void); + + // returns true if dictionary contains this sequence + bool Contains(const SamSequence& sequence) const; + bool Contains(const std::string& sequenceName) const; + + // returns true if dictionary is empty + bool IsEmpty(void) const; + + // removes sequence, if found + void Remove(const SamSequence& sequence); + void Remove(const std::string& sequenceName); + + // removes multiple sequences + void Remove(const std::vector<SamSequence>& sequences); + void Remove(const std::vector<std::string>& sequenceNames); + + // returns number of sequences in dictionary + int Size(void) const; + + // retrieves a modifiable reference to the SamSequence object associated with this name + SamSequence& operator[](const std::string& sequenceName); + + // retrieve STL-compatible iterators + public: + SamSequenceIterator Begin(void); // returns iterator to begin() + SamSequenceConstIterator Begin(void) const; // returns const_iterator to begin() + SamSequenceConstIterator ConstBegin(void) const; // returns const_iterator to begin() + SamSequenceIterator End(void); // returns iterator to end() + SamSequenceConstIterator End(void) const; // returns const_iterator to end() + SamSequenceConstIterator ConstEnd(void) const; // returns const_iterator to end() + + // internal methods + private: + int IndexOf(const std::string& name) const; + + // data members + private: + SamSequenceContainer m_data; +}; + +} // namespace BamTools + +#endif // SAM_SEQUENCE_DICTIONARY_H + |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/api_global.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/api_global.h Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,22 @@ +// *************************************************************************** +// api_global.h (c) 2010 Derek Barnett +// Marth Lab, Department of Biology, Boston College +// All rights reserved. +// --------------------------------------------------------------------------- +// Last modified: 19 November 2010 (DB) +// --------------------------------------------------------------------------- +// Provides macros for exporting & importing BamTools API library symbols +// *************************************************************************** + +#ifndef API_GLOBAL_H +#define API_GLOBAL_H + +#include "shared/bamtools_global.h" + +#ifdef BAMTOOLS_API_LIBRARY +# define API_EXPORT BAMTOOLS_LIBRARY_EXPORT +#else +# define API_EXPORT BAMTOOLS_LIBRARY_IMPORT +#endif + +#endif // API_GLOBAL_H |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamHeader_p.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamHeader_p.cpp Thu Nov 03 10:25:04 2011 -0400 |
[ |
@@ -0,0 +1,133 @@ +// *************************************************************************** +// BamHeader_p.cpp (c) 2010 Derek Barnett +// Marth Lab, Department of Biology, Boston College +// All rights reserved. +// --------------------------------------------------------------------------- +// Last modified: 21 March 2011 (DB) +// --------------------------------------------------------------------------- +// Provides the basic functionality for handling BAM headers. +// *************************************************************************** + +#include <api/BamAux.h> +#include <api/BamConstants.h> +#include <api/internal/BamHeader_p.h> +#include <api/internal/BgzfStream_p.h> +using namespace BamTools; +using namespace BamTools::Internal; + +#include <cstdio> +#include <cstdlib> +#include <cstring> +#include <iostream> +using namespace std; + +// ctor +BamHeader::BamHeader(void) { } + +// dtor +BamHeader::~BamHeader(void) { } + +// reads magic number from BGZF stream, returns true if valid +bool BamHeader::CheckMagicNumber(BgzfStream* stream) { + + // try to read magic number + char buffer[Constants::BAM_HEADER_MAGIC_LENGTH]; + if ( stream->Read(buffer, Constants::BAM_HEADER_MAGIC_LENGTH) != (int)Constants::BAM_HEADER_MAGIC_LENGTH ) { + fprintf(stderr, "BamHeader ERROR: could not read magic number\n"); + return false; + } + + // validate magic number + if ( strncmp(buffer, Constants::BAM_HEADER_MAGIC, Constants::BAM_HEADER_MAGIC_LENGTH) != 0 ) { + fprintf(stderr, "BamHeader ERROR: invalid magic number\n"); + return false; + } + + // all checks out + return true; +} + +// clear SamHeader data +void BamHeader::Clear(void) { + m_header.Clear(); +} + +// return true if SamHeader data is valid +bool BamHeader::IsValid(void) const { + return m_header.IsValid(); +} + +// load BAM header ('magic number' and SAM header text) from BGZF stream +// returns true if all OK +bool BamHeader::Load(BgzfStream* stream) { + + // cannot load if invalid stream + if ( stream == 0 ) + return false; + + // cannot load if magic number is invalid + if ( !CheckMagicNumber(stream) ) + return false; + + // cannot load header if cannot read header length + uint32_t length(0); + if ( !ReadHeaderLength(stream, length) ) + return false; + + // cannot load header if cannot read header text + if ( !ReadHeaderText(stream, length) ) + return false; + + // otherwise, everything OK + return true; +} + +// reads SAM header text length from BGZF stream, stores it in @length +// returns read success/fail status +bool BamHeader::ReadHeaderLength(BgzfStream* stream, uint32_t& length) { + + // attempt to read BAM header text length + char buffer[sizeof(uint32_t)]; + if ( stream->Read(buffer, sizeof(uint32_t)) != sizeof(uint32_t) ) { + fprintf(stderr, "BamHeader ERROR: could not read header length\n"); + return false; + } + + // convert char buffer to length, return success + length = BamTools::UnpackUnsignedInt(buffer); + if ( BamTools::SystemIsBigEndian() ) + BamTools::SwapEndian_32(length); + return true; +} + +// reads SAM header text from BGZF stream, stores in SamHeader object +// returns read success/fail status +bool BamHeader::ReadHeaderText(BgzfStream* stream, const uint32_t& length) { + + // set up destination buffer + char* headerText = (char*)calloc(length + 1, 1); + + // attempt to read header text + const unsigned bytesRead = stream->Read(headerText, length); + const bool readOk = ( bytesRead == length ); + if ( readOk ) + m_header.SetHeaderText( (string)((const char*)headerText) ); + else + fprintf(stderr, "BamHeader ERROR: could not read header text\n"); + + // clean up calloc-ed temp variable (on success or fail) + free(headerText); + + // return read success + return readOk; +} + +// returns *copy* of SamHeader data object +SamHeader BamHeader::ToSamHeader(void) const { + return m_header; +} + +// returns SAM-formatted string of header data +string BamHeader::ToString(void) const { + return m_header.ToString(); +} |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamHeader_p.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamHeader_p.h Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,72 @@ +// *************************************************************************** +// BamHeader_p.h (c) 2010 Derek Barnett +// Marth Lab, Department of Biology, Boston College +// All rights reserved. +// --------------------------------------------------------------------------- +// Last modified: 26 January 2011 (DB) +// --------------------------------------------------------------------------- +// Provides the basic functionality for handling BAM headers. +// *************************************************************************** + +#ifndef BAMHEADER_P_H +#define BAMHEADER_P_H + +// ------------- +// W A R N I N G +// ------------- +// +// This file is not part of the BamTools API. It exists purely as an +// implementation detail. This header file may change from version to version +// without notice, or even be removed. +// +// We mean it. + +#include <api/SamHeader.h> +#include <string> + +namespace BamTools { +namespace Internal { + +class BgzfStream; + +class BamHeader { + + // ctor & dtor + public: + BamHeader(void); + ~BamHeader(void); + + // BamHeader interface + public: + // clear SamHeader data + void Clear(void); + // return true if SamHeader data is valid + bool IsValid(void) const; + // load BAM header ('magic number' and SAM header text) from BGZF stream + // returns true if all OK + bool Load(BgzfStream* stream); + // returns (editable) copy of SamHeader data object + SamHeader ToSamHeader(void) const; + // returns SAM-formatted string of header data + std::string ToString(void) const; + + // internal methods + private: + // reads magic number from BGZF stream, returns true if valid + bool CheckMagicNumber(BgzfStream* stream); + // reads SAM header length from BGZF stream, stores it in @length + // returns read success/fail status + bool ReadHeaderLength(BgzfStream* stream, uint32_t& length); + // reads SAM header text from BGZF stream, stores in SamHeader object + // returns read success/fail status + bool ReadHeaderText(BgzfStream* stream, const uint32_t& length); + + // data members + private: + SamHeader m_header; +}; + +} // namespace Internal +} // namespace BamTools + +#endif // BAMHEADER_P_H |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamIndexFactory_p.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamIndexFactory_p.cpp Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,113 @@ +// *************************************************************************** +// BamIndexFactory_p.cpp (c) 2011 Derek Barnett +// Marth Lab, Department of Biology, Boston College +// All rights reserved. +// --------------------------------------------------------------------------- +// Last modified: 5 April 2011 (DB) +// --------------------------------------------------------------------------- +// Provides interface for generating BamIndex implementations +// *************************************************************************** + +#include <api/BamAux.h> +#include <api/internal/BamIndexFactory_p.h> +#include <api/internal/BamStandardIndex_p.h> +#include <api/internal/BamToolsIndex_p.h> +using namespace BamTools; +using namespace BamTools::Internal; + +#include <cstdio> +using namespace std; + +// generates index filename from BAM filename (depending on requested type) +// if type is unknown, returns empty string +const string BamIndexFactory::CreateIndexFilename(const string& bamFilename, + const BamIndex::IndexType& type) +{ + switch ( type ) { + case ( BamIndex::STANDARD ) : return ( bamFilename + BamStandardIndex::Extension() ); + case ( BamIndex::BAMTOOLS ) : return ( bamFilename + BamToolsIndex::Extension() ); + default : + cerr << "BamIndexFactory ERROR: unknown index type" << type << endl; + return string(); + } +} + +// creates a new BamIndex object, depending on extension of @indexFilename +BamIndex* BamIndexFactory::CreateIndexFromFilename(const string& indexFilename, BamReaderPrivate* reader) { + + // if file doesn't exist, return null index + if ( !BamTools::FileExists(indexFilename) ) + return 0; + + // get file extension from index filename, including dot (".EXT") + // if can't get file extension, return null index + const string extension = FileExtension(indexFilename); + if ( extension.empty() ) + return 0; + + // create index based on extension + if ( extension == BamStandardIndex::Extension() ) return new BamStandardIndex(reader); + else if ( extension == BamToolsIndex::Extension() ) return new BamToolsIndex(reader); + else + return 0; +} + +// creates a new BamIndex, object of requested @type +BamIndex* BamIndexFactory::CreateIndexOfType(const BamIndex::IndexType& type, + BamReaderPrivate* reader) +{ + switch ( type ) { + case ( BamIndex::STANDARD ) : return new BamStandardIndex(reader); + case ( BamIndex::BAMTOOLS ) : return new BamToolsIndex(reader); + default : + cerr << "BamIndexFactory ERROR: unknown index type " << type << endl; + return 0; + } +} + +// retrieves file extension (including '.') +const string BamIndexFactory::FileExtension(const string& filename) { + + // if filename cannot contain valid path + extension, return empty string + if ( filename.empty() || filename.length() <= 4 ) + return string(); + + // look for last dot in filename + size_t lastDotPosition = filename.find_last_of('.'); + + // if none found, return empty string + if ( lastDotPosition == string::npos ) + return string(); + + // return substring from last dot position + return filename.substr(lastDotPosition); +} + +// returns name of existing index file that corresponds to @bamFilename +// will defer to @preferredType if possible, if not will attempt to load any supported type +// returns empty string if not found +const string BamIndexFactory::FindIndexFilename(const string& bamFilename, + const BamIndex::IndexType& preferredType) +{ + // try to find index of preferred type first + // return index filename if found + string indexFilename = CreateIndexFilename(bamFilename, preferredType); + if ( !indexFilename.empty() && BamTools::FileExists(indexFilename) ) + return indexFilename; + + // couldn't find preferred type, try the other supported types + // return index filename if found + if ( preferredType != BamIndex::STANDARD ) { + indexFilename = CreateIndexFilename(bamFilename, BamIndex::STANDARD); + if ( !indexFilename.empty() && BamTools::FileExists(indexFilename) ) + return indexFilename; + } + if ( preferredType != BamIndex::BAMTOOLS ) { + indexFilename = CreateIndexFilename(bamFilename, BamIndex::BAMTOOLS); + if ( !indexFilename.empty() && BamTools::FileExists(indexFilename) ) + return indexFilename; + } + + // otherwise couldn't find any index matching this filename + return string(); +} |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamIndexFactory_p.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamIndexFactory_p.h Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,50 @@ +// *************************************************************************** +// BamIndexFactory_p.h (c) 2011 Derek Barnett +// Marth Lab, Department of Biology, Boston College +// All rights reserved. +// --------------------------------------------------------------------------- +// Last modified: 5 April 2011 (DB) +// --------------------------------------------------------------------------- +// Provides interface for generating BamIndex implementations +// *************************************************************************** + +#ifndef BAMINDEX_FACTORY_P_H +#define BAMINDEX_FACTORY_P_H + +#include <api/BamIndex.h> +#include <string> + +namespace BamTools { +namespace Internal { + +class BamIndexFactory { + + // static interface methods + public: + // creates a new BamIndex object, depending on extension of @indexFilename + static BamIndex* CreateIndexFromFilename(const std::string& indexFilename, + BamReaderPrivate* reader); + // creates a new BamIndex object, of requested @type + static BamIndex* CreateIndexOfType(const BamIndex::IndexType& type, + BamReaderPrivate* reader); + // returns name of existing index file that corresponds to @bamFilename + // will defer to @preferredType if possible + // if @preferredType not found, will attempt to load any supported index type + // returns empty string if no index file (of any type) is found + static const std::string FindIndexFilename(const std::string& bamFilename, + const BamIndex::IndexType& preferredType); + + // internal methods + public: + // generates index filename from BAM filename (depending on requested type) + // if type is unknown, returns empty string + static const std::string CreateIndexFilename(const std::string& bamFilename, + const BamIndex::IndexType& type); + // retrieves file extension (including '.') + static const std::string FileExtension(const std::string& filename); +}; + +} // namespace Internal +} // namespace BamTools + +#endif // BAMINDEX_FACTORY_P_H |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamMultiMerger_p.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamMultiMerger_p.h Thu Nov 03 10:25:04 2011 -0400 |
b |
b'@@ -0,0 +1,295 @@\n+// ***************************************************************************\n+// BamMultiMerger_p.h (c) 2010 Derek Barnett\n+// Marth Lab, Department of Biology, Boston College\n+// All rights reserved.\n+// ---------------------------------------------------------------------------\n+// Last modified: 18 March 2011 (DB)\n+// ---------------------------------------------------------------------------\n+// Provides merging functionality for BamMultiReader. At this point, supports\n+// sorting results by (refId, position) or by read name.\n+// ***************************************************************************\n+\n+#ifndef BAMMULTIMERGER_P_H\n+#define BAMMULTIMERGER_P_H\n+\n+// -------------\n+// W A R N I N G\n+// -------------\n+//\n+// This file is not part of the BamTools API. It exists purely as an\n+// implementation detail. This header file may change from version to version\n+// without notice, or even be removed.\n+//\n+// We mean it.\n+\n+#include <api/BamAlignment.h>\n+#include <api/BamReader.h>\n+#include <map>\n+#include <queue>\n+#include <string>\n+#include <utility>\n+\n+namespace BamTools {\n+namespace Internal {\n+\n+typedef std::pair<BamReader*, BamAlignment*> ReaderAlignment;\n+\n+// generic MultiMerger interface\n+class IBamMultiMerger {\n+\n+ public:\n+ IBamMultiMerger(void) { }\n+ virtual ~IBamMultiMerger(void) { }\n+\n+ public:\n+ virtual void Add(const ReaderAlignment& value) =0;\n+ virtual void Clear(void) =0;\n+ virtual const ReaderAlignment& First(void) const =0;\n+ virtual bool IsEmpty(void) const =0;\n+ virtual void Remove(BamReader* reader) =0;\n+ virtual int Size(void) const =0;\n+ virtual ReaderAlignment TakeFirst(void) =0;\n+};\n+\n+// IBamMultiMerger implementation - sorted on BamAlignment: (RefId, Position)\n+class PositionMultiMerger : public IBamMultiMerger {\n+\n+ public:\n+ PositionMultiMerger(void) : IBamMultiMerger() { }\n+ ~PositionMultiMerger(void) { }\n+\n+ public:\n+ void Add(const ReaderAlignment& value);\n+ void Clear(void);\n+ const ReaderAlignment& First(void) const;\n+ bool IsEmpty(void) const;\n+ void Remove(BamReader* reader);\n+ int Size(void) const;\n+ ReaderAlignment TakeFirst(void);\n+\n+ private:\n+ typedef std::pair<int, int> KeyType;\n+ typedef ReaderAlignment ValueType;\n+ typedef std::pair<KeyType, ValueType> ElementType;\n+\n+ typedef std::multimap<KeyType, ValueType> ContainerType;\n+ typedef ContainerType::iterator DataIterator;\n+ typedef ContainerType::const_iterator DataConstIterator;\n+\n+ ContainerType m_data;\n+};\n+\n+// IBamMultiMerger implementation - sorted on BamAlignment: Name\n+class ReadNameMultiMerger : public IBamMultiMerger {\n+\n+ public:\n+ ReadNameMultiMerger(void) : IBamMultiMerger() { }\n+ ~ReadNameMultiMerger(void) { }\n+\n+ public:\n+ void Add(const ReaderAlignment& value);\n+ void Clear(void);\n+ const ReaderAlignment& First(void) const;\n+ bool IsEmpty(void) const;\n+ void Remove(BamReader* reader);\n+ int Size(void) const;\n+ ReaderAlignment TakeFirst(void);\n+\n+ private:\n+ typedef std::string KeyType;\n+ typedef ReaderAlignment ValueType;\n+ typedef std::pair<KeyType, ValueType> ElementType;\n+\n+ typedef std::multimap<KeyType, ValueType> ContainerType;\n+ typedef ContainerType::iterator DataIterator;\n+ typedef ContainerType::const_iterator DataConstIterator;\n+\n+ ContainerType m_data;\n+};\n+\n+// IBamMultiMerger implementation - unsorted BAM file(s)\n+class UnsortedMultiMerger : public IBamMultiMerger {\n+\n+ public:\n+ UnsortedMultiMerger(void) : IBamMultiMerger() { }\n+ ~UnsortedMultiMerger(void) { }\n+\n+ public:\n+ void Add(const ReaderAlignment& value);\n+ void Clear(void);\n+ con'..b't {\n+ return m_data.empty();\n+}\n+\n+inline void PositionMultiMerger::Remove(BamReader* reader) {\n+\n+ if ( reader == 0 ) return;\n+ const std::string filenameToRemove = reader->GetFilename();\n+\n+ // iterate over readers in cache\n+ DataIterator dataIter = m_data.begin();\n+ DataIterator dataEnd = m_data.end();\n+ for ( ; dataIter != dataEnd; ++dataIter ) {\n+ const ValueType& entry = (*dataIter).second;\n+ const BamReader* entryReader = entry.first;\n+ if ( entryReader == 0 ) continue;\n+\n+ // remove iterator on match\n+ if ( entryReader->GetFilename() == filenameToRemove ) {\n+ m_data.erase(dataIter);\n+ return;\n+ }\n+ }\n+}\n+\n+inline int PositionMultiMerger::Size(void) const {\n+ return m_data.size();\n+}\n+\n+inline ReaderAlignment PositionMultiMerger::TakeFirst(void) {\n+ DataIterator first = m_data.begin();\n+ ReaderAlignment next = (*first).second;\n+ m_data.erase(first);\n+ return next;\n+}\n+\n+// ------------------------------------------\n+// ReadNameMultiMerger implementation\n+\n+inline void ReadNameMultiMerger::Add(const ReaderAlignment& value) {\n+ const KeyType key(value.second->Name);\n+ m_data.insert( ElementType(key, value) );\n+}\n+\n+inline void ReadNameMultiMerger::Clear(void) {\n+ m_data.clear();\n+}\n+\n+inline const ReaderAlignment& ReadNameMultiMerger::First(void) const {\n+ const ElementType& entry = (*m_data.begin());\n+ return entry.second;\n+}\n+\n+inline bool ReadNameMultiMerger::IsEmpty(void) const {\n+ return m_data.empty();\n+}\n+\n+inline void ReadNameMultiMerger::Remove(BamReader* reader) {\n+\n+ if ( reader == 0 ) return;\n+ const std::string filenameToRemove = reader->GetFilename();\n+\n+ // iterate over readers in cache\n+ DataIterator dataIter = m_data.begin();\n+ DataIterator dataEnd = m_data.end();\n+ for ( ; dataIter != dataEnd; ++dataIter ) {\n+ const ValueType& entry = (*dataIter).second;\n+ const BamReader* entryReader = entry.first;\n+ if ( entryReader == 0 ) continue;\n+\n+ // remove iterator on match\n+ if ( entryReader->GetFilename() == filenameToRemove ) {\n+ m_data.erase(dataIter);\n+ return;\n+ }\n+ }\n+\n+}\n+\n+inline int ReadNameMultiMerger::Size(void) const {\n+ return m_data.size();\n+}\n+\n+inline ReaderAlignment ReadNameMultiMerger::TakeFirst(void) {\n+ DataIterator first = m_data.begin();\n+ ReaderAlignment next = (*first).second;\n+ m_data.erase(first);\n+ return next;\n+}\n+\n+// ------------------------------------------\n+// UnsortedMultiMerger implementation\n+\n+inline void UnsortedMultiMerger::Add(const ReaderAlignment& value) {\n+ m_data.push_back(value);\n+}\n+\n+inline void UnsortedMultiMerger::Clear(void) {\n+ for (size_t i = 0; i < m_data.size(); ++i )\n+ m_data.pop_back();\n+}\n+\n+inline const ReaderAlignment& UnsortedMultiMerger::First(void) const {\n+ return m_data.front();\n+}\n+\n+inline bool UnsortedMultiMerger::IsEmpty(void) const {\n+ return m_data.empty();\n+}\n+\n+inline void UnsortedMultiMerger::Remove(BamReader* reader) {\n+\n+ if ( reader == 0 ) return;\n+ const std::string filenameToRemove = reader->GetFilename();\n+\n+ // iterate over readers in cache\n+ DataIterator dataIter = m_data.begin();\n+ DataIterator dataEnd = m_data.end();\n+ for ( ; dataIter != dataEnd; ++dataIter ) {\n+ const BamReader* entryReader = (*dataIter).first;\n+ if ( entryReader == 0 ) continue;\n+\n+ // remove iterator on match\n+ if ( entryReader->GetFilename() == filenameToRemove ) {\n+ m_data.erase(dataIter);\n+ return;\n+ }\n+ }\n+}\n+\n+inline int UnsortedMultiMerger::Size(void) const {\n+ return m_data.size();\n+}\n+\n+inline ReaderAlignment UnsortedMultiMerger::TakeFirst(void) {\n+ ReaderAlignment first = m_data.front();\n+ m_data.erase( m_data.begin() );\n+ return first;\n+}\n+\n+} // namespace Internal\n+} // namespace BamTools\n+\n+#endif // BAMMULTIMERGER_P_H\n' |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamMultiReader_p.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamMultiReader_p.cpp Thu Nov 03 10:25:04 2011 -0400 |
b |
b"@@ -0,0 +1,802 @@\n+// ***************************************************************************\n+// BamMultiReader_p.cpp (c) 2010 Derek Barnett, Erik Garrison\n+// Marth Lab, Department of Biology, Boston College\n+// All rights reserved.\n+// ---------------------------------------------------------------------------\n+// Last modified: 5 April 2011 (DB)\n+// ---------------------------------------------------------------------------\n+// Functionality for simultaneously reading multiple BAM files\n+// *************************************************************************\n+\n+#include <api/BamAlignment.h>\n+#include <api/BamMultiReader.h>\n+#include <api/internal/BamMultiMerger_p.h>\n+#include <api/internal/BamMultiReader_p.h>\n+using namespace BamTools;\n+using namespace BamTools::Internal;\n+\n+#include <algorithm>\n+#include <fstream>\n+#include <iostream>\n+#include <iterator>\n+#include <sstream>\n+using namespace std;\n+\n+// ctor\n+BamMultiReaderPrivate::BamMultiReaderPrivate(void)\n+ : m_alignments(0)\n+ , m_isCoreMode(false)\n+ , m_sortOrder(BamMultiReader::SortedByPosition)\n+{ }\n+\n+// dtor\n+BamMultiReaderPrivate::~BamMultiReaderPrivate(void) {\n+\n+ // close all open BAM readers\n+ Close();\n+\n+ // clean up alignment cache\n+ delete m_alignments;\n+ m_alignments = 0;\n+}\n+\n+// close all BAM files\n+void BamMultiReaderPrivate::Close(void) {\n+ CloseFiles( Filenames() );\n+}\n+\n+// close requested BAM file\n+void BamMultiReaderPrivate::CloseFile(const string& filename) { \n+ vector<string> filenames(1, filename);\n+ CloseFiles(filenames);\n+}\n+\n+// close requested BAM files\n+void BamMultiReaderPrivate::CloseFiles(const vector<string>& filenames) {\n+\n+ // iterate over filenames\n+ vector<string>::const_iterator filesIter = filenames.begin();\n+ vector<string>::const_iterator filesEnd = filenames.end();\n+ for ( ; filesIter != filesEnd; ++filesIter ) {\n+ const string& filename = (*filesIter);\n+ if ( filename.empty() ) continue;\n+\n+ // iterate over readers\n+ vector<ReaderAlignment>::iterator readerIter = m_readers.begin();\n+ vector<ReaderAlignment>::iterator readerEnd = m_readers.end();\n+ for ( ; readerIter != readerEnd; ++readerIter ) {\n+ BamReader* reader = (*readerIter).first;\n+ if ( reader == 0 ) continue;\n+\n+ // if reader matches requested filename\n+ if ( reader->GetFilename() == filename ) {\n+\n+ // remove reader/alignment from alignment cache\n+ m_alignments->Remove(reader);\n+\n+ // close & delete reader\n+ reader->Close();\n+ delete reader;\n+ reader = 0;\n+\n+ // delete reader's alignment entry\n+ BamAlignment* alignment = (*readerIter).second;\n+ delete alignment;\n+ alignment = 0;\n+\n+ // remove reader from container\n+ m_readers.erase(readerIter);\n+\n+ // on match, just go on to next filename\n+ // (no need to keep looking and iterator is invalid now anyway)\n+ break;\n+ }\n+ }\n+ }\n+\n+ // make sure alignment cache is cleared if all readers are now closed\n+ if ( m_readers.empty() && m_alignments != 0 )\n+ m_alignments->Clear();\n+}\n+\n+// creates index files for BAM files that don't have them\n+bool BamMultiReaderPrivate::CreateIndexes(const BamIndex::IndexType& type) {\n+\n+ bool result = true;\n+\n+ // iterate over readers\n+ vector<ReaderAlignment>::iterator readerIter = m_readers.begin();\n+ vector<ReaderAlignment>::iterator readerEnd = m_readers.end();\n+ for ( ; readerIter != readerEnd; ++readerIter ) {\n+ BamReader* reader = (*readerIter).first;\n+ if ( reader == 0 ) continue;\n+\n+ // if reader doesn't have an index, create one\n+ if ( !reader->HasIndex() )\n+ result &= reader->CreateIndex(type);\n+ }\n+\n+ return result;\n+}\n+\n+I"..b'= (*readerIter).second;\n+ if ( reader == 0 || alignment == 0 ) continue;\n+\n+ // save next alignment from each reader in cache\n+ SaveNextAlignment(reader, alignment);\n+ }\n+}\n+\n+// ValidateReaders checks that all the readers point to BAM files representing\n+// alignments against the same set of reference sequences, and that the\n+// sequences are identically ordered. If these checks fail the operation of\n+// the multireader is undefined, so we force program exit.\n+void BamMultiReaderPrivate::ValidateReaders(void) const {\n+\n+ // retrieve first reader data\n+ const BamReader* firstReader = m_readers.front().first;\n+ if ( firstReader == 0 ) return;\n+ const RefVector firstReaderRefData = firstReader->GetReferenceData();\n+ const int firstReaderRefCount = firstReader->GetReferenceCount();\n+ const int firstReaderRefSize = firstReaderRefData.size();\n+\n+ // iterate over all readers\n+ vector<ReaderAlignment>::const_iterator readerIter = m_readers.begin();\n+ vector<ReaderAlignment>::const_iterator readerEnd = m_readers.end();\n+ for ( ; readerIter != readerEnd; ++readerIter ) {\n+\n+ // get current reader data\n+ BamReader* reader = (*readerIter).first;\n+ if ( reader == 0 ) continue;\n+ const RefVector currentReaderRefData = reader->GetReferenceData();\n+ const int currentReaderRefCount = reader->GetReferenceCount();\n+ const int currentReaderRefSize = currentReaderRefData.size();\n+\n+ // init container iterators\n+ RefVector::const_iterator firstRefIter = firstReaderRefData.begin();\n+ RefVector::const_iterator firstRefEnd = firstReaderRefData.end();\n+ RefVector::const_iterator currentRefIter = currentReaderRefData.begin();\n+\n+ // compare reference counts from BamReader ( & container size, in case of BR error)\n+ if ( (currentReaderRefCount != firstReaderRefCount) ||\n+ (firstReaderRefSize != currentReaderRefSize) )\n+ {\n+ cerr << "BamMultiReader ERROR: mismatched number of references in " << reader->GetFilename()\n+ << " expected " << firstReaderRefCount\n+ << " reference sequences but only found " << currentReaderRefCount << endl;\n+ exit(1);\n+ }\n+\n+ // this will be ok; we just checked above that we have identically-sized sets of references\n+ // here we simply check if they are all, in fact, equal in content\n+ while ( firstRefIter != firstRefEnd ) {\n+ const RefData& firstRef = (*firstRefIter);\n+ const RefData& currentRef = (*currentRefIter);\n+\n+ // compare reference name & length\n+ if ( (firstRef.RefName != currentRef.RefName) ||\n+ (firstRef.RefLength != currentRef.RefLength) )\n+ {\n+ cerr << "BamMultiReader ERROR: mismatched references found in " << reader->GetFilename()\n+ << " expected: " << endl;\n+\n+ // print first reader\'s reference data\n+ RefVector::const_iterator refIter = firstReaderRefData.begin();\n+ RefVector::const_iterator refEnd = firstReaderRefData.end();\n+ for ( ; refIter != refEnd; ++refIter ) {\n+ const RefData& entry = (*refIter);\n+ cerr << entry.RefName << " " << entry.RefLength << endl;\n+ }\n+\n+ cerr << "but found: " << endl;\n+\n+ // print current reader\'s reference data\n+ refIter = currentReaderRefData.begin();\n+ refEnd = currentReaderRefData.end();\n+ for ( ; refIter != refEnd; ++refIter ) {\n+ const RefData& entry = (*refIter);\n+ cerr << entry.RefName << " " << entry.RefLength << endl;\n+ }\n+\n+ exit(1);\n+ }\n+\n+ // update iterators\n+ ++firstRefIter;\n+ ++currentRefIter;\n+ }\n+ }\n+}\n' |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamMultiReader_p.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamMultiReader_p.h Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,102 @@ +// *************************************************************************** +// BamMultiReader_p.h (c) 2010 Derek Barnett +// Marth Lab, Department of Biology, Boston College +// All rights reserved. +// --------------------------------------------------------------------------- +// Last modified: 13 March 2011 (DB) +// --------------------------------------------------------------------------- +// Functionality for simultaneously reading multiple BAM files +// ************************************************************************* + +#ifndef BAMMULTIREADER_P_H +#define BAMMULTIREADER_P_H + +// ------------- +// W A R N I N G +// ------------- +// +// This file is not part of the BamTools API. It exists purely as an +// implementation detail. This header file may change from version to version +// without notice, or even be removed. +// +// We mean it. + +#include <api/SamHeader.h> +#include <api/BamMultiReader.h> +#include <string> +#include <vector> + +namespace BamTools { +namespace Internal { + +class IBamMultiMerger; + +class BamMultiReaderPrivate { + + // constructor / destructor + public: + BamMultiReaderPrivate(void); + ~BamMultiReaderPrivate(void); + + // public interface + public: + + // file operations + void Close(void); + void CloseFile(const std::string& filename); + void CloseFiles(const std::vector<std::string>& filenames); + const std::vector<std::string> Filenames(void) const; + bool Jump(int refID, int position = 0); + bool Open(const std::vector<std::string>& filenames); + bool OpenFile(const std::string& filename); + void PrintFilenames(void) const; + bool Rewind(void); + bool SetRegion(const BamRegion& region); + + // access alignment data + bool GetNextAlignment(BamAlignment& al); + bool GetNextAlignmentCore(BamAlignment& al); + bool HasOpenReaders(void); + void SetSortOrder(const BamMultiReader::SortOrder& order); + + // access auxiliary data + SamHeader GetHeader(void) const; + std::string GetHeaderText(void) const; + int GetReferenceCount(void) const; + const BamTools::RefVector GetReferenceData(void) const; + int GetReferenceID(const std::string& refName) const; + + // BAM index operations + bool CreateIndexes(const BamIndex::IndexType& type = BamIndex::STANDARD); + bool HasIndexes(void) const; + bool LocateIndexes(const BamIndex::IndexType& preferredType = BamIndex::STANDARD); + bool OpenIndexes(const std::vector<std::string>& indexFilenames); + void SetIndexCacheMode(const BamIndex::IndexCacheMode mode); + + // 'internal' methods + public: + IBamMultiMerger* CreateMergerForCurrentSortOrder(void) const; + const std::string ExtractReadGroup(const std::string& headerLine) const; + bool HasAlignmentData(void) const; + bool LoadNextAlignment(BamAlignment& al); + BamTools::BamReader* OpenReader(const std::string& filename); + bool RewindReaders(void); + void SaveNextAlignment(BamTools::BamReader* reader, BamTools::BamAlignment* alignment); + const std::vector<std::string> SplitHeaderText(const std::string& headerText) const; + void UpdateAlignmentCache(void); + void ValidateReaders(void) const; + + // data members + public: + typedef std::pair<BamReader*, BamAlignment*> ReaderAlignment; + std::vector<ReaderAlignment> m_readers; + + IBamMultiMerger* m_alignments; + bool m_isCoreMode; + BamMultiReader::SortOrder m_sortOrder; +}; + +} // namespace Internal +} // namespace BamTools + +#endif // BAMMULTIREADER_P_H |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamRandomAccessController_p.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamRandomAccessController_p.cpp Thu Nov 03 10:25:04 2011 -0400 |
[ |
b'@@ -0,0 +1,273 @@\n+// ***************************************************************************\n+// BamRandomAccessController_p.cpp (c) 2011 Derek Barnett\n+// Marth Lab, Department of Biology, Boston College\n+// All rights reserved.\n+// ---------------------------------------------------------------------------\n+// Last modified: 5 April 2011(DB)\n+// ---------------------------------------------------------------------------\n+// Manages random access operations in a BAM file\n+// **************************************************************************\n+\n+#include <api/BamIndex.h>\n+#include <api/internal/BamRandomAccessController_p.h>\n+#include <api/internal/BamReader_p.h>\n+#include <api/internal/BamIndexFactory_p.h>\n+using namespace BamTools;\n+using namespace BamTools::Internal;\n+\n+#include <iostream>\n+using namespace std;\n+\n+BamRandomAccessController::BamRandomAccessController(void)\n+ : m_index(0)\n+ , m_indexCacheMode(BamIndex::LimitedIndexCaching)\n+ , m_hasAlignmentsInRegion(true)\n+{ }\n+\n+BamRandomAccessController::~BamRandomAccessController(void) {\n+ Close();\n+}\n+\n+void BamRandomAccessController::AdjustRegion(const int& referenceCount) {\n+\n+ // skip if no index available\n+ if ( m_index == 0 )\n+ return;\n+\n+ // see if any references in region have alignments\n+ m_hasAlignmentsInRegion = false;\n+ int currentId = m_region.LeftRefID;\n+ const int rightBoundRefId = ( m_region.isRightBoundSpecified() ? m_region.RightRefID : referenceCount - 1 );\n+ while ( currentId <= rightBoundRefId ) {\n+ m_hasAlignmentsInRegion = m_index->HasAlignments(currentId);\n+ if ( m_hasAlignmentsInRegion ) break;\n+ ++currentId;\n+ }\n+\n+ // if no data found on any reference in region\n+ if ( !m_hasAlignmentsInRegion )\n+ return;\n+\n+ // if left bound of desired region had no data, use first reference that had data\n+ // otherwise, leave requested region as-is\n+ if ( currentId != m_region.LeftRefID ) {\n+ m_region.LeftRefID = currentId;\n+ m_region.LeftPosition = 0;\n+ }\n+}\n+\n+// returns alignments\' "RegionState": { Before|Overlaps|After } current region\n+BamRandomAccessController::RegionState\n+BamRandomAccessController::AlignmentState(const BamAlignment& alignment) const {\n+\n+ // if region has no left bound at all\n+ if ( !m_region.isLeftBoundSpecified() )\n+ return OverlapsRegion;\n+\n+ // handle unmapped reads - return AFTER region to halt processing\n+ if ( alignment.RefID == -1 )\n+ return AfterRegion;\n+\n+ // if alignment is on any reference before left bound reference\n+ if ( alignment.RefID < m_region.LeftRefID )\n+ return BeforeRegion;\n+\n+ // if alignment is on left bound reference\n+ else if ( alignment.RefID == m_region.LeftRefID ) {\n+\n+ // if alignment starts at or after left bound position\n+ if ( alignment.Position >= m_region.LeftPosition) {\n+\n+ if ( m_region.isRightBoundSpecified() && // right bound is specified AND\n+ m_region.LeftRefID == m_region.RightRefID && // left & right bounds on same reference AND\n+ alignment.Position > m_region.RightPosition ) // alignment starts after right bound position\n+ return AfterRegion;\n+\n+ // otherwise, alignment overlaps region\n+ else return OverlapsRegion;\n+ }\n+\n+ // alignment starts before left bound position\n+ else {\n+\n+ // if alignment overlaps left bound position\n+ if ( alignment.GetEndPosition() >= m_region.LeftPosition )\n+ return OverlapsRegion;\n+ else\n+ return BeforeRegion;\n+ }\n+ }\n+\n+ // otherwise alignment is on a reference after left bound reference\n+ else {\n+\n+ // if region has a right bound\n+ if ( m_region.isRightBoundSpecified() ) {\n+\n+ // alignment is on any reference between boundaries\n+ if ( alignment.RefID < m_region.Ri'..b' "BamRandomAccessController ERROR: could not create index for BAM file: "\n+ << reader->Filename() << endl;\n+ return false;\n+ }\n+\n+ // save new index\n+ SetIndex(newIndex);\n+\n+ // set new index\'s cache mode & return success\n+ newIndex->SetCacheMode(m_indexCacheMode);\n+ return true;\n+}\n+\n+bool BamRandomAccessController::HasIndex(void) const {\n+ return ( m_index != 0 );\n+}\n+\n+bool BamRandomAccessController::HasRegion(void) const {\n+ return ( !m_region.isNull() );\n+}\n+\n+bool BamRandomAccessController::IndexHasAlignmentsForReference(const int& refId) {\n+ return m_index->HasAlignments(refId);\n+}\n+\n+bool BamRandomAccessController::LocateIndex(BamReaderPrivate* reader,\n+ const BamIndex::IndexType& preferredType)\n+{\n+ // look up index filename, deferring to preferredType if possible\n+ const string& indexFilename = BamIndexFactory::FindIndexFilename(reader->Filename(), preferredType);\n+\n+ // if no index file found (of any type)\n+ if ( indexFilename.empty() ) {\n+ cerr << "BamRandomAccessController WARNING: "\n+ << "could not find index file for BAM: "\n+ << reader->Filename() << endl;\n+ return false;\n+ }\n+\n+ // otherwise open & use index file that was found\n+ return OpenIndex(indexFilename, reader);\n+}\n+\n+bool BamRandomAccessController::OpenIndex(const string& indexFilename, BamReaderPrivate* reader) {\n+\n+ // attempt create new index of type based on filename\n+ BamIndex* index = BamIndexFactory::CreateIndexFromFilename(indexFilename, reader);\n+ if ( index == 0 ) {\n+ cerr << "BamRandomAccessController ERROR: could not create index for file: " << indexFilename << endl;\n+ return false;\n+ }\n+\n+ // set cache mode\n+ index->SetCacheMode(m_indexCacheMode);\n+\n+ // attempt to load data from index file\n+ if ( !index->Load(indexFilename) ) {\n+ cerr << "BamRandomAccessController ERROR: could not load index data from file: " << indexFilename << endl;\n+ return false;\n+ }\n+\n+ // save new index & return success\n+ SetIndex(index);\n+ return true;\n+}\n+\n+bool BamRandomAccessController::RegionHasAlignments(void) const {\n+ return m_hasAlignmentsInRegion;\n+}\n+\n+void BamRandomAccessController::SetIndex(BamIndex* index) {\n+ if ( m_index )\n+ ClearIndex();\n+ m_index = index;\n+}\n+\n+void BamRandomAccessController::SetIndexCacheMode(const BamIndex::IndexCacheMode& mode) {\n+ m_indexCacheMode = mode;\n+ if ( m_index )\n+ m_index->SetCacheMode(mode);\n+}\n+\n+bool BamRandomAccessController::SetRegion(BamReaderPrivate* reader,\n+ const BamRegion& region,\n+ const int& referenceCount)\n+{\n+ // store region\n+ m_region = region;\n+\n+ // cannot jump when no index is available\n+ if ( !HasIndex() )\n+ return false;\n+\n+ // adjust region as necessary to reflect where data actually begins\n+ AdjustRegion(referenceCount);\n+\n+ // if no data present, return true\n+ // * Not an error, but future attempts to access alignments in this region will not return data\n+ // Returning true is useful in a BamMultiReader setting where some BAM files may\n+ // lack alignments in regions where other BAMs do have data.\n+ if ( !m_hasAlignmentsInRegion )\n+ return true;\n+\n+ // return success/failure of jump to specified region,\n+ //\n+ // * Index::Jump() is allowed to modify the m_hasAlignmentsInRegion flag\n+ // This covers \'corner case\' where a region is requested that lies beyond the last\n+ // alignment on a reference. If this occurs, any subsequent calls to GetNextAlignment[Core]\n+ // will not return data. BamMultiReader will still be able to successfully pull alignments\n+ // from a region from multiple files even if one or more have no data.\n+ return m_index->Jump(m_region, &m_hasAlignmentsInRegion);\n+}\n' |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamRandomAccessController_p.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamRandomAccessController_p.h Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,94 @@ +// *************************************************************************** +// BamRandomAccessController_p.h (c) 2011 Derek Barnett +// Marth Lab, Department of Biology, Boston College +// All rights reserved. +// --------------------------------------------------------------------------- +// Last modified: 24 February 2011(DB) +// --------------------------------------------------------------------------- +// Manages random access operations in a BAM file +// *************************************************************************** + +#ifndef BAMRACONTROLLER_P_H +#define BAMRACONTROLLER_P_H + +// ------------- +// W A R N I N G +// ------------- +// +// This file is not part of the BamTools API. It exists purely as an +// implementation detail. This header file may change from version to version +// without notice, or even be removed. +// +// We mean it. + +#include <api/BamAux.h> +#include <api/BamIndex.h> + +namespace BamTools { + +class BamAlignment; + +namespace Internal { + +class BamReaderPrivate; + +class BamRandomAccessController { + + // enums + public: enum RegionState { BeforeRegion = 0 + , OverlapsRegion + , AfterRegion + }; + + // ctor & dtor + public: + BamRandomAccessController(void); + ~BamRandomAccessController(void); + + // general interface + public: + void Close(void); + + // index operations + public: + // + void ClearIndex(void); + bool CreateIndex(BamReaderPrivate* reader, const BamIndex::IndexType& type); + bool HasIndex(void) const; + bool IndexHasAlignmentsForReference(const int& refId); + bool LocateIndex(BamReaderPrivate* reader, const BamIndex::IndexType& preferredType); + bool OpenIndex(const std::string& indexFilename, BamReaderPrivate* reader); + void SetIndex(BamIndex* index); + void SetIndexCacheMode(const BamIndex::IndexCacheMode& mode); + + // region operations + public: + void ClearRegion(void); + bool HasRegion(void) const; + RegionState AlignmentState(const BamAlignment& alignment) const; + bool RegionHasAlignments(void) const; + bool SetRegion(BamReaderPrivate* reader, + const BamRegion& region, + const int& referenceCount); + + // 'internal' methods + public: + // adjusts requested region if necessary (depending on where data actually begins) + void AdjustRegion(const int& referenceCount); + + // data members + private: + + // index data + BamIndex* m_index; // owns index, not a copy - responsible for deleting + BamIndex::IndexCacheMode m_indexCacheMode; + + // region data + BamRegion m_region; + bool m_hasAlignmentsInRegion; +}; + +} // namespace Internal +} // namespace BamTools + +#endif // BAMRACONTROLLER_P_H |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamReader_p.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamReader_p.cpp Thu Nov 03 10:25:04 2011 -0400 |
[ |
b'@@ -0,0 +1,381 @@\n+// ***************************************************************************\n+// BamReader_p.cpp (c) 2009 Derek Barnett\n+// Marth Lab, Department of Biology, Boston College\n+// All rights reserved.\n+// ---------------------------------------------------------------------------\n+// Last modified: 10 May 2011 (DB)\n+// ---------------------------------------------------------------------------\n+// Provides the basic functionality for reading BAM files\n+// ***************************************************************************\n+\n+#include <api/BamConstants.h>\n+#include <api/BamReader.h>\n+#include <api/internal/BamHeader_p.h>\n+#include <api/internal/BamRandomAccessController_p.h>\n+#include <api/internal/BamReader_p.h>\n+#include <api/internal/BamStandardIndex_p.h>\n+#include <api/internal/BamToolsIndex_p.h>\n+#include <api/internal/BgzfStream_p.h>\n+using namespace BamTools;\n+using namespace BamTools::Internal;\n+\n+#include <algorithm>\n+#include <iostream>\n+#include <iterator>\n+#include <vector>\n+using namespace std;\n+\n+// constructor\n+BamReaderPrivate::BamReaderPrivate(BamReader* parent)\n+ : m_alignmentsBeginOffset(0)\n+ , m_parent(parent)\n+{\n+ m_isBigEndian = BamTools::SystemIsBigEndian();\n+}\n+\n+// destructor\n+BamReaderPrivate::~BamReaderPrivate(void) {\n+ Close();\n+}\n+\n+// closes the BAM file\n+void BamReaderPrivate::Close(void) {\n+\n+ // clear header & reference data\n+ m_references.clear();\n+ m_header.Clear();\n+\n+ // close internal\n+ m_randomAccessController.Close();\n+ m_stream.Close();\n+\n+ // clear filename\n+ m_filename.clear();\n+}\n+\n+// creates an index file of requested type on current BAM file\n+bool BamReaderPrivate::CreateIndex(const BamIndex::IndexType& type) {\n+ if ( !IsOpen() ) return false;\n+ return m_randomAccessController.CreateIndex(this, type);\n+}\n+\n+// return path & filename of current BAM file\n+const string BamReaderPrivate::Filename(void) const {\n+ return m_filename;\n+}\n+\n+// return header data as std::string\n+string BamReaderPrivate::GetHeaderText(void) const {\n+ return m_header.ToString();\n+}\n+\n+// return header data as SamHeader object\n+SamHeader BamReaderPrivate::GetSamHeader(void) const {\n+ return m_header.ToSamHeader();\n+}\n+\n+// get next alignment (with character data fully parsed)\n+bool BamReaderPrivate::GetNextAlignment(BamAlignment& alignment) {\n+\n+ // if valid alignment found\n+ if ( GetNextAlignmentCore(alignment) ) {\n+\n+ // store alignment\'s "source" filename\n+ alignment.Filename = m_filename;\n+\n+ // return success/failure of parsing char data\n+ return alignment.BuildCharData();\n+ }\n+\n+ // no valid alignment found\n+ return false;\n+}\n+\n+// retrieves next available alignment core data (returns success/fail)\n+// ** DOES NOT populate any character data fields (read name, bases, qualities, tag data, filename)\n+// these can be accessed, if necessary, from the supportData\n+// useful for operations requiring ONLY positional or other alignment-related information\n+bool BamReaderPrivate::GetNextAlignmentCore(BamAlignment& alignment) {\n+\n+ // skip if region is set but has no alignments\n+ if ( m_randomAccessController.HasRegion() &&\n+ !m_randomAccessController.RegionHasAlignments() )\n+ {\n+ return false;\n+ }\n+\n+ // if can\'t read next alignment\n+ if ( !LoadNextAlignment(alignment) )\n+ return false;\n+\n+ // check alignment\'s region-overlap state\n+ BamRandomAccessController::RegionState state = m_randomAccessController.AlignmentState(alignment);\n+\n+ // if alignment starts after region, no need to keep reading\n+ if ( state == BamRandomAccessController::AfterRegion )\n+ return false;\n+\n+ // read until overlap is found\n+ while ( state != BamRandomAccessController::OverlapsRegion ) {\n+\n+ // if can\'t read next alignment\n+ if ( !LoadNextAlignment(alignment) )\n+ return false;\n+\n+ // check alignment\'s region-overlap sta'..b'n readCharDataOK;\n+}\n+\n+// loads reference data from BAM file\n+bool BamReaderPrivate::LoadReferenceData(void) {\n+\n+ // get number of reference sequences\n+ char buffer[sizeof(uint32_t)];\n+ m_stream.Read(buffer, sizeof(uint32_t));\n+ uint32_t numberRefSeqs = BamTools::UnpackUnsignedInt(buffer);\n+ if ( m_isBigEndian ) BamTools::SwapEndian_32(numberRefSeqs);\n+ m_references.reserve((int)numberRefSeqs);\n+\n+ // iterate over all references in header\n+ for ( unsigned int i = 0; i != numberRefSeqs; ++i ) {\n+\n+ // get length of reference name\n+ m_stream.Read(buffer, sizeof(uint32_t));\n+ uint32_t refNameLength = BamTools::UnpackUnsignedInt(buffer);\n+ if ( m_isBigEndian ) BamTools::SwapEndian_32(refNameLength);\n+ char* refName = (char*)calloc(refNameLength, 1);\n+\n+ // get reference name and reference sequence length\n+ m_stream.Read(refName, refNameLength);\n+ m_stream.Read(buffer, sizeof(int32_t));\n+ int32_t refLength = BamTools::UnpackSignedInt(buffer);\n+ if ( m_isBigEndian ) BamTools::SwapEndian_32(refLength);\n+\n+ // store data for reference\n+ RefData aReference;\n+ aReference.RefName = (string)((const char*)refName);\n+ aReference.RefLength = refLength;\n+ m_references.push_back(aReference);\n+\n+ // clean up calloc-ed temp variable\n+ free(refName);\n+ }\n+\n+ // return success\n+ return true;\n+}\n+\n+bool BamReaderPrivate::LocateIndex(const BamIndex::IndexType& preferredType) {\n+ return m_randomAccessController.LocateIndex(this, preferredType);\n+}\n+\n+// opens BAM file (and index)\n+bool BamReaderPrivate::Open(const string& filename) {\n+\n+ // close current BAM file if open\n+ if ( m_stream.IsOpen )\n+ Close();\n+\n+ // attempt to open BgzfStream for reading\n+ if ( !m_stream.Open(filename, "rb") ) {\n+ cerr << "BamReader ERROR: Could not open BGZF stream for " << filename << endl;\n+ return false;\n+ }\n+\n+ // attempt to load header data\n+ if ( !LoadHeaderData() ) {\n+ cerr << "BamReader ERROR: Could not load header data for " << filename << endl;\n+ Close();\n+ return false;\n+ }\n+\n+ // attempt to load reference data\n+ if ( !LoadReferenceData() ) {\n+ cerr << "BamReader ERROR: Could not load reference data for " << filename << endl;\n+ Close();\n+ return false;\n+ }\n+\n+ // if all OK, store filename & offset of first alignment\n+ m_filename = filename;\n+ m_alignmentsBeginOffset = m_stream.Tell();\n+\n+ // return success\n+ return true;\n+}\n+\n+bool BamReaderPrivate::OpenIndex(const std::string& indexFilename) {\n+ return m_randomAccessController.OpenIndex(indexFilename, this);\n+}\n+\n+// returns BAM file pointer to beginning of alignment data\n+bool BamReaderPrivate::Rewind(void) {\n+\n+ // attempt rewind to first alignment\n+ if ( !m_stream.Seek(m_alignmentsBeginOffset) )\n+ return false;\n+\n+ // verify that we can read first alignment\n+ BamAlignment al;\n+ if ( !LoadNextAlignment(al) )\n+ return false;\n+\n+ // reset region\n+ m_randomAccessController.ClearRegion();\n+\n+ // rewind back to beginning of first alignment\n+ // return success/fail of seek\n+ return m_stream.Seek(m_alignmentsBeginOffset);\n+}\n+\n+bool BamReaderPrivate::Seek(const int64_t& position) {\n+ return m_stream.Seek(position);\n+}\n+\n+void BamReaderPrivate::SetIndex(BamIndex* index) {\n+ m_randomAccessController.SetIndex(index);\n+}\n+\n+// change the index caching behavior\n+void BamReaderPrivate::SetIndexCacheMode(const BamIndex::IndexCacheMode& mode) {\n+ m_randomAccessController.SetIndexCacheMode(mode);\n+}\n+\n+// sets current region & attempts to jump to it\n+// returns success/failure\n+bool BamReaderPrivate::SetRegion(const BamRegion& region) {\n+ return m_randomAccessController.SetRegion(this, region, m_references.size());\n+}\n+\n+int64_t BamReaderPrivate::Tell(void) const {\n+ return m_stream.Tell();\n+}\n' |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamReader_p.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamReader_p.h Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,113 @@ +// *************************************************************************** +// BamReader_p.h (c) 2010 Derek Barnett +// Marth Lab, Department of Biology, Boston College +// All rights reserved. +// --------------------------------------------------------------------------- +// Last modified: 5 April 2011 (DB) +// --------------------------------------------------------------------------- +// Provides the basic functionality for reading BAM files +// *************************************************************************** + +#ifndef BAMREADER_P_H +#define BAMREADER_P_H + +// ------------- +// W A R N I N G +// ------------- +// +// This file is not part of the BamTools API. It exists purely as an +// implementation detail. This header file may change from version to version +// without notice, or even be removed. +// +// We mean it. + +#include <api/BamAlignment.h> +#include <api/BamIndex.h> +#include <api/BamReader.h> +#include <api/SamHeader.h> +#include <api/internal/BamHeader_p.h> +#include <api/internal/BamRandomAccessController_p.h> +#include <api/internal/BgzfStream_p.h> +#include <string> + +namespace BamTools { +namespace Internal { + +class BamReaderPrivate { + + // ctor & dtor + public: + BamReaderPrivate(BamReader* parent); + ~BamReaderPrivate(void); + + // BamReader interface + public: + + // file operations + void Close(void); + const std::string Filename(void) const; + bool IsOpen(void) const; + bool Open(const std::string& filename); + bool Rewind(void); + bool SetRegion(const BamRegion& region); + + // access alignment data + bool GetNextAlignment(BamAlignment& alignment); + bool GetNextAlignmentCore(BamAlignment& alignment); + + // access auxiliary data + std::string GetHeaderText(void) const; + SamHeader GetSamHeader(void) const; + int GetReferenceCount(void) const; + const RefVector& GetReferenceData(void) const; + int GetReferenceID(const std::string& refName) const; + + // index operations + bool CreateIndex(const BamIndex::IndexType& type); + bool HasIndex(void) const; + bool LocateIndex(const BamIndex::IndexType& preferredType); + bool OpenIndex(const std::string& indexFilename); + void SetIndex(BamIndex* index); + void SetIndexCacheMode(const BamIndex::IndexCacheMode& mode); + + // internal methods, but available as a BamReaderPrivate 'interface' + // + // these methods should only be used by BamTools::Internal classes + // (currently only used by the BamIndex subclasses) + public: + // retrieves header text from BAM file + bool LoadHeaderData(void); + // retrieves BAM alignment under file pointer + // (does no overlap checking or character data parsing) + bool LoadNextAlignment(BamAlignment& alignment); + // builds reference data structure from BAM file + bool LoadReferenceData(void); + // seek reader to file position + bool Seek(const int64_t& position); + // return reader's file position + int64_t Tell(void) const; + + // data members + public: + + // general BAM file data + int64_t m_alignmentsBeginOffset; + std::string m_filename; + RefVector m_references; + + // system data + bool m_isBigEndian; + + // parent BamReader + BamReader* m_parent; + + // BamReaderPrivate components + BamHeader m_header; + BamRandomAccessController m_randomAccessController; + BgzfStream m_stream; +}; + +} // namespace Internal +} // namespace BamTools + +#endif // BAMREADER_P_H |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamStandardIndex_p.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamStandardIndex_p.cpp Thu Nov 03 10:25:04 2011 -0400 |
b |
b'@@ -0,0 +1,974 @@\n+// ***************************************************************************\n+// BamStandardIndex.cpp (c) 2010 Derek Barnett\n+// Marth Lab, Department of Biology, Boston College\n+// All rights reserved.\n+// ---------------------------------------------------------------------------\n+// Last modified: 16 June 2011 (DB)\n+// ---------------------------------------------------------------------------\n+// Provides index operations for the standardized BAM index format (".bai")\n+// ***************************************************************************\n+\n+#include <api/BamAlignment.h>\n+#include <api/internal/BamReader_p.h>\n+#include <api/internal/BamStandardIndex_p.h>\n+using namespace BamTools;\n+using namespace BamTools::Internal;\n+\n+#include <cstdio>\n+#include <cstdlib>\n+#include <cstring>\n+#include <algorithm>\n+#include <iostream>\n+using namespace std;\n+\n+// static BamStandardIndex constants\n+const int BamStandardIndex::MAX_BIN = 37450; // =(8^6-1)/7+1\n+const int BamStandardIndex::BAM_LIDX_SHIFT = 14;\n+const string BamStandardIndex::BAI_EXTENSION = ".bai";\n+const char* const BamStandardIndex::BAI_MAGIC = "BAI\\1";\n+const int BamStandardIndex::SIZEOF_ALIGNMENTCHUNK = sizeof(uint64_t)*2;\n+const int BamStandardIndex::SIZEOF_BINCORE = sizeof(uint32_t) + sizeof(int32_t);\n+const int BamStandardIndex::SIZEOF_LINEAROFFSET = sizeof(uint64_t);\n+\n+// ctor\n+BamStandardIndex::BamStandardIndex(Internal::BamReaderPrivate* reader)\n+ : BamIndex(reader)\n+ , m_indexStream(0)\n+ , m_cacheMode(BamIndex::LimitedIndexCaching)\n+ , m_buffer(0)\n+ , m_bufferLength(0)\n+{\n+ m_isBigEndian = BamTools::SystemIsBigEndian();\n+}\n+\n+// dtor\n+BamStandardIndex::~BamStandardIndex(void) {\n+ CloseFile();\n+}\n+\n+bool BamStandardIndex::AdjustRegion(const BamRegion& region, uint32_t& begin, uint32_t& end) {\n+\n+ // retrieve references from reader\n+ const RefVector& references = m_reader->GetReferenceData();\n+\n+ // make sure left-bound position is valid\n+ if ( region.LeftPosition > references.at(region.LeftRefID).RefLength )\n+ return false;\n+\n+ // set region \'begin\'\n+ begin = (unsigned int)region.LeftPosition;\n+\n+ // if right bound specified AND left&right bounds are on same reference\n+ // OK to use right bound position as region \'end\'\n+ if ( region.isRightBoundSpecified() && ( region.LeftRefID == region.RightRefID ) )\n+ end = (unsigned int)region.RightPosition;\n+\n+ // otherwise, set region \'end\' to last reference base\n+ else end = (unsigned int)references.at(region.LeftRefID).RefLength - 1;\n+\n+ // return success\n+ return true;\n+}\n+\n+void BamStandardIndex::CalculateCandidateBins(const uint32_t& begin,\n+ const uint32_t& end,\n+ set<uint16_t>& candidateBins)\n+{\n+ // initialize list, bin \'0\' is always a valid bin\n+ candidateBins.insert(0);\n+\n+ // get rest of bins that contain this region\n+ unsigned int k;\n+ for (k = 1 + (begin>>26); k <= 1 + (end>>26); ++k) { candidateBins.insert(k); }\n+ for (k = 9 + (begin>>23); k <= 9 + (end>>23); ++k) { candidateBins.insert(k); }\n+ for (k = 73 + (begin>>20); k <= 73 + (end>>20); ++k) { candidateBins.insert(k); }\n+ for (k = 585 + (begin>>17); k <= 585 + (end>>17); ++k) { candidateBins.insert(k); }\n+ for (k = 4681 + (begin>>14); k <= 4681 + (end>>14); ++k) { candidateBins.insert(k); }\n+}\n+\n+bool BamStandardIndex::CalculateCandidateOffsets(const BaiReferenceSummary& refSummary,\n+ const uint64_t& minOffset,\n+ set<uint16_t>& candidateBins,\n+ vector<int64_t>& offsets)\n+{\n+ // attempt seek to first bin\n+ if ( !Seek(refSummary.FirstBinFilePosition, SEEK_SET) )\n+ return false;\n+\n+ // iterate over reference bins\n+ uint32_t bi'..b"ctor& chunks) {\n+\n+ // make sure chunks are merged (simplified) before writing & saving summary\n+ MergeAlignmentChunks(chunks);\n+\n+ size_t elementsWritten = 0;\n+\n+ // write chunks\n+ int32_t chunkCount = chunks.size();\n+ if ( m_isBigEndian ) SwapEndian_32(chunkCount);\n+ elementsWritten += fwrite(&chunkCount, sizeof(chunkCount), 1, m_indexStream);\n+\n+ // iterate over chunks\n+ bool chunksOk = true;\n+ BaiAlignmentChunkVector::const_iterator chunkIter = chunks.begin();\n+ BaiAlignmentChunkVector::const_iterator chunkEnd = chunks.end();\n+ for ( ; chunkIter != chunkEnd; ++chunkIter )\n+ chunksOk &= WriteAlignmentChunk( (*chunkIter) );\n+\n+ // return success/failure of write\n+ return ( (elementsWritten == 1) && chunksOk );\n+}\n+\n+bool BamStandardIndex::WriteBin(const uint32_t& binId, BaiAlignmentChunkVector& chunks) {\n+\n+ size_t elementsWritten = 0;\n+\n+ // write BAM bin ID\n+ uint32_t binKey = binId;\n+ if ( m_isBigEndian ) SwapEndian_32(binKey);\n+ elementsWritten += fwrite(&binKey, sizeof(binKey), 1, m_indexStream);\n+\n+ // write bin's alignment chunks\n+ bool chunksOk = WriteAlignmentChunks(chunks);\n+\n+ // return success/failure of write\n+ return ( (elementsWritten == 1) && chunksOk );\n+}\n+\n+bool BamStandardIndex::WriteBins(const int& refId, BaiBinMap& bins) {\n+\n+ size_t elementsWritten = 0;\n+\n+ // write number of bins\n+ int32_t binCount = bins.size();\n+ if ( m_isBigEndian ) SwapEndian_32(binCount);\n+ elementsWritten += fwrite(&binCount, sizeof(binCount), 1, m_indexStream);\n+\n+ // save summary for reference's bins\n+ SaveBinsSummary(refId, bins.size());\n+\n+ // iterate over bins\n+ bool binsOk = true;\n+ BaiBinMap::iterator binIter = bins.begin();\n+ BaiBinMap::iterator binEnd = bins.end();\n+ for ( ; binIter != binEnd; ++binIter )\n+ binsOk &= WriteBin( (*binIter).first, (*binIter).second );\n+\n+ // return success/failure of write\n+ return ( (elementsWritten == 1) && binsOk );\n+}\n+\n+bool BamStandardIndex::WriteHeader(void) {\n+\n+ size_t elementsWritten = 0;\n+\n+ // write magic number\n+ elementsWritten += fwrite(BamStandardIndex::BAI_MAGIC, sizeof(char), 4, m_indexStream);\n+\n+ // write number of reference sequences\n+ int32_t numReferences = m_indexFileSummary.size();\n+ if ( m_isBigEndian ) SwapEndian_32(numReferences);\n+ elementsWritten += fwrite(&numReferences, sizeof(numReferences), 1, m_indexStream);\n+\n+ // return success/failure of write\n+ return (elementsWritten == 5);\n+}\n+\n+bool BamStandardIndex::WriteLinearOffsets(const int& refId, BaiLinearOffsetVector& linearOffsets) {\n+\n+ // make sure linear offsets are sorted before writing & saving summary\n+ SortLinearOffsets(linearOffsets);\n+\n+ size_t elementsWritten = 0;\n+\n+ // write number of linear offsets\n+ int32_t offsetCount = linearOffsets.size();\n+ if ( m_isBigEndian ) SwapEndian_32(offsetCount);\n+ elementsWritten += fwrite(&offsetCount, sizeof(offsetCount), 1, m_indexStream);\n+\n+ // save summary for reference's linear offsets\n+ SaveLinearOffsetsSummary(refId, linearOffsets.size());\n+\n+ // iterate over linear offsets\n+ BaiLinearOffsetVector::const_iterator offsetIter = linearOffsets.begin();\n+ BaiLinearOffsetVector::const_iterator offsetEnd = linearOffsets.end();\n+ for ( ; offsetIter != offsetEnd; ++offsetIter ) {\n+\n+ // write linear offset\n+ uint64_t linearOffset = (*offsetIter);\n+ if ( m_isBigEndian ) SwapEndian_64(linearOffset);\n+ elementsWritten += fwrite(&linearOffset, sizeof(linearOffset), 1, m_indexStream);\n+ }\n+\n+ // return success/failure of write\n+ return ( elementsWritten == (size_t)(linearOffsets.size() + 1) );\n+}\n+\n+bool BamStandardIndex::WriteReferenceEntry(BaiReferenceEntry& refEntry) {\n+ bool refOk = true;\n+ refOk &= WriteBins(refEntry.ID, refEntry.Bins);\n+ refOk &= WriteLinearOffsets(refEntry.ID, refEntry.LinearOffsets);\n+ return refOk;\n+}\n" |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamStandardIndex_p.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamStandardIndex_p.h Thu Nov 03 10:25:04 2011 -0400 |
b |
b'@@ -0,0 +1,237 @@\n+// ***************************************************************************\n+// BamStandardIndex.h (c) 2010 Derek Barnett\n+// Marth Lab, Department of Biology, Boston College\n+// All rights reserved.\n+// ---------------------------------------------------------------------------\n+// Last modified: 5 April 2011 (DB)\n+// ---------------------------------------------------------------------------\n+// Provides index operations for the standardized BAM index format (".bai")\n+// ***************************************************************************\n+\n+#ifndef BAM_STANDARD_INDEX_FORMAT_H\n+#define BAM_STANDARD_INDEX_FORMAT_H\n+\n+// -------------\n+// W A R N I N G\n+// -------------\n+//\n+// This file is not part of the BamTools API. It exists purely as an\n+// implementation detail. This header file may change from version to\n+// version without notice, or even be removed.\n+//\n+// We mean it.\n+\n+#include <api/BamAux.h>\n+#include <api/BamIndex.h>\n+#include <map>\n+#include <set>\n+#include <string>\n+#include <vector>\n+\n+namespace BamTools {\n+namespace Internal {\n+\n+// -----------------------------------------------------------------------------\n+// BamStandardIndex data structures\n+\n+// defines start and end of a contiguous run of alignments\n+struct BaiAlignmentChunk {\n+\n+ // data members\n+ uint64_t Start;\n+ uint64_t Stop;\n+\n+ // constructor\n+ BaiAlignmentChunk(const uint64_t& start = 0,\n+ const uint64_t& stop = 0)\n+ : Start(start)\n+ , Stop(stop)\n+ { }\n+};\n+\n+// comparison operator (for sorting)\n+inline\n+bool operator<(const BaiAlignmentChunk& lhs, const BaiAlignmentChunk& rhs) {\n+ return lhs.Start < rhs.Start;\n+}\n+\n+// convenience typedef for a list of all alignment \'chunks\' in a BAI bin\n+typedef std::vector<BaiAlignmentChunk> BaiAlignmentChunkVector;\n+\n+// convenience typedef for a map of all BAI bins in a reference (ID => chunks)\n+typedef std::map<uint32_t, BaiAlignmentChunkVector> BaiBinMap;\n+\n+// convenience typedef for a list of all \'linear offsets\' in a reference\n+typedef std::vector<uint64_t> BaiLinearOffsetVector;\n+\n+// contains all fields necessary for building, loading, & writing\n+// full BAI index data for a single reference\n+struct BaiReferenceEntry {\n+\n+ // data members\n+ int32_t ID;\n+ BaiBinMap Bins;\n+ BaiLinearOffsetVector LinearOffsets;\n+\n+ // ctor\n+ BaiReferenceEntry(const int32_t& id = -1)\n+ : ID(id)\n+ { }\n+};\n+\n+// provides (persistent) summary of BaiReferenceEntry\'s index data\n+struct BaiReferenceSummary {\n+\n+ // data members\n+ int NumBins;\n+ int NumLinearOffsets;\n+ uint64_t FirstBinFilePosition;\n+ uint64_t FirstLinearOffsetFilePosition;\n+\n+ // ctor\n+ BaiReferenceSummary(void)\n+ : NumBins(0)\n+ , NumLinearOffsets(0)\n+ , FirstBinFilePosition(0)\n+ , FirstLinearOffsetFilePosition(0)\n+ { }\n+};\n+\n+// convenience typedef for describing a full BAI index file summary\n+typedef std::vector<BaiReferenceSummary> BaiFileSummary;\n+\n+// end BamStandardIndex data structures\n+// -----------------------------------------------------------------------------\n+\n+class BamStandardIndex : public BamIndex {\n+\n+ // ctor & dtor\n+ public:\n+ BamStandardIndex(Internal::BamReaderPrivate* reader);\n+ ~BamStandardIndex(void);\n+\n+ // BamIndex implementation\n+ public:\n+ // builds index from associated BAM file & writes out to index file\n+ bool Create(void);\n+ // returns whether reference has alignments or no\n+ bool HasAlignments(const int& referenceID) const;\n+ // attempts to use index data to jump to @region, returns success/fail\n+ // a "successful" jump indicates no error, but not whether this region has data\n+ // * thus, the method sets a flag to indicate whether there are alignments\n+ // available after the jump position\n+ bool Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegio'..b'ds\n+ private:\n+ bool AdjustRegion(const BamRegion& region, uint32_t& begin, uint32_t& end);\n+ void CalculateCandidateBins(const uint32_t& begin,\n+ const uint32_t& end,\n+ std::set<uint16_t>& candidateBins);\n+ bool CalculateCandidateOffsets(const BaiReferenceSummary& refSummary,\n+ const uint64_t& minOffset,\n+ std::set<uint16_t>& candidateBins,\n+ std::vector<int64_t>& offsets);\n+ uint64_t CalculateMinOffset(const BaiReferenceSummary& refSummary, const uint32_t& begin);\n+ bool GetOffsets(const BamRegion& region, std::vector<int64_t>& offsets);\n+ uint64_t LookupLinearOffset(const BaiReferenceSummary& refSummary, const int& index);\n+\n+ // internal BAI summary (create/load) methods\n+ private:\n+ void ReserveForSummary(const int& numReferences);\n+ void SaveBinsSummary(const int& refId, const int& numBins);\n+ void SaveLinearOffsetsSummary(const int& refId, const int& numLinearOffsets);\n+ bool SkipBins(const int& numBins);\n+ bool SkipLinearOffsets(const int& numLinearOffsets);\n+ bool SummarizeBins(BaiReferenceSummary& refSummary);\n+ bool SummarizeIndexFile(void);\n+ bool SummarizeLinearOffsets(BaiReferenceSummary& refSummary);\n+ bool SummarizeReference(BaiReferenceSummary& refSummary);\n+\n+ // internal BAI full index input methods\n+ private:\n+ bool ReadBinID(uint32_t& binId);\n+ bool ReadBinIntoBuffer(uint32_t& binId, int32_t& numAlignmentChunks);\n+ bool ReadIntoBuffer(const unsigned int& bytesRequested);\n+ bool ReadLinearOffset(uint64_t& linearOffset);\n+ bool ReadNumAlignmentChunks(int& numAlignmentChunks);\n+ bool ReadNumBins(int& numBins);\n+ bool ReadNumLinearOffsets(int& numLinearOffsets);\n+ bool ReadNumReferences(int& numReferences);\n+\n+ // internal BAI full index output methods\n+ private:\n+ void MergeAlignmentChunks(BaiAlignmentChunkVector& chunks);\n+ void SortLinearOffsets(BaiLinearOffsetVector& linearOffsets);\n+ bool WriteAlignmentChunk(const BaiAlignmentChunk& chunk);\n+ bool WriteAlignmentChunks(BaiAlignmentChunkVector& chunks);\n+ bool WriteBin(const uint32_t& binId, BaiAlignmentChunkVector& chunks);\n+ bool WriteBins(const int& refId, BaiBinMap& bins);\n+ bool WriteHeader(void);\n+ bool WriteLinearOffsets(const int& refId, BaiLinearOffsetVector& linearOffsets);\n+ bool WriteReferenceEntry(BaiReferenceEntry& refEntry);\n+\n+ // data members\n+ private:\n+ FILE* m_indexStream;\n+ bool m_isBigEndian;\n+ BamIndex::IndexCacheMode m_cacheMode;\n+ BaiFileSummary m_indexFileSummary;\n+\n+ // our input buffer\n+ char* m_buffer;\n+ unsigned int m_bufferLength;\n+\n+ // static methods\n+ private:\n+ // checks if the buffer is large enough to accomodate the requested size\n+ static void CheckBufferSize(char*& buffer,\n+ unsigned int& bufferLength,\n+ const unsigned int& requestedBytes);\n+ // checks if the buffer is large enough to accomodate the requested size\n+ static void CheckBufferSize(unsigned char*& buffer,\n+ unsigned int& bufferLength,\n+ const unsigned int& requestedBytes);\n+ // static constants\n+ private:\n+ static const int MAX_BIN;\n+ static const int BAM_LIDX_SHIFT;\n+ static const std::string BAI_EXTENSION;\n+ static const char* const BAI_MAGIC;\n+ static const int SIZEOF_ALIGNMENTCHUNK;\n+ static const int SIZEOF_BINCORE;\n+ static const int SIZEOF_LINEAROFFSET;\n+};\n+\n+} // namespace Internal\n+} // namespace BamTools\n+\n+#endif // BAM_STANDARD_INDEX_FORMAT_H\n' |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamToolsIndex_p.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamToolsIndex_p.cpp Thu Nov 03 10:25:04 2011 -0400 |
[ |
b'@@ -0,0 +1,642 @@\n+// ***************************************************************************\n+// BamToolsIndex.cpp (c) 2010 Derek Barnett\n+// Marth Lab, Department of Biology, Boston College\n+// All rights reserved.\n+// ---------------------------------------------------------------------------\n+// Last modified: 27 April 2011 (DB)\n+// ---------------------------------------------------------------------------\n+// Provides index operations for the BamTools index format (".bti")\n+// ***************************************************************************\n+\n+#include <api/BamAlignment.h>\n+#include <api/internal/BamReader_p.h>\n+#include <api/internal/BamToolsIndex_p.h>\n+#include <api/internal/BgzfStream_p.h>\n+using namespace BamTools;\n+using namespace BamTools::Internal;\n+\n+#include <cstdio>\n+#include <cstdlib>\n+#include <cstring>\n+#include <algorithm>\n+#include <iostream>\n+#include <iterator>\n+#include <map>\n+using namespace std;\n+\n+// static BamToolsIndex constants\n+const int BamToolsIndex::DEFAULT_BLOCK_LENGTH = 1000;\n+const string BamToolsIndex::BTI_EXTENSION = ".bti";\n+const char* const BamToolsIndex::BTI_MAGIC = "BTI\\1";\n+const int BamToolsIndex::SIZEOF_BLOCK = sizeof(int32_t)*2 + sizeof(int64_t);\n+\n+// ctor\n+BamToolsIndex::BamToolsIndex(Internal::BamReaderPrivate* reader)\n+ : BamIndex(reader)\n+ , m_indexStream(0)\n+ , m_cacheMode(BamIndex::LimitedIndexCaching)\n+ , m_blockSize(BamToolsIndex::DEFAULT_BLOCK_LENGTH)\n+ , m_inputVersion(0)\n+ , m_outputVersion(BTI_1_2) // latest version - used for writing new index files\n+{\n+ m_isBigEndian = BamTools::SystemIsBigEndian();\n+}\n+\n+// dtor\n+BamToolsIndex::~BamToolsIndex(void) {\n+ CloseFile();\n+}\n+\n+bool BamToolsIndex::CheckMagicNumber(void) {\n+\n+ // check \'magic number\' to see if file is BTI index\n+ char magic[4];\n+ size_t elementsRead = fread(magic, sizeof(char), 4, m_indexStream);\n+ if ( elementsRead != 4 ) {\n+ cerr << "BamToolsIndex ERROR: could not read format \'magic\' number" << endl;\n+ return false;\n+ }\n+\n+ if ( strncmp(magic, BamToolsIndex::BTI_MAGIC, 4) != 0 ) {\n+ cerr << "BamToolsIndex ERROR: invalid format" << endl;\n+ return false;\n+ }\n+\n+ // otherwise ok\n+ return true;\n+}\n+\n+// check index file version, return true if OK\n+bool BamToolsIndex::CheckVersion(void) {\n+\n+ // read version from file\n+ size_t elementsRead = fread(&m_inputVersion, sizeof(m_inputVersion), 1, m_indexStream);\n+ if ( elementsRead != 1 ) return false;\n+ if ( m_isBigEndian ) SwapEndian_32(m_inputVersion);\n+\n+ // if version is negative, or zero\n+ if ( m_inputVersion <= 0 ) {\n+ cerr << "BamToolsIndex ERROR: could not load index file: invalid version."\n+ << endl;\n+ return false;\n+ }\n+\n+ // if version is newer than can be supported by this version of bamtools\n+ else if ( m_inputVersion > m_outputVersion ) {\n+ cerr << "BamToolsIndex ERROR: could not load index file. This version of BamTools does not recognize new index file version"\n+ << endl\n+ << "Please update BamTools to a more recent version to support this index file."\n+ << endl;\n+ return false;\n+ }\n+\n+ // ------------------------------------------------------------------\n+ // check for deprecated, unsupported versions\n+ // (typically whose format did not accomodate a particular bug fix)\n+\n+ else if ( (Version)m_inputVersion == BamToolsIndex::BTI_1_0 ) {\n+ cerr << "BamToolsIndex ERROR: could not load index file. This version of the index contains a bug related to accessing data near reference ends."\n+ << endl << endl\n+ << "Please run \'bamtools index -bti -in yourData.bam\' to generate an up-to-date, fixed BTI file."\n+ << endl << endl;\n+ return false;\n+ }\n+\n+ else if ( (Version)m_inputVersion == BamToolsIndex::BTI_1_1 ) {\n+ cerr << "BamToolsIndex ERROR: could not load index file. '..b"efSummary.FirstBlockFilePosition << endl;\n+ return false;\n+ }\n+\n+ // read & store block entries\n+ bool readOk = true;\n+ BtiBlock block;\n+ for ( int i = 0; i < refSummary.NumBlocks; ++i ) {\n+ readOk &= ReadBlock(block);\n+ blocks.push_back(block);\n+ }\n+ return readOk;\n+}\n+\n+bool BamToolsIndex::ReadReferenceEntry(BtiReferenceEntry& refEntry) {\n+\n+ // return false if refId not valid index in file summary structure\n+ if ( refEntry.ID < 0 || refEntry.ID >= (int)m_indexFileSummary.size() )\n+ return false;\n+\n+ // use index summary to assist reading the reference's BTI blocks\n+ const BtiReferenceSummary& refSummary = m_indexFileSummary.at(refEntry.ID);\n+ return ReadBlocks(refSummary, refEntry.Blocks);\n+}\n+\n+bool BamToolsIndex::Seek(const int64_t& position, const int& origin) {\n+ return ( fseek64(m_indexStream, position, origin) == 0 );\n+}\n+\n+// change the index caching behavior\n+void BamToolsIndex::SetCacheMode(const BamIndex::IndexCacheMode& mode) {\n+ m_cacheMode = mode;\n+ // do nothing else here ? cache mode will be ignored from now on, most likely\n+}\n+\n+bool BamToolsIndex::SkipBlocks(const int& numBlocks) {\n+ return Seek( numBlocks*BamToolsIndex::SIZEOF_BLOCK, SEEK_CUR );\n+}\n+\n+int64_t BamToolsIndex::Tell(void) const {\n+ return ftell64(m_indexStream);\n+}\n+\n+bool BamToolsIndex::WriteBlock(const BtiBlock& block) {\n+\n+ // copy entry data\n+ int32_t maxEndPosition = block.MaxEndPosition;\n+ int64_t startOffset = block.StartOffset;\n+ int32_t startPosition = block.StartPosition;\n+\n+ // swap endian-ness if necessary\n+ if ( m_isBigEndian ) {\n+ SwapEndian_32(maxEndPosition);\n+ SwapEndian_64(startOffset);\n+ SwapEndian_32(startPosition);\n+ }\n+\n+ // write the reference index entry\n+ size_t elementsWritten = 0;\n+ elementsWritten += fwrite(&maxEndPosition, sizeof(maxEndPosition), 1, m_indexStream);\n+ elementsWritten += fwrite(&startOffset, sizeof(startOffset), 1, m_indexStream);\n+ elementsWritten += fwrite(&startPosition, sizeof(startPosition), 1, m_indexStream);\n+ return ( elementsWritten == 3 );\n+}\n+\n+bool BamToolsIndex::WriteBlocks(const BtiBlockVector& blocks) {\n+ bool writtenOk = true;\n+ BtiBlockVector::const_iterator blockIter = blocks.begin();\n+ BtiBlockVector::const_iterator blockEnd = blocks.end();\n+ for ( ; blockIter != blockEnd; ++blockIter )\n+ writtenOk &= WriteBlock(*blockIter);\n+ return writtenOk;\n+}\n+\n+bool BamToolsIndex::WriteHeader(void) {\n+\n+ size_t elementsWritten = 0;\n+\n+ // write BTI index format 'magic number'\n+ elementsWritten += fwrite(BamToolsIndex::BTI_MAGIC, 1, 4, m_indexStream);\n+\n+ // write BTI index format version\n+ int32_t currentVersion = (int32_t)m_outputVersion;\n+ if ( m_isBigEndian ) SwapEndian_32(currentVersion);\n+ elementsWritten += fwrite(¤tVersion, sizeof(currentVersion), 1, m_indexStream);\n+\n+ // write block size\n+ int32_t blockSize = m_blockSize;\n+ if ( m_isBigEndian ) SwapEndian_32(blockSize);\n+ elementsWritten += fwrite(&blockSize, sizeof(blockSize), 1, m_indexStream);\n+\n+ // write number of references\n+ int32_t numReferences = m_indexFileSummary.size();\n+ if ( m_isBigEndian ) SwapEndian_32(numReferences);\n+ elementsWritten += fwrite(&numReferences, sizeof(numReferences), 1, m_indexStream);\n+\n+ // return success/failure of write\n+ return ( elementsWritten == 7 );\n+}\n+\n+bool BamToolsIndex::WriteReferenceEntry(const BtiReferenceEntry& refEntry) {\n+\n+ size_t elementsWritten = 0;\n+\n+ // write number of blocks this reference\n+ uint32_t numBlocks = refEntry.Blocks.size();\n+ if ( m_isBigEndian ) SwapEndian_32(numBlocks);\n+ elementsWritten += fwrite(&numBlocks, sizeof(numBlocks), 1, m_indexStream);\n+\n+ // write actual block entries\n+ const bool blocksOk = WriteBlocks(refEntry.Blocks);\n+\n+ // return success/fail\n+ return ( elementsWritten == 1) && blocksOk;\n+}\n" |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamToolsIndex_p.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamToolsIndex_p.h Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,188 @@ +// *************************************************************************** +// BamToolsIndex.h (c) 2010 Derek Barnett +// Marth Lab, Department of Biology, Boston College +// All rights reserved. +// --------------------------------------------------------------------------- +// Last modified: 5 April 2011 (DB) +// --------------------------------------------------------------------------- +// Provides index operations for the BamTools index format (".bti") +// *************************************************************************** + +#ifndef BAMTOOLS_INDEX_FORMAT_H +#define BAMTOOLS_INDEX_FORMAT_H + +// ------------- +// W A R N I N G +// ------------- +// +// This file is not part of the BamTools API. It exists purely as an +// implementation detail. This header file may change from version to +// version without notice, or even be removed. +// +// We mean it. + +#include <api/BamAux.h> +#include <api/BamIndex.h> +#include <map> +#include <string> +#include <vector> + +namespace BamTools { +namespace Internal { + +// contains data for each 'block' in a BTI index +struct BtiBlock { + + // data members + int32_t MaxEndPosition; + int64_t StartOffset; + int32_t StartPosition; + + // ctor + BtiBlock(const int32_t& maxEndPosition = 0, + const int64_t& startOffset = 0, + const int32_t& startPosition = 0) + : MaxEndPosition(maxEndPosition) + , StartOffset(startOffset) + , StartPosition(startPosition) + { } +}; + +// convenience typedef for describing a a list of BTI blocks on a reference +typedef std::vector<BtiBlock> BtiBlockVector; + +// contains all fields necessary for building, loading, & writing +// full BTI index data for a single reference +struct BtiReferenceEntry { + + // data members + int32_t ID; + BtiBlockVector Blocks; + + // ctor + BtiReferenceEntry(const int& id = -1) + : ID(id) + { } +}; + +// provides (persistent) summary of BtiReferenceEntry's index data +struct BtiReferenceSummary { + + // data members + int NumBlocks; + uint64_t FirstBlockFilePosition; + + // ctor + BtiReferenceSummary(void) + : NumBlocks(0) + , FirstBlockFilePosition(0) + { } +}; + +// convenience typedef for describing a full BTI index file summary +typedef std::vector<BtiReferenceSummary> BtiFileSummary; + +class BamToolsIndex : public BamIndex { + + // keep a list of any supported versions here + // (might be useful later to handle any 'legacy' versions if the format changes) + // listed for example like: BTI_1_0 = 1, BTI_1_1 = 2, BTI_1_2 = 3, BTI_2_0 = 4, and so on + // + // so a change introduced in (hypothetical) BTI_1_2 would be handled from then on by: + // + // if ( indexVersion >= BTI_1_2 ) + // do something new + // else + // do the old thing + enum Version { BTI_1_0 = 1 + , BTI_1_1 + , BTI_1_2 + }; + + // ctor & dtor + public: + BamToolsIndex(Internal::BamReaderPrivate* reader); + ~BamToolsIndex(void); + + // BamIndex implementation + public: + // builds index from associated BAM file & writes out to index file + bool Create(void); + // returns whether reference has alignments or no + bool HasAlignments(const int& referenceID) const; + // attempts to use index data to jump to @region, returns success/fail + // a "successful" jump indicates no error, but not whether this region has data + // * thus, the method sets a flag to indicate whether there are alignments + // available after the jump position + bool Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion); + // loads existing data from file into memory + bool Load(const std::string& filename); + // change the index caching behavior + void SetCacheMode(const BamIndex::IndexCacheMode& mode); + public: + // returns format's file extension + static const std::string Extension(void); + + // internal file ops + private: + bool CheckMagicNumber(void); + bool CheckVersion(void); + void CloseFile(void); + bool IsFileOpen(void) const; + bool OpenFile(const std::string& filename, const char* mode); + bool Seek(const int64_t& position, const int& origin); + int64_t Tell(void) const; + + // internal BTI index building methods + private: + void ClearReferenceEntry(BtiReferenceEntry& refEntry); + + // internal random-access methods + private: + bool GetOffset(const BamRegion& region, int64_t& offset, bool* hasAlignmentsInRegion); + + // internal BTI summary data methods + private: + void InitializeFileSummary(const int& numReferences); + bool LoadFileSummary(void); + bool LoadHeader(void); + bool LoadNumBlocks(int& numBlocks); + bool LoadNumReferences(int& numReferences); + bool LoadReferenceSummary(BtiReferenceSummary& refSummary); + bool SkipBlocks(const int& numBlocks); + + // internal BTI full index input methods + private: + bool ReadBlock(BtiBlock& block); + bool ReadBlocks(const BtiReferenceSummary& refSummary, BtiBlockVector& blocks); + bool ReadReferenceEntry(BtiReferenceEntry& refEntry); + + // internal BTI full index output methods + private: + bool WriteBlock(const BtiBlock& block); + bool WriteBlocks(const BtiBlockVector& blocks); + bool WriteHeader(void); + bool WriteReferenceEntry(const BtiReferenceEntry& refEntry); + + // data members + private: + FILE* m_indexStream; + bool m_isBigEndian; + BamIndex::IndexCacheMode m_cacheMode; + BtiFileSummary m_indexFileSummary; + int m_blockSize; + int32_t m_inputVersion; // Version is serialized as int + Version m_outputVersion; + + // static constants + private: + static const int DEFAULT_BLOCK_LENGTH; + static const std::string BTI_EXTENSION; + static const char* const BTI_MAGIC; + static const int SIZEOF_BLOCK; +}; + +} // namespace Internal +} // namespace BamTools + +#endif // BAMTOOLS_INDEX_FORMAT_H |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamWriter_p.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamWriter_p.cpp Thu Nov 03 10:25:04 2011 -0400 |
[ |
b'@@ -0,0 +1,425 @@\n+// ***************************************************************************\n+// BamWriter_p.cpp (c) 2010 Derek Barnett\n+// Marth Lab, Department of Biology, Boston College\n+// All rights reserved.\n+// ---------------------------------------------------------------------------\n+// Last modified: 16 June 2011 (DB)\n+// ---------------------------------------------------------------------------\n+// Provides the basic functionality for producing BAM files\n+// ***************************************************************************\n+\n+#include <api/BamAlignment.h>\n+#include <api/BamConstants.h>\n+#include <api/internal/BamWriter_p.h>\n+using namespace BamTools;\n+using namespace BamTools::Internal;\n+\n+#include <cstdio>\n+#include <cstdlib>\n+#include <cstring>\n+using namespace std;\n+\n+// ctor\n+BamWriterPrivate::BamWriterPrivate(void)\n+ : m_isBigEndian( BamTools::SystemIsBigEndian() )\n+{ }\n+\n+// dtor\n+BamWriterPrivate::~BamWriterPrivate(void) {\n+ m_stream.Close();\n+}\n+\n+// calculates minimum bin for a BAM alignment interval\n+unsigned int BamWriterPrivate::CalculateMinimumBin(const int begin, int end) const {\n+ --end;\n+ if ( (begin >> 14) == (end >> 14) ) return 4681 + (begin >> 14);\n+ if ( (begin >> 17) == (end >> 17) ) return 585 + (begin >> 17);\n+ if ( (begin >> 20) == (end >> 20) ) return 73 + (begin >> 20);\n+ if ( (begin >> 23) == (end >> 23) ) return 9 + (begin >> 23);\n+ if ( (begin >> 26) == (end >> 26) ) return 1 + (begin >> 26);\n+ return 0;\n+}\n+\n+// closes the alignment archive\n+void BamWriterPrivate::Close(void) {\n+ m_stream.Close();\n+}\n+\n+// creates a cigar string from the supplied alignment\n+void BamWriterPrivate::CreatePackedCigar(const vector<CigarOp>& cigarOperations, string& packedCigar) {\n+\n+ // initialize\n+ const unsigned int numCigarOperations = cigarOperations.size();\n+ packedCigar.resize(numCigarOperations * Constants::BAM_SIZEOF_INT);\n+\n+ // pack the cigar data into the string\n+ unsigned int* pPackedCigar = (unsigned int*)packedCigar.data();\n+\n+ // iterate over cigar operations\n+ vector<CigarOp>::const_iterator coIter = cigarOperations.begin();\n+ vector<CigarOp>::const_iterator coEnd = cigarOperations.end();\n+ for ( ; coIter != coEnd; ++coIter ) {\n+\n+ // store op in packedCigar\n+ unsigned int cigarOp;\n+ switch ( coIter->Type ) {\n+ case (Constants::BAM_CIGAR_MATCH_CHAR) : cigarOp = Constants::BAM_CIGAR_MATCH; break;\n+ case (Constants::BAM_CIGAR_INS_CHAR) : cigarOp = Constants::BAM_CIGAR_INS; break;\n+ case (Constants::BAM_CIGAR_DEL_CHAR) : cigarOp = Constants::BAM_CIGAR_DEL; break;\n+ case (Constants::BAM_CIGAR_REFSKIP_CHAR) : cigarOp = Constants::BAM_CIGAR_REFSKIP; break;\n+ case (Constants::BAM_CIGAR_SOFTCLIP_CHAR) : cigarOp = Constants::BAM_CIGAR_SOFTCLIP; break;\n+ case (Constants::BAM_CIGAR_HARDCLIP_CHAR) : cigarOp = Constants::BAM_CIGAR_HARDCLIP; break;\n+ case (Constants::BAM_CIGAR_PAD_CHAR) : cigarOp = Constants::BAM_CIGAR_PAD; break;\n+ case (Constants::BAM_CIGAR_SEQMATCH_CHAR) : cigarOp = Constants::BAM_CIGAR_SEQMATCH; break;\n+ case (Constants::BAM_CIGAR_MISMATCH_CHAR) : cigarOp = Constants::BAM_CIGAR_MISMATCH; break;\n+ default:\n+ fprintf(stderr, "BamWriter ERROR: unknown cigar operation found: %c\\n", coIter->Type);\n+ exit(1);\n+ }\n+\n+ *pPackedCigar = coIter->Length << Constants::BAM_CIGAR_SHIFT | cigarOp;\n+ pPackedCigar++;\n+ }\n+}\n+\n+// encodes the supplied query sequence into 4-bit notation\n+void BamWriterPrivate::EncodeQuerySequence(const string& query, string& encodedQuery) {\n+\n+ // prepare the encoded query string\n+ const unsigned int queryLen = query.size();\n+ const unsigned int encodedQueryLen = (unsigned int)((queryLen / 2.0) + 0.5);\n+ encodedQuery.resize(encodedQueryLen);\n+ char* pEncode'..b' ++i;\n+ break;\n+ case (Constants::BAM_TAG_TYPE_INT16) :\n+ case (Constants::BAM_TAG_TYPE_UINT16) :\n+ BamTools::SwapEndian_16p(&tagData[i]);\n+ i += sizeof(uint16_t);\n+ break;\n+ case (Constants::BAM_TAG_TYPE_FLOAT) :\n+ case (Constants::BAM_TAG_TYPE_INT32) :\n+ case (Constants::BAM_TAG_TYPE_UINT32) :\n+ BamTools::SwapEndian_32p(&tagData[i]);\n+ i += sizeof(uint32_t);\n+ break;\n+ default:\n+ // error case\n+ fprintf(stderr,\n+ "BamWriter ERROR: unknown binary array type encountered: [%c]\\n",\n+ arrayType);\n+ exit(1);\n+ }\n+ }\n+\n+ break;\n+ }\n+\n+ default :\n+ fprintf(stderr, "BamWriter ERROR: invalid tag value type\\n"); // shouldn\'t get here\n+ free(tagData);\n+ exit(1);\n+ }\n+ }\n+ m_stream.Write(tagData, tagDataLength);\n+ free(tagData);\n+ }\n+ else\n+ m_stream.Write(al.TagData.data(), tagDataLength);\n+ }\n+}\n+\n+void BamWriterPrivate::SetWriteCompressed(bool ok) {\n+\n+ // warn if BAM file is already open\n+ // modifying compression is not allowed in this case\n+ if ( IsOpen() ) {\n+ cerr << "BamWriter WARNING: attempting to change compression mode on an open BAM file is not allowed. "\n+ << "Ignoring request." << endl;\n+ return;\n+ }\n+\n+ // set BgzfStream compression mode\n+ m_stream.SetWriteCompressed(ok);\n+}\n+\n+void BamWriterPrivate::WriteMagicNumber(void) {\n+ // write BAM file \'magic number\'\n+ m_stream.Write(Constants::BAM_HEADER_MAGIC, Constants::BAM_HEADER_MAGIC_LENGTH);\n+}\n+\n+void BamWriterPrivate::WriteReferences(const BamTools::RefVector& referenceSequences) {\n+\n+ // write the number of reference sequences\n+ uint32_t numReferenceSequences = referenceSequences.size();\n+ if ( m_isBigEndian ) BamTools::SwapEndian_32(numReferenceSequences);\n+ m_stream.Write((char*)&numReferenceSequences, Constants::BAM_SIZEOF_INT);\n+\n+ // foreach reference sequence\n+ RefVector::const_iterator rsIter = referenceSequences.begin();\n+ RefVector::const_iterator rsEnd = referenceSequences.end();\n+ for ( ; rsIter != rsEnd; ++rsIter ) {\n+\n+ // write the reference sequence name length\n+ uint32_t referenceSequenceNameLen = rsIter->RefName.size() + 1;\n+ if ( m_isBigEndian ) BamTools::SwapEndian_32(referenceSequenceNameLen);\n+ m_stream.Write((char*)&referenceSequenceNameLen, Constants::BAM_SIZEOF_INT);\n+\n+ // write the reference sequence name\n+ m_stream.Write(rsIter->RefName.c_str(), referenceSequenceNameLen);\n+\n+ // write the reference sequence length\n+ int32_t referenceLength = rsIter->RefLength;\n+ if ( m_isBigEndian ) BamTools::SwapEndian_32(referenceLength);\n+ m_stream.Write((char*)&referenceLength, Constants::BAM_SIZEOF_INT);\n+ }\n+}\n+\n+void BamWriterPrivate::WriteSamHeaderText(const std::string& samHeaderText) {\n+\n+ // write the SAM header text length\n+ uint32_t samHeaderLen = samHeaderText.size();\n+ if ( m_isBigEndian ) BamTools::SwapEndian_32(samHeaderLen);\n+ m_stream.Write((char*)&samHeaderLen, Constants::BAM_SIZEOF_INT);\n+\n+ // write the SAM header text\n+ if ( samHeaderLen > 0 )\n+ m_stream.Write(samHeaderText.data(), samHeaderLen);\n+}\n' |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamWriter_p.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamWriter_p.h Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,67 @@ +// *************************************************************************** +// BamWriter_p.h (c) 2010 Derek Barnett +// Marth Lab, Department of Biology, Boston College +// All rights reserved. +// --------------------------------------------------------------------------- +// Last modified: 24 February 2011 (DB) +// --------------------------------------------------------------------------- +// Provides the basic functionality for producing BAM files +// *************************************************************************** + +#ifndef BAMWRITER_P_H +#define BAMWRITER_P_H + +// ------------- +// W A R N I N G +// ------------- +// +// This file is not part of the BamTools API. It exists purely as an +// implementation detail. This header file may change from version to +// version without notice, or even be removed. +// +// We mean it. + +#include <api/BamAux.h> +#include <api/internal/BgzfStream_p.h> +#include <string> +#include <vector> + +namespace BamTools { +namespace Internal { + +class BamWriterPrivate { + + // ctor & dtor + public: + BamWriterPrivate(void); + ~BamWriterPrivate(void); + + // interface methods + public: + void Close(void); + bool IsOpen(void) const; + bool Open(const std::string& filename, + const std::string& samHeaderText, + const BamTools::RefVector& referenceSequences); + void SaveAlignment(const BamAlignment& al); + void SetWriteCompressed(bool ok); + + // 'internal' methods + public: + unsigned int CalculateMinimumBin(const int begin, int end) const; + void CreatePackedCigar(const std::vector<BamTools::CigarOp>& cigarOperations, std::string& packedCigar); + void EncodeQuerySequence(const std::string& query, std::string& encodedQuery); + void WriteMagicNumber(void); + void WriteReferences(const BamTools::RefVector& referenceSequences); + void WriteSamHeaderText(const std::string& samHeaderText); + + // data members + private: + BgzfStream m_stream; + bool m_isBigEndian; +}; + +} // namespace Internal +} // namespace BamTools + +#endif // BAMWRITER_P_H |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BgzfStream_p.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BgzfStream_p.cpp Thu Nov 03 10:25:04 2011 -0400 |
[ |
b'@@ -0,0 +1,439 @@\n+// ***************************************************************************\n+// BgzfStream_p.cpp (c) 2011 Derek Barnett\n+// Marth Lab, Department of Biology, Boston College\n+// All rights reserved.\n+// ---------------------------------------------------------------------------\n+// Last modified: 5 April 2011(DB)\n+// ---------------------------------------------------------------------------\n+// Based on BGZF routines developed at the Broad Institute.\n+// Provides the basic functionality for reading & writing BGZF files\n+// Replaces the old BGZF.* files to avoid clashing with other toolkits\n+// ***************************************************************************\n+\n+#include <api/internal/BgzfStream_p.h>\n+using namespace BamTools;\n+using namespace BamTools::Internal;\n+\n+#include <cstring>\n+#include <algorithm>\n+using namespace std;\n+\n+// constructor\n+BgzfStream::BgzfStream(void)\n+ : UncompressedBlockSize(Constants::BGZF_DEFAULT_BLOCK_SIZE)\n+ , CompressedBlockSize(Constants::BGZF_MAX_BLOCK_SIZE)\n+ , BlockLength(0)\n+ , BlockOffset(0)\n+ , BlockAddress(0)\n+ , IsOpen(false)\n+ , IsWriteOnly(false)\n+ , IsWriteCompressed(true)\n+ , Stream(NULL)\n+ , UncompressedBlock(NULL)\n+ , CompressedBlock(NULL)\n+{\n+ try {\n+ CompressedBlock = new char[CompressedBlockSize];\n+ UncompressedBlock = new char[UncompressedBlockSize];\n+ } catch( std::bad_alloc& ba ) {\n+ fprintf(stderr, "BgzfStream ERROR: unable to allocate memory\\n");\n+ exit(1);\n+ }\n+}\n+\n+// destructor\n+BgzfStream::~BgzfStream(void) {\n+ if( CompressedBlock ) delete[] CompressedBlock;\n+ if( UncompressedBlock ) delete[] UncompressedBlock;\n+}\n+\n+// closes BGZF file\n+void BgzfStream::Close(void) {\n+\n+ // skip if file not open\n+ if ( !IsOpen ) return;\n+\n+ // if writing to file, flush the current BGZF block,\n+ // then write an empty block (as EOF marker)\n+ if ( IsWriteOnly ) {\n+ FlushBlock();\n+ int blockLength = DeflateBlock();\n+ fwrite(CompressedBlock, 1, blockLength, Stream);\n+ }\n+\n+ // flush and close stream\n+ fflush(Stream);\n+ fclose(Stream);\n+\n+ // reset flags\n+ IsWriteCompressed = true;\n+ IsOpen = false;\n+}\n+\n+// compresses the current block\n+int BgzfStream::DeflateBlock(void) {\n+\n+ // initialize the gzip header\n+ char* buffer = CompressedBlock;\n+ memset(buffer, 0, 18);\n+ buffer[0] = Constants::GZIP_ID1;\n+ buffer[1] = (char)Constants::GZIP_ID2;\n+ buffer[2] = Constants::CM_DEFLATE;\n+ buffer[3] = Constants::FLG_FEXTRA;\n+ buffer[9] = (char)Constants::OS_UNKNOWN;\n+ buffer[10] = Constants::BGZF_XLEN;\n+ buffer[12] = Constants::BGZF_ID1;\n+ buffer[13] = Constants::BGZF_ID2;\n+ buffer[14] = Constants::BGZF_LEN;\n+\n+ // set compression level\n+ const int compressionLevel = ( IsWriteCompressed ? Z_DEFAULT_COMPRESSION : 0 );\n+\n+ // loop to retry for blocks that do not compress enough\n+ int inputLength = BlockOffset;\n+ int compressedLength = 0;\n+ unsigned int bufferSize = CompressedBlockSize;\n+\n+ while ( true ) {\n+\n+ // initialize zstream values\n+ z_stream zs;\n+ zs.zalloc = NULL;\n+ zs.zfree = NULL;\n+ zs.next_in = (Bytef*)UncompressedBlock;\n+ zs.avail_in = inputLength;\n+ zs.next_out = (Bytef*)&buffer[Constants::BGZF_BLOCK_HEADER_LENGTH];\n+ zs.avail_out = bufferSize - Constants::BGZF_BLOCK_HEADER_LENGTH - Constants::BGZF_BLOCK_FOOTER_LENGTH;\n+\n+ // initialize the zlib compression algorithm\n+ if ( deflateInit2(&zs,\n+ compressionLevel,\n+ Z_DEFLATED,\n+ Constants::GZIP_WINDOW_BITS,\n+ Constants::Z_DEFAULT_MEM_LEVEL,\n+ Z_DEFAULT_STRATEGY) != Z_OK )\n+ {\n+ fprintf(stderr, "BgzfStream ERROR: zlib deflate initialization failed\\n");\n+ exit(1);\n+ }\n+\n+ /'..b'Read += copyLength;\n+ }\n+\n+ // update block data\n+ if ( BlockOffset == BlockLength ) {\n+ BlockAddress = ftell64(Stream);\n+ BlockOffset = 0;\n+ BlockLength = 0;\n+ }\n+\n+ return numBytesRead;\n+}\n+\n+// reads a BGZF block\n+bool BgzfStream::ReadBlock(void) {\n+\n+ char header[Constants::BGZF_BLOCK_HEADER_LENGTH];\n+ int64_t blockAddress = ftell64(Stream);\n+\n+ // read block header from file\n+ int count = fread(header, 1, sizeof(header), Stream);\n+\n+ // if block header empty\n+ if ( count == 0 ) {\n+ BlockLength = 0;\n+ return true;\n+ }\n+\n+ // if block header invalid size\n+ if ( count != sizeof(header) ) {\n+ fprintf(stderr, "BgzfStream ERROR: read block failed - could not read block header\\n");\n+ return false;\n+ }\n+\n+ // validate block header contents\n+ if ( !BgzfStream::CheckBlockHeader(header) ) {\n+ fprintf(stderr, "BgzfStream ERROR: read block failed - invalid block header\\n");\n+ return false;\n+ }\n+\n+ // copy header contents to compressed buffer\n+ int blockLength = BamTools::UnpackUnsignedShort(&header[16]) + 1;\n+ char* compressedBlock = CompressedBlock;\n+ memcpy(compressedBlock, header, Constants::BGZF_BLOCK_HEADER_LENGTH);\n+ int remaining = blockLength - Constants::BGZF_BLOCK_HEADER_LENGTH;\n+\n+ // read remainder of block\n+ count = fread(&compressedBlock[Constants::BGZF_BLOCK_HEADER_LENGTH], 1, remaining, Stream);\n+ if ( count != remaining ) {\n+ fprintf(stderr, "BgzfStream ERROR: read block failed - could not read data from block\\n");\n+ return false;\n+ }\n+\n+ // decompress block data\n+ count = InflateBlock(blockLength);\n+ if ( count < 0 ) {\n+ fprintf(stderr, "BgzfStream ERROR: read block failed - could not decompress block data\\n");\n+ return false;\n+ }\n+\n+ // update block data\n+ if ( BlockLength != 0 )\n+ BlockOffset = 0;\n+ BlockAddress = blockAddress;\n+ BlockLength = count;\n+\n+ // return success\n+ return true;\n+}\n+\n+// seek to position in BGZF file\n+bool BgzfStream::Seek(const int64_t& position) {\n+\n+ // skip if not open\n+ if ( !IsOpen ) return false;\n+\n+ // determine adjusted offset & address\n+ int blockOffset = (position & 0xFFFF);\n+ int64_t blockAddress = (position >> 16) & 0xFFFFFFFFFFFFLL;\n+\n+ // attempt seek in file\n+ if ( fseek64(Stream, blockAddress, SEEK_SET) != 0 ) {\n+ fprintf(stderr, "BgzfStream ERROR: unable to seek in file\\n");\n+ return false;\n+ }\n+\n+ // update block data & return success\n+ BlockLength = 0;\n+ BlockAddress = blockAddress;\n+ BlockOffset = blockOffset;\n+ return true;\n+}\n+\n+void BgzfStream::SetWriteCompressed(bool ok) {\n+ IsWriteCompressed = ok;\n+}\n+\n+// get file position in BGZF file\n+int64_t BgzfStream::Tell(void) const {\n+ if ( !IsOpen )\n+ return 0;\n+ return ( (BlockAddress << 16) | (BlockOffset & 0xFFFF) );\n+}\n+\n+// writes the supplied data into the BGZF buffer\n+unsigned int BgzfStream::Write(const char* data, const unsigned int dataLen) {\n+\n+ // skip if file not open for writing\n+ if ( !IsOpen || !IsWriteOnly ) return false;\n+\n+ // write blocks as needed til all data is written\n+ unsigned int numBytesWritten = 0;\n+ const char* input = data;\n+ unsigned int blockLength = UncompressedBlockSize;\n+ while ( numBytesWritten < dataLen ) {\n+\n+ // copy data contents to uncompressed output buffer\n+ unsigned int copyLength = min(blockLength - BlockOffset, dataLen - numBytesWritten);\n+ char* buffer = UncompressedBlock;\n+ memcpy(buffer + BlockOffset, input, copyLength);\n+\n+ // update counter\n+ BlockOffset += copyLength;\n+ input += copyLength;\n+ numBytesWritten += copyLength;\n+\n+ // flush (& compress) output buffer when full\n+ if ( BlockOffset == blockLength ) FlushBlock();\n+ }\n+\n+ // return result\n+ return numBytesWritten;\n+}\n' |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BgzfStream_p.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BgzfStream_p.h Thu Nov 03 10:25:04 2011 -0400 |
[ |
@@ -0,0 +1,109 @@ +// *************************************************************************** +// BgzfStream_p.h (c) 2011 Derek Barnett +// Marth Lab, Department of Biology, Boston College +// All rights reserved. +// --------------------------------------------------------------------------- +// Last modified: 5 April 2011(DB) +// --------------------------------------------------------------------------- +// Based on BGZF routines developed at the Broad Institute. +// Provides the basic functionality for reading & writing BGZF files +// Replaces the old BGZF.* files to avoid clashing with other toolkits +// *************************************************************************** + +#ifndef BGZFSTREAM_P_H +#define BGZFSTREAM_P_H + +// ------------- +// W A R N I N G +// ------------- +// +// This file is not part of the BamTools API. It exists purely as an +// implementation detail. This header file may change from version to version +// without notice, or even be removed. +// +// We mean it. + +#include <api/BamAux.h> +#include <api/BamConstants.h> +#include "zlib.h" +#include <cstdio> +#include <string> + +namespace BamTools { +namespace Internal { + +class BgzfStream { + + // constructor & destructor + public: + BgzfStream(void); + ~BgzfStream(void); + + // main interface methods + public: + // closes BGZF file + void Close(void); + // opens the BGZF file (mode is either "rb" for reading, or "wb" for writing) + bool Open(const std::string& filename, const char* mode); + // reads BGZF data into a byte buffer + int Read(char* data, const unsigned int dataLength); + // seek to position in BGZF file + bool Seek(const int64_t& position); + // enable/disable compressed output + void SetWriteCompressed(bool ok); + // get file position in BGZF file + int64_t Tell(void) const; + // writes the supplied data into the BGZF buffer + unsigned int Write(const char* data, const unsigned int dataLen); + + // internal methods + private: + // compresses the current block + int DeflateBlock(void); + // flushes the data in the BGZF block + void FlushBlock(void); + // de-compresses the current block + int InflateBlock(const int& blockLength); + // reads a BGZF block + bool ReadBlock(void); + + // static 'utility' methods + public: + // checks BGZF block header + static inline bool CheckBlockHeader(char* header); + + // data members + public: + unsigned int UncompressedBlockSize; + unsigned int CompressedBlockSize; + unsigned int BlockLength; + unsigned int BlockOffset; + uint64_t BlockAddress; + bool IsOpen; + bool IsWriteOnly; + bool IsWriteCompressed; + FILE* Stream; + char* UncompressedBlock; + char* CompressedBlock; +}; + +// ------------------------------------------------------------- +// static 'utility' method implementations + +// checks BGZF block header +inline +bool BgzfStream::CheckBlockHeader(char* header) { + return (header[0] == Constants::GZIP_ID1 && + header[1] == (char)Constants::GZIP_ID2 && + header[2] == Z_DEFLATED && + (header[3] & Constants::FLG_FEXTRA) != 0 && + BamTools::UnpackUnsignedShort(&header[10]) == Constants::BGZF_XLEN && + header[12] == Constants::BGZF_ID1 && + header[13] == Constants::BGZF_ID2 && + BamTools::UnpackUnsignedShort(&header[14]) == Constants::BGZF_LEN ); +} + +} // namespace Internal +} // namespace BamTools + +#endif // BGZFSTREAM_P_H |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/SamFormatParser_p.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/SamFormatParser_p.cpp Thu Nov 03 10:25:04 2011 -0400 |
b |
b'@@ -0,0 +1,231 @@\n+// ***************************************************************************\n+// SamFormatParser.cpp (c) 2010 Derek Barnett\n+// Marth Lab, Department of Biology, Boston College\n+// All rights reserved.\n+// ---------------------------------------------------------------------------\n+// Last modified: 19 April 2011 (DB)\n+// ---------------------------------------------------------------------------\n+// Provides functionality for parsing SAM header text into SamHeader object\n+// ***************************************************************************\n+\n+#include <api/SamConstants.h>\n+#include <api/SamHeader.h>\n+#include <api/internal/SamFormatParser_p.h>\n+using namespace BamTools;\n+using namespace BamTools::Internal;\n+\n+#include <iostream>\n+#include <sstream>\n+#include <vector>\n+using namespace std;\n+\n+SamFormatParser::SamFormatParser(SamHeader& header)\n+ : m_header(header)\n+{ }\n+\n+SamFormatParser::~SamFormatParser(void) { }\n+\n+void SamFormatParser::Parse(const string& headerText) {\n+\n+ // clear header\'s prior contents\n+ m_header.Clear();\n+\n+ // empty header is OK, but skip processing\n+ if ( headerText.empty() )\n+ return;\n+\n+ // other wise parse SAM lines\n+ istringstream headerStream(headerText);\n+ string headerLine("");\n+ while ( getline(headerStream, headerLine) )\n+ ParseSamLine(headerLine);\n+}\n+\n+void SamFormatParser::ParseSamLine(const string& line) {\n+\n+ // skip if line is not long enough to contain true values\n+ if (line.length() < 5 ) return;\n+\n+ // determine token at beginning of line\n+ const string firstToken = line.substr(0,3);\n+ string restOfLine = line.substr(4);\n+ if ( firstToken == Constants::SAM_HD_BEGIN_TOKEN) ParseHDLine(restOfLine);\n+ else if ( firstToken == Constants::SAM_SQ_BEGIN_TOKEN) ParseSQLine(restOfLine);\n+ else if ( firstToken == Constants::SAM_RG_BEGIN_TOKEN) ParseRGLine(restOfLine);\n+ else if ( firstToken == Constants::SAM_PG_BEGIN_TOKEN) ParsePGLine(restOfLine);\n+ else if ( firstToken == Constants::SAM_CO_BEGIN_TOKEN) ParseCOLine(restOfLine);\n+ else\n+ cerr << "SamFormatParser ERROR: unknown token: " << firstToken << endl;\n+}\n+\n+void SamFormatParser::ParseHDLine(const string& line) {\n+\n+ // split HD lines into tokens\n+ vector<string> tokens = Split(line, Constants::SAM_TAB);\n+\n+ // iterate over tokens\n+ vector<string>::const_iterator tokenIter = tokens.begin();\n+ vector<string>::const_iterator tokenEnd = tokens.end();\n+ for ( ; tokenIter != tokenEnd; ++tokenIter ) {\n+\n+ // get tag/value\n+ const string tokenTag = (*tokenIter).substr(0,2);\n+ const string tokenValue = (*tokenIter).substr(3);\n+\n+ // set header contents\n+ if ( tokenTag == Constants::SAM_HD_VERSION_TAG ) m_header.Version = tokenValue;\n+ else if ( tokenTag == Constants::SAM_HD_SORTORDER_TAG ) m_header.SortOrder = tokenValue;\n+ else if ( tokenTag == Constants::SAM_HD_GROUPORDER_TAG ) m_header.GroupOrder = tokenValue;\n+ else\n+ cerr << "SamFormatParser ERROR: unknown HD tag: " << tokenTag << endl;\n+ }\n+\n+ // if @HD line exists, VN must be provided\n+ if ( !m_header.HasVersion() )\n+ cerr << "SamFormatParser ERROR: @HD line is missing VN tag" << endl;\n+}\n+\n+void SamFormatParser::ParseSQLine(const string& line) {\n+\n+ SamSequence seq;\n+\n+ // split SQ line into tokens\n+ vector<string> tokens = Split(line, Constants::SAM_TAB);\n+\n+ // iterate over tokens\n+ vector<string>::const_iterator tokenIter = tokens.begin();\n+ vector<string>::const_iterator tokenEnd = tokens.end();\n+ for ( ; tokenIter != tokenEnd; ++tokenIter ) {\n+\n+ // get tag/value\n+ const string tokenTag = (*tokenIter).substr(0,2);\n+ const string tokenValue = (*tokenIter).substr(3);\n+\n+ // set sequence contents\n+ if ( tokenTag == Constants::SAM_SQ_NAME_TAG ) seq.Name = tokenValue;\n+ else if ( toke'..b'r tokenIter = tokens.begin();\n+ vector<string>::const_iterator tokenEnd = tokens.end();\n+ for ( ; tokenIter != tokenEnd; ++tokenIter ) {\n+\n+ // get token tag/value\n+ const string tokenTag = (*tokenIter).substr(0,2);\n+ const string tokenValue = (*tokenIter).substr(3);\n+\n+ // set read group contents\n+ if ( tokenTag == Constants::SAM_RG_ID_TAG ) rg.ID = tokenValue;\n+ else if ( tokenTag == Constants::SAM_RG_DESCRIPTION_TAG ) rg.Description = tokenValue;\n+ else if ( tokenTag == Constants::SAM_RG_FLOWORDER_TAG ) rg.FlowOrder = tokenValue;\n+ else if ( tokenTag == Constants::SAM_RG_KEYSEQUENCE_TAG ) rg.KeySequence = tokenValue;\n+ else if ( tokenTag == Constants::SAM_RG_LIBRARY_TAG ) rg.Library = tokenValue;\n+ else if ( tokenTag == Constants::SAM_RG_PLATFORMUNIT_TAG ) rg.PlatformUnit = tokenValue;\n+ else if ( tokenTag == Constants::SAM_RG_PREDICTEDINSERTSIZE_TAG ) rg.PredictedInsertSize = tokenValue;\n+ else if ( tokenTag == Constants::SAM_RG_PRODUCTIONDATE_TAG ) rg.ProductionDate = tokenValue;\n+ else if ( tokenTag == Constants::SAM_RG_PROGRAM_TAG ) rg.Program = tokenValue;\n+ else if ( tokenTag == Constants::SAM_RG_SAMPLE_TAG ) rg.Sample = tokenValue;\n+ else if ( tokenTag == Constants::SAM_RG_SEQCENTER_TAG ) rg.SequencingCenter = tokenValue;\n+ else if ( tokenTag == Constants::SAM_RG_SEQTECHNOLOGY_TAG ) rg.SequencingTechnology = tokenValue;\n+ else\n+ cerr << "SamFormatParser ERROR: unknown RG tag: " << tokenTag << endl;\n+ }\n+\n+ bool isMissingRequiredFields = false;\n+\n+ // if @RG line exists, ID must be provided\n+ if ( !rg.HasID() ) {\n+ isMissingRequiredFields = true;\n+ cerr << "SamFormatParser ERROR: @RG line is missing ID tag" << endl;\n+ }\n+\n+ // store SAM read group entry\n+ if ( !isMissingRequiredFields )\n+ m_header.ReadGroups.Add(rg);\n+}\n+\n+void SamFormatParser::ParsePGLine(const string& line) {\n+\n+ SamProgram pg;\n+\n+ // split string into tokens\n+ vector<string> tokens = Split(line, Constants::SAM_TAB);\n+\n+ // iterate over tokens\n+ vector<string>::const_iterator tokenIter = tokens.begin();\n+ vector<string>::const_iterator tokenEnd = tokens.end();\n+ for ( ; tokenIter != tokenEnd; ++tokenIter ) {\n+\n+ // get token tag/value\n+ const string tokenTag = (*tokenIter).substr(0,2);\n+ const string tokenValue = (*tokenIter).substr(3);\n+\n+ // set program record contents\n+ if ( tokenTag == Constants::SAM_PG_ID_TAG ) pg.ID = tokenValue;\n+ else if ( tokenTag == Constants::SAM_PG_NAME_TAG ) pg.Name = tokenValue;\n+ else if ( tokenTag == Constants::SAM_PG_COMMANDLINE_TAG ) pg.CommandLine = tokenValue;\n+ else if ( tokenTag == Constants::SAM_PG_PREVIOUSPROGRAM_TAG ) pg.PreviousProgramID = tokenValue;\n+ else if ( tokenTag == Constants::SAM_PG_VERSION_TAG ) pg.Version = tokenValue;\n+ else\n+ cerr << "SamFormatParser ERROR: unknown PG tag: " << tokenTag << endl;\n+ }\n+\n+ bool isMissingRequiredFields = false;\n+\n+ // if @PG line exists, ID must be provided\n+ if ( !pg.HasID() ) {\n+ isMissingRequiredFields = true;\n+ cerr << "SamFormatParser ERROR: @PG line is missing ID tag" << endl;\n+ }\n+\n+ // store SAM program record\n+ if ( !isMissingRequiredFields )\n+ m_header.Programs.Add(pg);\n+}\n+\n+void SamFormatParser::ParseCOLine(const string& line) {\n+ // simply add line to comments list\n+ m_header.Comments.push_back(line);\n+}\n+\n+const vector<string> SamFormatParser::Split(const string& line, const char delim) {\n+ vector<string> tokens;\n+ stringstream lineStream(line);\n+ string token;\n+ while ( getline(lineStream, token, delim) )\n+ tokens.push_back(token);\n+ return tokens;\n+}\n' |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/SamFormatParser_p.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/SamFormatParser_p.h Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,62 @@ +// *************************************************************************** +// SamFormatParser.h (c) 2010 Derek Barnett +// Marth Lab, Department of Biology, Boston College +// All rights reserved. +// --------------------------------------------------------------------------- +// Last modified: 23 December 2010 (DB) +// --------------------------------------------------------------------------- +// Provides functionality for parsing SAM header text into SamHeader object +// *************************************************************************** + +#ifndef SAM_FORMAT_PARSER_H +#define SAM_FORMAT_PARSER_H + +// ------------- +// W A R N I N G +// ------------- +// +// This file is not part of the BamTools API. It exists purely as an +// implementation detail. This header file may change from version to version +// without notice, or even be removed. +// +// We mean it. + +#include <string> +#include <vector> + +namespace BamTools { + +class SamHeader; + +namespace Internal { + +class SamFormatParser { + + // ctor & dtor + public: + SamFormatParser(BamTools::SamHeader& header); + ~SamFormatParser(void); + + // parse text & populate header data + public: + void Parse(const std::string& headerText); + + // internal methods + private: + void ParseSamLine(const std::string& line); + void ParseHDLine(const std::string& line); + void ParseSQLine(const std::string& line); + void ParseRGLine(const std::string& line); + void ParsePGLine(const std::string& line); + void ParseCOLine(const std::string& line); + const std::vector<std::string> Split(const std::string& line, const char delim); + + // data members + private: + SamHeader& m_header; +}; + +} // namespace Internal +} // namespace BamTools + +#endif // SAM_FORMAT_PARSER_H |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/SamFormatPrinter_p.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/SamFormatPrinter_p.cpp Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,211 @@ +// *************************************************************************** +// SamFormatPrinter.cpp (c) 2010 Derek Barnett +// Marth Lab, Department of Biology, Boston College +// All rights reserved. +// --------------------------------------------------------------------------- +// Last modified: 19 April 2011 (DB) +// --------------------------------------------------------------------------- +// Provides functionality for printing formatted SAM header to string +// *************************************************************************** + +#include <api/SamConstants.h> +#include <api/SamHeader.h> +#include <api/internal/SamFormatPrinter_p.h> +using namespace BamTools; +using namespace BamTools::Internal; + +#include <iostream> +#include <sstream> +#include <vector> +using namespace std; + +SamFormatPrinter::SamFormatPrinter(const SamHeader& header) + : m_header(header) +{ } + +SamFormatPrinter::~SamFormatPrinter(void) { } + +const string SamFormatPrinter::FormatTag(const string &tag, const string &value) const { + return string(Constants::SAM_TAB + tag + Constants::SAM_COLON + value); +} + +const string SamFormatPrinter::ToString(void) const { + + // clear out stream + stringstream out(""); + + // generate formatted header text + PrintHD(out); + PrintSQ(out); + PrintRG(out); + PrintPG(out); + PrintCO(out); + + // return result + return out.str(); +} + +void SamFormatPrinter::PrintHD(std::stringstream& out) const { + + // if header has @HD data + if ( m_header.HasVersion() ) { + + // @HD VN:<Version> + out << Constants::SAM_HD_BEGIN_TOKEN + << FormatTag(Constants::SAM_HD_VERSION_TAG, m_header.Version); + + // SO:<SortOrder> + if ( m_header.HasSortOrder() ) + out << FormatTag(Constants::SAM_HD_SORTORDER_TAG, m_header.SortOrder); + + // GO:<GroupOrder> + if ( m_header.HasGroupOrder() ) + out << FormatTag(Constants::SAM_HD_GROUPORDER_TAG, m_header.GroupOrder); + + // newline + out << endl; + } +} + +void SamFormatPrinter::PrintSQ(std::stringstream& out) const { + + // iterate over sequence entries + SamSequenceConstIterator seqIter = m_header.Sequences.ConstBegin(); + SamSequenceConstIterator seqEnd = m_header.Sequences.ConstEnd(); + for ( ; seqIter != seqEnd; ++seqIter ) { + const SamSequence& seq = (*seqIter); + + // @SQ SN:<Name> LN:<Length> + out << Constants::SAM_SQ_BEGIN_TOKEN + << FormatTag(Constants::SAM_SQ_NAME_TAG, seq.Name) + << FormatTag(Constants::SAM_SQ_LENGTH_TAG, seq.Length); + + // AS:<AssemblyID> + if ( seq.HasAssemblyID() ) + out << FormatTag(Constants::SAM_SQ_ASSEMBLYID_TAG, seq.AssemblyID); + + // M5:<Checksum> + if ( seq.HasChecksum() ) + out << FormatTag(Constants::SAM_SQ_CHECKSUM_TAG, seq.Checksum); + + // SP:<Species> + if ( seq.HasSpecies() ) + out << FormatTag(Constants::SAM_SQ_SPECIES_TAG, seq.Species); + + // UR:<URI> + if ( seq.HasURI() ) + out << FormatTag(Constants::SAM_SQ_URI_TAG, seq.URI); + + // newline + out << endl; + } +} + +void SamFormatPrinter::PrintRG(std::stringstream& out) const { + + // iterate over read group entries + SamReadGroupConstIterator rgIter = m_header.ReadGroups.ConstBegin(); + SamReadGroupConstIterator rgEnd = m_header.ReadGroups.ConstEnd(); + for ( ; rgIter != rgEnd; ++rgIter ) { + const SamReadGroup& rg = (*rgIter); + + // @RG ID:<ID> + out << Constants::SAM_RG_BEGIN_TOKEN + << FormatTag(Constants::SAM_RG_ID_TAG, rg.ID); + + // CN:<SequencingCenter> + if ( rg.HasSequencingCenter() ) + out << FormatTag(Constants::SAM_RG_SEQCENTER_TAG, rg.SequencingCenter); + + // DS:<Description> + if ( rg.HasDescription() ) + out << FormatTag(Constants::SAM_RG_DESCRIPTION_TAG, rg.Description); + + // DT:<ProductionDate> + if ( rg.HasProductionDate() ) + out << FormatTag(Constants::SAM_RG_PRODUCTIONDATE_TAG, rg.ProductionDate); + + // FO:<FlowOrder> + if ( rg.HasFlowOrder() ) + out << FormatTag(Constants::SAM_RG_FLOWORDER_TAG, rg.FlowOrder); + + // KS:<KeySequence> + if ( rg.HasKeySequence() ) + out << FormatTag(Constants::SAM_RG_KEYSEQUENCE_TAG, rg.KeySequence); + + // LB:<Library> + if ( rg.HasLibrary() ) + out << FormatTag(Constants::SAM_RG_LIBRARY_TAG, rg.Library); + + // PG:<Program> + if ( rg.HasProgram() ) + out << FormatTag(Constants::SAM_RG_PROGRAM_TAG, rg.Program); + + // PI:<PredictedInsertSize> + if ( rg.HasPredictedInsertSize() ) + out << FormatTag(Constants::SAM_RG_PREDICTEDINSERTSIZE_TAG, rg.PredictedInsertSize); + + // PL:<SequencingTechnology> + if ( rg.HasSequencingTechnology() ) + out << FormatTag(Constants::SAM_RG_SEQTECHNOLOGY_TAG, rg.SequencingTechnology); + + // PU:<PlatformUnit> + if ( rg.HasPlatformUnit() ) + out << FormatTag(Constants::SAM_RG_PLATFORMUNIT_TAG, rg.PlatformUnit); + + // SM:<Sample> + if ( rg.HasSample() ) + out << FormatTag(Constants::SAM_RG_SAMPLE_TAG, rg.Sample); + + // newline + out << endl; + } +} + +void SamFormatPrinter::PrintPG(std::stringstream& out) const { + + // iterate over program record entries + SamProgramConstIterator pgIter = m_header.Programs.ConstBegin(); + SamProgramConstIterator pgEnd = m_header.Programs.ConstEnd(); + for ( ; pgIter != pgEnd; ++pgIter ) { + const SamProgram& pg = (*pgIter); + + // @PG ID:<ID> + out << Constants::SAM_PG_BEGIN_TOKEN + << FormatTag(Constants::SAM_PG_ID_TAG, pg.ID); + + // PN:<Name> + if ( pg.HasName() ) + out << FormatTag(Constants::SAM_PG_NAME_TAG, pg.Name); + + // CL:<CommandLine> + if ( pg.HasCommandLine() ) + out << FormatTag(Constants::SAM_PG_COMMANDLINE_TAG, pg.CommandLine); + + // PP:<PreviousProgramID> + if ( pg.HasPreviousProgramID() ) + out << FormatTag(Constants::SAM_PG_PREVIOUSPROGRAM_TAG, pg.PreviousProgramID); + + // VN:<Version> + if ( pg.HasVersion() ) + out << FormatTag(Constants::SAM_PG_VERSION_TAG, pg.Version); + + // newline + out << endl; + } +} + +void SamFormatPrinter::PrintCO(std::stringstream& out) const { + + // iterate over comments + vector<string>::const_iterator commentIter = m_header.Comments.begin(); + vector<string>::const_iterator commentEnd = m_header.Comments.end(); + for ( ; commentIter != commentEnd; ++commentIter ) { + + // @CO <Comment> + out << Constants::SAM_CO_BEGIN_TOKEN + << Constants::SAM_TAB + << (*commentIter) + << endl; + } +} |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/SamFormatPrinter_p.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/SamFormatPrinter_p.h Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,61 @@ +// *************************************************************************** +// SamFormatPrinter.h (c) 2010 Derek Barnett +// Marth Lab, Department of Biology, Boston College +// All rights reserved. +// --------------------------------------------------------------------------- +// Last modified: 23 December 2010 (DB) +// --------------------------------------------------------------------------- +// Provides functionality for printing formatted SAM header to string +// *************************************************************************** + +#ifndef SAM_FORMAT_PRINTER_H +#define SAM_FORMAT_PRINTER_H + +// ------------- +// W A R N I N G +// ------------- +// +// This file is not part of the BamTools API. It exists purely as an +// implementation detail. This header file may change from version to version +// without notice, or even be removed. +// +// We mean it. + +#include <sstream> +#include <string> + +namespace BamTools { + +class SamHeader; + +namespace Internal { + +class SamFormatPrinter { + + // ctor & dtor + public: + SamFormatPrinter(const BamTools::SamHeader& header); + ~SamFormatPrinter(void); + + // generates SAM-formatted string from header data + public: + const std::string ToString(void) const; + + // internal methods + private: + const std::string FormatTag(const std::string& tag, const std::string& value) const; + void PrintHD(std::stringstream& out) const; + void PrintSQ(std::stringstream& out) const; + void PrintRG(std::stringstream& out) const; + void PrintPG(std::stringstream& out) const; + void PrintCO(std::stringstream& out) const; + + // data members + private: + const SamHeader& m_header; +}; + +} // namespace Internal +} // namespace BamTools + +#endif // SAM_FORMAT_PRINTER_H |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/SamHeaderValidator_p.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/SamHeaderValidator_p.cpp Thu Nov 03 10:25:04 2011 -0400 |
b |
b'@@ -0,0 +1,511 @@\n+// ***************************************************************************\n+// SamHeaderValidator.cpp (c) 2010 Derek Barnett\n+// Marth Lab, Department of Biology, Boston College\n+// All rights reserved.\n+// ---------------------------------------------------------------------------\n+// Last modified: 18 April 2011 (DB)\n+// ---------------------------------------------------------------------------\n+// Provides functionality for validating SamHeader data\n+// ***************************************************************************\n+\n+#include <api/SamConstants.h>\n+#include <api/SamHeader.h>\n+#include <api/internal/SamHeaderValidator_p.h>\n+#include <api/internal/SamHeaderVersion_p.h>\n+using namespace BamTools;\n+using namespace BamTools::Internal;\n+\n+#include <cctype>\n+#include <iostream>\n+#include <set>\n+#include <sstream>\n+using namespace std;\n+\n+namespace BamTools {\n+namespace Internal {\n+\n+bool caseInsensitiveCompare(const string& lhs, const string& rhs) {\n+\n+ // can omit checking chars if lengths not equal\n+ const int lhsLength = lhs.length();\n+ const int rhsLength = rhs.length();\n+ if ( lhsLength != rhsLength )\n+ return false;\n+\n+ // do *basic* toupper checks on each string char\'s\n+ for ( int i = 0; i < lhsLength; ++i ) {\n+ if ( toupper( (int)lhs.at(i)) != toupper( (int)rhs.at(i)) )\n+ return false;\n+ }\n+\n+ // otherwise OK\n+ return true;\n+}\n+\n+} // namespace Internal\n+} // namespace BamTools\n+\n+// ------------------------------------------------------------------------\n+// Allow validation rules to vary, as needed, between SAM header versions\n+//\n+// use SAM_VERSION_X_Y to tag important changes\n+//\n+// Together, they will allow for comparisons like:\n+// if ( m_version < SAM_VERSION_2_0 ) {\n+// // use some older rule\n+// else\n+// // use rule introduced with version 2.0\n+\n+static const SamHeaderVersion SAM_VERSION_1_0 = SamHeaderVersion(1,0);\n+static const SamHeaderVersion SAM_VERSION_1_1 = SamHeaderVersion(1,1);\n+static const SamHeaderVersion SAM_VERSION_1_2 = SamHeaderVersion(1,2);\n+static const SamHeaderVersion SAM_VERSION_1_3 = SamHeaderVersion(1,3);\n+static const SamHeaderVersion SAM_VERSION_1_4 = SamHeaderVersion(1,4);\n+\n+// TODO: This functionality is currently unused.\n+// Make validation "version-aware."\n+//\n+// ------------------------------------------------------------------------\n+\n+const string SamHeaderValidator::ERROR_PREFIX = "ERROR: ";\n+const string SamHeaderValidator::WARN_PREFIX = "WARNING: ";\n+const string SamHeaderValidator::NEWLINE = "\\n";\n+\n+SamHeaderValidator::SamHeaderValidator(const SamHeader& header)\n+ : m_header(header)\n+{ }\n+\n+SamHeaderValidator::~SamHeaderValidator(void) { }\n+\n+bool SamHeaderValidator::Validate(bool verbose) {\n+\n+ // validate header components\n+ bool isValid = true;\n+ isValid &= ValidateMetadata();\n+ isValid &= ValidateSequenceDictionary();\n+ isValid &= ValidateReadGroupDictionary();\n+ isValid &= ValidateProgramChain();\n+\n+ // report errors if desired\n+ if ( verbose ) {\n+ PrintErrorMessages();\n+ PrintWarningMessages();\n+ }\n+\n+ // return validation status\n+ return isValid;\n+}\n+\n+bool SamHeaderValidator::ValidateMetadata(void) {\n+ bool isValid = true;\n+ isValid &= ValidateVersion();\n+ isValid &= ValidateSortOrder();\n+ isValid &= ValidateGroupOrder();\n+ return isValid;\n+}\n+\n+bool SamHeaderValidator::ValidateVersion(void) {\n+\n+ const string& version = m_header.Version;\n+\n+ // warn if version not present\n+ if ( version.empty() ) {\n+ AddWarning("Version (VN) missing. Not required, but strongly recommended");\n+ return true;\n+ }\n+\n+ // invalid if version does not contain a period\n+ const size_t periodFound = version.find(Constants::SAM_PERIOD);\n+ if ( periodFound == string::npos ) {\n+ AddError("Invalid version (VN) format: " + version);\n+ return false;\n+ }\n+\n+ // invalid if '..b'APILLARY) ||\n+ caseInsensitiveCompare(technology, Constants::SAM_RG_SEQTECHNOLOGY_HELICOS) ||\n+ caseInsensitiveCompare(technology, Constants::SAM_RG_SEQTECHNOLOGY_ILLUMINA) ||\n+ caseInsensitiveCompare(technology, Constants::SAM_RG_SEQTECHNOLOGY_IONTORRENT) ||\n+ caseInsensitiveCompare(technology, Constants::SAM_RG_SEQTECHNOLOGY_LS454) ||\n+ caseInsensitiveCompare(technology, Constants::SAM_RG_SEQTECHNOLOGY_PACBIO) ||\n+ caseInsensitiveCompare(technology, Constants::SAM_RG_SEQTECHNOLOGY_SOLID)\n+ )\n+ {\n+ return true;\n+ }\n+\n+ // otherwise\n+ AddError("Invalid read group sequencing platform (PL): " + technology);\n+ return false;\n+}\n+\n+bool SamHeaderValidator::ValidateProgramChain(void) {\n+ bool isValid = true;\n+ isValid &= ContainsUniqueProgramIds();\n+ isValid &= ValidatePreviousProgramIds();\n+ return isValid;\n+}\n+\n+bool SamHeaderValidator::ContainsUniqueProgramIds(void) {\n+\n+ bool isValid = true;\n+ set<string> programIds;\n+ set<string>::iterator pgIdIter;\n+\n+ // iterate over program records\n+ const SamProgramChain& programs = m_header.Programs;\n+ SamProgramConstIterator pgIter = programs.ConstBegin();\n+ SamProgramConstIterator pgEnd = programs.ConstEnd();\n+ for ( ; pgIter != pgEnd; ++pgIter ) {\n+ const SamProgram& pg = (*pgIter);\n+\n+ // lookup program ID\n+ const string& pgId = pg.ID;\n+ pgIdIter = programIds.find(pgId);\n+\n+ // error if found (duplicate entry)\n+ if ( pgIdIter != programIds.end() ) {\n+ AddError("Program ID (ID): " + pgId + " is not unique");\n+ isValid = false;\n+ }\n+\n+ // otherwise ok, store ID\n+ programIds.insert(pgId);\n+ }\n+\n+ // return validation state\n+ return isValid;\n+}\n+\n+bool SamHeaderValidator::ValidatePreviousProgramIds(void) {\n+\n+ bool isValid = true;\n+\n+ // iterate over program records\n+ const SamProgramChain& programs = m_header.Programs;\n+ SamProgramConstIterator pgIter = programs.ConstBegin();\n+ SamProgramConstIterator pgEnd = programs.ConstEnd();\n+ for ( ; pgIter != pgEnd; ++pgIter ) {\n+ const SamProgram& pg = (*pgIter);\n+\n+ // ignore record for validation if PreviousProgramID is empty\n+ const string& ppId = pg.PreviousProgramID;\n+ if ( ppId.empty() )\n+ continue;\n+\n+ // see if program "chain" contains an entry for ppId\n+ if ( !programs.Contains(ppId) ) {\n+ AddError("PreviousProgramID (PP): " + ppId + " is not a known ID");\n+ isValid = false;\n+ }\n+ }\n+\n+ // return validation state\n+ return isValid;\n+}\n+void SamHeaderValidator::AddError(const string& message) {\n+ m_errorMessages.push_back(ERROR_PREFIX + message + NEWLINE);\n+}\n+\n+void SamHeaderValidator::AddWarning(const string& message) {\n+ m_warningMessages.push_back(WARN_PREFIX + message + NEWLINE);\n+}\n+\n+void SamHeaderValidator::PrintErrorMessages(void) {\n+\n+ // skip if no error messages\n+ if ( m_errorMessages.empty() ) return;\n+\n+ // print error header line\n+ cerr << "* SAM header has " << m_errorMessages.size() << " errors:" << endl;\n+\n+ // print each error message\n+ vector<string>::const_iterator errorIter = m_errorMessages.begin();\n+ vector<string>::const_iterator errorEnd = m_errorMessages.end();\n+ for ( ; errorIter != errorEnd; ++errorIter )\n+ cerr << (*errorIter);\n+}\n+\n+void SamHeaderValidator::PrintWarningMessages(void) {\n+\n+ // skip if no warning messages\n+ if ( m_warningMessages.empty() ) return;\n+\n+ // print warning header line\n+ cerr << "* SAM header has " << m_warningMessages.size() << " warnings:" << endl;\n+\n+ // print each warning message\n+ vector<string>::const_iterator warnIter = m_warningMessages.begin();\n+ vector<string>::const_iterator warnEnd = m_warningMessages.end();\n+ for ( ; warnIter != warnEnd; ++warnIter )\n+ cerr << (*warnIter);\n+}\n' |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/SamHeaderValidator_p.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/SamHeaderValidator_p.h Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,102 @@ +// *************************************************************************** +// SamHeaderValidator.h (c) 2010 Derek Barnett +// Marth Lab, Department of Biology, Boston College +// All rights reserved. +// --------------------------------------------------------------------------- +// Last modified: 13 January 2011 (DB) +// --------------------------------------------------------------------------- +// Provides functionality for validating SamHeader data +// *************************************************************************** + +#ifndef SAM_HEADER_VALIDATOR_P_H +#define SAM_HEADER_VALIDATOR_P_H + +// ------------- +// W A R N I N G +// ------------- +// +// This file is not part of the BamTools API. It exists purely as an +// implementation detail. This header file may change from version to version +// without notice, or even be removed. +// +// We mean it. + +#include <string> +#include <vector> + +namespace BamTools { + +class SamHeader; +class SamReadGroup; +class SamSequence; + +namespace Internal { + +class SamHeaderValidator { + + // ctor & dtor + public: + SamHeaderValidator(const SamHeader& header); + ~SamHeaderValidator(void); + + // SamHeaderValidator interface + public: + // validates SamHeader data, returns true/false accordingly + // prints error & warning messages to stderr when @verbose is true + bool Validate(bool verbose = false); + + // internal methods + private: + + // validate header metadata + bool ValidateMetadata(void); + bool ValidateVersion(void); + bool ContainsOnlyDigits(const std::string& s); + bool ValidateSortOrder(void); + bool ValidateGroupOrder(void); + + // validate sequence dictionary + bool ValidateSequenceDictionary(void); + bool ContainsUniqueSequenceNames(void); + bool CheckNameFormat(const std::string& name); + bool ValidateSequence(const SamSequence& seq); + bool CheckLengthInRange(const std::string& length); + + // validate read group dictionary + bool ValidateReadGroupDictionary(void); + bool ContainsUniqueIDsAndPlatformUnits(void); + bool ValidateReadGroup(const SamReadGroup& rg); + bool CheckReadGroupID(const std::string& id); + bool CheckSequencingTechnology(const std::string& technology); + + // validate program data + bool ValidateProgramChain(void); + bool ContainsUniqueProgramIds(void); + bool ValidatePreviousProgramIds(void); + + // error reporting + void AddError(const std::string& message); + void AddWarning(const std::string& message); + void PrintErrorMessages(void); + void PrintWarningMessages(void); + + // data members + private: + + // SamHeader being validated + const SamHeader& m_header; + + // error reporting helpers + static const std::string ERROR_PREFIX; + static const std::string WARN_PREFIX; + static const std::string NEWLINE; + + // error reporting messages + std::vector<std::string> m_errorMessages; + std::vector<std::string> m_warningMessages; +}; + +} // namespace Internal +} // namespace BamTools + +#endif // SAM_HEADER_VALIDATOR_P_H |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/SamHeaderVersion_p.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/SamHeaderVersion_p.h Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,135 @@ +// *************************************************************************** +// SamHeaderVersion.h (c) 2010 Derek Barnett +// Marth Lab, Department of Biology, Boston College +// All rights reserved. +// --------------------------------------------------------------------------- +// Last modified: 24 February 2011 (DB) +// --------------------------------------------------------------------------- +// Provides functionality for comparing SAM header versions +// ************************************************************************* + +#ifndef SAM_HEADERVERSION_P_H +#define SAM_HEADERVERSION_P_H + +// ------------- +// W A R N I N G +// ------------- +// +// This file is not part of the BamTools API. It exists purely as an +// implementation detail. This header file may change from version to version +// without notice, or even be removed. +// +// We mean it. + +#include <api/SamConstants.h> +#include <sstream> +#include <string> + +namespace BamTools { +namespace Internal { + +class SamHeaderVersion { + + // ctors & dtor + public: + SamHeaderVersion(void) + : m_majorVersion(0) + , m_minorVersion(0) + { } + + explicit SamHeaderVersion(const std::string& version) + : m_majorVersion(0) + , m_minorVersion(0) + { + SetVersion(version); + } + + SamHeaderVersion(const unsigned int& major, const unsigned int& minor) + : m_majorVersion(major) + , m_minorVersion(minor) + { } + + ~SamHeaderVersion(void) { + m_majorVersion = 0; + m_minorVersion = 0; + } + + // acess data + public: + unsigned int MajorVersion(void) const { return m_majorVersion; } + unsigned int MinorVersion(void) const { return m_minorVersion; } + + void SetVersion(const std::string& version); + std::string ToString(void) const; + + // data members + private: + unsigned int m_majorVersion; + unsigned int m_minorVersion; +}; + +inline +void SamHeaderVersion::SetVersion(const std::string& version) { + + // do nothing if version is empty + if ( !version.empty() ) { + + std::stringstream versionStream(""); + + // do nothing if period not found + const size_t periodFound = version.find(Constants::SAM_PERIOD); + if ( periodFound != std::string::npos ) { + + // store major version if non-empty and contains only digits + const std::string& majorVersion = version.substr(0, periodFound); + versionStream.str(majorVersion); + if ( !majorVersion.empty() ) { + const size_t nonDigitFound = majorVersion.find_first_not_of(Constants::SAM_DIGITS); + if ( nonDigitFound == std::string::npos ) + versionStream >> m_majorVersion; + } + + // store minor version if non-empty and contains only digits + const std::string& minorVersion = version.substr(periodFound + 1); + versionStream.str(minorVersion); + if ( !minorVersion.empty() ) { + const size_t nonDigitFound = minorVersion.find_first_not_of(Constants::SAM_DIGITS); + if ( nonDigitFound == std::string::npos ) + versionStream >> m_minorVersion; + } + } + } +} + +// ----------------------------------------------------- +// printing + +inline std::string SamHeaderVersion::ToString(void) const { + std::stringstream version; + version << m_majorVersion << Constants::SAM_PERIOD << m_minorVersion; + return version.str(); +} + +// ----------------------------------------------------- +// comparison operators + +inline bool operator==(const SamHeaderVersion& lhs, const SamHeaderVersion& rhs) { + return (lhs.MajorVersion() == rhs.MajorVersion()) && + (lhs.MinorVersion() == rhs.MinorVersion()); +} + +inline bool operator<(const SamHeaderVersion& lhs, const SamHeaderVersion& rhs) { + if ( lhs.MajorVersion() == rhs.MajorVersion() ) + return lhs.MinorVersion() < rhs.MinorVersion(); + else + return lhs.MajorVersion() < rhs.MajorVersion(); +} + +inline bool operator> (const SamHeaderVersion& lhs, const SamHeaderVersion& rhs) { return rhs < lhs; } +inline bool operator<=(const SamHeaderVersion& lhs, const SamHeaderVersion& rhs) { return !(lhs>rhs); } +inline bool operator>=(const SamHeaderVersion& lhs, const SamHeaderVersion& rhs) { return !(lhs<rhs); } + +} // namespace Internal +} // namespace BamTools + +#endif // SAM_HEADERVERSION_P_H |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/BamTools/src/shared/bamtools_global.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/BamTools/src/shared/bamtools_global.h Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,79 @@ +// *************************************************************************** +// bamtools_global.h (c) 2010 Derek Barnett +// Marth Lab, Department of Biology, Boston College +// All rights reserved. +// --------------------------------------------------------------------------- +// Last modified: 3 March 2011 (DB) +// --------------------------------------------------------------------------- +// Provides the basic definitions for exporting & importing library symbols. +// Also provides some platform-specific rules for definitions. +// *************************************************************************** + +#ifndef BAMTOOLS_GLOBAL_H +#define BAMTOOLS_GLOBAL_H + +/*! \brief Library export macro + \internal +*/ +#ifndef BAMTOOLS_LIBRARY_EXPORT +# if defined(WIN32) +# define BAMTOOLS_LIBRARY_EXPORT __declspec(dllexport) +# else +# define BAMTOOLS_LIBRARY_EXPORT __attribute__((visibility("default"))) +# endif +#endif // BAMTOOLS_LIBRARY_EXPORT + +/*! \brief Library import macro + \internal +*/ +#ifndef BAMTOOLS_LIBRARY_IMPORT +# if defined(WIN32) +# define BAMTOOLS_LIBRARY_IMPORT __declspec(dllimport) +# else +# define BAMTOOLS_LIBRARY_IMPORT +# endif +#endif // BAMTOOLS_LIBRARY_IMPORT + +/*! \brief Platform-specific type definitions + \internal +*/ +#ifndef BAMTOOLS_LFS +#define BAMTOOLS_LFS + #ifdef WIN32 + #define ftell64(a) _ftelli64(a) + #define fseek64(a,b,c) _fseeki64(a,b,c) + #else + #define ftell64(a) ftello(a) + #define fseek64(a,b,c) fseeko(a,b,c) + #endif +#endif // BAMTOOLS_LFS + +/*! \def ftell64(a) + \brief Platform-independent tell() operation. + \internal +*/ +/*! \def fseek64(a,b,c) + \brief Platform-independent seek() operation. + \internal +*/ + +/*! \brief Platform-specific type definitions + \internal +*/ +#ifndef BAMTOOLS_TYPES +#define BAMTOOLS_TYPES + #ifdef _MSC_VER + typedef char int8_t; + typedef unsigned char uint8_t; + typedef short int16_t; + typedef unsigned short uint16_t; + typedef int int32_t; + typedef unsigned int uint32_t; + typedef long long int64_t; + typedef unsigned long long uint64_t; + #else + #include <stdint.h> + #endif +#endif // BAMTOOLS_TYPES + +#endif // BAMTOOLS_GLOBAL_H |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/Fasta/Fasta.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/Fasta/Fasta.cpp Thu Nov 03 10:25:04 2011 -0400 |
[ |
b'@@ -0,0 +1,325 @@\n+// ***************************************************************************\n+// FastaIndex.cpp (c) 2010 Erik Garrison <erik.garrison@bc.edu>\n+// Marth Lab, Department of Biology, Boston College\n+// All rights reserved.\n+// ---------------------------------------------------------------------------\n+// Last modified: 9 February 2010 (EG)\n+// ---------------------------------------------------------------------------\n+\n+#include "Fasta.h"\n+\n+FastaIndexEntry::FastaIndexEntry(string name, int length, long long offset, int line_blen, int line_len)\n+ : name(name)\n+ , length(length)\n+ , offset(offset)\n+ , line_blen(line_blen)\n+ , line_len(line_len)\n+{}\n+\n+FastaIndexEntry::FastaIndexEntry(void) // empty constructor\n+{ clear(); }\n+\n+FastaIndexEntry::~FastaIndexEntry(void)\n+{}\n+\n+void FastaIndexEntry::clear(void)\n+{\n+ name = "";\n+ length = NULL;\n+ offset = -1; // no real offset will ever be below 0, so this allows us to\n+ // check if we have already recorded a real offset\n+ line_blen = NULL;\n+ line_len = NULL;\n+}\n+\n+ostream& operator<<(ostream& output, const FastaIndexEntry& e) {\n+ // just write the first component of the name, for compliance with other tools\n+ output << split(e.name, \' \').at(0) << "\\t" << e.length << "\\t" << e.offset << "\\t" <<\n+ e.line_blen << "\\t" << e.line_len;\n+ return output; // for multiple << operators.\n+}\n+\n+FastaIndex::FastaIndex(void) \n+{}\n+\n+void FastaIndex::readIndexFile(string fname) {\n+ string line;\n+ long long linenum = 0;\n+ indexFile.open(fname.c_str(), ifstream::in);\n+ if (indexFile.is_open()) {\n+ while (getline (indexFile, line)) {\n+ ++linenum;\n+ // the fai format defined in samtools is tab-delimited, every line being:\n+ // fai->name[i], (int)x.len, (long long)x.offset, (int)x.line_blen, (int)x.line_len\n+ vector<string> fields = split(line, \'\\t\');\n+ if (fields.size() == 5) { // if we don\'t get enough fields then there is a problem with the file\n+ // note that fields[0] is the sequence name\n+ char* end;\n+ string name = split(fields[0], " \\t").at(0); // key by first token of name\n+ sequenceNames.push_back(name);\n+ this->insert(make_pair(name, FastaIndexEntry(fields[0], atoi(fields[1].c_str()),\n+ strtoll(fields[2].c_str(), &end, 10),\n+ atoi(fields[3].c_str()),\n+ atoi(fields[4].c_str()))));\n+ } else {\n+ cerr << "Warning: malformed fasta index file " << fname << \n+ "does not have enough fields @ line " << linenum << endl;\n+ cerr << line << endl;\n+ exit(1);\n+ }\n+ }\n+ } else {\n+ cerr << "could not open index file " << fname << endl;\n+ exit(1);\n+ }\n+}\n+\n+// for consistency this should be a class method\n+bool fastaIndexEntryCompare ( FastaIndexEntry a, FastaIndexEntry b) { return (a.offset<b.offset); }\n+\n+ostream& operator<<(ostream& output, FastaIndex& fastaIndex) {\n+ vector<FastaIndexEntry> sortedIndex;\n+ for(vector<string>::const_iterator it = fastaIndex.sequenceNames.begin(); it != fastaIndex.sequenceNames.end(); ++it)\n+ {\n+ sortedIndex.push_back(fastaIndex[*it]);\n+ }\n+ sort(sortedIndex.begin(), sortedIndex.end(), fastaIndexEntryCompare);\n+ for( vector<FastaIndexEntry>::iterator fit = sortedIndex.begin(); fit != sortedIndex.end(); ++fit) {\n+ output << *fit << endl;\n+ }\n+ return output;\n+}\n+\n+void FastaIndex::indexReference(string refname) {\n+ // overview:\n+ // for line in the reference fasta file\n+ // track byte offset from the start of the file\n+ // if line is a fasta header, take the name and dump the last sequnece to the index\n+ // if line is a sequen'..b' exit(1);\n+ } else {\n+ return e->second;\n+ }\n+}\n+\n+string FastaIndex::indexFileExtension() { return ".fai"; }\n+\n+void FastaReference::open(string reffilename, bool usemmap) {\n+ filename = reffilename;\n+ if (!(file = fopen(filename.c_str(), "r"))) {\n+ cerr << "could not open " << filename << endl;\n+ exit(1);\n+ }\n+ index = new FastaIndex();\n+ struct stat stFileInfo; \n+ string indexFileName = filename + index->indexFileExtension(); \n+ // if we can find an index file, use it\n+ if(stat(indexFileName.c_str(), &stFileInfo) == 0) { \n+ index->readIndexFile(indexFileName);\n+ } else { // otherwise, read the reference and generate the index file in the cwd\n+ cerr << "index file " << indexFileName << " not found, generating..." << endl;\n+ index->indexReference(filename);\n+ index->writeIndexFile(indexFileName);\n+ }\n+ if (usemmap) {\n+ usingmmap = true;\n+ int fd = fileno(file);\n+ struct stat sb;\n+ if (fstat(fd, &sb) == -1)\n+ cerr << "could not stat file" << filename << endl;\n+ filesize = sb.st_size;\n+ // map the whole file\n+ filemm = mmap(NULL, filesize, PROT_READ, MAP_SHARED, fd, 0);\n+ }\n+}\n+\n+FastaReference::~FastaReference(void) {\n+ fclose(file);\n+ if (usingmmap) {\n+ munmap(filemm, filesize);\n+ }\n+ delete index;\n+}\n+\n+string FastaReference::getSequence(string seqname) {\n+ FastaIndexEntry entry = index->entry(seqname);\n+ int newlines_in_sequence = entry.length / entry.line_blen;\n+ int seqlen = newlines_in_sequence + entry.length;\n+ char* seq = (char*) calloc (seqlen + 1, sizeof(char));\n+ if (usingmmap) {\n+ memcpy(seq, (char*) filemm + entry.offset, seqlen);\n+ } else {\n+ fseek64(file, entry.offset, SEEK_SET);\n+ fread(seq, sizeof(char), seqlen, file);\n+ }\n+ seq[seqlen] = \'\\0\';\n+ char* pbegin = seq;\n+ char* pend = seq + (seqlen/sizeof(char));\n+ pend = remove(pbegin, pend, \'\\n\');\n+ pend = remove(pbegin, pend, \'\\0\');\n+ string s = seq;\n+ free(seq);\n+ s.resize((pend - pbegin)/sizeof(char));\n+ return s;\n+}\n+\n+// TODO cleanup; odd function. use a map\n+string FastaReference::sequenceNameStartingWith(string seqnameStart) {\n+ try {\n+ return (*index)[seqnameStart].name;\n+ } catch (exception& e) {\n+ cerr << e.what() << ": unable to find index entry for " << seqnameStart << endl;\n+ exit(1);\n+ }\n+}\n+\n+string FastaReference::getSubSequence(string seqname, int start, int length) {\n+ FastaIndexEntry entry = index->entry(seqname);\n+ if (start < 0 || length < 1) {\n+ cerr << "Error: cannot construct subsequence with negative offset or length < 1" << endl;\n+ exit(1);\n+ }\n+ // we have to handle newlines\n+ // approach: count newlines before start\n+ // count newlines by end of read\n+ // subtracting newlines before start find count of embedded newlines\n+ int newlines_before = start > 0 ? (start - 1) / entry.line_blen : 0;\n+ int newlines_by_end = (start + length - 1) / entry.line_blen;\n+ int newlines_inside = newlines_by_end - newlines_before;\n+ int seqlen = length + newlines_inside;\n+ char* seq = (char*) calloc (seqlen + 1, sizeof(char));\n+ if (usingmmap) {\n+ memcpy(seq, (char*) filemm + entry.offset + newlines_before + start, seqlen);\n+ } else {\n+ fseek64(file, (off_t) (entry.offset + newlines_before + start), SEEK_SET);\n+ fread(seq, sizeof(char), (off_t) seqlen, file);\n+ }\n+ seq[seqlen] = \'\\0\';\n+ char* pbegin = seq;\n+ char* pend = seq + (seqlen/sizeof(char));\n+ pend = remove(pbegin, pend, \'\\n\');\n+ pend = remove(pbegin, pend, \'\\0\');\n+ string s = seq;\n+ free(seq);\n+ s.resize((pend - pbegin)/sizeof(char));\n+ return s;\n+}\n+\n+long unsigned int FastaReference::sequenceLength(string seqname) {\n+ FastaIndexEntry entry = index->entry(seqname);\n+ return entry.length;\n+}\n+\n' |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/Fasta/Fasta.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/Fasta/Fasta.h Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,78 @@ +// *************************************************************************** +// FastaIndex.h (c) 2010 Erik Garrison <erik.garrison@bc.edu> +// Marth Lab, Department of Biology, Boston College +// All rights reserved. +// --------------------------------------------------------------------------- +// Last modified: 5 February 2010 (EG) +// --------------------------------------------------------------------------- + +#ifndef _FASTA_H +#define _FASTA_H + +#include <map> +#include <iostream> +#include <fstream> +#include <vector> +#include <stdint.h> +#include <stdio.h> +#include <algorithm> +#include "LargeFileSupport.h" +#include <sys/stat.h> +#include <sys/mman.h> +#include "split.h" +#include <stdlib.h> +#include <ctype.h> +#include <unistd.h> + +using namespace std; + +class FastaIndexEntry { + friend ostream& operator<<(ostream& output, const FastaIndexEntry& e); + public: + FastaIndexEntry(string name, int length, long long offset, int line_blen, int line_len); + FastaIndexEntry(void); + ~FastaIndexEntry(void); + string name; // sequence name + int length; // length of sequence + long long offset; // bytes offset of sequence from start of file + int line_blen; // line length in bytes, sequence characters + int line_len; // line length including newline + void clear(void); +}; + +class FastaIndex : public map<string, FastaIndexEntry> { + friend ostream& operator<<(ostream& output, FastaIndex& i); + public: + FastaIndex(void); + ~FastaIndex(void); + vector<string> sequenceNames; + void indexReference(string refName); + void readIndexFile(string fname); + void writeIndexFile(string fname); + ifstream indexFile; + FastaIndexEntry entry(string key); + void flushEntryToIndex(FastaIndexEntry& entry); + string indexFileExtension(void); +}; + +class FastaReference { + public: + void open(string reffilename, bool usemmap = false); + bool usingmmap; + string filename; + FastaReference(void) : usingmmap(false) { } + ~FastaReference(void); + FILE* file; + void* filemm; + size_t filesize; + FastaIndex* index; + vector<FastaIndexEntry> findSequencesStartingWith(string seqnameStart); + string getSequence(string seqname); + // potentially useful for performance, investigate + // void getSequence(string seqname, string& sequence); + string getSubSequence(string seqname, int start, int length); + string sequenceNameStartingWith(string seqnameStart); + long unsigned int sequenceLength(string seqname); +}; + +#endif |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/Fasta/LargeFileSupport.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/Fasta/LargeFileSupport.h Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,13 @@ +#pragma once + +#define _FILE_OFFSET_BITS 64 + +#ifdef WIN32 +#define ftell64(a) _ftelli64(a) +#define fseek64(a,b,c) _fseeki64(a,b,c) +typedef __int64_t off_type; +#else +#define ftell64(a) ftello(a) +#define fseek64(a,b,c) fseeko(a,b,c) +typedef off_t off_type; +#endif |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/Fasta/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/Fasta/Makefile Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,26 @@ +OBJ_DIR = ../../../obj/ +BIN_DIR = ../../../bin/ +UTILITIES_DIR = ../../utils/ +# ------------------- +# define our includes +# ------------------- +INCLUDES = + +# ---------------------------------- +# define our source and object files +# ---------------------------------- +SOURCES= Fasta.cpp split.cpp +OBJECTS= $(SOURCES:.cpp=.o) +BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) + +all: $(BUILT_OBJECTS) + +.PHONY: all + +$(BUILT_OBJECTS): $(SOURCES) + @echo " * compiling" $(*F).cpp + @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) + +clean: + @echo "Cleaning up." + @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/Fasta/split.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/Fasta/split.cpp Thu Nov 03 10:25:04 2011 -0400 |
[ |
@@ -0,0 +1,33 @@ +#include "split.h" + +std::vector<std::string> &split(const std::string &s, char delim, std::vector<std::string> &elems) { + std::stringstream ss(s); + std::string item; + while(std::getline(ss, item, delim)) { + elems.push_back(item); + } + return elems; +} + +std::vector<std::string> split(const std::string &s, char delim) { + std::vector<std::string> elems; + return split(s, delim, elems); +} + +std::vector<std::string> &split(const std::string &s, const std::string& delims, std::vector<std::string> &elems) { + char* tok; + char cchars [s.size()+1]; + char* cstr = &cchars[0]; + strcpy(cstr, s.c_str()); + tok = strtok(cstr, delims.c_str()); + while (tok != NULL) { + elems.push_back(tok); + tok = strtok(NULL, delims.c_str()); + } + return elems; +} + +std::vector<std::string> split(const std::string &s, const std::string& delims) { + std::vector<std::string> elems; + return split(s, delims, elems); +} |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/Fasta/split.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/Fasta/split.h Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,20 @@ +#ifndef __SPLIT_H +#define __SPLIT_H + +// functions to split a string by a specific delimiter +#include <string> +#include <vector> +#include <sstream> +#include <string.h> + +// thanks to Evan Teran, http://stackoverflow.com/questions/236129/how-to-split-a-string/236803#236803 + +// split a string on a single delimiter character (delim) +std::vector<std::string>& split(const std::string &s, char delim, std::vector<std::string> &elems); +std::vector<std::string> split(const std::string &s, char delim); + +// split a string on any character found in the string of delimiters (delims) +std::vector<std::string>& split(const std::string &s, const std::string& delims, std::vector<std::string> &elems); +std::vector<std::string> split(const std::string &s, const std::string& delims); + +#endif |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/bedFile/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/bedFile/Makefile Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,29 @@ +OBJ_DIR = ../../../obj/ +BIN_DIR = ../../../bin/ +UTILITIES_DIR = ../../utils/ +# ------------------- +# define our includes +# ------------------- +INCLUDES = -I$(UTILITIES_DIR)/lineFileUtilities/ -I$(UTILITIES_DIR)/gzstream/ -I$(UTILITIES_DIR)/fileType/ -I$(UTILITIES_DIR)/stringUtilities/ + +# ---------------------------------- +# define our source and object files +# ---------------------------------- +SOURCES= bedFile.cpp +OBJECTS= $(SOURCES:.cpp=.o) +_EXT_OBJECTS=lineFileUtilities.o gzstream.o fileType.o +EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) +BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) + +$(BUILT_OBJECTS): $(SOURCES) + @echo " * compiling" $(*F).cpp + @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) + +$(EXT_OBJECTS): + @$(MAKE) --no-print-directory -C -W $(INCLUDES) + +clean: + @echo "Cleaning up." + @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* + +.PHONY: clean \ No newline at end of file |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/bedFile/bedFile.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/bedFile/bedFile.cpp Thu Nov 03 10:25:04 2011 -0400 |
[ |
b'@@ -0,0 +1,720 @@\n+/*****************************************************************************\n+ bedFile.cpp\n+\n+ (c) 2009 - Aaron Quinlan\n+ Hall Laboratory\n+ Department of Biochemistry and Molecular Genetics\n+ University of Virginia\n+ aaronquinlan@gmail.com\n+\n+ Licensed under the GNU General Public License 2.0 license.\n+******************************************************************************/\n+#include "bedFile.h"\n+\n+\n+/************************************************\n+Helper functions\n+*************************************************/\n+void splitBedIntoBlocks(const BED &bed, int lineNum, bedVector &bedBlocks) {\n+\n+ if (bed.otherFields.size() < 6) {\n+ cerr << "Input error: Cannot split into blocks. Found interval with fewer than 12 columns on line " << lineNum << "." << endl;\n+ exit(1);\n+ }\n+\n+ int blockCount = atoi(bed.otherFields[3].c_str());\n+ if ( blockCount <= 0 ) {\n+ cerr << "Input error: found interval having <= 0 blocks on line " << lineNum << "." << endl;\n+ exit(1);\n+ }\n+ else if ( blockCount == 1 ) {\n+ //take a short-cut for single blocks\n+ bedBlocks.push_back(bed);\n+ }\n+ else {\n+ // get the comma-delimited strings for the BED12 block starts and block ends.\n+ string blockSizes(bed.otherFields[4]);\n+ string blockStarts(bed.otherFields[5]);\n+\n+ vector<int> sizes;\n+ vector<int> starts;\n+ Tokenize(blockSizes, sizes, ",");\n+ Tokenize(blockStarts, starts, ",");\n+\n+ if ( sizes.size() != (size_t) blockCount || starts.size() != (size_t) blockCount ) {\n+ cerr << "Input error: found interval with block-counts not matching starts/sizes on line " << lineNum << "." << endl;\n+ exit(1);\n+ }\n+\n+ // add each BED block to the bedBlocks vector\n+ for (UINT i = 0; i < (UINT) blockCount; ++i) {\n+ CHRPOS blockStart = bed.start + starts[i];\n+ CHRPOS blockEnd = bed.start + starts[i] + sizes[i];\n+ BED currBedBlock(bed.chrom, blockStart, blockEnd, bed.name, bed.score, bed.strand, bed.otherFields);\n+ bedBlocks.push_back(currBedBlock);\n+ }\n+ }\n+}\n+\n+\n+/***********************************************\n+Sorting comparison functions\n+************************************************/\n+bool sortByChrom(BED const &a, BED const &b) {\n+ if (a.chrom < b.chrom) return true;\n+ else return false;\n+};\n+\n+bool sortByStart(const BED &a, const BED &b) {\n+ if (a.start < b.start) return true;\n+ else return false;\n+};\n+\n+bool sortBySizeAsc(const BED &a, const BED &b) {\n+\n+ CHRPOS aLen = a.end - a.start;\n+ CHRPOS bLen = b.end - b.start;\n+\n+ if (aLen < bLen) return true;\n+ else return false;\n+};\n+\n+bool sortBySizeDesc(const BED &a, const BED &b) {\n+\n+ CHRPOS aLen = a.end - a.start;\n+ CHRPOS bLen = b.end - b.start;\n+\n+ if (aLen > bLen) return true;\n+ else return false;\n+};\n+\n+bool sortByScoreAsc(const BED &a, const BED &b) {\n+ if (a.score < b.score) return true;\n+ else return false;\n+};\n+\n+bool sortByScoreDesc(const BED &a, const BED &b) {\n+ if (a.score > b.score) return true;\n+ else return false;\n+};\n+\n+bool byChromThenStart(BED const &a, BED const &b) {\n+\n+ if (a.chrom < b.chrom) return true;\n+ else if (a.chrom > b.chrom) return false;\n+\n+ if (a.start < b.start) return true;\n+ else if (a.start >= b.start) return false;\n+\n+ return false;\n+};\n+\n+\n+/*******************************************\n+Class methods\n+*******************************************/\n+\n+// Constructor\n+BedFile::BedFile(string &bedFile)\n+: bedFile(bedFile),\n+ _isGff(false),\n+ _isVcf(false),\n+ _typeIsKnown(false),\n+ _merged_start(-1),\n+ _merged_end(-1),\n+ _merged_chrom(""),\n+ _prev_start(-1),\n+ _prev_chrom("")\n+{}\n+\n+// Destructor\n+BedFile::~BedFile(void) {\n+}\n+\n+\n+void BedFile::Open(void) {\n+ \n+ _bedFields.reserve(12);\n+ \n+ if (bedFile == "stdin" || bedFile == "-") {\n+ _bedStream'..b' else {\n+ // correct for the fact that we artificially expanded the zeroLength feature\n+ bedItr->depthMapList[index][a.start+2].starts++;\n+ bedItr->depthMapList[index][a.end-1].ends++; \n+ }\n+\n+ if (a.start < bedItr->minOverlapStarts[index]) {\n+ bedItr->minOverlapStarts[index] = a.start;\n+ }\n+ }\n+ }\n+ }\n+ startBin >>= _binNextShift;\n+ endBin >>= _binNextShift;\n+ }\n+}\n+\n+void BedFile::setZeroBased(bool zeroBased) { this->isZeroBased = zeroBased; }\n+\n+void BedFile::setGff (bool gff) { this->_isGff = gff; }\n+\n+\n+void BedFile::setVcf (bool vcf) { this->_isVcf = vcf; }\n+\n+\n+void BedFile::setFileType (FileType type) {\n+ _fileType = type;\n+ _typeIsKnown = true;\n+}\n+\n+\n+void BedFile::setBedType (int colNums) {\n+ bedType = colNums;\n+}\n+\n+\n+void BedFile::loadBedFileIntoMap() {\n+\n+ BED bedEntry, nullBed;\n+ int lineNum = 0;\n+ BedLineStatus bedStatus;\n+\n+ Open();\n+ while ((bedStatus = GetNextBed(bedEntry, lineNum)) != BED_INVALID) {\n+ if (bedStatus == BED_VALID) {\n+ BIN bin = getBin(bedEntry.start, bedEntry.end);\n+ bedMap[bedEntry.chrom][bin].push_back(bedEntry);\n+ bedEntry = nullBed;\n+ }\n+ }\n+ Close();\n+}\n+\n+\n+void BedFile::loadBedCovFileIntoMap() {\n+\n+ BED bedEntry, nullBed;\n+ int lineNum = 0;\n+ BedLineStatus bedStatus;\n+\n+ Open();\n+ while ((bedStatus = GetNextBed(bedEntry, lineNum)) != BED_INVALID) {\n+ if (bedStatus == BED_VALID) {\n+ BIN bin = getBin(bedEntry.start, bedEntry.end);\n+\n+ BEDCOV bedCov;\n+ bedCov.chrom = bedEntry.chrom;\n+ bedCov.start = bedEntry.start;\n+ bedCov.end = bedEntry.end;\n+ bedCov.name = bedEntry.name;\n+ bedCov.score = bedEntry.score;\n+ bedCov.strand = bedEntry.strand;\n+ bedCov.otherFields = bedEntry.otherFields;\n+ bedCov.zeroLength = bedEntry.zeroLength;\n+ bedCov.count = 0;\n+ bedCov.minOverlapStart = INT_MAX;\n+\n+ bedCovMap[bedEntry.chrom][bin].push_back(bedCov);\n+ bedEntry = nullBed;\n+ }\n+ }\n+ Close();\n+}\n+\n+void BedFile::loadBedCovListFileIntoMap() {\n+\n+ BED bedEntry, nullBed;\n+ int lineNum = 0;\n+ BedLineStatus bedStatus;\n+\n+ Open();\n+ while ((bedStatus = GetNextBed(bedEntry, lineNum)) != BED_INVALID) {\n+ if (bedStatus == BED_VALID) {\n+ BIN bin = getBin(bedEntry.start, bedEntry.end);\n+\n+ BEDCOVLIST bedCovList;\n+ bedCovList.chrom = bedEntry.chrom;\n+ bedCovList.start = bedEntry.start;\n+ bedCovList.end = bedEntry.end;\n+ bedCovList.name = bedEntry.name;\n+ bedCovList.score = bedEntry.score;\n+ bedCovList.strand = bedEntry.strand;\n+ bedCovList.otherFields = bedEntry.otherFields;\n+ bedCovList.zeroLength = bedEntry.zeroLength;\n+\n+ bedCovListMap[bedEntry.chrom][bin].push_back(bedCovList);\n+ bedEntry = nullBed;\n+ }\n+ }\n+ Close();\n+}\n+\n+\n+void BedFile::loadBedFileIntoMapNoBin() {\n+\n+ BED bedEntry, nullBed;\n+ int lineNum = 0;\n+ BedLineStatus bedStatus;\n+\n+ Open();\n+ while ((bedStatus = this->GetNextBed(bedEntry, lineNum)) != BED_INVALID) {\n+ if (bedStatus == BED_VALID) {\n+ bedMapNoBin[bedEntry.chrom].push_back(bedEntry);\n+ bedEntry = nullBed;\n+ }\n+ }\n+ Close();\n+\n+ // sort the BED entries for each chromosome\n+ // in ascending order of start position\n+ for (masterBedMapNoBin::iterator m = this->bedMapNoBin.begin(); m != this->bedMapNoBin.end(); ++m) {\n+ sort(m->second.begin(), m->second.end(), sortByStart);\n+ }\n+}\n' |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/bedFile/bedFile.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/bedFile/bedFile.h Thu Nov 03 10:25:04 2011 -0400 |
[ |
b'@@ -0,0 +1,1167 @@\n+/*****************************************************************************\n+ bedFile.h\n+\n+ (c) 2009 - Aaron Quinlan\n+ Hall Laboratory\n+ Department of Biochemistry and Molecular Genetics\n+ University of Virginia\n+ aaronquinlan@gmail.com\n+\n+ Licensed under the GNU General Public License 2.0 license.\n+******************************************************************************/\n+#ifndef BEDFILE_H\n+#define BEDFILE_H\n+\n+// "local" includes\n+#include "gzstream.h"\n+#include "lineFileUtilities.h"\n+#include "fileType.h"\n+\n+// standard includes\n+#include <vector>\n+#include <map>\n+#include <set>\n+#include <string>\n+#include <iostream>\n+#include <fstream>\n+#include <sstream>\n+#include <cstring>\n+#include <algorithm>\n+#include <limits.h>\n+#include <stdint.h>\n+#include <cstdio>\n+//#include <tr1/unordered_map> // Experimental.\n+using namespace std;\n+\n+\n+//*************************************************\n+// Data type tydedef\n+//*************************************************\n+typedef uint32_t CHRPOS;\n+typedef uint16_t BINLEVEL;\n+typedef uint32_t BIN;\n+typedef uint16_t USHORT;\n+typedef uint32_t UINT;\n+\n+//*************************************************\n+// Genome binning constants\n+//*************************************************\n+\n+const BIN _numBins = 37450;\n+const BINLEVEL _binLevels = 7;\n+\n+// bins range in size from 16kb to 512Mb\n+// Bin 0 spans 512Mbp, # Level 1\n+// Bins 1-8 span 64Mbp, # Level 2\n+// Bins 9-72 span 8Mbp, # Level 3\n+// Bins 73-584 span 1Mbp # Level 4\n+// Bins 585-4680 span 128Kbp # Level 5\n+// Bins 4681-37449 span 16Kbp # Level 6\n+const BIN _binOffsetsExtended[] = {32678+4096+512+64+8+1, 4096+512+64+8+1, 512+64+8+1, 64+8+1, 8+1, 1, 0};\n+//const BIN _binOffsetsExtended[] = {4096+512+64+8+1, 4096+512+64+8+1, 512+64+8+1, 64+8+1, 8+1, 1, 0};\n+\n+const USHORT _binFirstShift = 14; /* How much to shift to get to finest bin. */\n+const USHORT _binNextShift = 3; /* How much to shift to get to next larger bin. */\n+\n+\n+//*************************************************\n+// Common data structures\n+//*************************************************\n+\n+struct DEPTH {\n+ UINT starts;\n+ UINT ends;\n+};\n+\n+\n+/*\n+ Structure for regular BED records\n+*/\n+struct BED {\n+\n+ // Regular BED fields\n+ string chrom;\n+ CHRPOS start;\n+ CHRPOS end;\n+ string name;\n+ string score;\n+ string strand;\n+\n+ // Add\'l fields for BED12 and/or custom BED annotations\n+ vector<string> otherFields;\n+\n+ // experimental fields for the FJOIN approach.\n+ bool zeroLength;\n+ bool added;\n+ bool finished;\n+ // list of hits from another file.\n+ vector<BED> overlaps;\n+\n+public:\n+ // constructors\n+\n+ // Null\n+ BED()\n+ : chrom(""),\n+ start(0),\n+ end(0),\n+ name(""),\n+ score(""),\n+ strand(""),\n+ otherFields(),\n+ zeroLength(false),\n+ added(false),\n+ finished(false),\n+ overlaps()\n+ {}\n+\n+ // BED3\n+ BED(string chrom, CHRPOS start, CHRPOS end)\n+ : chrom(chrom),\n+ start(start),\n+ end(end),\n+ name(""),\n+ score(""),\n+ strand(""),\n+ otherFields(),\n+ zeroLength(false),\n+ added(false),\n+ finished(false),\n+ overlaps()\n+ {}\n+\n+ // BED4\n+ BED(string chrom, CHRPOS start, CHRPOS end, string strand)\n+ : chrom(chrom),\n+ start(start),\n+ end(end),\n+ name(""),\n+ score(""),\n+ strand(strand),\n+ otherFields(),\n+ zeroLength(false),\n+ added(false),\n+ finished(false),\n+ overlaps()\n+ {}\n+\n+ // BED6\n+ BED(string chrom, CHRPOS start, CHRPOS end, string name,\n+ string score, string strand)\n+ : chrom(chrom),\n+ start(start),\n+ end(end),\n+ name(name),\n+ score(score),\n+ strand(strand),\n+ otherFields(),\n+ zeroLength(false),\n+ added(false),\n+ finished(false),\n+ overlaps()\n+ {}\n+\n+ // B'..b' vector<string>::const_iterator othEnd = bed.otherFields.end();\n+ for ( ; othIt != othEnd; ++othIt) {\n+ printf("\\t%s", othIt->c_str());\n+ }\n+ printf("\\n");\n+ }\n+ }\n+ // VCF\n+ else if (_isGff == false && _isVcf == true) {\n+ printf ("%s\\t%d\\t", bed.chrom.c_str(), bed.start+1);\n+\n+ vector<string>::const_iterator othIt = bed.otherFields.begin();\n+ vector<string>::const_iterator othEnd = bed.otherFields.end();\n+ for ( ; othIt != othEnd; ++othIt) {\n+ printf("%s\\t", othIt->c_str());\n+ }\n+ printf("\\n");\n+ }\n+ // GFF\n+ else if (_isGff == true) {\n+ // "GFF-9"\n+ if (this->bedType == 8) {\n+ printf ("%s\\t%s\\t%s\\t%d\\t%d\\t%s\\t%s\\t%s\\n", bed.chrom.c_str(), bed.otherFields[0].c_str(),\n+ bed.name.c_str(), start+1, end,\n+ bed.score.c_str(), bed.strand.c_str(),\n+ bed.otherFields[1].c_str());\n+ }\n+ // "GFF-8"\n+ else if (this->bedType == 9) {\n+ printf ("%s\\t%s\\t%s\\t%d\\t%d\\t%s\\t%s\\t%s\\t%s\\n", bed.chrom.c_str(), bed.otherFields[0].c_str(),\n+ bed.name.c_str(), start+1, end,\n+ bed.score.c_str(), bed.strand.c_str(),\n+ bed.otherFields[1].c_str(), bed.otherFields[2].c_str());\n+ }\n+ }\n+ }\n+\n+\n+ /*\n+ reportNullBedTab\n+ */\n+ void reportNullBedTab() {\n+\n+ if (_isGff == false && _isVcf == false) {\n+ if (this->bedType == 3) {\n+ printf (".\\t-1\\t-1\\t");\n+ }\n+ else if (this->bedType == 4) {\n+ printf (".\\t-1\\t-1\\t.\\t");\n+ }\n+ else if (this->bedType == 5) {\n+ printf (".\\t-1\\t-1\\t.\\t-1\\t");\n+ }\n+ else if (this->bedType == 6) {\n+ printf (".\\t-1\\t-1\\t.\\t-1\\t.\\t");\n+ }\n+ else if (this->bedType > 6) {\n+ printf (".\\t-1\\t-1\\t.\\t-1\\t.\\t");\n+ for (unsigned int i = 6; i < this->bedType; ++i) {\n+ printf(".\\t");\n+ }\n+ }\n+ }\n+ else if (_isGff == true && _isVcf == false) {\n+ if (this->bedType == 8) {\n+ printf (".\\t.\\t.\\t-1\\t-1\\t-1\\t.\\t.\\t");\n+ }\n+ else if (this->bedType == 9) {\n+ printf (".\\t.\\t.\\t-1\\t-1\\t-1\\t.\\t.\\t.\\t");\n+ }\n+ }\n+ }\n+\n+\n+ /*\n+ reportNullBedTab\n+ */\n+ void reportNullBedNewLine() {\n+\n+ if (_isGff == false && _isVcf == false) {\n+ if (this->bedType == 3) {\n+ printf (".\\t-1\\t-1\\n");\n+ }\n+ else if (this->bedType == 4) {\n+ printf (".\\t-1\\t-1\\t.\\n");\n+ }\n+ else if (this->bedType == 5) {\n+ printf (".\\t-1\\t-1\\t.\\t-1\\n");\n+ }\n+ else if (this->bedType == 6) {\n+ printf (".\\t-1\\t-1\\t.\\t-1\\t.\\n");\n+ }\n+ else if (this->bedType > 6) {\n+ printf (".\\t-1\\t-1\\t.\\t-1\\t.");\n+ for (unsigned int i = 6; i < this->bedType; ++i) {\n+ printf("\\t.");\n+ }\n+ printf("\\n");\n+ }\n+ }\n+ else if (_isGff == true && _isVcf == false) {\n+ if (this->bedType == 8) {\n+ printf (".\\t.\\t.\\t-1\\t-1\\t-1\\t.\\t.\\n");\n+ }\n+ else if (this->bedType == 9) {\n+ printf (".\\t.\\t.\\t-1\\t-1\\t-1\\t.\\t.\\t.\\n");\n+ }\n+ }\n+ }\n+\n+\n+};\n+\n+#endif /* BEDFILE_H */\n' |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/bedFile/bedFile.h.orig --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/bedFile/bedFile.h.orig Thu Nov 03 10:25:04 2011 -0400 |
[ |
b'@@ -0,0 +1,1144 @@\n+/*****************************************************************************\n+ bedFile.h\n+\n+ (c) 2009 - Aaron Quinlan\n+ Hall Laboratory\n+ Department of Biochemistry and Molecular Genetics\n+ University of Virginia\n+ aaronquinlan@gmail.com\n+\n+ Licensed under the GNU General Public License 2.0 license.\n+******************************************************************************/\n+#ifndef BEDFILE_H\n+#define BEDFILE_H\n+\n+// "local" includes\n+#include "gzstream.h"\n+#include "lineFileUtilities.h"\n+#include "fileType.h"\n+\n+// standard includes\n+#include <vector>\n+#include <map>\n+#include <set>\n+#include <string>\n+#include <iostream>\n+#include <fstream>\n+#include <sstream>\n+#include <cstring>\n+#include <algorithm>\n+#include <limits.h>\n+#include <stdint.h>\n+#include <cstdio>\n+//#include <tr1/unordered_map> // Experimental.\n+using namespace std;\n+\n+\n+//*************************************************\n+// Data type tydedef\n+//*************************************************\n+typedef uint32_t CHRPOS;\n+typedef uint16_t BINLEVEL;\n+typedef uint32_t BIN;\n+typedef uint16_t USHORT;\n+typedef uint32_t UINT;\n+\n+//*************************************************\n+// Genome binning constants\n+//*************************************************\n+\n+const BIN _numBins = 37450;\n+const BINLEVEL _binLevels = 7;\n+\n+// bins range in size from 16kb to 512Mb\n+// Bin 0 spans 512Mbp, # Level 1\n+// Bins 1-8 span 64Mbp, # Level 2\n+// Bins 9-72 span 8Mbp, # Level 3\n+// Bins 73-584 span 1Mbp # Level 4\n+// Bins 585-4680 span 128Kbp # Level 5\n+// Bins 4681-37449 span 16Kbp # Level 6\n+const BIN _binOffsetsExtended[] = {32678+4096+512+64+8+1, 4096+512+64+8+1, 512+64+8+1, 64+8+1, 8+1, 1, 0};\n+//const BIN _binOffsetsExtended[] = {4096+512+64+8+1, 4096+512+64+8+1, 512+64+8+1, 64+8+1, 8+1, 1, 0};\n+\n+const USHORT _binFirstShift = 14; /* How much to shift to get to finest bin. */\n+const USHORT _binNextShift = 3; /* How much to shift to get to next larger bin. */\n+\n+\n+//*************************************************\n+// Common data structures\n+//*************************************************\n+\n+struct DEPTH {\n+ UINT starts;\n+ UINT ends;\n+};\n+\n+\n+/*\n+ Structure for regular BED records\n+*/\n+struct BED {\n+\n+ // Regular BED fields\n+ string chrom;\n+ CHRPOS start;\n+ CHRPOS end;\n+ string name;\n+ string score;\n+ string strand;\n+\n+ // Add\'l fields for BED12 and/or custom BED annotations\n+ vector<string> otherFields;\n+\n+ // experimental fields for the FJOIN approach.\n+ bool zeroLength;\n+ bool added;\n+ bool finished;\n+ // list of hits from another file.\n+ vector<BED> overlaps;\n+\n+public:\n+ // constructors\n+\n+ // Null\n+ BED()\n+ : chrom(""),\n+ start(0),\n+ end(0),\n+ name(""),\n+ score(""),\n+ strand(""),\n+ otherFields(),\n+ zeroLength(false),\n+ added(false),\n+ finished(false),\n+ overlaps()\n+ {}\n+\n+ // BED3\n+ BED(string chrom, CHRPOS start, CHRPOS end)\n+ : chrom(chrom),\n+ start(start),\n+ end(end),\n+ name(""),\n+ score(""),\n+ strand(""),\n+ otherFields(),\n+ zeroLength(false),\n+ added(false),\n+ finished(false),\n+ overlaps()\n+ {}\n+\n+ // BED4\n+ BED(string chrom, CHRPOS start, CHRPOS end, string strand)\n+ : chrom(chrom),\n+ start(start),\n+ end(end),\n+ name(""),\n+ score(""),\n+ strand(strand),\n+ otherFields(),\n+ zeroLength(false),\n+ added(false),\n+ finished(false),\n+ overlaps()\n+ {}\n+\n+ // BED6\n+ BED(string chrom, CHRPOS start, CHRPOS end, string name,\n+ string score, string strand)\n+ : chrom(chrom),\n+ start(start),\n+ end(end),\n+ name(name),\n+ score(score),\n+ strand(strand),\n+ otherFields(),\n+ zeroLength(false),\n+ added(false),\n+ finished(false),\n+ overlaps()\n+ {}\n+\n+ // B'..b' vector<string>::const_iterator othEnd = bed.otherFields.end();\n+ for ( ; othIt != othEnd; ++othIt) {\n+ printf("\\t%s", othIt->c_str());\n+ }\n+ printf("\\n");\n+ }\n+ }\n+ // VCF\n+ else if (_isGff == false && _isVcf == true) {\n+ printf ("%s\\t%d\\t", bed.chrom.c_str(), bed.start+1);\n+\n+ vector<string>::const_iterator othIt = bed.otherFields.begin();\n+ vector<string>::const_iterator othEnd = bed.otherFields.end();\n+ for ( ; othIt != othEnd; ++othIt) {\n+ printf("%s\\t", othIt->c_str());\n+ }\n+ printf("\\n");\n+ }\n+ // GFF\n+ else if (_isGff == true) {\n+ // "GFF-9"\n+ if (this->bedType == 8) {\n+ printf ("%s\\t%s\\t%s\\t%d\\t%d\\t%s\\t%s\\t%s\\n", bed.chrom.c_str(), bed.otherFields[0].c_str(),\n+ bed.name.c_str(), start+1, end,\n+ bed.score.c_str(), bed.strand.c_str(),\n+ bed.otherFields[1].c_str());\n+ }\n+ // "GFF-8"\n+ else if (this->bedType == 9) {\n+ printf ("%s\\t%s\\t%s\\t%d\\t%d\\t%s\\t%s\\t%s\\t%s\\n", bed.chrom.c_str(), bed.otherFields[0].c_str(),\n+ bed.name.c_str(), start+1, end,\n+ bed.score.c_str(), bed.strand.c_str(),\n+ bed.otherFields[1].c_str(), bed.otherFields[2].c_str());\n+ }\n+ }\n+ }\n+\n+\n+ /*\n+ reportNullBedTab\n+ */\n+ void reportNullBedTab() {\n+\n+ if (_isGff == false && _isVcf == false) {\n+ if (this->bedType == 3) {\n+ printf (".\\t-1\\t-1\\t");\n+ }\n+ else if (this->bedType == 4) {\n+ printf (".\\t-1\\t-1\\t.\\t");\n+ }\n+ else if (this->bedType == 5) {\n+ printf (".\\t-1\\t-1\\t.\\t-1\\t");\n+ }\n+ else if (this->bedType == 6) {\n+ printf (".\\t-1\\t-1\\t.\\t-1\\t.\\t");\n+ }\n+ else if (this->bedType > 6) {\n+ printf (".\\t-1\\t-1\\t.\\t-1\\t.\\t");\n+ for (unsigned int i = 6; i < this->bedType; ++i) {\n+ printf(".\\t");\n+ }\n+ }\n+ }\n+ else if (_isGff == true && _isVcf == false) {\n+ if (this->bedType == 8) {\n+ printf (".\\t.\\t.\\t-1\\t-1\\t-1\\t.\\t.\\t");\n+ }\n+ else if (this->bedType == 9) {\n+ printf (".\\t.\\t.\\t-1\\t-1\\t-1\\t.\\t.\\t.\\t");\n+ }\n+ }\n+ }\n+\n+\n+ /*\n+ reportNullBedTab\n+ */\n+ void reportNullBedNewLine() {\n+\n+ if (_isGff == false && _isVcf == false) {\n+ if (this->bedType == 3) {\n+ printf (".\\t-1\\t-1\\n");\n+ }\n+ else if (this->bedType == 4) {\n+ printf (".\\t-1\\t-1\\t.\\n");\n+ }\n+ else if (this->bedType == 5) {\n+ printf (".\\t-1\\t-1\\t.\\t-1\\n");\n+ }\n+ else if (this->bedType == 6) {\n+ printf (".\\t-1\\t-1\\t.\\t-1\\t.\\n");\n+ }\n+ else if (this->bedType > 6) {\n+ printf (".\\t-1\\t-1\\t.\\t-1\\t.");\n+ for (unsigned int i = 6; i < this->bedType; ++i) {\n+ printf("\\t.");\n+ }\n+ printf("\\n");\n+ }\n+ }\n+ else if (_isGff == true && _isVcf == false) {\n+ if (this->bedType == 8) {\n+ printf (".\\t.\\t.\\t-1\\t-1\\t-1\\t.\\t.\\n");\n+ }\n+ else if (this->bedType == 9) {\n+ printf (".\\t.\\t.\\t-1\\t-1\\t-1\\t.\\t.\\t.\\n");\n+ }\n+ }\n+ }\n+\n+\n+};\n+\n+#endif /* BEDFILE_H */\n' |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/bedFilePE/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/bedFilePE/Makefile Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,29 @@ +OBJ_DIR = ../../../obj/ +BIN_DIR = ../../../bin/ +UTILITIES_DIR = ../../utils/ +# ------------------- +# define our includes +# ------------------- +INCLUDES = -I$(UTILITIES_DIR)/lineFileUtilities/ -I$(UTILITIES_DIR)/bedFile/ -I$(UTILITIES_DIR)/gzstream/ -I$(UTILITIES_DIR)/fileType/ + +# ---------------------------------- +# define our source and object files +# ---------------------------------- +SOURCES= bedFilePE.cpp +OBJECTS= $(SOURCES:.cpp=.o) +_EXT_OBJECTS=lineFileUtilities.o fileType.o +EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) +BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) + +$(BUILT_OBJECTS): $(SOURCES) + @echo " * compiling" $(*F).cpp + @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) + +$(EXT_OBJECTS): + @$(MAKE) --no-print-directory -C $(INCLUDES) + +clean: + @echo "Cleaning up." + @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* + +.PHONY: clean \ No newline at end of file |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/bedFilePE/bedFilePE.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/bedFilePE/bedFilePE.cpp Thu Nov 03 10:25:04 2011 -0400 |
[ |
b'@@ -0,0 +1,530 @@\n+//\n+// bedFilePE.cpp\n+// BEDTools\n+//\n+// Created by Aaron Quinlan Spring 2009.\n+// Copyright 2009 Aaron Quinlan. All rights reserved.\n+//\n+// Summary: Contains common functions for finding BED overlaps.\n+//\n+// Acknowledgments: Much of the code herein is taken from Jim Kent\'s\n+// BED processing code. I am grateful for his elegant\n+// genome binning algorithm and therefore use it extensively.\n+\n+\n+#include "bedFilePE.h"\n+\n+\n+// Constructor\n+BedFilePE::BedFilePE(string &bedFile) {\n+ this->bedFile = bedFile;\n+}\n+\n+// Destructor\n+BedFilePE::~BedFilePE(void) {\n+}\n+\n+void BedFilePE::Open(void) {\n+ if (bedFile == "stdin" || bedFile == "-") {\n+ _bedStream = &cin;\n+ }\n+ else {\n+ _bedStream = new ifstream(bedFile.c_str(), ios::in);\n+\n+ if (isGzipFile(_bedStream) == true) {\n+ delete _bedStream;\n+ _bedStream = new igzstream(bedFile.c_str(), ios::in);\n+ }\n+ // can we open the file?\n+ if ( !(_bedStream->good()) ) {\n+ cerr << "Error: The requested bed file (" << bedFile << ") could not be opened. Exiting!" << endl;\n+ exit (1);\n+ }\n+ }\n+}\n+\n+\n+\n+// Close the BEDPE file\n+void BedFilePE::Close(void) {\n+ if (bedFile != "stdin" && bedFile != "-") delete _bedStream;\n+}\n+\n+\n+BedLineStatus BedFilePE::GetNextBedPE (BEDPE &bedpe, int &lineNum) {\n+\n+ // make sure there are still lines to process.\n+ // if so, tokenize, validate and return the BEDPE entry.\n+ if (_bedStream->good()) {\n+ string bedPELine;\n+ vector<string> bedPEFields;\n+ bedPEFields.reserve(10);\n+\n+ // parse the bedStream pointer\n+ getline(*_bedStream, bedPELine);\n+ lineNum++;\n+\n+ // split into a string vector.\n+ Tokenize(bedPELine,bedPEFields);\n+\n+ // load the BEDPE struct as long as it\'s a valid BEDPE entry.\n+ return parseLine(bedpe, bedPEFields, lineNum);\n+ }\n+ // default if file is closed or EOF\n+ return BED_INVALID;\n+}\n+\n+\n+/*\n+ reportBedPETab\n+\n+ Writes the _original_ BED entry for A.\n+ Works for BEDPE only.\n+*/\n+void BedFilePE::reportBedPETab(const BEDPE &a) {\n+\n+ if (this->bedType == 6) {\n+ printf("%s\\t%d\\t%d\\t%s\\t%d\\t%d\\t", a.chrom1.c_str(), a.start1, a.end1,\n+ a.chrom2.c_str(), a.start2, a.end2);\n+ }\n+ else if (this->bedType == 7) {\n+ printf("%s\\t%d\\t%d\\t%s\\t%d\\t%d\\t%s\\t", a.chrom1.c_str(), a.start1, a.end1,\n+ a.chrom2.c_str(), a.start2, a.end2,\n+ a.name.c_str());\n+ }\n+ else if (this->bedType == 8) {\n+ printf("%s\\t%d\\t%d\\t%s\\t%d\\t%d\\t%s\\t%s\\t", a.chrom1.c_str(), a.start1, a.end1,\n+ a.chrom2.c_str(), a.start2, a.end2,\n+ a.name.c_str(), a.score.c_str());\n+ }\n+ else if (this->bedType == 10) {\n+ printf("%s\\t%d\\t%d\\t%s\\t%d\\t%d\\t%s\\t%s\\t%s\\t%s\\t", a.chrom1.c_str(), a.start1, a.end1,\n+ a.chrom2.c_str(), a.start2, a.end2,\n+ a.name.c_str(), a.score.c_str(), a.strand1.c_str(), a.strand2.c_str());\n+ }\n+ else if (this->bedType > 10) {\n+ printf("%s\\t%d\\t%d\\t%s\\t%d\\t%d\\t%s\\t%s\\t%s\\t%s", a.chrom1.c_str(), a.start1, a.end1,\n+ a.chrom2.c_str(), a.start2, a.end2,\n+ a.name.c_str(), a.score.c_str(), a.strand1.c_str(), a.strand2.c_str());\n+\n+ vector<string>::const_iterator othIt = a.otherFields.begin();\n+ vector<string>::const_iterator othEnd = a.otherFields.end();\n+ for ( ; othIt != othEnd; ++othIt) {\n+ printf("\\t%s", othIt->c_str());\n+ }\n+ printf("\\t");\n+ }\n+}\n+\n+\n+\n+/*\n+ reportBedPENewLine\n+\n+ Writes the _original_ BED entry for A.\n+ '..b' float size = end - start;\n+\n+ if ( (overlap / size) >= overlapFraction ) {\n+\n+ // skip the hit if not on the same strand (and we care)\n+ if ((forceStrand == false) && (enforceDiffNames == false)) {\n+ hits.push_back(*bedItr); // it\'s a hit, add it.\n+ }\n+ else if ((forceStrand == true) && (enforceDiffNames == false)) {\n+ if (strand == bedItr->bed.strand)\n+ hits.push_back(*bedItr); // it\'s a hit, add it.\n+ }\n+ else if ((forceStrand == true) && (enforceDiffNames == true)) {\n+ if ((strand == bedItr->bed.strand) && (name != bedItr->bed.name))\n+ hits.push_back(*bedItr); // it\'s a hit, add it.\n+ }\n+ else if ((forceStrand == false) && (enforceDiffNames == true)) {\n+ if (name != bedItr->bed.name)\n+ hits.push_back(*bedItr); // it\'s a hit, add it.\n+ }\n+ }\n+\n+ }\n+ }\n+ startBin >>= _binNextShift;\n+ endBin >>= _binNextShift;\n+ }\n+}\n+\n+\n+void BedFilePE::loadBedPEFileIntoMap() {\n+\n+ int lineNum = 0;\n+ int bin1, bin2;\n+ BedLineStatus bedStatus;\n+ BEDPE bedpeEntry, nullBedPE;\n+\n+ Open();\n+ bedStatus = this->GetNextBedPE(bedpeEntry, lineNum);\n+ while (bedStatus != BED_INVALID) {\n+\n+ if (bedStatus == BED_VALID) {\n+ MATE *bedEntry1 = new MATE();\n+ MATE *bedEntry2 = new MATE();\n+ // separate the BEDPE entry into separate\n+ // BED entries\n+ splitBedPEIntoBeds(bedpeEntry, lineNum, bedEntry1, bedEntry2);\n+\n+ // load end1 into a UCSC bin map\n+ bin1 = getBin(bedEntry1->bed.start, bedEntry1->bed.end);\n+ this->bedMapEnd1[bedEntry1->bed.chrom][bin1].push_back(*bedEntry1);\n+\n+ // load end2 into a UCSC bin map\n+ bin2 = getBin(bedEntry2->bed.start, bedEntry2->bed.end);\n+ this->bedMapEnd2[bedEntry2->bed.chrom][bin2].push_back(*bedEntry2);\n+\n+ bedpeEntry = nullBedPE;\n+ }\n+ bedStatus = this->GetNextBedPE(bedpeEntry, lineNum);\n+ }\n+ Close();\n+}\n+\n+\n+void BedFilePE::splitBedPEIntoBeds(const BEDPE &bedpeEntry, const int &lineNum, MATE *bedEntry1, MATE *bedEntry2) {\n+\n+ /*\n+ Split the BEDPE entry into separate BED entries\n+\n+ NOTE: I am using a trick here where I store\n+ the lineNum of the BEDPE from the original file\n+ in the "count" column. This allows me to later\n+ resolve whether the hits found on both ends of BEDPE A\n+ came from the same entry in BEDPE B. Tracking by "name"\n+ alone with fail when there are multiple mappings for a given\n+ read-pair.\n+ */\n+\n+ bedEntry1->bed.chrom = bedpeEntry.chrom1;\n+ bedEntry1->bed.start = bedpeEntry.start1;\n+ bedEntry1->bed.end = bedpeEntry.end1;\n+ bedEntry1->bed.name = bedpeEntry.name;\n+ bedEntry1->bed.score = bedpeEntry.score; // only store the score in end1 to save memory\n+ bedEntry1->bed.strand = bedpeEntry.strand1;\n+ bedEntry1->bed.otherFields = bedpeEntry.otherFields; // only store the otherFields in end1 to save memory\n+ bedEntry1->lineNum = lineNum;\n+ bedEntry1->mate = bedEntry2; // keep a pointer to end2\n+\n+ bedEntry2->bed.chrom = bedpeEntry.chrom2;\n+ bedEntry2->bed.start = bedpeEntry.start2;\n+ bedEntry2->bed.end = bedpeEntry.end2;\n+ bedEntry2->bed.name = bedpeEntry.name;\n+ bedEntry2->bed.strand = bedpeEntry.strand2;\n+ bedEntry2->lineNum = lineNum;\n+ bedEntry2->mate = bedEntry1; // keep a pointer to end1\n+}\n+\n+\n+\n' |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/bedFilePE/bedFilePE.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/bedFilePE/bedFilePE.h Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,93 @@ +#ifndef BEDFILEPE_H +#define BEDFILEPE_H + +#include <vector> +#include <map> +#include <string> +#include <iostream> +#include <fstream> +#include <sstream> +#include <cstring> +#include <algorithm> +#include "bedFile.h" +#include "lineFileUtilities.h" + +using namespace std; + + +/* + Structure for paired-end records +*/ +struct BEDPE { + + // UCSC BED fields + string chrom1; + CHRPOS start1; + CHRPOS end1; + + string chrom2; + CHRPOS start2; + CHRPOS end2; + + string name; + string score; + + string strand1; + string strand2; + + vector<string> otherFields; +}; + + + + +//************************************************ +// BedFile Class methods and elements +//************************************************ +class BedFilePE { + +public: + + // Constructor + BedFilePE(string &); + + // Destructor + ~BedFilePE(void); + + // Open a BEDPE file for reading (creates an istream pointer) + void Open(void); + + // Close an opened BEDPE file. + void Close(void); + + // Get the next BED entry in an opened BED file. + BedLineStatus GetNextBedPE (BEDPE &bedpe, int &lineNum); + + + // Methods + + void reportBedPETab(const BEDPE &a); + void reportBedPENewLine(const BEDPE &a); + void loadBedPEFileIntoMap(); + void splitBedPEIntoBeds(const BEDPE &a, const int &lineNum, MATE *bedEntry1, MATE *bedEntry2); + + + void FindOverlapsPerBin(int bEnd, string chrom, CHRPOS start, CHRPOS end, string name, string strand, + vector<MATE> &hits, float overlapFraction, bool forceStrand, bool enforceDiffNames); + + + string bedFile; + unsigned int bedType; + + masterMateMap bedMapEnd1; + masterMateMap bedMapEnd2; + +private: + istream *_bedStream; + + // methods + BedLineStatus parseLine (BEDPE &bedpe, const vector<string> &lineVector, int &lineNum); + bool parseBedPELine (BEDPE &bed, const vector<string> &lineVector, const int &lineNum); +}; + +#endif /* BEDFILEPE_H */ |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/bedGraphFile/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/bedGraphFile/Makefile Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,31 @@ +OBJ_DIR = ../../../obj/ +BIN_DIR = ../../../bin/ +UTILITIES_DIR = ../../utils/ +# ------------------- +# define our includes +# ------------------- +INCLUDES = -I$(UTILITIES_DIR)/lineFileUtilities/ \ + -I$(UTILITIES_DIR)/gzstream/ \ + -I$(UTILITIES_DIR)/fileType/ + +# ---------------------------------- +# define our source and object files +# ---------------------------------- +SOURCES= bedGraphFile.cpp +OBJECTS= $(SOURCES:.cpp=.o) +_EXT_OBJECTS=lineFileUtilities.o gzstream.o fileType.o +EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) +BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) + +$(BUILT_OBJECTS): $(SOURCES) + @echo " * compiling" $(*F).cpp + @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) + +$(EXT_OBJECTS): + @$(MAKE) --no-print-directory -C -W $(INCLUDES) + +clean: + @echo "Cleaning up." + @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* + +.PHONY: clean |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/bedGraphFile/bedGraphFile.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/bedGraphFile/bedGraphFile.cpp Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,58 @@ +/***************************************************************************** + bedGraphFile.cpp + + (c) 2010 - Assaf Gordon + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licenced under the GNU General Public License 2.0 license. +******************************************************************************/ +#include "bedGraphFile.h" +#include <sstream> + +// Constructor +BedGraphFile::BedGraphFile(string &_file) : + bedGraphFile(_file), + _bedGraphStream(NULL) +{} + + +// Destructor +BedGraphFile::~BedGraphFile() { + Close(); +} + + +// Open the BEDGRAPH file +void BedGraphFile::Open() { + if (bedGraphFile == "stdin" || bedGraphFile == "-") { + _bedGraphStream = &cin; + } + else { + _bedGraphStream = new ifstream(bedGraphFile.c_str(), ios::in); + + if (isGzipFile(_bedGraphStream) == true) { + delete _bedGraphStream; + _bedGraphStream = new igzstream(bedGraphFile.c_str(), ios::in); + } + // can we open the file? + if ( !(_bedGraphStream->good()) ) { + cerr << "Error: The requested bed file (" << bedGraphFile << ") could not be opened. Exiting!" << endl; + exit (1); + } + } +} + + +// Close the BEDGRAPH file +void BedGraphFile::Close() { + if (bedGraphFile != "stdin" && bedGraphFile != "-") { + if (_bedGraphStream) { + delete _bedGraphStream; + _bedGraphStream = NULL ; + } + } +} + |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/bedGraphFile/bedGraphFile.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/bedGraphFile/bedGraphFile.h Thu Nov 03 10:25:04 2011 -0400 |
[ |
@@ -0,0 +1,199 @@ +/***************************************************************************** + bedGraphFile.cpp + + (c) 2010 - Assaf Gordon + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licenced under the GNU General Public License 2.0 license. +******************************************************************************/ +#ifndef BEDGRAPHFILE_H +#define BEDGRAPHFILE_H + +#include "gzstream.h" +#include "lineFileUtilities.h" +#include "fileType.h" +#include <vector> +#include <map> +#include <set> +#include <string> +#include <iostream> +#include <fstream> +#include <sstream> +#include <cstring> +#include <algorithm> +#include <limits.h> +#include <stdint.h> +#include <cstdio> + +using namespace std; + +//************************************************* +// Data type tydedef +//************************************************* +#ifndef CHRPOS +typedef uint32_t CHRPOS; +#endif + +#ifndef DEPTH +typedef uint32_t DEPTH; +#endif + +/* + Structure for regular BedGraph records + */ +template <typename T> +class BEDGRAPH +{ +public: + std::string chrom; + CHRPOS start; + CHRPOS end; + T depth; + +public: + typedef T DEPTH_TYPE; + // constructors + + // Null + BEDGRAPH() : + start(0), + end(0), + depth(T()) + {} + + // BEDGraph + BEDGRAPH(string _chrom, CHRPOS _start, CHRPOS _end, T _depth) : + chrom(_chrom), + start(_start), + end(_end), + depth(_depth) + {} +}; // BEDGraph + +typedef BEDGRAPH<int32_t> BEDGRAPH_INT; +typedef BEDGRAPH<std::string> BEDGRAPH_STR; +typedef BEDGRAPH<double> BEDGRAPH_FLOAT; + +template <typename T> +std::ostream& operator<< (std::ostream& strm, const BEDGRAPH<T>& bg) +{ + strm << bg.chrom << "\t" + << bg.start << "\t" + << bg.end << "\t" + << bg.depth; + return strm; +} + +// enum to flag the state of a given line in a BEDGraph file. +enum BedGraphLineStatus +{ + BEDGRAPH_INVALID = -1, + BEDGRAPH_HEADER = 0, + BEDGRAPH_BLANK = 1, + BEDGRAPH_VALID = 2 +}; + + +//************************************************ +// BedGraphFile Class methods and elements +//************************************************ +class BedGraphFile { + +public: + + // Constructor + BedGraphFile(string &); + + // Destructor + ~BedGraphFile(void); + + // Open a BEDGraph file for reading (creates an istream pointer) + void Open(void); + + // Close an opened BED file. + void Close(void); + + // Get the next BED entry in an opened BED file. + template <typename T> + BedGraphLineStatus GetNextBedGraph (BEDGRAPH<T> &bedgraph, int &lineNum) + { + // make sure there are still lines to process. + // if so, tokenize, validate and return the BED entry. + if (_bedGraphStream->good()) { + string bedGraphLine; + vector<string> bedGraphFields; + + // parse the bedStream pointer + getline(*_bedGraphStream, bedGraphLine); + if (_bedGraphStream->eof()) + return BEDGRAPH_INVALID; + if (_bedGraphStream->bad()) { + cerr << "Error while reading file '" << bedGraphFile << "' : " + << strerror(errno) << endl; + exit(1); + } + lineNum++; + + // split into a string vector. + Tokenize(bedGraphLine,bedGraphFields); + + // load the BED struct as long as it's a valid BED entry. + return parseLine(bedgraph, bedGraphFields, lineNum); + } + + // default if file is closed or EOF + return BEDGRAPH_INVALID; + } + + // the bedfile with which this instance is associated + string bedGraphFile; + +private: + // data + istream *_bedGraphStream; + + template <typename T> + BedGraphLineStatus parseLine (BEDGRAPH<T> &bg, const vector<string> &lineVector, int &lineNum) + { + if (lineVector.size() == 0) + return BEDGRAPH_BLANK; + + if (lineVector[0].find("track") != string::npos || + lineVector[0].find("browser") != string::npos || + lineVector[0].find("#") != string::npos) + return BEDGRAPH_HEADER; + + if (lineVector.size() != 4) + return BEDGRAPH_INVALID; + + bg.chrom = lineVector[0]; + + stringstream str_start(lineVector[1]); + if (! (str_start >> bg.start) ) { + cerr << "Input error, failed to extract start value from '" << lineVector[1] + << "' (column 2) in " << bedGraphFile << " line " << lineNum << endl; + exit(1); + } + + stringstream str_end(lineVector[2]); + if (! (str_end >> bg.end) ) { + cerr << "Input error, failed to extract end value from '" << lineVector[2] + << "' (column 3) in " << bedGraphFile << " line " << lineNum << endl; + exit(1); + } + + stringstream str_depth(lineVector[3]); + if (! (str_depth >> bg.depth) ) { + cerr << "Input error, failed to extract depth value from '" << lineVector[3] + << "' (column 4) in " << bedGraphFile << " line " << lineNum << endl; + exit(1); + } + + return BEDGRAPH_VALID; + } +}; + +#endif /* BEDFILE_H */ |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/chromsweep/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/chromsweep/Makefile Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,32 @@ +OBJ_DIR = ../../../obj/ +BIN_DIR = ../../../bin/ +UTILITIES_DIR = ../../utils/ +# ------------------- +# define our includes +# ------------------- +INCLUDES = -I$(UTILITIES_DIR)/lineFileUtilities/ \ + -I$(UTILITIES_DIR)/bedFile/ \ + -I$(UTILITIES_DIR)/gzstream/ \ + -I$(UTILITIES_DIR)/fileType/ + +# ---------------------------------- +# define our source and object files +# ---------------------------------- +SOURCES= chromsweep.cpp +OBJECTS= $(SOURCES:.cpp=.o) +_EXT_OBJECTS=lineFileUtilities.o fileType.o +EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) +BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) + +$(BUILT_OBJECTS): $(SOURCES) + @echo " * compiling" $(*F).cpp + @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) + +$(EXT_OBJECTS): + @$(MAKE) --no-print-directory -C $(INCLUDES) + +clean: + @echo "Cleaning up." + @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* + +.PHONY: clean \ No newline at end of file |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/chromsweep/chromsweep.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/chromsweep/chromsweep.cpp Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,180 @@ +/***************************************************************************** + chromsweep.cpp + + (c) 2009 - Aaron Quinlan + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licenced under the GNU General Public License 2.0 license. +******************************************************************************/ +#include "lineFileUtilities.h" +#include "chromsweep.h" +#include <queue> + +bool after(const BED &a, const BED &b); +void report_hits(const BED &curr_qy, const vector<BED> &hits); +vector<BED> scan_cache(const BED &curr_qy, BedLineStatus qy_status, const vector<BED> &db_cache, vector<BED> &hits); + + +/* + // constructor using existing BedFile pointers +*/ +ChromSweep::ChromSweep(BedFile *bedA, BedFile *bedB, bool sameStrand, bool diffStrand) +: _bedA(bedA) +, _bedB(bedB) +, _sameStrand(sameStrand) +, _diffStrand(diffStrand) +{ + // prime the results pump. + _qy_lineNum = 0; + _db_lineNum = 0; + + _hits.reserve(1000); + _cache.reserve(1000); + + _bedA->Open(); + _bedB->Open(); + _qy_status = _bedA->GetNextBed(_curr_qy, _qy_lineNum); + _db_status = _bedB->GetNextBed(_curr_db, _db_lineNum); +} + +/* + Constructor with filenames +*/ +ChromSweep::ChromSweep(string &bedAFile, string &bedBFile) +{ + // prime the results pump. + _qy_lineNum = 0; + _db_lineNum = 0; + + _hits.reserve(100000); + _cache.reserve(100000); + + _bedA = new BedFile(bedAFile); + _bedB = new BedFile(bedBFile); + + _bedA->Open(); + _bedB->Open(); + + _qy_status = _bedA->GetNextBed(_curr_qy, _qy_lineNum); + _db_status = _bedB->GetNextBed(_curr_db, _db_lineNum); +} + + +/* + Destructor +*/ +ChromSweep::~ChromSweep(void) { +} + + +void ChromSweep::ScanCache() { + if (_qy_status != BED_INVALID) { + vector<BED>::iterator c = _cache.begin(); + while (c != _cache.end()) + { + if ((_curr_qy.chrom == c->chrom) && !(after(_curr_qy, *c))) { + if (IsValidHit(_curr_qy, *c)) { + _hits.push_back(*c); + } + ++c; + } + else { + c = _cache.erase(c); + } + } + } +} + + +bool ChromSweep::ChromChange() +{ + // the files are on the same chrom + if ((_curr_qy.chrom == _curr_db.chrom) || (_db_status == BED_INVALID) || (_qy_status == BED_INVALID)) { + return false; + } + // the query is ahead of the database. fast-forward the database to catch-up. + else if (_curr_qy.chrom > _curr_db.chrom) { + while (!_bedB->Empty() && _curr_db.chrom < _curr_qy.chrom) + { + _db_status = _bedB->GetNextBed(_curr_db, _db_lineNum); + } + _cache.clear(); + return false; + } + // the database is ahead of the query. + else { + // 1. scan the cache for remaining hits on the query's current chrom. + if (_curr_qy.chrom == _curr_chrom) + { + ScanCache(); + _results.push(make_pair(_curr_qy, _hits)); + _hits.clear(); + } + // 2. fast-forward until we catch up and report 0 hits until we do. + else if (_curr_qy.chrom < _curr_db.chrom) + { + _results.push(make_pair(_curr_qy, _no_hits)); + _cache.clear(); + } + _qy_status = _bedA->GetNextBed(_curr_qy, _qy_lineNum); + _curr_chrom = _curr_qy.chrom; + return true; + } +} + +bool ChromSweep::IsValidHit(const BED &query, const BED &db) { + // do we have an overlap in the DB? + if (overlaps(query.start, query.end, db.start, db.end) > 0) { + // Now test for necessary strandedness. + bool strands_are_same = (query.strand == db.strand); + if ( (_sameStrand == false && _diffStrand == false) + || + (_sameStrand == true && strands_are_same == true) + || + (_diffStrand == true && strands_are_same == false) + ) + { + return true; + } + } + return false; +} + + +bool ChromSweep::Next(pair<BED, vector<BED> > &next) { + if (!_bedA->Empty()) { + // have we changed chromosomes? + if (ChromChange() == false) { + // scan the database cache for hits + ScanCache(); + // advance the db until we are ahead of the query. update hits and cache as necessary + while (!_bedB->Empty() && _curr_qy.chrom == _curr_db.chrom && !(after(_curr_db, _curr_qy))) + { + if (IsValidHit(_curr_qy, _curr_db)) { + _hits.push_back(_curr_db); + } + _cache.push_back(_curr_db); + _db_status = _bedB->GetNextBed(_curr_db, _db_lineNum); + } + // add the hits for this query to the pump + _results.push(make_pair(_curr_qy, _hits)); + // reset for the next query + _hits.clear(); + _curr_qy = _nullBed; + _qy_status = _bedA->GetNextBed(_curr_qy, _qy_lineNum); + _curr_chrom = _curr_qy.chrom; + } + } + // report the next set if hits if there are still overlaps in the pump + if (!_results.empty()) { + next = _results.front(); + _results.pop(); + return true; + } + // otherwise, the party is over. + else {return false;} +} + |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/chromsweep/chromsweep.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/chromsweep/chromsweep.h Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,89 @@ +/***************************************************************************** + chromsweepBed.h + + (c) 2009 - Aaron Quinlan + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licenced under the GNU General Public License 2.0 license. +******************************************************************************/ +#ifndef CHROMSWEEP_H +#define CHROMSWEEP_H + +#include "bedFile.h" +#include <vector> +#include <queue> +#include <iostream> +#include <fstream> +#include <stdlib.h> +using namespace std; + + + +class ChromSweep { + +// public interface. +public: + + // A is the query and B is the database + + // constructor using existing BedFile pointers + ChromSweep(BedFile *bedA, BedFile *bedB, bool sameStrand = false, bool diffStrand = false); + + // constructor using filenames + ChromSweep(string &bedAFile, string &bedBFile); + + // destructor + ~ChromSweep(void); + + // loads next (a pair) with the current query and it's overlaps + // next.first is the current query interval + // next.second is a vector of the current query's hits. + // returns true if overlap + bool Next(pair<BED, vector<BED> > &next); + + // Usage: + // ChromSweep sweep = ChromSweep(_bedA, _bedB); + // pair<BED, vector<BED> > hit_set; + // while (sweep.Next(hit_set)) + // { + // // magic happens here! + // processHits(hit_set.first, hit_set.second); + // } + +// private variables. +private: + + // instances of a bed file class. + BedFile *_bedA, *_bedB; + // do we care about strandedness. + bool _sameStrand, _diffStrand; + // a cache of still active features from the database file + vector<BED> _cache; + // the set of hits in the database for the current query + vector<BED> _hits; + // a queue from which we retrieve overlap results. used by Next() + queue< pair<BED, vector<BED> > > _results; + BED _nullBed; + // an empty BED vector for returning no hits for a given query + vector<BED> _no_hits; + // the current query and db features. + BED _curr_qy, _curr_db; + // a cache of the current chrom from the query. used to handle chrom changes. + string _curr_chrom; + // the current line status in the database and query files + BedLineStatus _qy_status, _db_status; + // the current line numbers in the database and query files + int _qy_lineNum, _db_lineNum; + +// private methods. +private: + + void ScanCache(); + bool ChromChange(); + bool IsValidHit(const BED &query, const BED &db); +}; + +#endif /* CHROMSWEEP_H */ |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/fileType/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/fileType/Makefile Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,29 @@ +OBJ_DIR = ../../../obj/ +BIN_DIR = ../../../bin/ +UTILITIES_DIR = ../../utils/ +# ------------------- +# define our includes +# ------------------- +INCLUDES = + +# ---------------------------------- +# define our source and object files +# ---------------------------------- +SOURCES= fileType.cpp +OBJECTS= $(SOURCES:.cpp=.o) +_EXT_OBJECTS= +EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) +BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) + +$(BUILT_OBJECTS): $(SOURCES) + @echo " * compiling" $(*F).cpp + @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) + +$(EXT_OBJECTS): + @$(MAKE) --no-print-directory -C $(INCLUDES) + +clean: + @echo "Cleaning up." + @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* + +.PHONY: clean \ No newline at end of file |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/fileType/fileType.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/fileType/fileType.cpp Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,71 @@ +/***************************************************************************** +fileType.cpp + +(c) 2009 - Aaron Quinlan +Hall Laboratory +Department of Biochemistry and Molecular Genetics +University of Virginia +aaronquinlan@gmail.com + +Licensed under the GNU General Public License 2.0 license. +******************************************************************************/ + +#include "fileType.h" + + +/* +returns TRUE if the file is a regular file: +not a pipe/device. + +This implies that the file can be opened/closed/seek'd multiple times without losing information +*/ +bool isRegularFile(const string& filename) { + struct stat buf ; + int i; + + i = stat(filename.c_str(), &buf); + if (i!=0) { + cerr << "Error: can't determine file type of '" << filename << "': " << strerror(errno) << endl; + exit(1); + } + if (S_ISREG(buf.st_mode)) + return true; + + return false; +} + +/* +returns TRUE if the file has a GZIP header. +Should only be run on regular files. +*/ +bool isGzipFile(istream *file) { + //see http://www.gzip.org/zlib/rfc-gzip.html#file-format + + /* + 11-Sep-2011: + We now only peek at the first byte and test for GZIPiness. + This is because I can only putback() one byte into an istream + without triggering the "fail" bit. This was necessary to support + FIFOs, per version 2.13.0 + */ + struct { + unsigned char id1; +// unsigned char id2; +// unsigned char cm; + } gzip_header; + + if (!file->read((char*)&gzip_header, sizeof(gzip_header))) { + return false; + } + + if ( gzip_header.id1 == 0x1f ) +// && +// gzip_header.id2 == 0x8b +// && +// gzip_header.cm == 8 ) + { + return true; + } + file->putback(gzip_header.id1); + return false; +} |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/fileType/fileType.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/fileType/fileType.h Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,38 @@ +/***************************************************************************** + fileType.h + + (c) 2009 - Aaron Quinlan + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licensed under the GNU General Public License 2.0 license. +******************************************************************************/ +#ifndef FILETYPE_H +#define FILETYPE_H + +#include <string> +#include <iostream> +#include <fstream> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> +#include <sstream> + +using namespace std; + +/***************************************************************************** + Convenience functions to detect whether a given file is + "regular" and/or "gzipped". + + Kindly contributed by Assaf Gordon. +******************************************************************************/ +string string_error(int errnum); +bool isRegularFile(const string& filename); +bool isGzipFile(istream *file); + +#endif /* FILETYPE_H */ |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/genomeFile/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/genomeFile/Makefile Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,32 @@ +OBJ_DIR = ../../../obj/ +BIN_DIR = ../../../bin/ +UTILITIES_DIR = ../ +# ------------------- +# define our includes +# ------------------- +INCLUDES = -I$(UTILITIES_DIR)/lineFileUtilities/ \ + -I$(UTILITIES_DIR)/fileType/ \ + -I$(UTILITIES_DIR)/BamTools/include/ + +# ---------------------------------- +# define our source and object files +# ---------------------------------- +SOURCES= genomeFile.cpp +OBJECTS= $(SOURCES:.cpp=.o) +_EXT_OBJECTS=lineFileUtilities.o fileType.o +EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) +BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) + +$(BUILT_OBJECTS): $(SOURCES) + @echo " * compiling" $(*F).cpp + @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) -L$(BT_ROOT)/lib + +$(EXT_OBJECTS): + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/ + +clean: + @echo "Cleaning up." + @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* + +.PHONY: clean |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/genomeFile/genomeFile.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/genomeFile/genomeFile.cpp Thu Nov 03 10:25:04 2011 -0400 |
[ |
@@ -0,0 +1,103 @@ +/***************************************************************************** + genomeFile.cpp + + (c) 2009 - Aaron Quinlan + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licensed under the GNU General Public License 2.0 license. +******************************************************************************/ +#include "lineFileUtilities.h" +#include "genomeFile.h" + + +GenomeFile::GenomeFile(const string &genomeFile) { + _genomeFile = genomeFile; + loadGenomeFileIntoMap(); +} + +GenomeFile::GenomeFile(const RefVector &genome) { + for (size_t i = 0; i < genome.size(); ++i) { + string chrom = genome[i].RefName; + int length = genome[i].RefLength; + + _chromSizes[chrom] = length; + _chromList.push_back(chrom); + } +} + +// Destructor +GenomeFile::~GenomeFile(void) { +} + + +void GenomeFile::loadGenomeFileIntoMap() { + + string genomeLine; + int lineNum = 0; + vector<string> genomeFields; // vector for a GENOME entry + + // open the GENOME file for reading + ifstream genome(_genomeFile.c_str(), ios::in); + if ( !genome ) { + cerr << "Error: The requested genome file (" << _genomeFile << ") could not be opened. Exiting!" << endl; + exit (1); + } + + while (getline(genome, genomeLine)) { + + Tokenize(genomeLine,genomeFields); // load the fields into the vector + lineNum++; + + // ignore a blank line + if (genomeFields.size() > 0) { + if (genomeFields[0].find("#") == string::npos) { + + // we need at least 2 columns + if (genomeFields.size() >= 2) { + char *p2End; + long c2; + // make sure the second column is numeric. + c2 = strtol(genomeFields[1].c_str(), &p2End, 10); + + // strtol will set p2End to the start of the string if non-integral, base 10 + if (p2End != genomeFields[1].c_str()) { + string chrom = genomeFields[0]; + int size = atoi(genomeFields[1].c_str()); + _chromSizes[chrom] = size; + _chromList.push_back(chrom); + } + } + else { + cerr << "Less than the req'd two fields were encountered in the genome file (" << _genomeFile << ")"; + cerr << " at line " << lineNum << ". Exiting." << endl; + exit (1); + } + } + } + genomeFields.clear(); + } +} + + +int GenomeFile::getChromSize(const string &chrom) { + chromToSizes::const_iterator chromIt = _chromSizes.find(chrom); + if (chromIt != _chromSizes.end()) + return _chromSizes[chrom]; + else + return -1; // chrom not found. +} + +vector<string> GenomeFile::getChromList() { + return _chromList; +} + +int GenomeFile::getNumberOfChroms() { + return _chromList.size(); +} + +string GenomeFile::getGenomeFileName() { + return _genomeFile; +} |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/genomeFile/genomeFile.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/genomeFile/genomeFile.h Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,62 @@ +/***************************************************************************** + genomeFile.h + + (c) 2009 - Aaron Quinlan + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licensed under the GNU General Public License 2.0 license. +******************************************************************************/ +#ifndef GENOMEFILE_H +#define GENOMEFILE_H + +#include <map> +#include <string> +#include <iostream> +#include <sstream> +#include <fstream> +#include <cstring> +#include <cstdio> +#include "api/BamReader.h" +#include "api/BamAux.h" +using namespace BamTools; + +using namespace std; + + +// typedef for mapping b/w chrom name and it's size in b.p. +typedef map<string, int, std::less<string> > chromToSizes; + + +class GenomeFile { + +public: + + // Constructor using a file + GenomeFile(const string &genomeFile); + + // Constructor using a vector of BamTools RefVector + GenomeFile(const RefVector &genome); + + // Destructor + ~GenomeFile(void); + + // load a GENOME file into a map keyed by chrom. value is size of chrom. + void loadGenomeFileIntoMap(); + + int getChromSize(const string &chrom); // return the size of a chromosome + vector<string> getChromList(); // return a list of chrom names + int getNumberOfChroms(); // return the number of chroms + string getGenomeFileName(); // return the name of the genome file + + + +private: + string _genomeFile; + chromToSizes _chromSizes; + vector<string> _chromList; +}; + +#endif /* GENOMEFILE_H */ |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/gzstream/COPYING.LIB --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/gzstream/COPYING.LIB Thu Nov 03 10:25:04 2011 -0400 |
[ |
b"@@ -0,0 +1,504 @@\n+\t\t GNU LESSER GENERAL PUBLIC LICENSE\n+\t\t Version 2.1, February 1999\n+\n+ Copyright (C) 1991, 1999 Free Software Foundation, Inc.\n+ 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA\n+ Everyone is permitted to copy and distribute verbatim copies\n+ of this license document, but changing it is not allowed.\n+\n+[This is the first released version of the Lesser GPL. It also counts\n+ as the successor of the GNU Library Public License, version 2, hence\n+ the version number 2.1.]\n+\n+\t\t\t Preamble\n+\n+ The licenses for most software are designed to take away your\n+freedom to share and change it. By contrast, the GNU General Public\n+Licenses are intended to guarantee your freedom to share and change\n+free software--to make sure the software is free for all its users.\n+\n+ This license, the Lesser General Public License, applies to some\n+specially designated software packages--typically libraries--of the\n+Free Software Foundation and other authors who decide to use it. You\n+can use it too, but we suggest you first think carefully about whether\n+this license or the ordinary General Public License is the better\n+strategy to use in any particular case, based on the explanations below.\n+\n+ When we speak of free software, we are referring to freedom of use,\n+not price. Our General Public Licenses are designed to make sure that\n+you have the freedom to distribute copies of free software (and charge\n+for this service if you wish); that you receive source code or can get\n+it if you want it; that you can change the software and use pieces of\n+it in new free programs; and that you are informed that you can do\n+these things.\n+\n+ To protect your rights, we need to make restrictions that forbid\n+distributors to deny you these rights or to ask you to surrender these\n+rights. These restrictions translate to certain responsibilities for\n+you if you distribute copies of the library or if you modify it.\n+\n+ For example, if you distribute copies of the library, whether gratis\n+or for a fee, you must give the recipients all the rights that we gave\n+you. You must make sure that they, too, receive or can get the source\n+code. If you link other code with the library, you must provide\n+complete object files to the recipients, so that they can relink them\n+with the library after making changes to the library and recompiling\n+it. And you must show them these terms so they know their rights.\n+\n+ We protect your rights with a two-step method: (1) we copyright the\n+library, and (2) we offer you this license, which gives you legal\n+permission to copy, distribute and/or modify the library.\n+\n+ To protect each distributor, we want to make it very clear that\n+there is no warranty for the free library. Also, if the library is\n+modified by someone else and passed on, the recipients should know\n+that what they have is not the original version, so that the original\n+author's reputation will not be affected by problems that might be\n+introduced by others.\n+\x0c\n+ Finally, software patents pose a constant threat to the existence of\n+any free program. We wish to make sure that a company cannot\n+effectively restrict the users of a free program by obtaining a\n+restrictive license from a patent holder. Therefore, we insist that\n+any patent license obtained for a version of the library must be\n+consistent with the full freedom of use specified in this license.\n+\n+ Most GNU software, including some libraries, is covered by the\n+ordinary GNU General Public License. This license, the GNU Lesser\n+General Public License, applies to certain designated libraries, and\n+is quite different from the ordinary General Public License. We use\n+this license for certain libraries in order to permit linking those\n+libraries into non-free programs.\n+\n+ When a program is linked with a library, whether statically or using\n+a shared library, the combination of the two is legally speaking a\n+combined work, a derivative of the original library. The o"..b'se version number, you may choose any version ever published by\n+the Free Software Foundation.\n+\x0c\n+ 14. If you wish to incorporate parts of the Library into other free\n+programs whose distribution conditions are incompatible with these,\n+write to the author to ask for permission. For software which is\n+copyrighted by the Free Software Foundation, write to the Free\n+Software Foundation; we sometimes make exceptions for this. Our\n+decision will be guided by the two goals of preserving the free status\n+of all derivatives of our free software and of promoting the sharing\n+and reuse of software generally.\n+\n+\t\t\t NO WARRANTY\n+\n+ 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO\n+WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW.\n+EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR\n+OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY\n+KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE\n+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR\n+PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE\n+LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME\n+THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.\n+\n+ 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN\n+WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY\n+AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU\n+FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR\n+CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE\n+LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING\n+RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A\n+FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF\n+SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH\n+DAMAGES.\n+\n+\t\t END OF TERMS AND CONDITIONS\n+\x0c\n+ How to Apply These Terms to Your New Libraries\n+\n+ If you develop a new library, and you want it to be of the greatest\n+possible use to the public, we recommend making it free software that\n+everyone can redistribute and change. You can do so by permitting\n+redistribution under these terms (or, alternatively, under the terms of the\n+ordinary General Public License).\n+\n+ To apply these terms, attach the following notices to the library. It is\n+safest to attach them to the start of each source file to most effectively\n+convey the exclusion of warranty; and each file should have at least the\n+"copyright" line and a pointer to where the full notice is found.\n+\n+ <one line to give the library\'s name and a brief idea of what it does.>\n+ Copyright (C) <year> <name of author>\n+\n+ This library is free software; you can redistribute it and/or\n+ modify it under the terms of the GNU Lesser General Public\n+ License as published by the Free Software Foundation; either\n+ version 2.1 of the License, or (at your option) any later version.\n+\n+ This library is distributed in the hope that it will be useful,\n+ but WITHOUT ANY WARRANTY; without even the implied warranty of\n+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\n+ Lesser General Public License for more details.\n+\n+ You should have received a copy of the GNU Lesser General Public\n+ License along with this library; if not, write to the Free Software\n+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA\n+\n+Also add information on how to contact you by electronic and paper mail.\n+\n+You should also get your employer (if you work as a programmer) or your\n+school, if any, to sign a "copyright disclaimer" for the library, if\n+necessary. Here is a sample; alter the names:\n+\n+ Yoyodyne, Inc., hereby disclaims all copyright interest in the\n+ library `Frob\' (a library for tweaking knobs) written by James Random Hacker.\n+\n+ <signature of Ty Coon>, 1 April 1990\n+ Ty Coon, President of Vice\n+\n+That\'s all there is to it!\n+\n+\n' |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/gzstream/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/gzstream/Makefile Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,49 @@ +# ============================================================================ +# gzstream, C++ iostream classes wrapping the zlib compression library. +# Copyright (C) 2001 Deepak Bandyopadhyay, Lutz Kettner +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# ============================================================================ +# +# File : Makefile +# Revision : $Revision: 1.3 $ +# Revision_date : $Date: 2001/10/04 15:09:28 $ +# Author(s) : Deepak Bandyopadhyay, Lutz Kettner +# +# ============================================================================ + +# ---------------------------------------------------------------------------- +# adapt these settings to your need: +# add '-DGZSTREAM_NAMESPACE=name' to CPPFLAGS to place the classes +# in its own namespace. Note, this macro needs to be set while creating +# the library as well while compiling applications based on it. +# As an alternative, gzstream.C and gzstream.h can be edited. +# ---------------------------------------------------------------------------- + +INCLUDES = -I. +OBJ_DIR = ../../../obj/ +BIN_DIR = ../../../bin/ +UTILITIES_DIR = ../../utils/ + +${OBJ_DIR}/gzstream.o : gzstream.C gzstream.h + ${CXX} ${CXXFLAGS} -c -o ${OBJ_DIR}/gzstream.o gzstream.C $(INCLUDES) + +clean: + @echo "Cleaning up." + @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* + +.PHONY: clean + + |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/gzstream/README --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/gzstream/README Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,6 @@ + + gzstream + C++ iostream classes wrapping the zlib compression library. +=========================================================================== + + See index.html for documentation and installation instructions. |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/gzstream/gzstream.C --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/gzstream/gzstream.C Thu Nov 03 10:25:04 2011 -0400 |
[ |
@@ -0,0 +1,165 @@ +// ============================================================================ +// gzstream, C++ iostream classes wrapping the zlib compression library. +// Copyright (C) 2001 Deepak Bandyopadhyay, Lutz Kettner +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// ============================================================================ +// +// File : gzstream.C +// Revision : $Revision: 1.7 $ +// Revision_date : $Date: 2003/01/08 14:41:27 $ +// Author(s) : Deepak Bandyopadhyay, Lutz Kettner +// +// Standard streambuf implementation following Nicolai Josuttis, "The +// Standard C++ Library". +// ============================================================================ + +#include <gzstream.h> +#include <iostream> +#include <string.h> // for memcpy + +#ifdef GZSTREAM_NAMESPACE +namespace GZSTREAM_NAMESPACE { +#endif + +// ---------------------------------------------------------------------------- +// Internal classes to implement gzstream. See header file for user classes. +// ---------------------------------------------------------------------------- + +// -------------------------------------- +// class gzstreambuf: +// -------------------------------------- + +gzstreambuf* gzstreambuf::open( const char* name, int open_mode) { + if ( is_open()) + return (gzstreambuf*)0; + mode = open_mode; + // no append nor read/write mode + if ((mode & std::ios::ate) || (mode & std::ios::app) + || ((mode & std::ios::in) && (mode & std::ios::out))) + return (gzstreambuf*)0; + char fmode[10]; + char* fmodeptr = fmode; + if ( mode & std::ios::in) + *fmodeptr++ = 'r'; + else if ( mode & std::ios::out) + *fmodeptr++ = 'w'; + *fmodeptr++ = 'b'; + *fmodeptr = '\0'; + file = gzopen( name, fmode); + if (file == 0) + return (gzstreambuf*)0; + opened = 1; + return this; +} + +gzstreambuf * gzstreambuf::close() { + if ( is_open()) { + sync(); + opened = 0; + if ( gzclose( file) == Z_OK) + return this; + } + return (gzstreambuf*)0; +} + +int gzstreambuf::underflow() { // used for input buffer only + if ( gptr() && ( gptr() < egptr())) + return * reinterpret_cast<unsigned char *>( gptr()); + + if ( ! (mode & std::ios::in) || ! opened) + return EOF; + // Josuttis' implementation of inbuf + int n_putback = gptr() - eback(); + if ( n_putback > 4) + n_putback = 4; + memcpy( buffer + (4 - n_putback), gptr() - n_putback, n_putback); + + int num = gzread( file, buffer+4, bufferSize-4); + if (num <= 0) // ERROR or EOF + return EOF; + + // reset buffer pointers + setg( buffer + (4 - n_putback), // beginning of putback area + buffer + 4, // read position + buffer + 4 + num); // end of buffer + + // return next character + return * reinterpret_cast<unsigned char *>( gptr()); +} + +int gzstreambuf::flush_buffer() { + // Separate the writing of the buffer from overflow() and + // sync() operation. + int w = pptr() - pbase(); + if ( gzwrite( file, pbase(), w) != w) + return EOF; + pbump( -w); + return w; +} + +int gzstreambuf::overflow( int c) { // used for output buffer only + if ( ! ( mode & std::ios::out) || ! opened) + return EOF; + if (c != EOF) { + *pptr() = c; + pbump(1); + } + if ( flush_buffer() == EOF) + return EOF; + return c; +} + +int gzstreambuf::sync() { + // Changed to use flush_buffer() instead of overflow( EOF) + // which caused improper behavior with std::endl and flush(), + // bug reported by Vincent Ricard. + if ( pptr() && pptr() > pbase()) { + if ( flush_buffer() == EOF) + return -1; + } + return 0; +} + +// -------------------------------------- +// class gzstreambase: +// -------------------------------------- + +gzstreambase::gzstreambase( const char* name, int mode) { + init( &buf); + open( name, mode); +} + +gzstreambase::~gzstreambase() { + buf.close(); +} + +void gzstreambase::open( const char* name, int open_mode) { + if ( ! buf.open( name, open_mode)) + clear( rdstate() | std::ios::badbit); +} + +void gzstreambase::close() { + if ( buf.is_open()) + if ( ! buf.close()) + clear( rdstate() | std::ios::badbit); +} + +#ifdef GZSTREAM_NAMESPACE +} // namespace GZSTREAM_NAMESPACE +#endif + +// ============================================================================ +// EOF // |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/gzstream/gzstream.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/gzstream/gzstream.h Thu Nov 03 10:25:04 2011 -0400 |
[ |
@@ -0,0 +1,121 @@ +// ============================================================================ +// gzstream, C++ iostream classes wrapping the zlib compression library. +// Copyright (C) 2001 Deepak Bandyopadhyay, Lutz Kettner +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// ============================================================================ +// +// File : gzstream.h +// Revision : $Revision: 1.5 $ +// Revision_date : $Date: 2002/04/26 23:30:15 $ +// Author(s) : Deepak Bandyopadhyay, Lutz Kettner +// +// Standard streambuf implementation following Nicolai Josuttis, "The +// Standard C++ Library". +// ============================================================================ + +#ifndef GZSTREAM_H +#define GZSTREAM_H 1 + +// standard C++ with new header file names and std:: namespace +#include <iostream> +#include <fstream> +#include <zlib.h> + +#ifdef GZSTREAM_NAMESPACE +namespace GZSTREAM_NAMESPACE { +#endif + +// ---------------------------------------------------------------------------- +// Internal classes to implement gzstream. See below for user classes. +// ---------------------------------------------------------------------------- + +class gzstreambuf : public std::streambuf { +private: + static const int bufferSize = 47+256; // size of data buff + // totals 512 bytes under g++ for igzstream at the end. + + gzFile file; // file handle for compressed file + char buffer[bufferSize]; // data buffer + char opened; // open/close state of stream + int mode; // I/O mode + + int flush_buffer(); +public: + gzstreambuf() : opened(0) { + setp( buffer, buffer + (bufferSize-1)); + setg( buffer + 4, // beginning of putback area + buffer + 4, // read position + buffer + 4); // end position + // ASSERT: both input & output capabilities will not be used together + } + int is_open() { return opened; } + gzstreambuf* open( const char* name, int open_mode); + gzstreambuf* close(); + ~gzstreambuf() { close(); } + + virtual int overflow( int c = EOF); + virtual int underflow(); + virtual int sync(); +}; + +class gzstreambase : virtual public std::ios { +protected: + gzstreambuf buf; +public: + gzstreambase() { init(&buf); } + gzstreambase( const char* name, int open_mode); + ~gzstreambase(); + void open( const char* name, int open_mode); + void close(); + gzstreambuf* rdbuf() { return &buf; } +}; + +// ---------------------------------------------------------------------------- +// User classes. Use igzstream and ogzstream analogously to ifstream and +// ofstream respectively. They read and write files based on the gz* +// function interface of the zlib. Files are compatible with gzip compression. +// ---------------------------------------------------------------------------- + +class igzstream : public gzstreambase, public std::istream { +public: + igzstream() : std::istream( &buf) {} + igzstream( const char* name, int open_mode = std::ios::in) + : gzstreambase( name, open_mode), std::istream( &buf) {} + gzstreambuf* rdbuf() { return gzstreambase::rdbuf(); } + void open( const char* name, int open_mode = std::ios::in) { + gzstreambase::open( name, open_mode); + } +}; + +class ogzstream : public gzstreambase, public std::ostream { +public: + ogzstream() : std::ostream( &buf) {} + ogzstream( const char* name, int mode = std::ios::out) + : gzstreambase( name, mode), std::ostream( &buf) {} + gzstreambuf* rdbuf() { return gzstreambase::rdbuf(); } + void open( const char* name, int open_mode = std::ios::out) { + gzstreambase::open( name, open_mode); + } +}; + +#ifdef GZSTREAM_NAMESPACE +} // namespace GZSTREAM_NAMESPACE +#endif + +#endif // GZSTREAM_H +// ============================================================================ +// EOF // + |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/gzstream/gzstream.o |
b |
Binary file BEDTools-Version-2.14.3/src/utils/gzstream/gzstream.o has changed |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/gzstream/test_gunzip.o |
b |
Binary file BEDTools-Version-2.14.3/src/utils/gzstream/test_gunzip.o has changed |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/gzstream/test_gzip.o |
b |
Binary file BEDTools-Version-2.14.3/src/utils/gzstream/test_gzip.o has changed |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/gzstream/version --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/gzstream/version Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,1 @@ +1.5 (08 Jan 2003) |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/lineFileUtilities/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/lineFileUtilities/Makefile Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,29 @@ +OBJ_DIR = ../../../obj/ +BIN_DIR = ../../../bin/ +UTILITIES_DIR = ../../utils/ +# ------------------- +# define our includes +# ------------------- +INCLUDES = + +# ---------------------------------- +# define our source and object files +# ---------------------------------- +SOURCES= lineFileUtilities.cpp +OBJECTS= $(SOURCES:.cpp=.o) +_EXT_OBJECTS= +EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) +BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) + +$(BUILT_OBJECTS): $(SOURCES) + @echo " * compiling" $(*F).cpp + @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) + +$(EXT_OBJECTS): + @$(MAKE) --no-print-directory -C $(INCLUDES) + +clean: + @echo "Cleaning up." + @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* + +.PHONY: clean \ No newline at end of file |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/lineFileUtilities/lineFileUtilities.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/lineFileUtilities/lineFileUtilities.cpp Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,15 @@ +// +// lineFileUtilities.cpp +// BEDTools +// +// Created by Aaron Quinlan Spring 2009. +// Copyright 2009 Aaron Quinlan. All rights reserved. +// +// Summary: Contains common functions for processing text files. +// +#include <sstream> +#include <iostream> +#include "lineFileUtilities.h" + + + |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/lineFileUtilities/lineFileUtilities.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/lineFileUtilities/lineFileUtilities.h Thu Nov 03 10:25:04 2011 -0400 |
[ |
@@ -0,0 +1,52 @@ +#ifndef LINEFILEUTILITIES_H +#define LINEFILEUTILITIES_H + +#include <vector> +#include <string> +#include <cstring> +#include <cstdlib> +#include <sstream> + +using namespace std; + +// templated function to convert objects to strings +template <typename T> +inline +std::string ToString(const T & value) { + std::stringstream ss; + ss << value; + return ss.str(); +} + +// tokenize into a list of strings. +inline +void Tokenize(const string &str, vector<string> &elems, const string &delimiter = "\t") +{ + char* tok; + char cchars [str.size()+1]; + char* cstr = &cchars[0]; + strcpy(cstr, str.c_str()); + tok = strtok(cstr, delimiter.c_str()); + while (tok != NULL) { + elems.push_back(tok); + tok = strtok(NULL, delimiter.c_str()); + } +} + +// tokenize into a list of integers +inline +void Tokenize(const string &str, vector<int> &elems, const string &delimiter = "\t") +{ + char* tok; + char cchars [str.size()+1]; + char* cstr = &cchars[0]; + strcpy(cstr, str.c_str()); + tok = strtok(cstr, delimiter.c_str()); + while (tok != NULL) { + elems.push_back(atoi(tok)); + tok = strtok(NULL, delimiter.c_str()); + } +} + +#endif /* LINEFILEUTILITIES_H */ + |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/sequenceUtilities/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/sequenceUtilities/Makefile Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,29 @@ +OBJ_DIR = ../../../obj/ +BIN_DIR = ../../../bin/ +UTILITIES_DIR = ../../utils/ +# ------------------- +# define our includes +# ------------------- +INCLUDES = + +# ---------------------------------- +# define our source and object files +# ---------------------------------- +SOURCES= sequenceUtils.cpp +OBJECTS= $(SOURCES:.cpp=.o) +_EXT_OBJECTS = +EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) +BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) + +$(BUILT_OBJECTS): $(SOURCES) + @echo " * compiling" $(*F).cpp + @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) + +$(EXT_OBJECTS): + @$(MAKE) --no-print-directory -C $(INCLUDES) + +clean: + @echo "Cleaning up." + @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* + +.PHONY: clean \ No newline at end of file |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/sequenceUtilities/sequenceUtils.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/sequenceUtilities/sequenceUtils.cpp Thu Nov 03 10:25:04 2011 -0400 |
[ |
@@ -0,0 +1,126 @@ +// +// sequenceUtils.cpp +// BEDTools +// +// Created by Aaron Quinlan Spring 2009. +// Copyright 2009 Aaron Quinlan. All rights reserved. +// +// Summary: Contains common functions for manipulating DNA sequences. +// +// Acknowledgment: I am grateful to Michael Stromberg for the code below to +// reverse complement a sequence. + +#include "sequenceUtils.h" + +// Performs an in-place sequence reversal +void reverseSequence(string &seq) { + std::reverse(seq.begin(), seq.end()); +} + +// Performs an in-place reverse complement conversion +void reverseComplement(string &seq) { + + // reverse the sequence + reverseSequence(seq); + + // swap the bases + for(unsigned int i = 0; i < seq.length(); i++) { + switch(seq[i]) { + case 'A': + seq[i] = 'T'; + break; + case 'C': + seq[i] = 'G'; + break; + case 'G': + seq[i] = 'C'; + break; + case 'T': + seq[i] = 'A'; + break; + case 'a': + seq[i] = 't'; + break; + case 'c': + seq[i] = 'g'; + break; + case 'g': + seq[i] = 'c'; + break; + case 't': + seq[i] = 'a'; + break; + default: + break; + } + } +} + + +void toLowerCase(std::string &seq) +{ + const int length = seq.length(); + for(int i=0; i < length; ++i) + { + seq[i] = std::tolower(seq[i]); + } +} + + +void toUpperCase(std::string &seq) +{ + const int length = seq.length(); + for(int i=0; i < length; ++i) + { + seq[i] = std::toupper(seq[i]); + } +} + + +void getDnaContent(const string &seq, int &a, int &c, int &g, int &t, int &n, int &other) +{ + // swap the bases + for(unsigned int i = 0; i < seq.length(); i++) { + switch(seq[i]) { + case 'A': + case 'a': + a++; + break; + case 'C': + case 'c': + c++; + break; + case 'G': + case 'g': + g++; + break; + case 'T': + case 't': + t++; + break; + case 'N': + case 'n': + n++; + break; + default: + other++; + break; + } + } +} + + +int countPattern(const string &seq, const string &pattern) +{ + // swap the bases + int patternLength = pattern.size(); + int patternCount = 0; + for(unsigned int i = 0; i < seq.length(); i++) { + if (seq.substr(i,patternLength) == pattern) { + patternCount++; + } + } + return patternCount; +} + + |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/sequenceUtilities/sequenceUtils.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/sequenceUtilities/sequenceUtils.h Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,27 @@ +#ifndef SEQUENCEUTILS_H +#define SEQUENCEUTILS_H + +#include <string> +#include <algorithm> +#include <cctype> + +using namespace std; + +// Performs an in-place sequence reversal +void reverseSequence(string &seq); + +// Performs an in-place reverse complement conversion +void reverseComplement(string &seq); + +// Converts every character in a string to lowercase +void toLowerCase(string &seq); + +// Converts every character in a string to uppercase +void toUpperCase(string &seq); + +// Calculates the number of a, c, g, t, n, and other bases found in a sequence +void getDnaContent(const string &seq, int &a, int &c, int &g, int &t, int &n, int &other); + +int countPattern(const string &seq, const string &pattern); + +#endif |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/stringUtilities/stringUtilities.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/stringUtilities/stringUtilities.h Thu Nov 03 10:25:04 2011 -0400 |
[ |
@@ -0,0 +1,18 @@ +#ifndef STRINGUTILITIES_H +#define STRINGUTILITIES_H + +#include <cctype> +#include <string> + +/**************************************************** +// isInteger(s): Tests if string s is a valid integer +*****************************************************/ +inline bool isInteger(const std::string& s) { + int len = s.length(); + for (int i = 0; i < len; i++) { + if (!std::isdigit(s[i])) return false; + return true; +} + +#endif /* STRINGUTILITIES_H */ + |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/tabFile/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/tabFile/Makefile Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,29 @@ +OBJ_DIR = ../../../obj/ +BIN_DIR = ../../../bin/ +UTILITIES_DIR = ../../utils/ +# ------------------- +# define our includes +# ------------------- +INCLUDES = -I$(UTILITIES_DIR)/lineFileUtilities/ -I$(UTILITIES_DIR)/gzstream/ -I$(UTILITIES_DIR)/fileType/ + +# ---------------------------------- +# define our source and object files +# ---------------------------------- +SOURCES= tabFile.cpp +OBJECTS= $(SOURCES:.cpp=.o) +_EXT_OBJECTS=lineFileUtilities.o gzstream.o fileType.o +EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) +BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) + +$(BUILT_OBJECTS): $(SOURCES) + @echo " * compiling" $(*F).cpp + @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) + +$(EXT_OBJECTS): + @$(MAKE) --no-print-directory -C -W $(INCLUDES) + +clean: + @echo "Cleaning up." + @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* + +.PHONY: clean \ No newline at end of file |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/tabFile/tabFile.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/tabFile/tabFile.cpp Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,99 @@ +/***************************************************************************** + tabFile.cpp + + (c) 2009 - Aaron Quinlan + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licensed under the GNU General Public License 2.0 license. +******************************************************************************/ +#include "lineFileUtilities.h" +#include "tabFile.h" + +/******************************************* +Class methods +*******************************************/ + +// Constructor +TabFile::TabFile(const string &tabFile) +: _tabFile(tabFile) +{} + +// Destructor +TabFile::~TabFile(void) { +} + + +void TabFile::Open(void) { + if (_tabFile == "stdin") { + _tabStream = &cin; + } + else { + size_t foundPos; + foundPos = _tabFile.find_last_of(".gz"); + // is this a GZIPPED TAB file? + if (foundPos == _tabFile.size() - 1) { + igzstream tabs(_tabFile.c_str(), ios::in); + if ( !tabs ) { + cerr << "Error: The requested file (" << _tabFile << ") could not be opened. Exiting!" << endl; + exit (1); + } + else { + // if so, close it (this was just a test) + tabs.close(); + // now set a pointer to the stream so that we + // can read the file later on. + _tabStream = new igzstream(_tabFile.c_str(), ios::in); + } + } + // not GZIPPED. + else { + + ifstream tabs(_tabFile.c_str(), ios::in); + // can we open the file? + if ( !tabs ) { + cerr << "Error: The requested file (" << _tabFile << ") could not be opened. Exiting!" << endl; + exit (1); + } + else { + // if so, close it (this was just a test) + tabs.close(); + // now set a pointer to the stream so that we + // can read the file later on. + _tabStream = new ifstream(_tabFile.c_str(), ios::in); + } + } + } +} + + +// Close the TAB file +void TabFile::Close(void) { + if (_tabFile != "stdin") delete _tabStream; +} + + +TabLineStatus TabFile::GetNextTabLine(TAB_FIELDS &tabFields, int &lineNum) { + + // make sure there are still lines to process. + // if so, tokenize, return the TAB_FIELDS. + if (_tabStream->good() == true) { + string tabLine; + tabFields.reserve(20); + + // parse the tabStream pointer + getline(*_tabStream, tabLine); + lineNum++; + + // split into a string vector. + Tokenize(tabLine, tabFields); + + // parse the line and validate it + return parseTabLine(tabFields, lineNum); + } + + // default if file is closed or EOF + return TAB_INVALID; +} |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/tabFile/tabFile.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/tabFile/tabFile.h Thu Nov 03 10:25:04 2011 -0400 |
[ |
@@ -0,0 +1,80 @@ +/***************************************************************************** + tabFile.h + + (c) 2009 - Aaron Quinlan + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licensed under the GNU General Public License 2.0 license. +******************************************************************************/ +#ifndef TABFILE_H +#define TABFILE_H + +#include "gzstream.h" +#include <vector> +#include <string> +#include <iostream> + +using namespace std; + +// enum to flag the state of a given line in a TAB file. +enum TabLineStatus +{ + TAB_INVALID = -1, + TAB_HEADER = 0, + TAB_BLANK = 1, + TAB_VALID = 2 +}; + +typedef vector<string> TAB_FIELDS; + +//************************************************ +// TabFile Class methods and elements +//************************************************ +class TabFile { + +public: + + // Constructor + TabFile(const string &tabFile); + + // Destructor + ~TabFile(void); + + // Open a TAB file for reading (creates an istream pointer) + void Open(void); + + // Close an opened TAB file. + void Close(void); + + // Get the next TAB entry in an opened TAB file. + TabLineStatus GetNextTabLine (TAB_FIELDS &tab, int &lineNum); + +private: + + // data + istream *_tabStream; + string _tabFile; + + // methods + inline TabLineStatus parseTabLine (const vector<string> &lineVector, int &lineNum) { + // bail out if we have a blank line + if (lineVector.size() == 0) + return TAB_BLANK; + // real line with data + if (lineVector[0][0] != '#') { + return TAB_VALID; + } + // comment or header line + else { + lineNum--; + return TAB_HEADER; + } + // default + return TAB_INVALID; + } +}; + +#endif /* TABFILE_H */ |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/utils/version/version.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/version/version.h Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,8 @@ +#ifndef VERSION_H +#define VERSION_H + +// define the version. All tools in the +// suite carry the same version number. +#define VERSION "2.14.2" + +#endif /* VERSION_H */ |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/windowBed/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/windowBed/Makefile Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,51 @@ +UTILITIES_DIR = ../utils/ +OBJ_DIR = ../../obj/ +BIN_DIR = ../../bin/ + +# ------------------- +# define our includes +# ------------------- +INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \ + -I$(UTILITIES_DIR)/version/ \ + -I$(UTILITIES_DIR)/gzstream/ \ + -I$(UTILITIES_DIR)/genomeFile/ \ + -I$(UTILITIES_DIR)/lineFileUtilities/ \ + -I$(UTILITIES_DIR)/fileType/ \ + -I$(UTILITIES_DIR)/BamTools/include \ + -I$(UTILITIES_DIR)/BamTools-Ancillary +# ---------------------------------- +# define our source and object files +# ---------------------------------- +SOURCES= windowMain.cpp windowBed.cpp +OBJECTS= $(SOURCES:.cpp=.o) +_EXT_OBJECTS=bedFile.o lineFileUtilities.o gzstream.o fileType.o +EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) +BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) +PROGRAM= windowBed + + +all: $(PROGRAM) + +.PHONY: all + +$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS) + @echo " * linking $(PROGRAM)" + @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ -L$(UTILITIES_DIR)/BamTools/lib/ -lbamtools $(LIBS) + +$(BUILT_OBJECTS): $(SOURCES) + @echo " * compiling" $(*F).cpp + @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) + +$(EXT_OBJECTS): + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/BamTools/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/BamTools-Ancillary/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/ + @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/ + +clean: + @echo "Cleaning up." + @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* + +.PHONY: clean |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/windowBed/windowBed.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/windowBed/windowBed.cpp Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,253 @@ +/***************************************************************************** + windowBed.cpp + + (c) 2009 - Aaron Quinlan + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licenced under the GNU General Public License 2.0 license. +******************************************************************************/ +#include "lineFileUtilities.h" +#include "windowBed.h" + + +/* + Constructor +*/ +BedWindow::BedWindow(string bedAFile, string bedBFile, int leftSlop, int rightSlop, + bool anyHit, bool noHit, bool writeCount, bool strandWindows, + bool matchOnSameStrand, bool matchOnDiffStrand, bool bamInput, bool bamOutput, bool isUncompressedBam) { + + _bedAFile = bedAFile; + _bedBFile = bedBFile; + + _leftSlop = leftSlop; + _rightSlop = rightSlop; + + _anyHit = anyHit; + _noHit = noHit; + _writeCount = writeCount; + _strandWindows = strandWindows; + _matchOnSameStrand = matchOnSameStrand; + _matchOnDiffStrand = matchOnDiffStrand; + _bamInput = bamInput; + _bamOutput = bamOutput; + _isUncompressedBam = isUncompressedBam; + + _bedA = new BedFile(bedAFile); + _bedB = new BedFile(bedBFile); + + if (_bamInput == false) + WindowIntersectBed(); + else + WindowIntersectBam(_bedAFile); +} + + + +/* + Destructor +*/ +BedWindow::~BedWindow(void) { +} + + + +void BedWindow::FindWindowOverlaps(const BED &a, vector<BED> &hits) { + + /* + Adjust the start and end of a based on the requested window + */ + + // update the current feature's start and end + // according to the slop requested (slop = 0 by default) + CHRPOS aFudgeStart = 0; + CHRPOS aFudgeEnd; + AddWindow(a, aFudgeStart, aFudgeEnd); + + /* + Now report the hits (if any) based on the window around a. + */ + // get the hits in B for the A feature + _bedB->FindOverlapsPerBin(a.chrom, aFudgeStart, aFudgeEnd, a.strand, hits, _matchOnSameStrand, _matchOnDiffStrand); + + int numOverlaps = 0; + + // loop through the hits and report those that meet the user's criteria + vector<BED>::const_iterator h = hits.begin(); + vector<BED>::const_iterator hitsEnd = hits.end(); + for (; h != hitsEnd; ++h) { + + int s = max(aFudgeStart, h->start); + int e = min(aFudgeEnd, h->end); + int overlapBases = (e - s); // the number of overlapping bases b/w a and b + int aLength = (a.end - a.start); // the length of a in b.p. + + if (s < e) { + // is there enough overlap (default ~ 1bp) + if ( ((float) overlapBases / (float) aLength) > 0 ) { + numOverlaps++; + if (_anyHit == false && _noHit == false && _writeCount == false) { + _bedA->reportBedTab(a); + _bedB->reportBedNewLine(*h); + } + } + } + } + if (_anyHit == true && (numOverlaps >= 1)) { + _bedA->reportBedNewLine(a); } + else if (_writeCount == true) { + _bedA->reportBedTab(a); printf("\t%d\n", numOverlaps); + } + else if (_noHit == true && (numOverlaps == 0)) { + _bedA->reportBedNewLine(a); + } +} + + +bool BedWindow::FindOneOrMoreWindowOverlaps(const BED &a) { + + // update the current feature's start and end + // according to the slop requested (slop = 0 by default) + CHRPOS aFudgeStart = 0; + CHRPOS aFudgeEnd; + AddWindow(a, aFudgeStart, aFudgeEnd); + + bool overlapsFound = _bedB->FindOneOrMoreOverlapsPerBin(a.chrom, a.start, a.end, a.strand, _matchOnSameStrand, _matchOnDiffStrand); + return overlapsFound; +} + + +void BedWindow::WindowIntersectBed() { + + // load the "B" bed file into a map so + // that we can easily compare "A" to it for overlaps + _bedB->loadBedFileIntoMap(); + + BED a, nullBed; + int lineNum = 0; // current input line number + BedLineStatus bedStatus; + vector<BED> hits; // vector of potential hits + hits.reserve(100); + + _bedA->Open(); + while ((bedStatus = _bedA->GetNextBed(a, lineNum)) != BED_INVALID) { + if (bedStatus == BED_VALID) { + FindWindowOverlaps(a, hits); + hits.clear(); + a = nullBed; + } + } + _bedA->Close(); +} + + +void BedWindow::WindowIntersectBam(string bamFile) { + + // load the "B" bed file into a map so + // that we can easily compare "A" to it for overlaps + _bedB->loadBedFileIntoMap(); + + // open the BAM file + BamReader reader; + BamWriter writer; + reader.Open(bamFile); + + // get header & reference information + string bamHeader = reader.GetHeaderText(); + RefVector refs = reader.GetReferenceData(); + + // open a BAM output to stdout if we are writing BAM + if (_bamOutput == true) { + // set compression mode + BamWriter::CompressionMode compressionMode = BamWriter::Compressed; + if ( _isUncompressedBam ) compressionMode = BamWriter::Uncompressed; + writer.SetCompressionMode(compressionMode); + // open our BAM writer + writer.Open("stdout", bamHeader, refs); + } + + vector<BED> hits; // vector of potential hits + // reserve some space + hits.reserve(100); + + _bedA->bedType = 6; + BamAlignment bam; + bool overlapsFound; + // get each set of alignments for each pair. + while (reader.GetNextAlignment(bam)) { + + if (bam.IsMapped()) { + BED a; + a.chrom = refs.at(bam.RefID).RefName; + a.start = bam.Position; + a.end = bam.GetEndPosition(false, false); + + // build the name field from the BAM alignment. + a.name = bam.Name; + if (bam.IsFirstMate()) a.name += "/1"; + if (bam.IsSecondMate()) a.name += "/2"; + + a.score = ToString(bam.MapQuality); + a.strand = "+"; if (bam.IsReverseStrand()) a.strand = "-"; + + if (_bamOutput == true) { + overlapsFound = FindOneOrMoreWindowOverlaps(a); + if (overlapsFound == true) { + if (_noHit == false) + writer.SaveAlignment(bam); + } + else { + if (_noHit == true) + writer.SaveAlignment(bam); + } + } + else { + FindWindowOverlaps(a, hits); + hits.clear(); + } + } + // BAM IsMapped() is false + else if (_noHit == true) { + writer.SaveAlignment(bam); + } + } + + // close the relevant BAM files. + reader.Close(); + if (_bamOutput == true) { + writer.Close(); + } +} + + +void BedWindow::AddWindow(const BED &a, CHRPOS &fudgeStart, CHRPOS &fudgeEnd) { + // Does the user want to treat the windows based on strand? + // If so, + // if "+", then left is left and right is right + // if "-", the left is right and right is left. + if (_strandWindows) { + if (a.strand == "+") { + if ((int) (a.start - _leftSlop) > 0) + fudgeStart = a.start - _leftSlop; + else fudgeStart = 0; + fudgeEnd = a.end + _rightSlop; + } + else { + if ((int) (a.start - _rightSlop) > 0) + fudgeStart = a.start - _rightSlop; + else fudgeStart = 0; + fudgeEnd = a.end + _leftSlop; + } + } + // If not, add the windows irrespective of strand + else { + if ((int) (a.start - _leftSlop) > 0) + fudgeStart = a.start - _leftSlop; + else fudgeStart = 0; + fudgeEnd = a.end + _rightSlop; + } +} + |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/windowBed/windowBed.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/windowBed/windowBed.h Thu Nov 03 10:25:04 2011 -0400 |
b |
@@ -0,0 +1,69 @@ +/***************************************************************************** + windowBed.h + + (c) 2009 - Aaron Quinlan + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licenced under the GNU General Public License 2.0 license. +******************************************************************************/ +#ifndef WINDOWBED_H +#define WINDOWBED_H + +#include "api/BamReader.h" +#include "api/BamWriter.h" +#include "api/BamAux.h" +using namespace BamTools; + +#include "bedFile.h" +#include <vector> +#include <iostream> +#include <fstream> + +using namespace std; + +//************************************************ +// Class methods and elements +//************************************************ +class BedWindow { + +public: + + // constructor + BedWindow(string bedAFile, string bedBFile, int leftSlop, int rightSlop, + bool anyHit, bool noHit, bool writeCount, bool strandWindows, + bool matchOnSameStrand, bool matchOnDiffStrand, bool bamInput, bool bamOutput, bool isUncompressedBam); + + // destructor + ~BedWindow(void); + +private: + + string _bedAFile; + string _bedBFile; + bool _anyHit; + bool _writeCount; + int _leftSlop; + int _rightSlop; + bool _noHit; + bool _strandWindows; + bool _matchOnSameStrand; + bool _matchOnDiffStrand; + bool _bamInput; + bool _bamOutput; + bool _isUncompressedBam; + + // instance of a bed file class. + BedFile *_bedA, *_bedB; + + // methods + void WindowIntersectBed(); + void WindowIntersectBam(string bamFile); + void FindWindowOverlaps(const BED &a, vector<BED> &hits); + bool FindOneOrMoreWindowOverlaps(const BED &a); + void AddWindow(const BED &a, CHRPOS &fudgeStart, CHRPOS &fudgeEnd); + +}; +#endif /* WINDOWBED_H */ |
b |
diff -r 000000000000 -r dfcd8b6c1bda BEDTools-Version-2.14.3/src/windowBed/windowMain.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/windowBed/windowMain.cpp Thu Nov 03 10:25:04 2011 -0400 |
[ |
b'@@ -0,0 +1,263 @@\n+/*****************************************************************************\n+ windowMain.cpp\n+\n+ (c) 2009 - Aaron Quinlan\n+ Hall Laboratory\n+ Department of Biochemistry and Molecular Genetics\n+ University of Virginia\n+ aaronquinlan@gmail.com\n+\n+ Licenced under the GNU General Public License 2.0 license.\n+******************************************************************************/\n+#include "windowBed.h"\n+#include "version.h"\n+\n+using namespace std;\n+\n+// define the version\n+#define PROGRAM_NAME "windowBed"\n+\n+// define our parameter checking macro\n+#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)\n+\n+// function declarations\n+void ShowHelp(void);\n+\n+\n+int main(int argc, char* argv[]) {\n+\n+ // our configuration variables\n+ bool showHelp = false;\n+\n+ // input files\n+ string bedAFile;\n+ string bedBFile;\n+\n+ // input arguments\n+ int leftSlop = 1000;\n+ int rightSlop = 1000;\n+\n+ bool haveBedA = false;\n+ bool haveBedB = false;\n+ bool noHit = false;\n+ bool anyHit = false;\n+ bool writeCount = false;\n+ bool haveSlop = false;\n+ bool haveLeft = false;\n+ bool haveRight = false;\n+ bool strandWindows = false;\n+ bool matchOnSameStrand = false;\n+ bool matchOnDiffStrand = false;\n+ bool inputIsBam = false;\n+ bool outputIsBam = true;\n+ bool uncompressedBam = false;\n+\n+ // check to see if we should print out some help\n+ if(argc <= 1) showHelp = true;\n+\n+ for(int i = 1; i < argc; i++) {\n+ int parameterLength = (int)strlen(argv[i]);\n+\n+ if((PARAMETER_CHECK("-h", 2, parameterLength)) ||\n+ (PARAMETER_CHECK("--help", 5, parameterLength))) {\n+ showHelp = true;\n+ }\n+ }\n+\n+ if(showHelp) ShowHelp();\n+\n+ // do some parsing (all of these parameters require 2 strings)\n+ for(int i = 1; i < argc; i++) {\n+\n+ int parameterLength = (int)strlen(argv[i]);\n+\n+ if(PARAMETER_CHECK("-a", 2, parameterLength)) {\n+ if ((i+1) < argc) {\n+ haveBedA = true;\n+ bedAFile = argv[i + 1];\n+ i++;\n+ }\n+ }\n+ else if(PARAMETER_CHECK("-abam", 5, parameterLength)) {\n+ if ((i+1) < argc) {\n+ haveBedA = true;\n+ inputIsBam = true;\n+ bedAFile = argv[i + 1];\n+ i++;\n+ }\n+ }\n+ else if(PARAMETER_CHECK("-b", 2, parameterLength)) {\n+ if ((i+1) < argc) {\n+ haveBedB = true;\n+ bedBFile = argv[i + 1];\n+ i++;\n+ }\n+ }\n+ else if(PARAMETER_CHECK("-bed", 4, parameterLength)) {\n+ outputIsBam = false;\n+ }\n+ else if(PARAMETER_CHECK("-u", 2, parameterLength)) {\n+ anyHit = true;\n+ }\n+ else if(PARAMETER_CHECK("-c", 2, parameterLength)) {\n+ writeCount = true;\n+ }\n+ else if (PARAMETER_CHECK("-v", 2, parameterLength)) {\n+ noHit = true;\n+ }\n+ else if (PARAMETER_CHECK("-sw", 3, parameterLength)) {\n+ strandWindows = true;\n+ }\n+ else if (PARAMETER_CHECK("-sm", 3, parameterLength)) {\n+ matchOnSameStrand = true;\n+ }\n+ else if (PARAMETER_CHECK("-Sm", 3, parameterLength)) {\n+ matchOnDiffStrand = true;\n+ }\n+ else if (PARAMETER_CHECK("-w", 2, parameterLength)) {\n+ if ((i+1) < argc) {\n+ haveSlop = true;\n+ leftSlop = atoi(argv[i + 1]);\n+ rightSlop = leftSlop;\n+ i++;\n+ }\n+ }\n+ else if (PARAMETER_CHECK("-l", 2, parameterLength)) {\n+ if ((i+1) < argc) {\n+ haveLeft = true;\n+ leftSlop = atoi(argv[i + 1]);\n+ i++;\n+ }\n+ '..b'ressedBam);\n+ delete bi;\n+ return 0;\n+ }\n+ else {\n+ ShowHelp();\n+ }\n+}\n+\n+\n+void ShowHelp(void) {\n+\n+ cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl;\n+\n+ cerr << "Author: Aaron Quinlan (aaronquinlan@gmail.com)" << endl;\n+\n+ cerr << "Summary: Examines a \\"window\\" around each feature in A and" << endl;\n+ cerr << "\\t reports all features in B that overlap the window. For each" << endl;\n+ cerr << "\\t overlap the entire entry in A and B are reported." << endl << endl;\n+\n+ cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -a <bed/gff/vcf> -b <bed/gff/vcf>" << endl << endl;\n+\n+ cerr << "Options: " << endl;\n+\n+ cerr << "\\t-abam\\t" << "The A input file is in BAM format. Output will be BAM as well." << endl << endl;\n+\n+ cerr << "\\t-ubam\\t" << "Write uncompressed BAM output. Default is to write compressed BAM." << endl << endl;\n+\n+ cerr << "\\t-bed\\t" << "When using BAM input (-abam), write output as BED. The default" << endl;\n+ cerr << "\\t\\tis to write output in BAM when using -abam." << endl << endl;\n+\n+ cerr << "\\t-w\\t" << "Base pairs added upstream and downstream of each entry" << endl;\n+ cerr << "\\t\\tin A when searching for overlaps in B." << endl;\n+ cerr << "\\t\\t- Creates symterical \\"windows\\" around A." << endl;\n+ cerr << "\\t\\t- Default is 1000 bp." << endl;\n+ cerr << "\\t\\t- (INTEGER)" << endl << endl;\n+\n+ cerr << "\\t-l\\t" << "Base pairs added upstream (left of) of each entry" << endl;\n+ cerr << "\\t\\tin A when searching for overlaps in B." << endl;\n+ cerr << "\\t\\t- Allows one to define assymterical \\"windows\\"." << endl;\n+ cerr << "\\t\\t- Default is 1000 bp." << endl;\n+ cerr << "\\t\\t- (INTEGER)" << endl << endl;\n+\n+ cerr << "\\t-r\\t" << "Base pairs added downstream (right of) of each entry" << endl;\n+ cerr << "\\t\\tin A when searching for overlaps in B." << endl;\n+ cerr << "\\t\\t- Allows one to define assymterical \\"windows\\"." << endl;\n+ cerr << "\\t\\t- Default is 1000 bp." << endl;\n+ cerr << "\\t\\t- (INTEGER)" << endl << endl;\n+\n+ cerr << "\\t-sw\\t" << "Define -l and -r based on strand. For example if used, -l 500" << endl;\n+ cerr << "\\t\\tfor a negative-stranded feature will add 500 bp downstream." << endl;\n+ cerr << "\\t\\t- Default = disabled." << endl << endl;\n+\n+ cerr << "\\t-sm\\t" << "Only report hits in B that overlap A on the _same_ strand." << endl;\n+ cerr << "\\t\\t- By default, overlaps are reported without respect to strand." << endl << endl;\n+\n+ cerr << "\\t-Sm\\t" << "Only report hits in B that overlap A on the _opposite_ strand." << endl;\n+ cerr << "\\t\\t- By default, overlaps are reported without respect to strand." << endl << endl;\n+\n+ cerr << "\\t-u\\t" << "Write the original A entry _once_ if _any_ overlaps found in B." << endl;\n+ cerr << "\\t\\t- In other words, just report the fact >=1 hit was found." << endl << endl;\n+\n+ cerr << "\\t-c\\t" << "For each entry in A, report the number of overlaps with B." << endl;\n+ cerr << "\\t\\t- Reports 0 for A entries that have no overlap with B." << endl;\n+ cerr << "\\t\\t- Overlaps restricted by -f." << endl << endl;\n+\n+ cerr << "\\t-v\\t" << "Only report those entries in A that have _no overlaps_ with B." << endl;\n+ cerr << "\\t\\t- Similar to \\"grep -v.\\"" << endl << endl;\n+\n+ // end the program here\n+ exit(1);\n+}\n' |