Previous changeset 0:dfcd8b6c1bda (2011-11-03) |
Commit message:
Deleted selected files |
removed:
BEDTools-Version-2.14.3/LICENSE BEDTools-Version-2.14.3/Makefile BEDTools-Version-2.14.3/README.rst BEDTools-Version-2.14.3/RELEASE_HISTORY BEDTools-Version-2.14.3/data/knownGene.hg18.chr21.bed BEDTools-Version-2.14.3/data/rmsk.hg18.chr21.bed BEDTools-Version-2.14.3/genomes/human.hg18.genome BEDTools-Version-2.14.3/genomes/human.hg19.genome BEDTools-Version-2.14.3/genomes/mouse.mm8.genome BEDTools-Version-2.14.3/genomes/mouse.mm9.genome BEDTools-Version-2.14.3/src/Makefile BEDTools-Version-2.14.3/src/annotateBed/Makefile BEDTools-Version-2.14.3/src/annotateBed/annotateBed.cpp BEDTools-Version-2.14.3/src/annotateBed/annotateBed.h BEDTools-Version-2.14.3/src/annotateBed/annotateMain.cpp BEDTools-Version-2.14.3/src/bamToBed/Makefile BEDTools-Version-2.14.3/src/bamToBed/bamToBed.cpp BEDTools-Version-2.14.3/src/bed12ToBed6/Makefile BEDTools-Version-2.14.3/src/bed12ToBed6/bed12ToBed6.cpp BEDTools-Version-2.14.3/src/bedToBam/Makefile BEDTools-Version-2.14.3/src/bedToBam/bedToBam.cpp BEDTools-Version-2.14.3/src/bedToIgv/Makefile BEDTools-Version-2.14.3/src/bedToIgv/bedToIgv.cpp BEDTools-Version-2.14.3/src/closestBed/Makefile BEDTools-Version-2.14.3/src/closestBed/closestBed.cpp BEDTools-Version-2.14.3/src/closestBed/closestBed.h BEDTools-Version-2.14.3/src/closestBed/closestMain.cpp BEDTools-Version-2.14.3/src/complementBed/Makefile BEDTools-Version-2.14.3/src/complementBed/complementBed.cpp BEDTools-Version-2.14.3/src/complementBed/complementBed.h BEDTools-Version-2.14.3/src/complementBed/complementMain.cpp BEDTools-Version-2.14.3/src/coverageBed/Makefile BEDTools-Version-2.14.3/src/coverageBed/coverageBed.cpp BEDTools-Version-2.14.3/src/coverageBed/coverageBed.h BEDTools-Version-2.14.3/src/coverageBed/coverageMain.cpp BEDTools-Version-2.14.3/src/cuffToTrans/Makefile BEDTools-Version-2.14.3/src/fastaFromBed/Makefile BEDTools-Version-2.14.3/src/fastaFromBed/fastaFromBed.cpp BEDTools-Version-2.14.3/src/fastaFromBed/fastaFromBed.h BEDTools-Version-2.14.3/src/fastaFromBed/fastaFromBedMain.cpp BEDTools-Version-2.14.3/src/fjoin/Makefile BEDTools-Version-2.14.3/src/fjoin/fjoin.cpp BEDTools-Version-2.14.3/src/fjoin/fjoin.h BEDTools-Version-2.14.3/src/fjoin/fjoinMain.cpp BEDTools-Version-2.14.3/src/flankBed/Makefile BEDTools-Version-2.14.3/src/flankBed/flankBed.cpp BEDTools-Version-2.14.3/src/flankBed/flankBed.h BEDTools-Version-2.14.3/src/flankBed/flankBedMain.cpp BEDTools-Version-2.14.3/src/genomeCoverageBed/Makefile BEDTools-Version-2.14.3/src/genomeCoverageBed/genomeCoverageBed.cpp BEDTools-Version-2.14.3/src/genomeCoverageBed/genomeCoverageBed.h BEDTools-Version-2.14.3/src/genomeCoverageBed/genomeCoverageMain.cpp BEDTools-Version-2.14.3/src/intersectBed/Makefile BEDTools-Version-2.14.3/src/intersectBed/intersectBed.cpp BEDTools-Version-2.14.3/src/intersectBed/intersectBed.h BEDTools-Version-2.14.3/src/intersectBed/intersectMain.cpp BEDTools-Version-2.14.3/src/linksBed/Makefile BEDTools-Version-2.14.3/src/linksBed/linksBed.cpp BEDTools-Version-2.14.3/src/linksBed/linksBed.h BEDTools-Version-2.14.3/src/linksBed/linksMain.cpp BEDTools-Version-2.14.3/src/maskFastaFromBed/Makefile BEDTools-Version-2.14.3/src/maskFastaFromBed/maskFastaFromBed.cpp BEDTools-Version-2.14.3/src/maskFastaFromBed/maskFastaFromBed.h BEDTools-Version-2.14.3/src/maskFastaFromBed/maskFastaFromBedMain.cpp BEDTools-Version-2.14.3/src/mergeBed/Makefile BEDTools-Version-2.14.3/src/mergeBed/mergeBed.cpp BEDTools-Version-2.14.3/src/mergeBed/mergeBed.h BEDTools-Version-2.14.3/src/mergeBed/mergeMain.cpp BEDTools-Version-2.14.3/src/multiBamCov/Makefile BEDTools-Version-2.14.3/src/multiBamCov/multiBamCov.cpp BEDTools-Version-2.14.3/src/multiBamCov/multiBamCov.h BEDTools-Version-2.14.3/src/multiBamCov/multiBamCovMain.cpp BEDTools-Version-2.14.3/src/multiIntersectBed/Makefile BEDTools-Version-2.14.3/src/multiIntersectBed/intervalItem.h BEDTools-Version-2.14.3/src/multiIntersectBed/multiIntersectBed.cpp BEDTools-Version-2.14.3/src/multiIntersectBed/multiIntersectBed.h BEDTools-Version-2.14.3/src/multiIntersectBed/multiIntersectBedMain.cpp BEDTools-Version-2.14.3/src/nucBed/LargeFileSupport.h BEDTools-Version-2.14.3/src/nucBed/Makefile BEDTools-Version-2.14.3/src/nucBed/nucBed.cpp BEDTools-Version-2.14.3/src/nucBed/nucBed.h BEDTools-Version-2.14.3/src/nucBed/nucBedMain.cpp BEDTools-Version-2.14.3/src/overlap/Makefile BEDTools-Version-2.14.3/src/overlap/overlap.cpp BEDTools-Version-2.14.3/src/pairToBed/Makefile BEDTools-Version-2.14.3/src/pairToBed/pairToBed.cpp BEDTools-Version-2.14.3/src/pairToBed/pairToBed.h BEDTools-Version-2.14.3/src/pairToBed/pairToBedMain.cpp BEDTools-Version-2.14.3/src/pairToPair/Makefile BEDTools-Version-2.14.3/src/pairToPair/pairToPair.cpp BEDTools-Version-2.14.3/src/pairToPair/pairToPair.h BEDTools-Version-2.14.3/src/pairToPair/pairToPairMain.cpp BEDTools-Version-2.14.3/src/shuffleBed/Makefile BEDTools-Version-2.14.3/src/shuffleBed/shuffleBed.cpp BEDTools-Version-2.14.3/src/shuffleBed/shuffleBed.h BEDTools-Version-2.14.3/src/shuffleBed/shuffleBedMain.cpp BEDTools-Version-2.14.3/src/slopBed/Makefile BEDTools-Version-2.14.3/src/slopBed/slopBed.cpp BEDTools-Version-2.14.3/src/slopBed/slopBed.h BEDTools-Version-2.14.3/src/slopBed/slopBedMain.cpp BEDTools-Version-2.14.3/src/sortBed/Makefile BEDTools-Version-2.14.3/src/sortBed/sortBed.cpp BEDTools-Version-2.14.3/src/sortBed/sortBed.h BEDTools-Version-2.14.3/src/sortBed/sortMain.cpp BEDTools-Version-2.14.3/src/subtractBed/Makefile BEDTools-Version-2.14.3/src/subtractBed/subtractBed.cpp BEDTools-Version-2.14.3/src/subtractBed/subtractBed.h BEDTools-Version-2.14.3/src/subtractBed/subtractMain.cpp BEDTools-Version-2.14.3/src/tagBam/Makefile BEDTools-Version-2.14.3/src/tagBam/tagBam.cpp BEDTools-Version-2.14.3/src/tagBam/tagBam.h BEDTools-Version-2.14.3/src/tagBam/tagBamMain.cpp BEDTools-Version-2.14.3/src/unionBedGraphs/Makefile BEDTools-Version-2.14.3/src/unionBedGraphs/intervalItem.h BEDTools-Version-2.14.3/src/unionBedGraphs/unionBedGraphs.cpp BEDTools-Version-2.14.3/src/unionBedGraphs/unionBedGraphs.h BEDTools-Version-2.14.3/src/unionBedGraphs/unionBedGraphsMain.cpp BEDTools-Version-2.14.3/src/utils/BamTools-Ancillary/BamAncillary.cpp BEDTools-Version-2.14.3/src/utils/BamTools-Ancillary/BamAncillary.h BEDTools-Version-2.14.3/src/utils/BamTools-Ancillary/Makefile BEDTools-Version-2.14.3/src/utils/BamTools/LICENSE BEDTools-Version-2.14.3/src/utils/BamTools/Makefile BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamAlignment.cpp BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamAlignment.h BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamAux.h BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamConstants.h BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamIndex.h BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamMultiReader.cpp BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamMultiReader.h BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamReader.cpp BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamReader.h BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamWriter.cpp BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamWriter.h BEDTools-Version-2.14.3/src/utils/BamTools/src/api/CMakeLists.txt BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamConstants.h BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamHeader.cpp BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamHeader.h BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamProgram.cpp BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamProgram.h BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamProgramChain.cpp BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamProgramChain.h BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamReadGroup.cpp BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamReadGroup.h BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamReadGroupDictionary.cpp BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamReadGroupDictionary.h BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamSequence.cpp BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamSequence.h BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamSequenceDictionary.cpp BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamSequenceDictionary.h BEDTools-Version-2.14.3/src/utils/BamTools/src/api/api_global.h BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamHeader_p.cpp BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamHeader_p.h BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamIndexFactory_p.cpp BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamIndexFactory_p.h BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamMultiMerger_p.h BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamMultiReader_p.cpp BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamMultiReader_p.h BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamRandomAccessController_p.cpp BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamRandomAccessController_p.h BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamReader_p.cpp BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamReader_p.h BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamStandardIndex_p.cpp BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamStandardIndex_p.h BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamToolsIndex_p.cpp BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamToolsIndex_p.h BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamWriter_p.cpp BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamWriter_p.h BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BgzfStream_p.cpp BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BgzfStream_p.h BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/SamFormatParser_p.cpp BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/SamFormatParser_p.h BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/SamFormatPrinter_p.cpp BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/SamFormatPrinter_p.h BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/SamHeaderValidator_p.cpp BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/SamHeaderValidator_p.h BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/SamHeaderVersion_p.h BEDTools-Version-2.14.3/src/utils/BamTools/src/shared/bamtools_global.h BEDTools-Version-2.14.3/src/utils/Fasta/Fasta.cpp BEDTools-Version-2.14.3/src/utils/Fasta/Fasta.h BEDTools-Version-2.14.3/src/utils/Fasta/LargeFileSupport.h BEDTools-Version-2.14.3/src/utils/Fasta/Makefile BEDTools-Version-2.14.3/src/utils/Fasta/split.cpp BEDTools-Version-2.14.3/src/utils/Fasta/split.h BEDTools-Version-2.14.3/src/utils/bedFile/Makefile BEDTools-Version-2.14.3/src/utils/bedFile/bedFile.cpp BEDTools-Version-2.14.3/src/utils/bedFile/bedFile.h BEDTools-Version-2.14.3/src/utils/bedFile/bedFile.h.orig BEDTools-Version-2.14.3/src/utils/bedFilePE/Makefile BEDTools-Version-2.14.3/src/utils/bedFilePE/bedFilePE.cpp BEDTools-Version-2.14.3/src/utils/bedFilePE/bedFilePE.h BEDTools-Version-2.14.3/src/utils/bedGraphFile/Makefile BEDTools-Version-2.14.3/src/utils/bedGraphFile/bedGraphFile.cpp BEDTools-Version-2.14.3/src/utils/bedGraphFile/bedGraphFile.h BEDTools-Version-2.14.3/src/utils/chromsweep/Makefile BEDTools-Version-2.14.3/src/utils/chromsweep/chromsweep.cpp BEDTools-Version-2.14.3/src/utils/chromsweep/chromsweep.h BEDTools-Version-2.14.3/src/utils/fileType/Makefile BEDTools-Version-2.14.3/src/utils/fileType/fileType.cpp BEDTools-Version-2.14.3/src/utils/fileType/fileType.h BEDTools-Version-2.14.3/src/utils/genomeFile/Makefile BEDTools-Version-2.14.3/src/utils/genomeFile/genomeFile.cpp BEDTools-Version-2.14.3/src/utils/genomeFile/genomeFile.h BEDTools-Version-2.14.3/src/utils/gzstream/COPYING.LIB BEDTools-Version-2.14.3/src/utils/gzstream/Makefile BEDTools-Version-2.14.3/src/utils/gzstream/README BEDTools-Version-2.14.3/src/utils/gzstream/gzstream.C BEDTools-Version-2.14.3/src/utils/gzstream/gzstream.h BEDTools-Version-2.14.3/src/utils/gzstream/gzstream.o BEDTools-Version-2.14.3/src/utils/gzstream/test_gunzip.o BEDTools-Version-2.14.3/src/utils/gzstream/test_gzip.o BEDTools-Version-2.14.3/src/utils/gzstream/version BEDTools-Version-2.14.3/src/utils/lineFileUtilities/Makefile BEDTools-Version-2.14.3/src/utils/lineFileUtilities/lineFileUtilities.cpp BEDTools-Version-2.14.3/src/utils/lineFileUtilities/lineFileUtilities.h BEDTools-Version-2.14.3/src/utils/sequenceUtilities/Makefile BEDTools-Version-2.14.3/src/utils/sequenceUtilities/sequenceUtils.cpp BEDTools-Version-2.14.3/src/utils/sequenceUtilities/sequenceUtils.h BEDTools-Version-2.14.3/src/utils/stringUtilities/stringUtilities.h BEDTools-Version-2.14.3/src/utils/tabFile/Makefile BEDTools-Version-2.14.3/src/utils/tabFile/tabFile.cpp BEDTools-Version-2.14.3/src/utils/tabFile/tabFile.h BEDTools-Version-2.14.3/src/utils/version/version.h BEDTools-Version-2.14.3/src/windowBed/Makefile BEDTools-Version-2.14.3/src/windowBed/windowBed.cpp BEDTools-Version-2.14.3/src/windowBed/windowBed.h BEDTools-Version-2.14.3/src/windowBed/windowMain.cpp |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/LICENSE --- a/BEDTools-Version-2.14.3/LICENSE Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
b'@@ -1,340 +0,0 @@\n-\t\t GNU GENERAL PUBLIC LICENSE\n-\t\t Version 2, June 1991\n-\n- Copyright (C) 1989, 1991 Free Software Foundation, Inc.,\n- 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\n- Everyone is permitted to copy and distribute verbatim copies\n- of this license document, but changing it is not allowed.\n-\n-\t\t\t Preamble\n-\n- The licenses for most software are designed to take away your\n-freedom to share and change it. By contrast, the GNU General Public\n-License is intended to guarantee your freedom to share and change free\n-software--to make sure the software is free for all its users. This\n-General Public License applies to most of the Free Software\n-Foundation\'s software and to any other program whose authors commit to\n-using it. (Some other Free Software Foundation software is covered by\n-the GNU Lesser General Public License instead.) You can apply it to\n-your programs, too.\n-\n- When we speak of free software, we are referring to freedom, not\n-price. Our General Public Licenses are designed to make sure that you\n-have the freedom to distribute copies of free software (and charge for\n-this service if you wish), that you receive source code or can get it\n-if you want it, that you can change the software or use pieces of it\n-in new free programs; and that you know you can do these things.\n-\n- To protect your rights, we need to make restrictions that forbid\n-anyone to deny you these rights or to ask you to surrender the rights.\n-These restrictions translate to certain responsibilities for you if you\n-distribute copies of the software, or if you modify it.\n-\n- For example, if you distribute copies of such a program, whether\n-gratis or for a fee, you must give the recipients all the rights that\n-you have. You must make sure that they, too, receive or can get the\n-source code. And you must show them these terms so they know their\n-rights.\n-\n- We protect your rights with two steps: (1) copyright the software, and\n-(2) offer you this license which gives you legal permission to copy,\n-distribute and/or modify the software.\n-\n- Also, for each author\'s protection and ours, we want to make certain\n-that everyone understands that there is no warranty for this free\n-software. If the software is modified by someone else and passed on, we\n-want its recipients to know that what they have is not the original, so\n-that any problems introduced by others will not reflect on the original\n-authors\' reputations.\n-\n- Finally, any free program is threatened constantly by software\n-patents. We wish to avoid the danger that redistributors of a free\n-program will individually obtain patent licenses, in effect making the\n-program proprietary. To prevent this, we have made it clear that any\n-patent must be licensed for everyone\'s free use or not licensed at all.\n-\n- The precise terms and conditions for copying, distribution and\n-modification follow.\n-\n-\t\t GNU GENERAL PUBLIC LICENSE\n- TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION\n-\n- 0. This License applies to any program or other work which contains\n-a notice placed by the copyright holder saying it may be distributed\n-under the terms of this General Public License. The "Program", below,\n-refers to any such program or work, and a "work based on the Program"\n-means either the Program or any derivative work under copyright law:\n-that is to say, a work containing the Program or a portion of it,\n-either verbatim or with modifications and/or translated into another\n-language. (Hereinafter, translation is included without limitation in\n-the term "modification".) Each licensee is addressed as "you".\n-\n-Activities other than copying, distribution and modification are not\n-covered by this License; they are outside its scope. The act of\n-running the Program is not restricted, and the output from the Program\n-is covered only if its contents constitute a work based on the\n-Program (independent of having been made by running the Program).\n-Whethe'..b'/OR OTHER PARTIES\n-PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED\n-OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF\n-MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS\n-TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE\n-PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,\n-REPAIR OR CORRECTION.\n-\n- 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING\n-WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR\n-REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,\n-INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING\n-OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED\n-TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY\n-YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER\n-PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE\n-POSSIBILITY OF SUCH DAMAGES.\n-\n-\t\t END OF TERMS AND CONDITIONS\n-\n-\t How to Apply These Terms to Your New Programs\n-\n- If you develop a new program, and you want it to be of the greatest\n-possible use to the public, the best way to achieve this is to make it\n-free software which everyone can redistribute and change under these terms.\n-\n- To do so, attach the following notices to the program. It is safest\n-to attach them to the start of each source file to most effectively\n-convey the exclusion of warranty; and each file should have at least\n-the "copyright" line and a pointer to where the full notice is found.\n-\n- <one line to give the program\'s name and a brief idea of what it does.>\n- Copyright (C) <year> <name of author>\n-\n- This program is free software; you can redistribute it and/or modify\n- it under the terms of the GNU General Public License as published by\n- the Free Software Foundation; either version 2 of the License, or\n- (at your option) any later version.\n-\n- This program is distributed in the hope that it will be useful,\n- but WITHOUT ANY WARRANTY; without even the implied warranty of\n- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n- GNU General Public License for more details.\n-\n- You should have received a copy of the GNU General Public License along\n- with this program; if not, write to the Free Software Foundation, Inc.,\n- 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.\n-\n-Also add information on how to contact you by electronic and paper mail.\n-\n-If the program is interactive, make it output a short notice like this\n-when it starts in an interactive mode:\n-\n- Gnomovision version 69, Copyright (C) year name of author\n- Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w\'.\n- This is free software, and you are welcome to redistribute it\n- under certain conditions; type `show c\' for details.\n-\n-The hypothetical commands `show w\' and `show c\' should show the appropriate\n-parts of the General Public License. Of course, the commands you use may\n-be called something other than `show w\' and `show c\'; they could even be\n-mouse-clicks or menu items--whatever suits your program.\n-\n-You should also get your employer (if you work as a programmer) or your\n-school, if any, to sign a "copyright disclaimer" for the program, if\n-necessary. Here is a sample; alter the names:\n-\n- Yoyodyne, Inc., hereby disclaims all copyright interest in the program\n- `Gnomovision\' (which makes passes at compilers) written by James Hacker.\n-\n- <signature of Ty Coon>, 1 April 1989\n- Ty Coon, President of Vice\n-\n-This General Public License does not permit incorporating your program into\n-proprietary programs. If your program is a subroutine library, you may\n-consider it more useful to permit linking proprietary applications with the\n-library. If this is what you want to do, use the GNU Lesser General\n-Public License instead of this License.\n-\n' |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/Makefile --- a/BEDTools-Version-2.14.3/Makefile Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,89 +0,0 @@ -# ========================== -# BEDTools Makefile -# (c) 2009 Aaron Quinlan -# ========================== - -# define our object and binary directories -export OBJ_DIR = obj -export BIN_DIR = bin -export SRC_DIR = src -export CXX = g++ -export CXXFLAGS = -Wall -O2 -D_FILE_OFFSET_BITS=64 -fPIC -export LIBS = -lz -export BT_ROOT = src/utils/BamTools/ - - -SUBDIRS = $(SRC_DIR)/annotateBed \ - $(SRC_DIR)/bamToBed \ - $(SRC_DIR)/bedToBam \ - $(SRC_DIR)/bedToIgv \ - $(SRC_DIR)/bed12ToBed6 \ - $(SRC_DIR)/closestBed \ - $(SRC_DIR)/complementBed \ - $(SRC_DIR)/coverageBed \ - $(SRC_DIR)/fastaFromBed \ - $(SRC_DIR)/flankBed \ - $(SRC_DIR)/genomeCoverageBed \ - $(SRC_DIR)/intersectBed \ - $(SRC_DIR)/linksBed \ - $(SRC_DIR)/maskFastaFromBed \ - $(SRC_DIR)/mergeBed \ - $(SRC_DIR)/multiBamCov \ - $(SRC_DIR)/multiIntersectBed \ - $(SRC_DIR)/nucBed \ - $(SRC_DIR)/overlap \ - $(SRC_DIR)/pairToBed \ - $(SRC_DIR)/pairToPair \ - $(SRC_DIR)/shuffleBed \ - $(SRC_DIR)/slopBed \ - $(SRC_DIR)/sortBed \ - $(SRC_DIR)/subtractBed \ - $(SRC_DIR)/tagBam \ - $(SRC_DIR)/unionBedGraphs \ - $(SRC_DIR)/windowBed - -UTIL_SUBDIRS = $(SRC_DIR)/utils/lineFileUtilities \ - $(SRC_DIR)/utils/bedFile \ - $(SRC_DIR)/utils/bedGraphFile \ - $(SRC_DIR)/utils/chromsweep \ - $(SRC_DIR)/utils/gzstream \ - $(SRC_DIR)/utils/fileType \ - $(SRC_DIR)/utils/bedFilePE \ - $(SRC_DIR)/utils/sequenceUtilities \ - $(SRC_DIR)/utils/tabFile \ - $(SRC_DIR)/utils/BamTools \ - $(SRC_DIR)/utils/BamTools-Ancillary \ - $(SRC_DIR)/utils/Fasta \ - $(SRC_DIR)/utils/genomeFile - -all: - [ -d $(OBJ_DIR) ] || mkdir -p $(OBJ_DIR) - [ -d $(BIN_DIR) ] || mkdir -p $(BIN_DIR) - - @echo "Building BEDTools:" - @echo "=========================================================" - - @for dir in $(UTIL_SUBDIRS); do \ - echo "- Building in $$dir"; \ - $(MAKE) --no-print-directory -C $$dir; \ - echo ""; \ - done - - @for dir in $(SUBDIRS); do \ - echo "- Building in $$dir"; \ - $(MAKE) --no-print-directory -C $$dir; \ - echo ""; \ - done - - -.PHONY: all - -clean: - @echo "Cleaning up." - @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* - @rm -Rf $(BT_ROOT)/lib - @rm -f $(BT_ROOT)/src/api/*.o - @rm -f $(BT_ROOT)/src/api/internal/*.o - @rm -Rf $(BT_ROOT)/include - -.PHONY: clean |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/README.rst --- a/BEDTools-Version-2.14.3/README.rst Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,47 +0,0 @@ -============================== - BEDTools -============================== - -Created by Aaron Quinlan Spring 2009. - -Copyright 2009,2010,2011 Aaron Quinlan. All rights reserved. - -Stable releases: http://code.google.com/p/bedtools - -Repository: https://github.com/arq5x/bedtools - -Released under GNU public license version 2 (GPL v2). - - -Summary -------- -BEDTools is a collection of utilities for comparing, summarizing, and -intersecting genomic features in BED, GTF/GFF, VCF and BAM formats. - - -Manual ------- -See the extensive PDF manual included at: http://code.google.com/p/bedtools/downloads/detail?name=BEDTools-User-Manual.v4.pdf. - -This manual covers many common usage examples. There are also examples available at: -http://code.google.com/p/bedtools/wiki/Usage -http://code.google.com/p/bedtools/wiki/UsageAdvanced - -Installation ------------- -Git -... -git clone git://github.com/arq5x/bedtools.git - -Download tarball - that big gray button on the upper right. -........................................................... -#. Unpack the source downloaded tarball. -#. cd into the expanded folder. -#. Type "make clean" and hit enter. -#. Type "make all" and hit enter. -#. If you encountered no errors, then all of the BED Tools should now be in bin/ - If not, try to troubleshoot then email me: aaronquinlan at gmail dot com -#. Copy the files in bin/ to ~/bin or if you have the privileges, to /usr/local/bin. -#. Use the tools. - - |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/RELEASE_HISTORY --- a/BEDTools-Version-2.14.3/RELEASE_HISTORY Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
b'@@ -1,646 +0,0 @@\n-Version 2.14.2 (2-Nov-2011)\n-\n-Bug Fixes\n-=========\n-1. Corrected the help for closestBed. It now correctly reads -io instead of -no.\n-2. Fixed regression in closestBed injected in version 2.13.4 whereby B features to the right of an A feature were missed.\n-\n-New tool.\n-============\n-1. Added the multiIntersectBed tool for reporting common intervals among multiple **sorted** BED/GFF/VCF files.\n-\n-\n-\n-Version 2.13.4 (26-Oct-2011)\n-Bug Fixes\n-=========\n-1. The -sorted option (chromsweep) in intersectBed now obeys -s and -S. I had neglected to implement that. Thanks to Paul Ryvkin for pointing this out.\n-2. The -split option was mistakenly splitting of D CIGAR ops.\n-3. The Makefile was not including zlib properly for newer versions of GCC. Thanks to Istvan Albert for pointing this out and providing the solution.\n-\n-Improvements\n-============\n-1. Thanks to Jacob Biesinger for a new option (-D) in closestBed that will report _signed_ distances. Moreover, the new option allows fine control over whether the distances are reported based on the reference genome or based on the strand of the A or B feature. Many thanks to Jacob.\n-2. Thanks to some nice analysis from Paul Ryvkin, I realized that the -sorted option was using way too much memory in certain cases where there is a chromosome change in a sorted BED file. This has been corrected.\n-\n-Version 2.13.3 (30-Sept-2011)\n-Bug Fixes\n-============\n-1. intersectBed detected, but did not report overlaps when using BAM input and -bed.\n-\n-Other\n-=====\n-1. Warning that -sorted trusts, but does not enforce that data is actually sorted.\n-\n-\n-Version 2.13.2 (23-Sept-2011)\n-\n-New algorithm\n-=============\n-1. Preliminary release of the chrom_sweep algorithm.\n-\n-New options\n-===========\n-1. genomeCoverageBed no longer requires a genome file when working with BAM input. It instead uses the BAM header.\n-2. tagBam now has a -score option for annotating alignments with the BED "scores" field in annotation files. This overrides the default behavior, which is to use the -labels associated with the annotation files passed in on the command line.\n-\n-Bug fixes\n-=========\n-1. Correct a bug that prevented proper BAM support in intersectBed.\n-2. Improved detection of GFF features with negative coordinates.\n-\n-\n-\n-Version 2.13.1 (6-Sept-2011)\n-\n-New options\n-===========\n-1. tagBam now has -s and -S options for only annotating alignments with features on the same and opposite strand, respectively.\n-2. tagBam now has a -names option for annotating alignments with the "name" field in annotation files. This overrides the default behavior, which is to use the -labels associated with the annotation files passed in on the command line. Currently, this works well with BED files, but given the limited metadata support for GFF files, annotating with -names and GFF files may not work as well as wished, depending on the type of GFF file used.\n-\n-\n-\n-Version 2.13.0 (1-Sept-2011)\n-\n-New tools\n-=========\n-1. tagBam. This tool annotates a BAM file with custom tag fields based on overlaps with BED/GFF/VCF files.\n-For example:\n-$ tagBam -i aln.bam -files exons.bed introns.bed cpg.bed utrs.bed \\\n- -tags exonic intonic cpg utr \\\n- > aln.tagged.bam\n-For alignments that have overlaps, you should see new BAM tags like "YB:Z:exonic", "YB:Z:cpg;utr"\n-\n-2. multiBamCov. The new tool counts sequence coverage for multiple bams at specific loci defined in a BED/GFF/VCF file.\n-For example:\n-\n-$ multiBamCov -bams aln.1.bam aln.2.bam aln3.bam -bed exons.bed\n-chr1\t861306\t861409\tSAMD11\t1\t+\t181\t280\t236\n-chr1\t865533\t865718\tSAMD11\t2\t+\t249\t365\t374\n-chr1\t866393\t866496\tSAMD11\t3\t+\t162\t298\t322\n-\n-where the last 3 columns represent the number of alignments overlapping each interval from the three BAM file.\n-\n-The following options are available to control which types of alignments are are counted.\n--q\tMinimum mapping quality allowed. Default is 0.\n-\n--D\tInclude duplicate-marked reads. Defa'..b'.\n-\n-\n-Version 2.2.1\n-1. Fixed a very obvious bug in subtractBed that caused improper behavior when a feature in A was overlapped by more than one feature in B.\n-Many thanks to folks in the Hannon lab at CSHL for pointing this out.\n-\n-\n-Version 2.2.0\n-=== Notable changes in this release ===\n-1. coverageBed will optionally only count features in BED file A (e.g. sequencing reads) that overlap with \n-\tthe intervals/windows in BED file B on the same strand. This has been requested several times recently \n-\tand facilitates CHiP-Seq and RNA-Seq experiments.\n-\n-2. intersectBed can now require a minimum __reciprocal__ overlap between intervals in BED A and BED B. For example,\n-\tpreviously, if one used -f 0.90, it required that a feature in B overlap 90% of the feature in A for the "hit"\n-\tto be reported. If one adds the -r (reciprocal) option, the hit must also cover 90% of the feature in B. This helps\n-\tto exclude overlaps between say small features in A and large features in B:\n-\n-\tA ==========\n-\tB **********************************************************\n-\t\t\n-\t-f 0.50 (Reported), whereas -f 0.50 -r (Not reported)\n-\n-3. The score field has been changed to be a string. While this deviates from the UCSC definition, it allows one to track\n-\tmuch more meaningful information about a feature/interval. For example, score could now be:\n-\t\n-\t7.31E-05 (a p-value)\n-\t0.334577 (mean enrichment)\n-\t2:2.2:40:2 (several values encoded in a string)\n-\t\n-4. closestBed now, by default, reports __all__ intervals in B that overlap equally with an interval in A. Previously, it\n-\tmerely reported the first such feature that appeared in B. Here\'s a cartoon explaining the difference.\n-\t\n-\t**Prior behavior**\n-\t\n-\tA\t ==============\n-\tB.1 \t\t\t\t++++++++++++++\n-\tB.2 \t\t\t\t++++++++++++++\n-\tB.3 \t\t\t\t+++++++++\n-\n-\t-----------------------------------------\n-\tResult = B.1 \t\t\t++++++++++++++\n-\t\n-\t\n-\t**Current behavior**\n-\t\n-\tA\t ==============\n-\tB.1 \t\t\t\t++++++++++++++\n-\tB.2 \t\t\t\t++++++++++++++\n-\tB.3 \t\t\t\t+++++++++\n-\n-\t-----------------------------------------\n-\tResult = B.1 \t\t\t++++++++++++++\n-\t\t\t B.2 \t\t\t++++++++++++++\n-\n-\tUsing the -t option, one can also choose to report either the first or the last entry in B in the event of a tie.\n-\n-5. Several other minor changes to the algorithms have been made to increase speed a bit.\n-\n-\n-VERSION 2.1.2\n-1. Fixed yet another bug in the parsing of "track" or "browser" lines. Sigh...\n-2. Change the "score" column (i.e. column 5) to b stored as a string. While this deviates\n- from the UCSC convention, it allows significantly more information to be packed into the column.\n-\n-\n-VERSION 2.1.1\n-1. Added limits.h to bedFile.h to fix compilation issues on some systems.\n-2. Fixed bug in testing for "track" or "browser" lines.\n-\n-\n-VERSION 2.1.0\n-1. Fixed a bug in peIntersectBed that prevented -a from being correctly handled when passed via stdin.\n-2. Added new functionality to coverageBed that calculates the density of coverage.\n-3. Fixed bug in geneomCoverageBed.\n-\n-\n-VERSION 2.0.1\n-1. Added the ability to retain UCSC browser track/browser headers in BED files.\n-\n-\n-VERSION 2.0\n-1. Sped up the file parsing. ~10-20% increase in speed.\n-2. Created reportBed() as a common method in the bedFile class. Cleans up the code quite nicely.\n-3. Added the ability to compare BED files accounting for strandedness.\n-4. Paired-end intersect.\n-5. Fixed bug that prevented overlaps from being reported when the overlap fraction requested is 1.0\n-\n-\n-\n-VERSION 1.2, 04/27/2009. (1eb06115bdf3c49e75793f764a70c3501bb53f33)\n-1. Added subtractBed.\n-\tA. Fixed bug that prevented "split" overlaps from being reported.\n-\tB. Prevented A from being reported if >=1 feature in B completely spans it.\n-2. Added linksBed.\n-3. Added the ability to define separate windows for upstream and downstream to windowBed.\n-\n-\n-VERSION 1.1, 04/23/2009. (b74eb1afddca9b70bfa90ba763d4f2981a56f432)\n-Initial release.\n' |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/data/knownGene.hg18.chr21.bed --- a/BEDTools-Version-2.14.3/data/knownGene.hg18.chr21.bed Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
b'@@ -1,828 +0,0 @@\n-chr21\t9928613\t10012791\tuc002yip.1\t0\t-\t9928775\t9995604\t0\t24\t298,71,93,80,106,81,62,89,82,61,65,64,100,120,162,51,60,54,54,54,54,58,109,158,\t0,2082,3564,7620,9627,13341,15191,27109,27296,28194,35165,35968,36178,37925,44523,46170,52998,62332,63266,64549,66980,78302,81026,84020,\n-chr21\t9928613\t10012791\tuc002yiq.1\t0\t-\t9928775\t9995604\t0\t23\t298,71,93,80,106,81,62,89,82,61,65,64,100,120,162,51,60,54,54,54,58,109,158,\t0,2082,3564,7620,9627,13341,15191,27109,27296,28194,35165,35968,36178,37925,44523,46170,52998,63266,64549,66980,78302,81026,84020,\n-chr21\t9928613\t10012791\tuc002yir.1\t0\t-\t9928775\t9995604\t0\t22\t298,71,93,80,106,81,62,89,82,61,65,64,100,120,162,51,54,54,54,58,109,158,\t0,2082,3564,7620,9627,13341,15191,27109,27296,28194,35165,35968,36178,37925,44523,46170,63266,64549,66980,78302,81026,84020,\n-chr21\t9928613\t10012791\tuc010gkv.1\t0\t-\t9928775\t9973168\t0\t19\t298,71,93,80,106,81,62,89,82,61,65,64,100,120,162,51,58,109,158,\t0,2082,3564,7620,9627,13341,15191,27109,27296,28194,35165,35968,36178,37925,44523,46170,78302,81026,84020,\n-chr21\t9928613\t10061300\tuc002yis.1\t0\t-\t9928613\t9928613\t0\t33\t298,71,93,80,81,62,89,82,61,65,64,100,120,162,51,60,54,54,54,58,109,120,129,213,66,130,165,197,105,102,117,120,702,\t0,2082,3564,7620,13341,15191,27109,27296,28194,35165,35968,36178,37925,44523,46170,52998,63266,64549,66980,78302,81026,89277,91464,104695,106174,106728,108195,108605,114070,114367,119980,122855,131985,\n-chr21\t10042683\t10120796\tuc002yit.1\t0\t-\t10071441\t10120588\t0\t10\t105,102,117,120,702,115,172,163,101,223,\t0,297,5910,8785,17915,26668,28637,37348,76733,77890,\n-chr21\t10042683\t10120808\tuc002yiu.1\t0\t-\t10080193\t10120608\t0\t9\t105,102,117,702,115,172,163,101,215,\t0,297,5910,17915,26668,28637,37348,76733,77910,\n-chr21\t10079666\t10120808\tuc002yiv.1\t0\t-\t10081686\t10120608\t0\t4\t528,91,101,215,\t0,1930,39750,40927,\n-chr21\t10080031\t10081687\tuc002yiw.1\t0\t-\t10080031\t10080031\t0\t2\t200,91,\t0,1565,\n-chr21\t10081660\t10120796\tuc002yix.2\t0\t-\t10081660\t10081660\t0\t3\t27,101,223,\t0,37756,38913,\n-chr21\t13332351\t13346202\tuc002yiy.2\t0\t+\t13332351\t13332351\t0\t5\t265,115,2492,65,215,\t0,4342,4619,10805,13636,\n-chr21\t13336975\t13346202\tuc002yiz.2\t0\t+\t13336975\t13336975\t0\t4\t169,108,65,215,\t0,2379,6181,9012,\n-chr21\t13361138\t13412440\tuc002yja.2\t0\t+\t13361189\t13412250\t0\t3\t102,118,411,\t0,2228,50891,\n-chr21\t13904368\t13935777\tuc002yjb.1\t0\t+\t13904420\t13935758\t0\t11\t573,115,174,107,138,71,71,45,167,124,241,\t0,4946,5220,8172,9981,12628,18205,19679,20764,29338,31168,\n-chr21\t13944438\t13944477\tuc002yjc.1\t0\t+\t13944438\t13944438\t0\t1\t39,\t0,\n-chr21\t13945076\t13945106\tuc002yjd.1\t0\t+\t13945076\t13945076\t0\t1\t30,\t0,\n-chr21\t13973491\t13975330\tuc002yje.1\t0\t-\t13973781\t13974201\t0\t1\t1839,\t0,\n-chr21\t14137333\t14142556\tuc002yjf.1\t0\t-\t14137333\t14137333\t0\t6\t291,114,270,129,191,275,\t0,880,1863,2871,3617,4948,\n-chr21\t14200023\t14200052\tuc002yjg.1\t0\t+\t14200023\t14200023\t0\t1\t29,\t0,\n-chr21\t14202070\t14202096\tuc002yjh.1\t0\t-\t14202070\t14202070\t0\t1\t26,\t0,\n-chr21\t14237966\t14274631\tuc002yji.1\t0\t-\t14237966\t14237966\t0\t6\t88,71,73,29,85,738,\t0,1264,7292,7457,10291,35927,\n-chr21\t14270940\t14274631\tuc002yjj.2\t0\t-\t14270940\t14270940\t0\t2\t1809,738,\t0,2953,\n-chr21\t14321612\t14438647\tuc002yjk.2\t0\t+\t14321612\t14321612\t0\t3\t177,195,699,\t0,56529,116336,\n-chr21\t14321612\t14438730\tuc002yjl.2\t0\t+\t14321612\t14321612\t0\t3\t177,195,1030,\t0,56529,116088,\n-chr21\t14403005\t14501125\tuc002yjm.1\t0\t-\t14403184\t14501115\t0\t10\t267,177,112,105,168,90,102,109,386,119,\t0,35809,43759,54605,56409,57548,72944,77147,80220,98001,\n-chr21\t14459414\t14483611\tuc010gkw.1\t0\t-\t14459415\t14483519\t0\t4\t168,102,109,386,\t0,16535,20738,23811,\n-chr21\t14510336\t14522564\tuc002yjo.2\t0\t+\t14510378\t14521485\t0\t5\t138,163,73,100,1493,\t0,3418,4952,8293,10735,\n-chr21\t14510336\t14522564\tuc002yjn.2\t0\t+\t14518639\t14521485\t0\t5\t138,173,73,100,1493,\t0,3408,4952,8293,10735,\n-chr21\t14510336\t14522564\tuc002yjp.2\t0\t+\t14518639\t14521485\t0\t4\t138,73,100,1493,\t0,4952,8293,10735,\n-chr21\t14567990\t14585577\tuc002yjq.1\t0\t+\t14568283\t14585568\t0\t5\t359,62,148,143,100,\t0,5797,13751,14605,17487,\n-chr21\t14567990\t14595563\t'..b',39359,42463,57573,61711,64621,65078,66549,67043,73143,73892,74113,75468,77429,78209,87067,88715,91947,94082,97848,101709,103500,104273,105888,106385,107407,111781,112856,114041,115960,116732,118374,119687,120571,121161,\n-chr21\t46569229\t46690110\tuc002zjj.1\t0\t+\t46578825\t46689668\t0\t47\t323,213,372,81,256,56,175,137,112,223,82,175,218,455,556,147,152,143,233,163,213,230,117,21,207,171,153,879,156,771,103,155,141,174,196,223,151,450,245,103,174,120,230,77,139,128,486,\t0,1489,9509,21240,21855,22569,24124,24796,26541,28216,29099,30565,32087,38593,41697,56807,60945,63855,64312,65783,66277,72377,73126,73347,74702,76663,77443,86301,87949,91181,93316,97082,100943,102734,103507,105122,105619,106641,111015,112090,113275,115194,115966,117608,118921,119805,120395,\n-chr21\t46578738\t46594162\tuc010gqk.1\t0\t+\t46578738\t46578738\t0\t6\t372,81,259,56,175,137,\t0,11731,12326,13060,14615,15287,\n-chr21\t46699327\t46703021\tuc002zjk.1\t0\t-\t46699327\t46699327\t0\t2\t932,597,\t0,3097,\n-chr21\t46703317\t46790647\tuc002zjl.1\t0\t+\t46703472\t46790347\t0\t20\t246,72,120,120,252,129,120,198,92,111,124,110,103,65,94,120,115,140,137,426,\t0,25780,31623,38011,39605,45384,50280,52440,70115,73150,74547,75027,75608,78247,78469,79506,80900,82779,86160,86904,\n-chr21\t46703317\t46791548\tuc002zjm.1\t0\t+\t46703472\t46791387\t0\t21\t246,72,120,120,252,129,120,198,92,111,124,110,103,65,94,120,115,140,137,81,293,\t0,25780,31623,38011,39605,45384,50280,52440,70115,73150,74547,75027,75608,78247,78469,79506,80900,82779,86160,86904,87938,\n-chr21\t46703317\t46791548\tuc010gql.1\t0\t+\t46703472\t46791387\t0\t20\t246,72,120,120,252,120,198,92,111,124,110,103,65,94,120,115,140,137,81,293,\t0,25780,31623,38011,39605,50280,52440,70115,73150,74547,75027,75608,78247,78469,79506,80900,82779,86160,86904,87938,\n-chr21\t46703317\t46794451\tuc002zjn.1\t0\t+\t46703472\t46794259\t0\t22\t246,72,120,120,252,129,120,198,92,111,124,110,103,65,94,120,115,140,137,81,128,340,\t0,25780,31623,38011,39605,45384,50280,52440,70115,73150,74547,75027,75608,78247,78469,79506,80900,82779,86160,86904,87938,90794,\n-chr21\t46703317\t46813028\tuc002zjo.1\t0\t+\t46703472\t46811963\t0\t38\t246,72,120,120,252,129,120,198,92,111,124,110,103,65,94,120,115,140,137,81,128,115,202,110,81,124,122,112,110,131,169,171,62,58,75,175,124,1318,\t0,25780,31623,38011,39605,45384,50280,52440,70115,73150,74547,75027,75608,78247,78469,79506,80900,82779,86160,86904,87938,90794,91566,92657,92847,95192,95598,95845,97005,97962,98608,99246,101745,102771,104881,106736,107583,108393,\n-chr21\t46748653\t46789614\tuc002zjp.1\t0\t+\t46748811\t46789614\t0\t14\t177,120,198,92,111,124,110,103,65,94,120,115,140,137,\t0,4944,7104,24779,27814,29211,29691,30272,32911,33133,34170,35564,37443,40824,\n-chr21\t46783075\t46791548\tuc002zjq.1\t0\t+\t46784120\t46791387\t0\t5\t1257,140,137,81,293,\t0,3021,6402,7146,8180,\n-chr21\t46797251\t46803437\tuc002zjr.2\t0\t+\t46798578\t46802830\t0\t7\t1382,122,112,110,131,169,874,\t0,1664,1911,3071,4028,4674,5312,\n-chr21\t46805625\t46813028\tuc002zjs.1\t0\t+\t46806017\t46811963\t0\t5\t521,75,175,124,1318,\t0,2573,4428,5275,6085,\n-chr21\t46808123\t46813028\tuc002zjt.1\t0\t+\t46808123\t46808123\t0\t4\t150,175,124,1318,\t0,1930,2777,3587,\n-chr21\t46842958\t46849463\tuc002zju.1\t0\t-\t46843703\t46846756\t0\t3\t886,139,110,\t0,3660,6395,\n-chr21\t46842958\t46849463\tuc002zjv.1\t0\t-\t46843791\t46846756\t0\t4\t886,94,139,110,\t0,1664,3660,6395,\n-chr21\t46879954\t46904483\tuc002zjw.2\t0\t+\t46881291\t46903264\t0\t7\t149,95,105,183,162,165,1399,\t0,1281,7920,8691,12843,13960,23130,\n-chr21\t46879954\t46909291\tuc002zjx.1\t0\t+\t46881291\t46908667\t0\t12\t149,109,95,105,183,162,165,176,130,137,172,657,\t0,824,1281,7920,8691,12843,13960,23130,25218,26185,27768,28680,\n-chr21\t46879954\t46909291\tuc002zjy.1\t0\t+\t46881291\t46908667\t0\t11\t149,95,105,183,162,165,176,130,137,172,657,\t0,1281,7920,8691,12843,13960,23130,25218,26185,27768,28680,\n-chr21\t46879954\t46909291\tuc010gqm.1\t0\t+\t46881291\t46908667\t0\t9\t149,95,105,183,162,165,137,172,657,\t0,1281,7920,8691,12843,13960,26185,27768,28680,\n-chr21\t46887625\t46906276\tuc002zjz.1\t0\t+\t46892812\t46905317\t0\t6\t354,183,162,165,176,1104,\t0,1020,5172,6289,15459,17547,\n' |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/data/rmsk.hg18.chr21.bed --- a/BEDTools-Version-2.14.3/data/rmsk.hg18.chr21.bed Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
b'@@ -1,57261 +0,0 @@\n-chr21\t9719768\t9721892\tALR/Alpha\t1004\t+\n-chr21\t9721905\t9725582\tALR/Alpha\t1010\t+\n-chr21\t9725582\t9725977\tL1PA3\t3288\t+\n-chr21\t9726021\t9729309\tALR/Alpha\t1051\t+\n-chr21\t9729320\t9729809\tL1PA3\t3897\t-\n-chr21\t9729809\t9730866\tL1P1\t8367\t+\n-chr21\t9730866\t9734026\tALR/Alpha\t1036\t-\n-chr21\t9734037\t9757471\tALR/Alpha\t1182\t-\n-chr21\t9757520\t9758476\tALR/Alpha\t1092\t-\n-chr21\t9758521\t9764575\tL1PA3\t26286\t-\n-chr21\t9764577\t9778787\tALR/Alpha\t1141\t-\n-chr21\t9778798\t9788657\tALR/Alpha\t1188\t-\n-chr21\t9788657\t9794680\tL1PA3\t27485\t-\n-chr21\t9794680\t9795266\tALR/Alpha\t1011\t-\n-chr21\t9795278\t9795587\tALR/Alpha\t979\t-\n-chr21\t9795589\t9795713\t(GAATG)n\t308\t+\n-chr21\t9795736\t9795894\t(GAATG)n\t683\t+\n-chr21\t9795911\t9796007\t(GAATG)n\t345\t+\n-chr21\t9796028\t9796187\t(GAATG)n\t756\t+\n-chr21\t9796202\t9796615\t(GAATG)n\t891\t+\n-chr21\t9796637\t9796824\t(GAATG)n\t621\t+\n-chr21\t9796824\t9796866\tHSATII\t242\t-\n-chr21\t9796866\t9797049\t(GAATG)n\t621\t+\n-chr21\t9797067\t9797436\t(GAATG)n\t900\t+\n-chr21\t9797482\t9797839\t(GAATG)n\t1008\t+\n-chr21\t9797866\t9798044\t(GAATG)n\t858\t+\n-chr21\t9798051\t9798118\t(GAGTG)n\t259\t+\n-chr21\t9798118\t9798658\t(GAATG)n\t969\t+\n-chr21\t9798626\t9798765\t(AAATG)n\t201\t+\n-chr21\t9798770\t9798950\t(GAGTG)n\t571\t+\n-chr21\t9798908\t9799265\t(GAATG)n\t942\t+\n-chr21\t9799280\t9799460\t(GAATG)n\t813\t+\n-chr21\t9799500\t9800262\t(GAATG)n\t933\t+\n-chr21\t9800289\t9800469\t(GAATG)n\t666\t+\n-chr21\t9800481\t9800797\t(GAATG)n\t977\t+\n-chr21\t9800840\t9800878\t(GAGTG)n\t225\t+\n-chr21\t9800913\t9801092\t(GAGTG)n\t930\t+\n-chr21\t9801092\t9801169\t(GAATG)n\t298\t+\n-chr21\t9801182\t9801639\t(GAATG)n\t747\t+\n-chr21\t9801651\t9801769\t(GAATG)n\t330\t+\n-chr21\t9801781\t9802265\t(GAATG)n\t747\t+\n-chr21\t9802265\t9802310\t(GAGTG)n\t245\t+\n-chr21\t9802310\t9802490\t(GAATG)n\t1203\t+\n-chr21\t9802490\t9802503\t(GAGTG)n\t245\t+\n-chr21\t9802508\t9802679\t(GAATG)n\t660\t+\n-chr21\t9802699\t9803425\t(GAATG)n\t1008\t+\n-chr21\t9803427\t9803488\t(GAGTG)n\t332\t+\n-chr21\t9803490\t9803789\t(GAATG)n\t708\t+\n-chr21\t9803803\t9804202\t(GAATG)n\t897\t+\n-chr21\t9804215\t9804262\t(GAATG)n\t261\t+\n-chr21\t9804276\t9804450\t(GAATG)n\t771\t+\n-chr21\t9804469\t9804637\t(GAATG)n\t756\t+\n-chr21\t9804660\t9804840\t(GAATG)n\t729\t+\n-chr21\t9804905\t9805085\t(GAATG)n\t726\t+\n-chr21\t9805118\t9805404\t(GAATG)n\t930\t+\n-chr21\t9805416\t9805716\t(GAATG)n\t708\t+\n-chr21\t9805730\t9806084\t(GAATG)n\t1050\t+\n-chr21\t9806147\t9806522\t(GAATG)n\t765\t+\n-chr21\t9806555\t9806812\t(GAATG)n\t661\t+\n-chr21\t9806824\t9807184\t(GAATG)n\t1080\t+\n-chr21\t9807228\t9807661\t(GAATG)n\t888\t+\n-chr21\t9807669\t9807698\t(GAGTG)n\t231\t+\n-chr21\t9807698\t9808290\t(GAATG)n\t807\t+\n-chr21\t9808301\t9808897\t(GAATG)n\t984\t+\n-chr21\t9808920\t9809796\t(GAATG)n\t960\t+\n-chr21\t9809843\t9810023\t(GAATG)n\t972\t+\n-chr21\t9810043\t9810492\t(GAATG)n\t690\t+\n-chr21\t9810503\t9810553\t(GAATG)n\t208\t+\n-chr21\t9810554\t9810733\t(GAGTG)n\t828\t+\n-chr21\t9810696\t9811576\t(GAATG)n\t1005\t+\n-chr21\t9811606\t9811772\t(GAATG)n\t604\t+\n-chr21\t9811778\t9812022\t(GAGTG)n\t858\t+\n-chr21\t9812022\t9812464\t(GAATG)n\t1017\t+\n-chr21\t9812479\t9812900\t(GAATG)n\t729\t+\n-chr21\t9812901\t9812954\t(GAGTG)n\t235\t+\n-chr21\t9812958\t9813124\t(GAATG)n\t740\t+\n-chr21\t9813179\t9813356\t(GAGTG)n\t819\t+\n-chr21\t9813335\t9813790\t(GAATG)n\t837\t+\n-chr21\t9813801\t9813973\t(GAATG)n\t582\t+\n-chr21\t9814004\t9814407\t(GAATG)n\t780\t+\n-chr21\t9814408\t9814467\t(GAGTG)n\t300\t+\n-chr21\t9814467\t9814824\t(GAATG)n\t921\t+\n-chr21\t9814871\t9815045\t(GAATG)n\t654\t+\n-chr21\t9815045\t9815117\t(GAGTG)n\t423\t+\n-chr21\t9815118\t9815297\t(GAATG)n\t926\t+\n-chr21\t9815356\t9815455\t(GAATG)n\t325\t+\n-chr21\t9815463\t9815640\t(GAGTG)n\t981\t+\n-chr21\t9815642\t9815982\t(GAATG)n\t805\t+\n-chr21\t9816000\t9816174\t(GAATG)n\t660\t+\n-chr21\t9816197\t9816535\t(GAATG)n\t919\t+\n-chr21\t9816595\t9816936\t(GAATG)n\t867\t+\n-chr21\t9816995\t9817175\t(GAATG)n\t510\t+\n-chr21\t9817189\t9817257\t(GAGTG)n\t269\t+\n-chr21\t9817258\t9817854\t(GAATG)n\t1092\t+\n-chr21\t9817883\t9818578\t(GAATG)n\t966\t+\n-chr21\t9818589\t9818768\t(GAATG)n\t552\t+\n-chr21\t9818798\t9818860\t(GAATG)n\t213\t+\n-chr21\t9818872\t9819215\t(GAATG)n\t894\t+\n-chr21\t9819230\t9819370\t(GAATG)n\t312\t+\n-chr21\t9819426\t9819773\t(GAATG)n\t897\t+\n-chr21\t9819798\t9819976\t(GAATG)n\t878\t+\n-chr21\t9819990\t9820169\t(GAATG)n\t680\t+\n-chr21\t9820188\t9820366\t(GAATG)n\t738\t+\n-chr21\t9820322\t9820507\t(GAGTG)n\t641\t+\n-chr21\t9820'..b'6885745\t46886054\tAluSx\t1935\t+\n-chr21\t46886059\t46886096\t(TG)n\t270\t+\n-chr21\t46886242\t46886708\tL1ME4a\t405\t+\n-chr21\t46886812\t46886962\tL1ME4a\t361\t+\n-chr21\t46887080\t46887388\tAluSx\t1967\t+\n-chr21\t46888920\t46889201\tMER58B\t1178\t-\n-chr21\t46889293\t46889629\tAluJo\t1519\t+\n-chr21\t46889638\t46889821\tAluJo\t850\t+\n-chr21\t46889914\t46890044\tL1ME3B\t405\t+\n-chr21\t46890044\t46890353\tAluSx\t1946\t-\n-chr21\t46890353\t46890614\tL1ME3B\t405\t+\n-chr21\t46890829\t46891136\tL1MC4a\t413\t-\n-chr21\t46891141\t46891421\tMLT1A1\t693\t-\n-chr21\t46891477\t46891642\tL1MC4_3endX\t343\t-\n-chr21\t46891836\t46892003\tFRAM\t650\t-\n-chr21\t46892029\t46892086\tAT_rich\t22\t+\n-chr21\t46892396\t46892604\tL1MC4a\t314\t-\n-chr21\t46892583\t46892681\tL1MD\t293\t-\n-chr21\t46896438\t46896576\tL1PREC2\t277\t+\n-chr21\t46896932\t46897040\tL1ME4a\t288\t+\n-chr21\t46898261\t46898291\t(T)n\t195\t+\n-chr21\t46898475\t46898726\tL1ME4a\t442\t+\n-chr21\t46898714\t46898903\tL1MD2\t776\t+\n-chr21\t46898903\t46899208\tMER2\t1135\t+\n-chr21\t46899304\t46899922\tL1MD2\t2612\t+\n-chr21\t46899916\t46900310\tL1MD2\t1306\t+\n-chr21\t46900317\t46900724\tMSTB1\t2258\t+\n-chr21\t46900724\t46902105\tL1MD2\t2718\t+\n-chr21\t46902161\t46902336\tL1ME4a\t384\t+\n-chr21\t46902470\t46902579\tMER45B\t546\t-\n-chr21\t46903800\t46903973\tL1M5\t344\t+\n-chr21\t46904289\t46904311\tAT_rich\t22\t+\n-chr21\t46906284\t46906449\tG-rich\t373\t+\n-chr21\t46909243\t46909287\tL2\t195\t+\n-chr21\t46909413\t46909464\tAT_rich\t23\t+\n-chr21\t46909464\t46909768\tAluJo\t2015\t+\n-chr21\t46909769\t46910593\tL1ME3A\t1283\t-\n-chr21\t46910643\t46910947\tAluSx\t2427\t-\n-chr21\t46911036\t46911163\tAluJo\t773\t+\n-chr21\t46911163\t46911432\tAluSx\t2091\t+\n-chr21\t46913108\t46913727\tL1PA3\t5539\t-\n-chr21\t46915754\t46916276\tMLT1E2\t1609\t-\n-chr21\t46916330\t46916418\tL1M5\t226\t-\n-chr21\t46916433\t46916857\tL1M5\t1626\t-\n-chr21\t46916857\t46917170\tAluJo\t1400\t+\n-chr21\t46917170\t46917283\tL1M5\t1626\t-\n-chr21\t46917286\t46917758\tL1MA4A\t2227\t+\n-chr21\t46917764\t46917862\tL1MA4A\t435\t+\n-chr21\t46917957\t46918166\tMIR\t320\t-\n-chr21\t46918500\t46918536\tAT_rich\t22\t+\n-chr21\t46918925\t46919030\tGA-rich\t252\t+\n-chr21\t46919402\t46919654\tL2\t390\t+\n-chr21\t46919654\t46919928\tAluSx\t1723\t-\n-chr21\t46919928\t46920171\tL2\t390\t+\n-chr21\t46920752\t46922048\tL1PA7\t7374\t-\n-chr21\t46922123\t46922411\tL1ME1\t883\t+\n-chr21\t46922411\t46922704\tAluJb\t1385\t-\n-chr21\t46922718\t46922935\tL1ME1\t699\t+\n-chr21\t46923377\t46923802\tMLT1K\t323\t-\n-chr21\t46924116\t46924575\tHAL1\t655\t+\n-chr21\t46924661\t46924875\tMER74A\t443\t+\n-chr21\t46925595\t46925624\tAT_rich\t22\t+\n-chr21\t46926764\t46927048\tMLT1I\t641\t-\n-chr21\t46927048\t46927090\t(A)n\t378\t+\n-chr21\t46927155\t46927194\tMLT1I\t248\t-\n-chr21\t46927433\t46927614\t(TTAGGG)n\t234\t+\n-chr21\t46928301\t46928745\tMER4A1\t2938\t-\n-chr21\t46929613\t46930969\tL1MC3\t5007\t-\n-chr21\t46930969\t46931413\tMSTB1\t1749\t-\n-chr21\t46931413\t46931818\tL1MC3\t5007\t-\n-chr21\t46931945\t46932055\tMER34B-int\t303\t-\n-chr21\t46932058\t46932281\tAluJb\t1504\t-\n-chr21\t46932282\t46932323\tAluJb\t228\t-\n-chr21\t46932323\t46932751\tMER34B-int\t2060\t-\n-chr21\t46932771\t46933151\tLTR10C\t1336\t-\n-chr21\t46933171\t46933204\t(CA)n\t297\t+\n-chr21\t46933260\t46934052\tMER34B-int\t3116\t-\n-chr21\t46934052\t46934352\tAluY\t2333\t-\n-chr21\t46934352\t46934522\tMER34B-int\t3116\t-\n-chr21\t46934536\t46935047\tMER34B-int\t1297\t-\n-chr21\t46935651\t46936098\tMER34B-int\t1588\t-\n-chr21\t46936101\t46936169\tMLT2B3\t330\t+\n-chr21\t46936194\t46936262\tMLT2B3\t406\t+\n-chr21\t46936287\t46936355\tMLT2B3\t375\t+\n-chr21\t46936380\t46936448\tMLT2B3\t330\t+\n-chr21\t46936473\t46936541\tMLT2B3\t346\t+\n-chr21\t46936566\t46936632\tMLT2B3\t335\t+\n-chr21\t46936657\t46936725\tMLT2B5\t417\t+\n-chr21\t46936750\t46936977\tMLT2B3\t2194\t+\n-chr21\t46936977\t46937339\tTHE1C\t2217\t+\n-chr21\t46937339\t46937689\tMLT2B5\t1691\t+\n-chr21\t46937696\t46938061\tMER34B-int\t678\t-\n-chr21\t46938036\t46938374\tMER34B-int\t910\t-\n-chr21\t46938415\t46938527\tMER34B-int\t268\t-\n-chr21\t46938776\t46938841\tMER72\t3923\t-\n-chr21\t46938841\t46939137\tAluSq\t2287\t-\n-chr21\t46939137\t46939777\tMER72\t3923\t-\n-chr21\t46939986\t46940174\tL1MC\t565\t-\n-chr21\t46940179\t46940735\tMER34C_\t2769\t-\n-chr21\t46940746\t46941357\tL1MC\t1506\t-\n-chr21\t46941373\t46941479\tLTR60\t529\t+\n-chr21\t46941590\t46941894\tAluYb8\t2829\t+\n-chr21\t46941894\t46942298\tLTR60\t958\t+\n-chr21\t46942142\t46944181\tTAR1\t16459\t+\n-chr21\t46944181\t46944323\t(TTAGGG)n\t1057\t+\n' |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/genomes/human.hg18.genome --- a/BEDTools-Version-2.14.3/genomes/human.hg18.genome Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,50 +0,0 @@ -chr1 247249719 -chr1_random 1663265 -chr10 135374737 -chr10_random 113275 -chr11 134452384 -chr11_random 215294 -chr12 132349534 -chr13 114142980 -chr13_random 186858 -chr14 106368585 -chr15 100338915 -chr15_random 784346 -chr16 88827254 -chr16_random 105485 -chr17 78774742 -chr17_random 2617613 -chr18 76117153 -chr18_random 4262 -chr19 63811651 -chr19_random 301858 -chr2 242951149 -chr2_random 185571 -chr20 62435964 -chr21 46944323 -chr21_random 1679693 -chr22 49691432 -chr22_random 257318 -chr22_h2_hap1 63661 -chr3 199501827 -chr3_random 749256 -chr4 191273063 -chr4_random 842648 -chr5 180857866 -chr5_random 143687 -chr5_h2_hap1 1794870 -chr6 170899992 -chr6_random 1875562 -chr6_cox_hap1 4731698 -chr6_qbl_hap2 4565931 -chr7 158821424 -chr7_random 549659 -chr8 146274826 -chr8_random 943810 -chr9 140273252 -chr9_random 1146434 -chrM 16571 -chrX 154913754 -chrX_random 1719168 -chrY 57772954 - |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/genomes/human.hg19.genome --- a/BEDTools-Version-2.14.3/genomes/human.hg19.genome Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,94 +0,0 @@ -chr1 249250621 -chr2 243199373 -chr3 198022430 -chr4 191154276 -chr5 180915260 -chr6 171115067 -chr7 159138663 -chrX 155270560 -chr8 146364022 -chr9 141213431 -chr10 135534747 -chr11 135006516 -chr12 133851895 -chr13 115169878 -chr14 107349540 -chr15 102531392 -chr16 90354753 -chr17 81195210 -chr18 78077248 -chr20 63025520 -chrY 59373566 -chr19 59128983 -chr22 51304566 -chr21 48129895 -chr6_ssto_hap7 4928567 -chr6_mcf_hap5 4833398 -chr6_cox_hap2 4795371 -chr6_mann_hap4 4683263 -chr6_apd_hap1 4622290 -chr6_qbl_hap6 4611984 -chr6_dbb_hap3 4610396 -chr17_ctg5_hap1 1680828 -chr4_ctg9_hap1 590426 -chr1_gl000192_random 547496 -chrUn_gl000225 211173 -chr4_gl000194_random 191469 -chr4_gl000193_random 189789 -chr9_gl000200_random 187035 -chrUn_gl000222 186861 -chrUn_gl000212 186858 -chr7_gl000195_random 182896 -chrUn_gl000223 180455 -chrUn_gl000224 179693 -chrUn_gl000219 179198 -chr17_gl000205_random 174588 -chrUn_gl000215 172545 -chrUn_gl000216 172294 -chrUn_gl000217 172149 -chr9_gl000199_random 169874 -chrUn_gl000211 166566 -chrUn_gl000213 164239 -chrUn_gl000220 161802 -chrUn_gl000218 161147 -chr19_gl000209_random 159169 -chrUn_gl000221 155397 -chrUn_gl000214 137718 -chrUn_gl000228 129120 -chrUn_gl000227 128374 -chr1_gl000191_random 106433 -chr19_gl000208_random 92689 -chr9_gl000198_random 90085 -chr17_gl000204_random 81310 -chrUn_gl000233 45941 -chrUn_gl000237 45867 -chrUn_gl000230 43691 -chrUn_gl000242 43523 -chrUn_gl000243 43341 -chrUn_gl000241 42152 -chrUn_gl000236 41934 -chrUn_gl000240 41933 -chr17_gl000206_random 41001 -chrUn_gl000232 40652 -chrUn_gl000234 40531 -chr11_gl000202_random 40103 -chrUn_gl000238 39939 -chrUn_gl000244 39929 -chrUn_gl000248 39786 -chr8_gl000196_random 38914 -chrUn_gl000249 38502 -chrUn_gl000246 38154 -chr17_gl000203_random 37498 -chr8_gl000197_random 37175 -chrUn_gl000245 36651 -chrUn_gl000247 36422 -chr9_gl000201_random 36148 -chrUn_gl000235 34474 -chrUn_gl000239 33824 -chr21_gl000210_random 27682 -chrUn_gl000231 27386 -chrUn_gl000229 19913 -chrM 16571 -chrUn_gl000226 15008 -chr18_gl000207_random 4262 - |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/genomes/mouse.mm8.genome --- a/BEDTools-Version-2.14.3/genomes/mouse.mm8.genome Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,35 +0,0 @@ -chr1 197069962 -chr2 181976762 -chr3 159872112 -chr4 155029701 -chr5 152003063 -chr6 149525685 -chr7 145134094 -chr8 132085098 -chr9 124000669 -chrM 16299 -chrX 165556469 -chrY 16029404 -chr10 129959148 -chr11 121798632 -chr12 120463159 -chr13 120614378 -chr14 123978870 -chr15 103492577 -chr16 98252459 -chr17 95177420 -chr18 90736837 -chr19 61321190 -chr1_random 172274 -chr5_random 2921247 -chr7_random 243910 -chr8_random 206961 -chr9_random 17232 -chrX_random 39696 -chrY_random 14577732 -chr10_random 10781 -chr13_random 436191 -chr15_random 105932 -chr17_random 89091 -chrUn_random 1540053 - |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/genomes/mouse.mm9.genome --- a/BEDTools-Version-2.14.3/genomes/mouse.mm9.genome Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,36 +0,0 @@ -chr1 197195432 -chr2 181748087 -chr3 159599783 -chr4 155630120 -chr5 152537259 -chr6 149517037 -chr7 152524553 -chr8 131738871 -chr9 124076172 -chr10 129993255 -chr11 121843856 -chr12 121257530 -chr13 120284312 -chr14 125194864 -chr15 103494974 -chr16 98319150 -chr17 95272651 -chr18 90772031 -chr19 61342430 -chrX 166650296 -chrY 15902555 -chrM 16299 -chr13_random 400311 -chr16_random 3994 -chr17_random 628739 -chr1_random 1231697 -chr3_random 41899 -chr4_random 160594 -chr5_random 357350 -chr7_random 362490 -chr8_random 849593 -chr9_random 449403 -chrUn_random 5900358 -chrX_random 1785075 -chrY_random 58682461 - |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/Makefile --- a/BEDTools-Version-2.14.3/src/Makefile Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,48 +0,0 @@ -UTILITIES_DIR = ../utils/ -OBJ_DIR = ../../obj/ -BIN_DIR = ../../bin/ - -# ------------------- -# define our includes -# ------------------- -INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \ - -I$(UTILITIES_DIR)/version/ \ - -I$(UTILITIES_DIR)/gzstream/ \ - -I$(UTILITIES_DIR)/lineFileUtilities/ \ - -I$(UTILITIES_DIR)/fileType/ \ - -I$(UTILITIES_DIR)/BamTools/include - -# ---------------------------------- -# define our source and object files -# ---------------------------------- -SOURCES= multiCovMain.cpp multiCovBam.cpp -OBJECTS= $(SOURCES:.cpp=.o) -_EXT_OBJECTS=bedFile.o lineFileUtilities.o gzstream.o fileType.o -EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) -BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) -PROGRAM= multiCovBam - -all: $(PROGRAM) - -.PHONY: all - -$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS) - @echo " * linking $(PROGRAM)" - @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ $(LIBS) -L$(UTILITIES_DIR)/BamTools/lib/ -lbamtools - -$(BUILT_OBJECTS): $(SOURCES) - @echo " * compiling" $(*F).cpp - @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) - -$(EXT_OBJECTS): - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/BamTools/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/ - -clean: - @echo "Cleaning up." - @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* - -.PHONY: clean |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/annotateBed/Makefile --- a/BEDTools-Version-2.14.3/src/annotateBed/Makefile Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,43 +0,0 @@ -UTILITIES_DIR = ../utils/ -OBJ_DIR = ../../obj/ -BIN_DIR = ../../bin/ - -# ------------------- -# define our includes -# ------------------- -INCLUDES = -I$(UTILITIES_DIR)/bedFile/ -I$(UTILITIES_DIR)/lineFileUtilities/ -I$(UTILITIES_DIR)/version/ -I$(UTILITIES_DIR)/BamTools/ -I$(UTILITIES_DIR)/gzstream/ -I$(UTILITIES_DIR)/fileType/ - -# ---------------------------------- -# define our source and object files -# ---------------------------------- -SOURCES= annotateMain.cpp annotateBed.cpp -OBJECTS= $(SOURCES:.cpp=.o) -_EXT_OBJECTS=bedFile.o lineFileUtilities.o gzstream.o fileType.o -EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) -BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) -PROGRAM= annotateBed - - -all: $(PROGRAM) - -.PHONY: all - -$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS) - @echo " * linking $(PROGRAM)" - @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ $(LIBS) - -$(BUILT_OBJECTS): $(SOURCES) - @echo " * compiling" $(*F).cpp - @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) - -$(EXT_OBJECTS): - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/ - -clean: - @echo "Cleaning up." - @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* - -.PHONY: clean |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/annotateBed/annotateBed.cpp --- a/BEDTools-Version-2.14.3/src/annotateBed/annotateBed.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,209 +0,0 @@\n-/*****************************************************************************\n- annotateBed.cpp\n-\n- (c) 2009 - Aaron Quinlan\n- Hall Laboratory\n- Department of Biochemistry and Molecular Genetics\n- University of Virginia\n- aaronquinlan@gmail.com\n-\n- Licenced under the GNU General Public License 2.0 license.\n-******************************************************************************/\n-#include "lineFileUtilities.h"\n-#include "annotateBed.h"\n-\n-// build\n-BedAnnotate::BedAnnotate(const string &mainFile, const vector<string> &annoFileNames,\n- const vector<string> &annoTitles, bool sameStrand, bool diffStrand, bool reportCounts, bool reportBoth) :\n-\n- _mainFile(mainFile),\n- _annoFileNames(annoFileNames),\n- _annoTitles(annoTitles),\n- _sameStrand(sameStrand),\n- _diffStrand(diffStrand),\n- _reportCounts(reportCounts),\n- _reportBoth(reportBoth)\n-{\n- _bed = new BedFile(_mainFile);\n-}\n-\n-\n-// destroy and delete the open file pointers\n-BedAnnotate::~BedAnnotate(void) {\n- delete _bed;\n- CloseAnnoFiles();\n-}\n-\n-\n-void BedAnnotate::OpenAnnoFiles() {\n- for (size_t i=0; i < _annoFileNames.size(); ++i) {\n- BedFile *file = new BedFile(_annoFileNames[i]);\n- file->Open();\n- _annoFiles.push_back(file);\n- }\n-}\n-\n-\n-void BedAnnotate::CloseAnnoFiles() {\n- for (size_t i=0; i < _annoFiles.size(); ++i) {\n- BedFile *file = _annoFiles[i];\n- delete file;\n- _annoFiles[i] = NULL;\n- }\n-}\n-\n-\n-void BedAnnotate::PrintHeader() {\n- // print a hash to indicate header and then write a tab\n- // for each field in the main file.\n- printf("#");\n- for (size_t i = 0; i < _bed->bedType; ++i)\n- printf("\\t");\n-\n- // now print the label for each file.\n- if (_reportBoth == false) {\n- for (size_t i = 0; i < _annoTitles.size(); ++i)\n- printf("%s\\t", _annoTitles[i].c_str());\n- printf("\\n");\n- }\n- else {\n- for (size_t i = 0; i < _annoTitles.size(); ++i)\n- printf("%s_cnt\\t%s_pct", _annoTitles[i].c_str(), _annoTitles[i].c_str());\n- printf("\\n");\n- }\n-}\n-\n-\n-void BedAnnotate::InitializeMainFile() {\n- // process each chromosome\n- masterBedCovListMap::iterator chromItr = _bed->bedCovListMap.begin();\n- masterBedCovListMap::iterator chromEnd = _bed->bedCovListMap.end();\n- for (; chromItr != chromEnd; ++chromItr) {\n- // for each chrom, process each bin\n- binsToBedCovLists::iterator binItr = chromItr->second.begin();\n- binsToBedCovLists::iterator binEnd = chromItr->second.end();\n- for (; binItr != binEnd; ++binItr) {\n- // initialize BEDCOVLIST in this chrom/bin\n- vector<BEDCOVLIST>::iterator bedItr = binItr->second.begin();\n- vector<BEDCOVLIST>::iterator bedEnd = binItr->second.end();\n- for (; bedItr != bedEnd; ++bedItr) {\n- // initialize the depthMaps, counts, etc. for each anno file.\n- for (size_t i = 0; i < _annoFiles.size(); ++i) {\n- map<unsigned int, DEPTH> dummy;\n- bedItr->depthMapList.push_back(dummy);\n- bedItr->counts.push_back(0);\n- bedItr->minOverlapStarts.push_back(INT_MAX);\n- }\n- }\n- }\n- }\n-}\n-\n-\n-void BedAnnotate::AnnotateBed() {\n-\n- // load the "main" bed file into a map so\n- // that we can easily compare each annoFile to it for overlaps\n- _bed->loadBedCovListFileIntoMap();\n- // open the annotations files for processing;\n- OpenAnnoFiles();\n- // initialize counters, depths, etc. for the main file\n- InitializeMainFile();\n-\n- // annotate the main file with the coverage from the annotation files.\n- for (size_t annoIndex = 0; annoIndex < _annoFiles.size(); ++annoIndex) {\n- // grab the current annotation file.\n- BedFile *anno = _annoFiles[annoIndex];\n- int lineNum = 0;\n- BED a, nullBed;\n- BedLi'..b' a = nullBed;\n- }\n- }\n- }\n-\n- // report the annotations of the main file from the anno file.\n- ReportAnnotations();\n- // close the annotations files;\n- CloseAnnoFiles();\n-}\n-\n-\n-void BedAnnotate::ReportAnnotations() {\n-\n- if (_annoTitles.size() > 0) {\n- PrintHeader();\n- }\n-\n- // process each chromosome\n- masterBedCovListMap::const_iterator chromItr = _bed->bedCovListMap.begin();\n- masterBedCovListMap::const_iterator chromEnd = _bed->bedCovListMap.end();\n- for (; chromItr != chromEnd; ++chromItr) {\n- // for each chrom, process each bin\n- binsToBedCovLists::const_iterator binItr = chromItr->second.begin();\n- binsToBedCovLists::const_iterator binEnd = chromItr->second.end();\n- for (; binItr != binEnd; ++binItr) {\n- // for each chrom & bin, compute and report\n- // the observed coverage for each feature\n- vector<BEDCOVLIST>::const_iterator bedItr = binItr->second.begin();\n- vector<BEDCOVLIST>::const_iterator bedEnd = binItr->second.end();\n- for (; bedItr != bedEnd; ++bedItr) {\n- // print the main BED entry.\n- _bed->reportBedTab(*bedItr);\n-\n- // now report the coverage from each annotation file.\n- for (size_t i = 0; i < _annoFiles.size(); ++i) {\n- unsigned int totalLength = 0;\n- int zeroDepthCount = 0; // number of bases with zero depth\n- int depth = 0; // tracks the depth at the current base\n-\n- // the start is either the first base in the feature OR\n- // the leftmost position of an overlapping feature. e.g. (s = start):\n- // A ----------\n- // B s ------------\n- int start = min(bedItr->minOverlapStarts[i], bedItr->start);\n-\n- map<unsigned int, DEPTH>::const_iterator depthItr;\n- map<unsigned int, DEPTH>::const_iterator depthEnd;\n-\n- // compute the coverage observed at each base in the feature marching from start to end.\n- for (CHRPOS pos = start+1; pos <= bedItr->end; pos++) {\n- // map pointer grabbing the starts and ends observed at this position\n- depthItr = bedItr->depthMapList[i].find(pos);\n- depthEnd = bedItr->depthMapList[i].end();\n-\n- // increment coverage if starts observed at this position.\n- if (depthItr != depthEnd)\n- depth += depthItr->second.starts;\n- // update zero depth\n- if ((pos > bedItr->start) && (pos <= bedItr->end) && (depth == 0))\n- zeroDepthCount++;\n- // decrement coverage if ends observed at this position.\n- if (depthItr != depthEnd)\n- depth = depth - depthItr->second.ends;\n- }\n- // Summarize the coverage for the current interval,\n- CHRPOS length = bedItr->end - bedItr->start;\n- totalLength += length;\n- int nonZeroBases = (length - zeroDepthCount);\n- float fractCovered = (float) nonZeroBases / length;\n- if (_reportCounts == false && _reportBoth == false)\n- printf("%f\\t", fractCovered);\n- else if (_reportCounts == true && _reportBoth == false)\n- printf("%d\\t", bedItr->counts[i]);\n- else if (_reportCounts == false && _reportBoth == true)\n- printf("%d\\t%f\\t", bedItr->counts[i], fractCovered);\n- }\n- // print newline for next feature.\n- printf("\\n");\n- }\n- }\n- }\n-}\n-\n-\n' |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/annotateBed/annotateBed.h --- a/BEDTools-Version-2.14.3/src/annotateBed/annotateBed.h Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,71 +0,0 @@ -/***************************************************************************** - annotateBed.h - - (c) 2009 - Aaron Quinlan - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licenced under the GNU General Public License 2.0 license. -******************************************************************************/ -#ifndef ANNOTATEBED_H -#define ANNOTATEBED_H - -#include "bedFile.h" -#include <vector> -#include <algorithm> -#include <iostream> -#include <iomanip> -#include <fstream> -#include <stdlib.h> - -using namespace std; - -//************************************************ -// Class methods and elements -//************************************************ -class BedAnnotate { - -public: - - // constructor - BedAnnotate(const string &mainFile, const vector<string> &annoFileNames, - const vector<string> &annoTitles, bool sameStrand, bool diffStrand, bool reportCounts, bool reportBoth); - - // destructor - ~BedAnnotate(void); - - // annotate the master file with all of the annotation files. - void AnnotateBed(); - -private: - - // input files. - string _mainFile; - vector<string> _annoFileNames; - vector<string> _annoTitles; - - // instance of a bed file class. - BedFile *_bed; - vector<BedFile*> _annoFiles; - - // do we care about strandedness when counting coverage? - bool _sameStrand; - bool _diffStrand; - - bool _reportCounts; - bool _reportBoth; - - // private function for reporting coverage information - void ReportAnnotations(); - - void OpenAnnoFiles(); - - void CloseAnnoFiles(); - - void PrintHeader(); - - void InitializeMainFile(); -}; -#endif /* ANNOTATEBED_H */ |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/annotateBed/annotateMain.cpp --- a/BEDTools-Version-2.14.3/src/annotateBed/annotateMain.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,170 +0,0 @@ -/***************************************************************************** - annotateMain.cpp - - (c) 2009 - Aaron Quinlan - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licenced under the GNU General Public License 2.0 license. -******************************************************************************/ -#include "annotateBed.h" -#include "version.h" - -using namespace std; - -// define the version -#define PROGRAM_NAME "annotateBed" - -// define our parameter checking macro -#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen) - -// function declarations -void ShowHelp(void); - -int main(int argc, char* argv[]) { - - // our configuration variables - bool showHelp = false; - - // input file - string mainFile; - - // parm flags - bool sameStrand = false; - bool diffStrand = false; - bool haveBed = false; - bool haveFiles = false; - bool haveTitles = false; - bool reportCounts = false; - bool reportBoth = false; - - // list of annotation files / names - vector<string> inputFiles; - vector<string> inputTitles; - - // check to see if we should print out some help - if(argc <= 1) showHelp = true; - - for(int i = 1; i < argc; i++) { - int parameterLength = (int)strlen(argv[i]); - - if((PARAMETER_CHECK("-h", 2, parameterLength)) || - (PARAMETER_CHECK("--help", 5, parameterLength))) { - showHelp = true; - } - } - - if(showHelp) ShowHelp(); - - // do some parsing (all of these parameters require 2 strings) - for(int i = 1; i < argc; i++) { - - int parameterLength = (int)strlen(argv[i]); - - if(PARAMETER_CHECK("-i", 2, parameterLength)) { - if ((i+1) < argc) { - haveBed = true; - mainFile = argv[i + 1]; - i++; - } - } - else if(PARAMETER_CHECK("-files", 6, parameterLength)) { - if ((i+1) < argc) { - haveFiles = true; - i = i+1; - string file = argv[i]; - while (file[0] != '-' && i < argc) { - inputFiles.push_back(file); - i++; - if (i < argc) - file = argv[i]; - } - i--; - } - } - else if(PARAMETER_CHECK("-names", 6, parameterLength)) { - if ((i+1) < argc) { - haveTitles = true; - i = i+1; - string title = argv[i]; - while (title[0] != '-' && i < argc) { - inputTitles.push_back(title); - i++; - if (i < argc) - title = argv[i]; - } - i--; - } - } - else if(PARAMETER_CHECK("-counts", 7, parameterLength)) { - reportCounts = true; - } - else if(PARAMETER_CHECK("-both", 5, parameterLength)) { - reportBoth = true; - } - else if (PARAMETER_CHECK("-s", 2, parameterLength)) { - sameStrand = true; - } - else if (PARAMETER_CHECK("-S", 2, parameterLength)) { - diffStrand = true; - } - else { - cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl; - showHelp = true; - } - } - - // make sure we have both input files - if (!haveBed || !haveFiles) { - cerr << endl << "*****" << endl << "*****ERROR: Need -i and -files files. " << endl << "*****" << endl; - showHelp = true; - } - if (sameStrand && diffStrand) { - cerr << endl << "*****" << endl << "*****ERROR: Request either -s OR -S, not both." << endl << "*****" << endl; - showHelp = true; - } - - if (!showHelp) { - BedAnnotate *ba = new BedAnnotate(mainFile, inputFiles, inputTitles, sameStrand, diffStrand, reportCounts, reportBoth); - ba->AnnotateBed(); - delete ba; - return 0; - } - else { - ShowHelp(); - } -} - -void ShowHelp(void) { - - cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl; - - cerr << "Author: Aaron Quinlan (aaronquinlan@gmail.com)" << endl; - - cerr << "Summary: Annotates the depth & breadth of coverage of features from multiple files" << endl; - cerr << "\t on the intervals in -i." << endl << endl; - - cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -i <bed/gff/vcf> -files FILE1 FILE2 .. FILEn" << endl << endl; - - cerr << "Options: " << endl; - - cerr << "\t-names\t" << "A list of names (one / file) to describe each file in -i." << endl; - cerr << "\t\tThese names will be printed as a header line." << endl << endl; - - cerr << "\t-counts\t" << "Report the count of features in each file that overlap -i." << endl; - cerr << "\t\t- Default is to report the fraction of -i covered by each file." << endl << endl; - - cerr << "\t-both\t" << "Report the counts followed by the % coverage." << endl; - cerr << "\t\t- Default is to report the fraction of -i covered by each file." << endl << endl; - - cerr << "\t-s\t" << "Require same strandedness. That is, only counts overlaps" << endl; - cerr << "\t\ton the _same_ strand." << endl; - cerr << "\t\t- By default, overlaps are counted without respect to strand." << endl << endl; - - cerr << "\t-S\t" << "Require different strandedness. That is, only count overlaps" << endl; - cerr << "\t\ton the _opposite_ strand." << endl; - cerr << "\t\t- By default, overlaps are counted without respect to strand." << endl << endl; - exit(1); -} |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/bamToBed/Makefile --- a/BEDTools-Version-2.14.3/src/bamToBed/Makefile Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,47 +0,0 @@ -UTILITIES_DIR = ../utils/ -OBJ_DIR = ../../obj/ -BIN_DIR = ../../bin/ - -# ------------------- -# define our includes -# ------------------- -INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \ - -I$(UTILITIES_DIR)/version/ \ - -I$(UTILITIES_DIR)/gzstream/ \ - -I$(UTILITIES_DIR)/lineFileUtilities/ \ - -I$(UTILITIES_DIR)/fileType/ \ - -I$(UTILITIES_DIR)/BamTools/include \ - -I$(UTILITIES_DIR)/BamTools-Ancillary - - -# ---------------------------------- -# define our source and object files -# ---------------------------------- -SOURCES= bamToBed.cpp -OBJECTS= $(SOURCES:.cpp=.o) -_EXT_OBJECTS=BamAncillary.o -EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) -BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) -PROGRAM= bamToBed - - -all: $(PROGRAM) - -.PHONY: all - -$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS) - @echo " * linking $(PROGRAM)" - @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ -L$(UTILITIES_DIR)/BamTools/lib/ -lbamtools $(LIBS) - -$(BUILT_OBJECTS): $(SOURCES) - @echo " * compiling" $(*F).cpp - @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) - -$(EXT_OBJECTS): - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/BamAncillary/ - -clean: - @echo "Cleaning up." - @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* - -.PHONY: clean |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/bamToBed/bamToBed.cpp --- a/BEDTools-Version-2.14.3/src/bamToBed/bamToBed.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,564 +0,0 @@\n-/*****************************************************************************\n- bamToBed.cpp\n-\n- (c) 2009 - Aaron Quinlan\n- Hall Laboratory\n- Department of Biochemistry and Molecular Genetics\n- University of Virginia\n- aaronquinlan@gmail.com\n-\n- Licenced under the GNU General Public License 2.0 license.\n-******************************************************************************/\n-#include "version.h"\n-#include "api/BamReader.h"\n-#include "api/BamAux.h"\n-#include "BamAncillary.h"\n-#include "bedFile.h"\n-using namespace BamTools;\n-\n-#include <vector>\n-#include <algorithm> // for swap()\n-#include <iostream>\n-#include <fstream>\n-#include <stdlib.h>\n-\n-using namespace std;\n-\n-\n-// define our program name\n-#define PROGRAM_NAME "bamToBed"\n-\n-// define our parameter checking macro\n-#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)\n-\n-\n-// function declarations\n-void ShowHelp(void);\n-\n-void ConvertBamToBed(const string &bamFile, const bool &useEditDistance, const string &bamTag,\n- const bool &writeBed12, const bool &obeySplits, const string &color, const bool &useCigar);\n-void ConvertBamToBedpe(const string &bamFile, const bool &useEditDistance);\n-\n-void PrintBed(const BamAlignment &bam, const RefVector &refs, bool useEditDistance, const string &bamTag, bool obeySplits, bool useCigar);\n-void PrintBed12(const BamAlignment &bam, const RefVector &refs, bool useEditDistance, const string &bamTag, string color = "255,0,0");\n-void PrintBedPE(const BamAlignment &bam1, const BamAlignment &bam2,\n- const RefVector &refs, bool useEditDistance);\n-\n-void ParseCigarBed12(const vector<CigarOp> &cigar, vector<int> &blockStarts,\n- vector<int> &blockEnds, int &alignmentEnd);\n-string BuildCigarString(const vector<CigarOp> &cigar);\n-\n-bool IsCorrectMappingForBEDPE (const BamAlignment &bam);\n-\n-\n-int main(int argc, char* argv[]) {\n-\n- // our configuration variables\n- bool showHelp = false;\n-\n- // input files\n- string bamFile = "stdin";\n- string color = "255,0,0";\n- string tag = "";\n-\n- bool haveBam = true;\n- bool haveColor = false;\n- bool haveOtherTag = false;\n- bool writeBedPE = false;\n- bool writeBed12 = false;\n- bool useEditDistance = false;\n- bool useAlignmentScore = false;\n- bool useCigar = false;\n- bool obeySplits = false;\n-\n- // check to see if we should print out some help\n-\n- for(int i = 1; i < argc; i++) {\n- int parameterLength = (int)strlen(argv[i]);\n-\n- if((PARAMETER_CHECK("-h", 2, parameterLength)) ||\n- (PARAMETER_CHECK("--help", 5, parameterLength))) {\n- showHelp = true;\n- }\n- }\n-\n- if(showHelp) ShowHelp();\n-\n- // do some parsing (all of these parameters require 2 strings)\n- for(int i = 1; i < argc; i++) {\n-\n- int parameterLength = (int)strlen(argv[i]);\n-\n- if(PARAMETER_CHECK("-i", 2, parameterLength)) {\n- if ((i+1) < argc) {\n- bamFile = argv[i + 1];\n- i++;\n- }\n- }\n- else if(PARAMETER_CHECK("-bedpe", 6, parameterLength)) {\n- writeBedPE = true;\n- }\n- else if(PARAMETER_CHECK("-bed12", 6, parameterLength)) {\n- writeBed12 = true;\n- }\n- else if(PARAMETER_CHECK("-split", 6, parameterLength)) {\n- obeySplits = true;\n- }\n- else if(PARAMETER_CHECK("-ed", 3, parameterLength)) {\n- useEditDistance = true;\n- }\n- else if(PARAMETER_CHECK("-cigar", 6, parameterLength)) {\n- useCigar = true;\n- }\n- else if(PARAMETER_CHECK("-as", 3, parameterLength)) {\n- useAlignmentScore = true;\n- }\n- else if(PARAMETER_CHECK("-color", 6, parameterLength)) {\n- if ((i+1) < argc) {\n- '..b'etc.\n- printf("%d\\t%d\\t%s\\t%d\\t", bam.Position, alignmentEnd, color.c_str(), (int) blockStarts.size());\n-\n- // now write the lengths portion\n- unsigned int b;\n- for (b = 0; b < blockLengths.size() - 1; ++b) {\n- printf("%d,", blockLengths[b]);\n- }\n- printf("%d\\t", blockLengths[b]);\n-\n- // now write the starts portion\n- for (b = 0; b < blockStarts.size() - 1; ++b) {\n- printf("%d,", blockStarts[b]);\n- }\n- printf("%d\\n", blockStarts[b]);\n-}\n-\n-\n-void PrintBedPE(const BamAlignment &bam1, const BamAlignment &bam2, const RefVector &refs, bool useEditDistance) {\n-\n- // initialize BEDPE variables\n- string chrom1, chrom2, strand1, strand2;\n- int start1, start2, end1, end2;\n- uint32_t editDistance1, editDistance2;\n- start1 = start2 = end1 = end2 = -1;\n- chrom1 = chrom2 = strand1 = strand2 = ".";\n- editDistance1 = editDistance2 = 0;\n- uint16_t minMapQuality = 0;\n-\n- // extract relevant info for end 1\n- if (bam1.IsMapped()) {\n- chrom1 = refs.at(bam1.RefID).RefName;\n- start1 = bam1.Position;\n- end1 = bam1.GetEndPosition(false);\n- strand1 = "+";\n- if (bam1.IsReverseStrand()) strand1 = "-";\n-\n- // extract the edit distance from the NM tag\n- // if possible. otherwise, complain.\n- if (useEditDistance == true && bam1.GetTag("NM", editDistance1) == false) {\n- cerr << "The edit distance tag (NM) was not found in the BAM file. Please disable -ed. Exiting\\n";\n- exit(1);\n- }\n- }\n-\n- // extract relevant info for end 2\n- if (bam2.IsMapped()) {\n- chrom2 = refs.at(bam2.RefID).RefName;\n- start2 = bam2.Position;\n- end2 = bam2.GetEndPosition(false);\n- strand2 = "+";\n- if (bam2.IsReverseStrand()) strand2 = "-";\n-\n- // extract the edit distance from the NM tag\n- // if possible. otherwise, complain.\n- if (useEditDistance == true && bam2.GetTag("NM", editDistance2) == false) {\n- cerr << "The edit distance tag (NM) was not found in the BAM file. Please disable -ed. Exiting\\n";\n- exit(1);\n- }\n- }\n-\n- // swap the ends if necessary\n- if ( chrom1 > chrom2 || ((chrom1 == chrom2) && (start1 > start2)) ) {\n- swap(chrom1, chrom2);\n- swap(start1, start2);\n- swap(end1, end2);\n- swap(strand1, strand2);\n- }\n-\n- // report BEDPE using min mapQuality\n- if (useEditDistance == false) {\n- // compute the minimum mapping quality b/w the two ends of the pair.\n- if (bam1.IsMapped() == true && bam2.IsMapped() == true)\n- minMapQuality = min(bam1.MapQuality, bam2.MapQuality);\n-\n- printf("%s\\t%d\\t%d\\t\\%s\\t%d\\t%d\\t%s\\t%d\\t%s\\t%s\\n",\n- chrom1.c_str(), start1, end1, chrom2.c_str(), start2, end2,\n- bam1.Name.c_str(), minMapQuality, strand1.c_str(), strand2.c_str());\n- }\n- // report BEDPE using total edit distance\n- else {\n- uint16_t totalEditDistance = 0;\n- if (bam1.IsMapped() == true && bam2.IsMapped() == true)\n- totalEditDistance = editDistance1 + editDistance2;\n- else if (bam1.IsMapped() == true)\n- totalEditDistance = editDistance1;\n- else if (bam2.IsMapped() == true)\n- totalEditDistance = editDistance2;\n-\n- printf("%s\\t%d\\t%d\\t\\%s\\t%d\\t%d\\t%s\\t%d\\t%s\\t%s\\n",\n- chrom1.c_str(), start1, end1, chrom2.c_str(), start2, end2,\n- bam1.Name.c_str(), totalEditDistance, strand1.c_str(), strand2.c_str());\n- }\n-}\n-\n-\n-// deprecated.\n-bool IsCorrectMappingForBEDPE (const BamAlignment &bam) {\n-\n- if ( (bam.RefID == bam.MateRefID) && (bam.InsertSize > 0) ) {\n- return true;\n- }\n- else if ( (bam.RefID == bam.MateRefID) && (bam.InsertSize == 0) && bam.IsFirstMate() ) {\n- return true;\n- }\n- else if ( (bam.RefID != bam.MateRefID) && bam.IsFirstMate() ) {\n- return true;\n- }\n- else return false;\n-}\n' |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/bed12ToBed6/Makefile --- a/BEDTools-Version-2.14.3/src/bed12ToBed6/Makefile Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,44 +0,0 @@ -UTILITIES_DIR = ../utils/ -OBJ_DIR = ../../obj/ -BIN_DIR = ../../bin/ - -# ------------------- -# define our includes -# ------------------- -INCLUDES = -I$(UTILITIES_DIR)/bedFile/ -I$(UTILITIES_DIR)/lineFileUtilities/ -I$(UTILITIES_DIR)/version/ -I$(UTILITIES_DIR)/gzstream/ -I$(UTILITIES_DIR)/fileType/ - -# ---------------------------------- -# define our source and object files -# ---------------------------------- -SOURCES= bed12ToBed6.cpp -OBJECTS= $(SOURCES:.cpp=.o) -_EXT_OBJECTS=bedFile.o lineFileUtilities.o gzstream.o fileType.o -EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) -BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) -PROGRAM= bed12ToBed6 - - -all: $(PROGRAM) - -.PHONY: all - - -$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS) - @echo " * linking $(PROGRAM)" - @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ $(LIBS) - -$(BUILT_OBJECTS): $(SOURCES) - @echo " * compiling" $(*F).cpp - @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) - -$(EXT_OBJECTS): - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/ - -clean: - @echo "Cleaning up." - @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* - -.PHONY: clean |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/bed12ToBed6/bed12ToBed6.cpp --- a/BEDTools-Version-2.14.3/src/bed12ToBed6/bed12ToBed6.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,168 +0,0 @@ -/***************************************************************************** - bed12ToBed6.cpp - - (c) 2009 - Aaron Quinlan - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licenced under the GNU General Public License 2.0 license. -******************************************************************************/ -#include "lineFileUtilities.h" -#include "bedFile.h" -#include "version.h" - -#include <vector> -#include <iostream> -#include <fstream> -#include <stdlib.h> - -using namespace std; - - -// define our program name -#define PROGRAM_NAME "bed12ToBed6" - -// define our parameter checking macro -#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen) - - -// function declarations -void ShowHelp(void); -void DetermineBedInput(BedFile *bed); -void ProcessBed(istream &bedInput, BedFile *bed); - - -bool addBlockNums = false; - -int main(int argc, char* argv[]) { - - // our configuration variables - bool showHelp = false; - - // input files - string bedFile = "stdin"; - bool haveBed = true; - - for(int i = 1; i < argc; i++) { - int parameterLength = (int)strlen(argv[i]); - - if((PARAMETER_CHECK("-h", 2, parameterLength)) || - (PARAMETER_CHECK("--help", 5, parameterLength))) { - showHelp = true; - } - } - - if(showHelp) ShowHelp(); - - // do some parsing (all of these parameters require 2 strings) - for(int i = 1; i < argc; i++) { - - int parameterLength = (int)strlen(argv[i]); - - if(PARAMETER_CHECK("-i", 2, parameterLength)) { - if ((i+1) < argc) { - bedFile = argv[i + 1]; - i++; - } - } - else if(PARAMETER_CHECK("-n", 2, parameterLength)) { - addBlockNums = true; - i++; - } - else { - cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl; - showHelp = true; - } - } - - // make sure we have an input files - if (!haveBed ) { - cerr << endl << "*****" << endl << "*****ERROR: Need -i (BED) file. " << endl << "*****" << endl; - showHelp = true; - } - - if (!showHelp) { - BedFile *bed = new BedFile(bedFile); - DetermineBedInput(bed); - } - else { - ShowHelp(); - } -} - - -void ShowHelp(void) { - - cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl; - - cerr << "Author: Aaron Quinlan (aaronquinlan@gmail.com)" << endl; - - cerr << "Summary: Splits BED12 features into discrete BED6 features." << endl << endl; - - cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -i <bed12>" << endl << endl; - - cerr << "Options: " << endl; - - cerr << "\t-n\t" << "Force the score to be the (1-based) block number from the BED12." << endl << endl; - - - // end the program here - exit(1); -} - - -void DetermineBedInput(BedFile *bed) { - - // dealing with a proper file - if (bed->bedFile != "stdin") { - - ifstream bedStream(bed->bedFile.c_str(), ios::in); - if ( !bedStream ) { - cerr << "Error: The requested bed file (" << bed->bedFile << ") could not be opened. Exiting!" << endl; - exit (1); - } - ProcessBed(bedStream, bed); - } - // reading from stdin - else { - ProcessBed(cin, bed); - } -} - - -void ProcessBed(istream &bedInput, BedFile *bed) { - - // process each BED entry and convert to BAM - BED bedEntry, nullBed; - int lineNum = 0; - BedLineStatus bedStatus; - // open the BED file for reading. - bed->Open(); - while ((bedStatus = bed->GetNextBed(bedEntry, lineNum)) != BED_INVALID) { - if (bedStatus == BED_VALID) { - - bedVector bedBlocks; // vec to store the discrete BED "blocks" from a - splitBedIntoBlocks(bedEntry, lineNum, bedBlocks); - - for (int i = 0; i < (int) bedBlocks.size(); ++i) { - if (addBlockNums == false) { - printf ("%s\t%d\t%d\t%s\t%s\t%s\n", bedBlocks[i].chrom.c_str(), bedBlocks[i].start, bedBlocks[i].end, bedBlocks[i].name.c_str(), - bedBlocks[i].score.c_str(), bedBlocks[i].strand.c_str()); - } - else { - if (bedBlocks[i].strand == "+") - printf ("%s\t%d\t%d\t%s\t%d\t%s\n", bedBlocks[i].chrom.c_str(), bedBlocks[i].start, bedBlocks[i].end, bedBlocks[i].name.c_str(), - i+1, bedBlocks[i].strand.c_str()); - else - printf ("%s\t%d\t%d\t%s\t%d\t%s\n", bedBlocks[i].chrom.c_str(), bedBlocks[i].start, bedBlocks[i].end, bedBlocks[i].name.c_str(), - (int) ((bedBlocks.size()+1)-i), bedBlocks[i].strand.c_str()); - } - } - bedEntry = nullBed; - } - } - // close up - bed->Close(); -} |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/bedToBam/Makefile --- a/BEDTools-Version-2.14.3/src/bedToBam/Makefile Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,53 +0,0 @@ -UTILITIES_DIR = ../utils/ -OBJ_DIR = ../../obj/ -BIN_DIR = ../../bin/ - -# ------------------- -# define our includes -# ------------------- -INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \ - -I$(UTILITIES_DIR)/version/ \ - -I$(UTILITIES_DIR)/gzstream/ \ - -I$(UTILITIES_DIR)/genomeFile/ \ - -I$(UTILITIES_DIR)/lineFileUtilities/ \ - -I$(UTILITIES_DIR)/fileType/ \ - -I$(UTILITIES_DIR)/BamTools/include \ - -I$(UTILITIES_DIR)/BamTools-Ancillary - -# ---------------------------------- -# define our source and object files -# ---------------------------------- -SOURCES= bedToBam.cpp -OBJECTS= $(SOURCES:.cpp=.o) -_EXT_OBJECTS=bedFile.o genomeFile.o lineFileUtilities.o gzstream.o fileType.o -EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) -BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) -PROGRAM= bedToBam - - -all: $(PROGRAM) - -.PHONY: all - - -$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS) - @echo " * linking $(PROGRAM)" - @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ -L$(UTILITIES_DIR)/BamTools/lib/ -lbamtools $(LIBS) - -$(BUILT_OBJECTS): $(SOURCES) - @echo " * compiling" $(*F).cpp - @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) - -$(EXT_OBJECTS): - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/BamTools/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/genomeFile/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/ - -clean: - @echo "Cleaning up." - @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* - -.PHONY: clean |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/bedToBam/bedToBam.cpp --- a/BEDTools-Version-2.14.3/src/bedToBam/bedToBam.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,357 +0,0 @@\n-/*****************************************************************************\n- bedToBam.cpp\n-\n- (c) 2009 - Aaron Quinlan\n- Hall Laboratory\n- Department of Biochemistry and Molecular Genetics\n- University of Virginia\n- aaronquinlan@gmail.com\n-\n- Licenced under the GNU General Public License 2.0 license.\n-******************************************************************************/\n-#include "lineFileUtilities.h"\n-#include "bedFile.h"\n-#include "genomeFile.h"\n-#include "version.h"\n-\n-\n-#include "api/BamReader.h"\n-#include "api/BamAux.h"\n-#include "api/BamWriter.h"\n-using namespace BamTools;\n-\n-#include <vector>\n-#include <iostream>\n-#include <fstream>\n-#include <stdlib.h>\n-\n-using namespace std;\n-\n-\n-// define our program name\n-#define PROGRAM_NAME "bedToBam"\n-\n-// define our parameter checking macro\n-#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)\n-\n-\n-// function declarations\n-void ShowHelp(void);\n-void ProcessBed(istream &bedInput, BedFile *bed, GenomeFile *genome, bool isBED12, int mapQual, bool uncompressedBam);\n-void ConvertBedToBam(const BED &bed, BamAlignment &bam, map<string, int> &chromToId, bool isBED12, int mapQual, int lineNum);\n-void MakeBamHeader(const string &genomeFile, RefVector &refs, string &header, map<string, int> &chromToInt);\n-int reg2bin(int beg, int end);\n-\n-\n-\n-int main(int argc, char* argv[]) {\n-\n- // our configuration variables\n- bool showHelp = false;\n-\n- // input files\n- string bedFile = "stdin";\n- string genomeFile;\n-\n- unsigned int mapQual = 255;\n-\n- bool haveBed = true;\n- bool haveGenome = false;\n- bool haveMapQual = false;\n- bool isBED12 = false;\n- bool uncompressedBam = false;\n-\n- for(int i = 1; i < argc; i++) {\n- int parameterLength = (int)strlen(argv[i]);\n-\n- if((PARAMETER_CHECK("-h", 2, parameterLength)) ||\n- (PARAMETER_CHECK("--help", 5, parameterLength))) {\n- showHelp = true;\n- }\n- }\n-\n- if(showHelp) ShowHelp();\n-\n- // do some parsing (all of these parameters require 2 strings)\n- for(int i = 1; i < argc; i++) {\n-\n- int parameterLength = (int)strlen(argv[i]);\n-\n- if(PARAMETER_CHECK("-i", 2, parameterLength)) {\n- if ((i+1) < argc) {\n- bedFile = argv[i + 1];\n- i++;\n- }\n- }\n- else if(PARAMETER_CHECK("-g", 2, parameterLength)) {\n- if ((i+1) < argc) {\n- haveGenome = true;\n- genomeFile = argv[i + 1];\n- i++;\n- }\n- }\n- else if(PARAMETER_CHECK("-mapq", 5, parameterLength)) {\n- haveMapQual = true;\n- if ((i+1) < argc) {\n- mapQual = atoi(argv[i + 1]);\n- i++;\n- }\n- }\n- else if(PARAMETER_CHECK("-bed12", 6, parameterLength)) {\n- isBED12 = true;\n- }\n- else if(PARAMETER_CHECK("-ubam", 5, parameterLength)) {\n- uncompressedBam = true;\n- }\n- else {\n- cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl;\n- showHelp = true;\n- }\n- }\n-\n- // make sure we have an input files\n- if (!haveBed ) {\n- cerr << endl << "*****" << endl << "*****ERROR: Need -i (BED) file. " << endl << "*****" << endl;\n- showHelp = true;\n- }\n- if (!haveGenome ) {\n- cerr << endl << "*****" << endl << "*****ERROR: Need -g (genome) file. " << endl << "*****" << endl;\n- showHelp = true;\n- }\n- if (mapQual < 0 || mapQual > 255) {\n- cerr << endl << "*****" << endl << "*****ERROR: MAPQ must be in range [0,255]. " << endl << "*****" << endl;\n- showHelp = true;\n- }\n-\n-\n- if (!showHelp) {\n- BedFile *bed = new BedFile(bedFile);\n- GenomeFile *genome = new GenomeFile(genomeFile);\n-\n- '..b'() == 6) {\n-\n- // extract the relevant BED fields to convert BED12 to BAM\n- // namely: blockCount, blockStarts, blockEnds\n- unsigned int blockCount = atoi(bed.otherFields[3].c_str());\n-\n- vector<int> blockSizes, blockStarts;\n- Tokenize(bed.otherFields[4], blockSizes, ",");\n- Tokenize(bed.otherFields[5], blockStarts, ",");\n-\n- // make sure this is a well-formed BED12 entry.\n- if (blockSizes.size() != blockCount) {\n- cerr << "Error: Number of BED blocks does not match blockCount at line: " << lineNum << ". Exiting!" << endl;\n- exit (1);\n- }\n- else {\n- // does the first block start after the bed.start?\n- // if so, we need to do some "splicing"\n- if (blockStarts[0] > 0) {\n- CigarOp cOp;\n- cOp.Length = blockStarts[0];\n- cOp.Type = \'N\';\n- bam.CigarData.push_back(cOp);\n- }\n- // handle the "middle" blocks\n- for (unsigned int i = 0; i < blockCount - 1; ++i) {\n- CigarOp cOp;\n- cOp.Length = blockSizes[i];\n- cOp.Type = \'M\';\n- bam.CigarData.push_back(cOp);\n-\n- if (blockStarts[i+1] > (blockStarts[i] + blockSizes[i])) {\n- CigarOp cOp;\n- cOp.Length = (blockStarts[i+1] - (blockStarts[i] + blockSizes[i]));\n- cOp.Type = \'N\';\n- bam.CigarData.push_back(cOp);\n- }\n- }\n- // handle the last block.\n- CigarOp cOp;\n- cOp.Length = blockSizes[blockCount - 1];\n- cOp.Type = \'M\';\n- bam.CigarData.push_back(cOp);\n- }\n- }\n- // it doesn\'t smell like BED12. complain.\n- else {\n- cerr << "You\'ve indicated that the input file is in BED12 format, yet the relevant fields cannot be found. Exiting." << endl << endl;\n- exit(1);\n- }\n- }\n-}\n-\n-\n-void MakeBamHeader(const string &genomeFile, RefVector &refs, string &header,\n- map<string, int, std::less<string> > &chromToId) {\n-\n- // make a genome map of the genome file.\n- GenomeFile genome(genomeFile);\n-\n- header += "@HD\\tVN:1.0\\tSO:unsorted\\n";\n- header += "@PG\\tID:BEDTools_bedToBam\\tVN:V";\n- header += VERSION;\n- header += "\\n";\n-\n- int chromId = 0;\n- vector<string> chromList = genome.getChromList();\n- sort(chromList.begin(), chromList.end());\n-\n- // create a BAM header (@SQ) entry for each chrom in the BEDTools genome file.\n- vector<string>::const_iterator genomeItr = chromList.begin();\n- vector<string>::const_iterator genomeEnd = chromList.end();\n- for (; genomeItr != genomeEnd; ++genomeItr) {\n- chromToId[*genomeItr] = chromId;\n- chromId++;\n-\n- // add to the header text\n- int size = genome.getChromSize(*genomeItr);\n- string chromLine = "@SQ\\tSN:" + *genomeItr + "\\tAS:" + genomeFile + "\\tLN:" + ToString(size) + "\\n";\n- header += chromLine;\n-\n- // create a chrom entry and add it to\n- // the RefVector\n- RefData chrom;\n- chrom.RefName = *genomeItr;\n- chrom.RefLength = size;\n- refs.push_back(chrom);\n- }\n-}\n-\n-\n-/* Taken directly from the SAMTools spec\n-calculate bin given an alignment in [beg,end) (zero-based, half-close, half-open) */\n-int reg2bin(int beg, int end) {\n- --end;\n- if (beg>>14 == end>>14) return ((1<<15)-1)/7 + (beg>>14);\n- if (beg>>17 == end>>17) return ((1<<12)-1)/7 + (beg>>17);\n- if (beg>>20 == end>>20) return ((1<<9)-1)/7 + (beg>>20);\n- if (beg>>23 == end>>23) return ((1<<6)-1)/7 + (beg>>23);\n- if (beg>>26 == end>>26) return ((1<<3)-1)/7 + (beg>>26);\n- return 0;\n-}\n-\n-\n' |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/bedToIgv/Makefile --- a/BEDTools-Version-2.14.3/src/bedToIgv/Makefile Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,51 +0,0 @@ -UTILITIES_DIR = ../utils/ -OBJ_DIR = ../../obj/ -BIN_DIR = ../../bin/ - -# ------------------- -# define our includes -# ------------------- -INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \ - -I$(UTILITIES_DIR)/genomeFile/ \ - -I$(UTILITIES_DIR)/lineFileUtilities/ \ - -I$(UTILITIES_DIR)/version/ \ - -I$(UTILITIES_DIR)/gzstream/ \ - -I$(UTILITIES_DIR)/fileType/ \ - -I$(UTILITIES_DIR)/BamTools/include - -# ---------------------------------- -# define our source and object files -# ---------------------------------- -SOURCES= bedToIgv.cpp -OBJECTS= $(SOURCES:.cpp=.o) -_EXT_OBJECTS=bedFile.o genomeFile.o lineFileUtilities.o gzstream.o fileType.o -EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) -BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) -PROGRAM= bedToIgv - - -all: $(PROGRAM) - -.PHONY: all - - -$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS) - @echo " * linking $(PROGRAM)" - @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ $(LIBS) - -$(BUILT_OBJECTS): $(SOURCES) - @echo " * compiling" $(*F).cpp - @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) - -$(EXT_OBJECTS): - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/genomeFile/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/ - -clean: - @echo "Cleaning up." - @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* - -.PHONY: clean |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/bedToIgv/bedToIgv.cpp --- a/BEDTools-Version-2.14.3/src/bedToIgv/bedToIgv.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,269 +0,0 @@\n-/*****************************************************************************\n- bedToIgv.cpp\n-\n- (c) 2009 - Aaron Quinlan\n- Hall Laboratory\n- Department of Biochemistry and Molecular Genetics\n- University of Virginia\n- aaronquinlan@gmail.com\n-\n- Licenced under the GNU General Public License 2.0 license.\n-******************************************************************************/\n-#include "lineFileUtilities.h"\n-#include "bedFile.h"\n-#include "genomeFile.h"\n-#include "version.h"\n-\n-#include <vector>\n-#include <iostream>\n-#include <fstream>\n-#include <stdlib.h>\n-\n-using namespace std;\n-\n-// define our program name\n-#define PROGRAM_NAME "bedToIgv"\n-\n-// define our parameter checking macro\n-#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)\n-\n-// function declarations\n-void ShowHelp(void);\n-\n-void DetermineBedInput(BedFile *bed, string path, string sortType, string session,\n- bool collapse, bool useNames, string imageType, int slop);\n-void ProcessBed(istream &bedInput, BedFile *bed, string path, string sortType, string session,\n- bool collapse, bool useNames, string imageType, int slop);\n-\n-\n-int main(int argc, char* argv[]) {\n-\n- // our configuration variables\n- bool showHelp = false;\n-\n- // input files\n- string bedFile = "stdin";\n- string imagePath = "./";\n- string sortType = "none";\n- string session = "none";\n- int slop = 0;\n- string imageType = "png";\n-\n- bool haveBed = true;\n- bool collapse = false;\n- bool useNames = false;\n-\n- for(int i = 1; i < argc; i++) {\n- int parameterLength = (int)strlen(argv[i]);\n-\n- if((PARAMETER_CHECK("-h", 2, parameterLength)) ||\n- (PARAMETER_CHECK("--help", 5, parameterLength))) {\n- showHelp = true;\n- }\n- }\n-\n- if(showHelp) ShowHelp();\n-\n- // do some parsing (all of these parameters require 2 strings)\n- for(int i = 1; i < argc; i++) {\n-\n- int parameterLength = (int)strlen(argv[i]);\n-\n- if(PARAMETER_CHECK("-i", 2, parameterLength)) {\n- if ((i+1) < argc) {\n- bedFile = argv[i + 1];\n- i++;\n- }\n- }\n- else if(PARAMETER_CHECK("-path", 5, parameterLength)) {\n- if ((i+1) < argc) {\n- imagePath = argv[i + 1];\n- i++;\n- }\n- }\n- else if(PARAMETER_CHECK("-sort", 5, parameterLength)) {\n- if ((i+1) < argc) {\n- sortType = argv[i + 1];\n- i++;\n- }\n- }\n- else if(PARAMETER_CHECK("-sess", 5, parameterLength)) {\n- if ((i+1) < argc) {\n- session = argv[i + 1];\n- i++;\n- }\n- }\n- else if(PARAMETER_CHECK("-clps", 5, parameterLength)) {\n- collapse = true;\n- }\n- else if(PARAMETER_CHECK("-name", 5, parameterLength)) {\n- useNames = true;\n- }\n- else if(PARAMETER_CHECK("-slop", 5, parameterLength)) {\n- if ((i+1) < argc) {\n- slop = atoi(argv[i + 1]);\n- i++;\n- }\n- }\n- else if(PARAMETER_CHECK("-img", 4, parameterLength)) {\n- if ((i+1) < argc) {\n- imageType = argv[i + 1];\n- i++;\n- }\n- }\n- else {\n- cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl;\n- showHelp = true;\n- }\n- }\n-\n- // make sure we have an input files\n- if (!haveBed ) {\n- cerr << endl << "*****" << endl << "*****ERROR: Need -i (BED) file. " << endl << "*****" << endl;\n- showHelp = true;\n- }\n- if (sortType != "none") {\n- if ((sortType != "base") && (sortType != "position") && (sortType != "strand") &&\n- (sortType != "quali'..b'ns: base, position, strand, quality, sample, and readGroup" << endl;\n- cerr << "\\t\\tDefault is to apply no sorting at all." << endl << endl;\n-\n- cerr << "\\t-clps\\t" << "Collapse the aligned reads prior to taking a snapshot. " << endl;\n- cerr << "\\t\\tDefault is to no collapse." << endl << endl;\n-\n- cerr << "\\t-name\\t" << "Use the \\"name\\" field (column 4) for each image\'s filename. " << endl;\n- cerr << "\\t\\tDefault is to use the \\"chr:start-pos.ext\\"." << endl << endl;\n-\n- cerr << "\\t-slop\\t" << "Number of flanking base pairs on the left & right of the image." << endl;\n- cerr << "\\t\\t- (INT) Default = 0." << endl << endl;\n-\n- cerr << "\\t-img\\t" << "The type of image to be created. " << endl;\n- cerr << "\\t\\tOptions: png, eps, svg" << endl;\n- cerr << "\\t\\tDefault is png." << endl << endl;\n-\n- cerr << "Notes: " << endl;\n- cerr << "\\t(1) The resulting script is meant to be run from within the IGV GUI version 1.5 or later." << endl;\n- cerr << "\\t(2) Unless you use the -sess option, it is assumed that prior to running the script, " << endl;\n- cerr << "\\t\\tyou have loaded the proper genome, tracks and data files." << endl << endl;\n-\n-\n- // end the program here\n- exit(1);\n-}\n-\n-\n-void DetermineBedInput(BedFile *bed, string path, string sortType, string session,\n- bool collapse, bool useNames, string imageType, int slop) {\n-\n- // dealing with a proper file\n- if (bed->bedFile != "stdin") {\n-\n- ifstream bedStream(bed->bedFile.c_str(), ios::in);\n- if ( !bedStream ) {\n- cerr << "Error: The requested bed file (" << bed->bedFile << ") could not be opened. Exiting!" << endl;\n- exit (1);\n- }\n- ProcessBed(bedStream, bed, path, sortType, session, collapse, useNames, imageType, slop);\n- }\n- // reading from stdin\n- else {\n- ProcessBed(cin, bed, path, sortType, session, collapse, useNames, imageType, slop);\n- }\n-}\n-\n-\n-void ProcessBed(istream &bedInput, BedFile *bed, string path, string sortType, string session,\n- bool collapse, bool useNames, string imageType, int slop) {\n-\n- // set the image path\n- cout << "snapshotDirectory " << path << endl;\n-\n- // should we load a session\n- if (session != "none")\n- cout << "load " << session << endl;\n-\n-\n- BED bedEntry, nullBed;\n- int lineNum = 0;\n- BedLineStatus bedStatus;\n-\n- bed->Open();\n- // process each BED entry and convert to an IGV request\n- while ((bedStatus = bed->GetNextBed(bedEntry, lineNum)) != BED_INVALID) {\n- if (bedStatus == BED_VALID) {\n-\n- string filename = bedEntry.chrom + "_" + ToString(bedEntry.start) + "_" + ToString(bedEntry.end);\n- string locus = bedEntry.chrom + ":" + ToString(bedEntry.start - slop) + "-" + ToString(bedEntry.end + slop);\n-\n- if (useNames == true) {\n- if (bedEntry.name.empty() == false)\n- filename = filename + "_" + bedEntry.name;\n- else {\n- cerr << "Error: You requested that filenames be based upon the name field. However, it appears to be empty. Exiting!" << endl;\n- exit (1);\n- }\n- }\n- if (slop > 0) {\n- filename = filename + "_" + "slop" + ToString(slop);\n- }\n- // goto\n- cout << "goto " << locus << endl;\n-\n- // sort\n- if (sortType != "none")\n- cout << "sort " << sortType << endl;\n-\n- // collapse\n- if (collapse == true)\n- cout << "collapse" << endl;\n-\n- // snapshot\n- cout << "snapshot " << filename << "." << imageType << endl;\n-\n- // reset\n- bedEntry = nullBed;\n- }\n- }\n- // close up\n- bed->Close();\n-}\n' |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/closestBed/Makefile --- a/BEDTools-Version-2.14.3/src/closestBed/Makefile Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,41 +0,0 @@ -UTILITIES_DIR = ../utils/ -OBJ_DIR = ../../obj/ -BIN_DIR = ../../bin/ - -# ------------------- -# define our includes -# ------------------- -INCLUDES = -I$(UTILITIES_DIR)/bedFile/ -I$(UTILITIES_DIR)/lineFileUtilities/ -I$(UTILITIES_DIR)/version/ -I$(UTILITIES_DIR)/gzstream/ -I$(UTILITIES_DIR)/fileType/ -# ---------------------------------- -# define our source and object files -# ---------------------------------- -SOURCES= closestMain.cpp closestBed.cpp -OBJECTS= $(SOURCES:.cpp=.o) -_EXT_OBJECTS=bedFile.o lineFileUtilities.o gzstream.o fileType.o -EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) -BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) -PROGRAM= closestBed - -all: $(PROGRAM) - -.PHONY: all - -$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS) - @echo " * linking $(PROGRAM)" - @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ $(LIBS) - -$(BUILT_OBJECTS): $(SOURCES) - @echo " * compiling" $(*F).cpp - @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) - -$(EXT_OBJECTS): - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/ - -clean: - @echo "Cleaning up." - @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* - -.PHONY: clean |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/closestBed/closestBed.cpp --- a/BEDTools-Version-2.14.3/src/closestBed/closestBed.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,234 +0,0 @@\n-/*****************************************************************************\n- closestBed.cpp\n-\n- (c) 2009 - Aaron Quinlan\n- Hall Laboratory\n- Department of Biochemistry and Molecular Genetics\n- University of Virginia\n- aaronquinlan@gmail.com\n-\n- Licenced under the GNU General Public License 2.0 license.\n-******************************************************************************/\n-#include "lineFileUtilities.h"\n-#include "closestBed.h"\n-\n-const int MAXSLOP = 256000000; // 2*MAXSLOP = 512 megabases.\n- // We don\'t want to keep looking if we\n- // can\'t find a nearby feature within 512 Mb.\n-const int SLOPGROWTH = 2048000;\n-\n-\n-/*\n- Constructor\n-*/\n-BedClosest::BedClosest(string &bedAFile, string &bedBFile, bool sameStrand, bool diffStrand,\n- string &tieMode, bool reportDistance, bool signDistance, string &_strandedDistMode,\n- bool ignoreOverlaps) \n- : _bedAFile(bedAFile)\n- , _bedBFile(bedBFile)\n- , _tieMode(tieMode)\n- , _sameStrand(sameStrand)\n- , _diffStrand(diffStrand)\n- , _reportDistance(reportDistance)\n- , _signDistance(signDistance)\n- , _strandedDistMode(_strandedDistMode)\n- , _ignoreOverlaps(ignoreOverlaps)\n-{\n- _bedA = new BedFile(_bedAFile);\n- _bedB = new BedFile(_bedBFile);\n- FindClosestBed();\n-}\n-\n-\n-/*\n- Destructor\n-*/\n-BedClosest::~BedClosest(void) {\n-}\n-\n-\n-void BedClosest::FindWindowOverlaps(BED &a, vector<BED> &hits) {\n-\n- int slop = 0; // start out just looking for overlaps\n- // within the current bin (~128Kb)\n-\n- // update the current feature\'s start and end\n-\n- CHRPOS aFudgeStart = 0;\n- CHRPOS aFudgeEnd;\n- int numOverlaps = 0;\n- vector<BED> closestB;\n- CHRPOS minDistance = INT_MAX;\n- int32_t curDistance = INT_MAX;\n- vector<int32_t> distances;\n-\n- // is there at least one feature in B on the same chrom\n- // as the current A feature?\n- if(_bedB->bedMap.find(a.chrom) != _bedB->bedMap.end()) {\n-\n- while ((numOverlaps == 0) && (slop <= MAXSLOP)) {\n-\n- // add some slop (starting at 0 bases) to a in hopes\n- // of finding a hit in B\n- if ((static_cast<int>(a.start) - slop) > 0)\n- aFudgeStart = a.start - slop;\n- else\n- aFudgeStart = 0;\n-\n- if ((static_cast<int>(a.start) + slop) < (2 * MAXSLOP))\n- aFudgeEnd = a.end + slop;\n- else\n- aFudgeEnd = 2 * MAXSLOP;\n-\n- // THE HEAVY LIFTING\n- // search for hits with the current slop added\n- _bedB->FindOverlapsPerBin(a.chrom, aFudgeStart, aFudgeEnd, a.strand, hits, _sameStrand, _diffStrand);\n-\n- vector<BED>::const_iterator h = hits.begin();\n- vector<BED>::const_iterator hitsEnd = hits.end();\n- for (; h != hitsEnd; ++h) {\n-\n- // do the actual features overlap?\n- int s = max(a.start, h->start);\n- int e = min(a.end, h->end);\n- int overlapBases = (e - s); // the number of overlapping bases b/w a and b\n-\n- // make sure we allow overlapping features.\n- if ((overlapBases > 0) && (_ignoreOverlaps == true))\n- continue;\n- else\n- numOverlaps++;\n-\n- // there is overlap. make sure we allow overlapping features ()\n- if (overlapBases > 0) {\n- closestB.push_back(*h);\n- distances.push_back(0);\n- }\n- // the hit is to the "left" of A\n- else if (h->end <= a.start) {\n- curDistance = a.start - h->end;\n- if (_signDistance) {\n- if ((_strandedDistMode == "ref")\n- || (_strandedDistMode == "a" && a.strand != "'..b'se if (abs(curDistance) == minDistance) {\n- minDistance = abs(curDistance);\n- closestB.push_back(*h);\n- distances.push_back(curDistance);\n- }\n- }\n- // the hit is to the "right" of A\n- else if (h->start >= a.end) {\n- curDistance = h->start - a.end;\n- if (_signDistance) {\n- if ((_strandedDistMode == "a" && a.strand == "-")\n- || (_strandedDistMode == "b" && h->strand != "-")) {\n- curDistance = -curDistance;\n- }\n- }\n- if (abs(curDistance) < minDistance) {\n- minDistance = abs(curDistance);\n- closestB.clear();\n- closestB.push_back(*h);\n- distances.clear();\n- distances.push_back(curDistance);\n- }\n- else if (abs(curDistance) == minDistance) {\n- minDistance = abs(curDistance);\n- closestB.push_back(*h);\n- distances.push_back(curDistance);\n- }\n- }\n- }\n- // if no overlaps were found, we\'ll widen the range\n- // by SLOPGROWTH in each direction and search again.\n- slop += SLOPGROWTH;\n- }\n- }\n- // there is no feature in B on the same chromosome as A\n- else {\n- _bedA->reportBedTab(a);\n- if (_reportDistance == true) {\n- _bedB->reportNullBedTab();\n- cout << -1 << endl;\n- }\n- else\n- _bedB->reportNullBedNewLine();\n- }\n-\n- // report the closest feature(s) in B to the current A feature.\n- // obey the user\'s reporting request (_tieMode)\n- if (numOverlaps > 0) {\n- if (closestB.size() == 1 || _tieMode == "first") {\n- _bedA->reportBedTab(a);\n- if (_reportDistance == true) {\n- _bedB->reportBedTab(closestB[0]);\n- cout << distances[0] << endl;\n- }\n- else\n- _bedB->reportBedNewLine(closestB[0]);\n- }\n- else {\n- if (_tieMode == "all") {\n- size_t i = 0;\n- for (vector<BED>::iterator b = closestB.begin(); b != closestB.end(); ++b) {\n- _bedA->reportBedTab(a);\n- if (_reportDistance == true) {\n- _bedB->reportBedTab(*b);\n- cout << distances[i++] <<endl;\n- }\n- else\n- _bedB->reportBedNewLine(*b);\n- }\n- }\n- else if (_tieMode == "last") {\n- _bedA->reportBedTab(a);\n- if (_reportDistance == true) {\n- _bedB->reportBedTab(closestB[closestB.size()-1]);\n- cout << distances[distances.size() - 1]<<endl;\n- }\n- else\n- _bedB->reportBedNewLine(closestB[closestB.size()-1]);\n- }\n- }\n- }\n-}\n-\n-\n-void BedClosest::FindClosestBed() {\n-\n- // load the "B" bed file into a map so\n- // that we can easily compare "A" to it for overlaps\n- _bedB->loadBedFileIntoMap();\n-\n- BED a, nullBed;\n- int lineNum = 0; // current input line number\n- vector<BED> hits; // vector of potential hits\n- hits.reserve(100);\n- BedLineStatus bedStatus;\n-\n- _bedA->Open();\n- // process each entry in A in search of the closest feature in B\n- while ((bedStatus = _bedA->GetNextBed(a, lineNum)) != BED_INVALID) {\n- if (bedStatus == BED_VALID) {\n- FindWindowOverlaps(a, hits);\n- hits.clear();\n- a = nullBed;\n- }\n- }\n- _bedA->Close();\n-}\n-// END ClosestBed\n-\n' |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/closestBed/closestBed.h --- a/BEDTools-Version-2.14.3/src/closestBed/closestBed.h Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,61 +0,0 @@ -/***************************************************************************** - closestBed.h - - (c) 2009 - Aaron Quinlan - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licenced under the GNU General Public License 2.0 license. -******************************************************************************/ -#ifndef CLOSESTBED_H -#define CLOSESTBED_H - -#include "bedFile.h" -#include <vector> -#include <iostream> -#include <fstream> - -using namespace std; - -//************************************************ -// Class methods and elements -//************************************************ -class BedClosest { - -public: - - // constructor - BedClosest(string &bedAFile, string &bedBFile, - bool sameStrand, bool diffStrand, string &tieMode, - bool reportDistance, bool signDistance, string &strandedDistMode, - bool ignoreOverlaps); - - // destructor - ~BedClosest(void); - - // find the closest feature in B to A - void FindClosestBed(); - -private: - - // data - string _bedAFile; - string _bedBFile; - string _tieMode; - bool _sameStrand; - bool _diffStrand; - bool _reportDistance; - bool _signDistance; - string _strandedDistMode; - bool _ignoreOverlaps; - - BedFile *_bedA, *_bedB; - - // methods - void reportNullB(); - void FindWindowOverlaps(BED &, vector<BED> &); - -}; -#endif /* CLOSEST_H */ |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/closestBed/closestMain.cpp --- a/BEDTools-Version-2.14.3/src/closestBed/closestMain.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,202 +0,0 @@\n-/*****************************************************************************\n- closestMain.cpp\n-\n- (c) 2009 - Aaron Quinlan\n- Hall Laboratory\n- Department of Biochemistry and Molecular Genetics\n- University of Virginia\n- aaronquinlan@gmail.com\n-\n- Licenced under the GNU General Public License 2.0 license.\n-******************************************************************************/\n-#include "closestBed.h"\n-#include "version.h"\n-\n-using namespace std;\n-\n-// define our program name\n-#define PROGRAM_NAME "closestBed"\n-\n-// define our parameter checking macro\n-#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)\n-\n-// function declarations\n-void ShowHelp(void);\n-\n-int main(int argc, char* argv[]) {\n-\n- // our configuration variables\n- bool showHelp = false;\n-\n- // input files\n- string bedAFile;\n- string bedBFile;\n- string tieMode = "all";\n- string strandedDistMode = "";\n-\n- bool haveBedA = false;\n- bool haveBedB = false;\n- bool haveTieMode = false;\n- bool sameStrand = false;\n- bool diffStrand = false;\n- bool ignoreOverlaps = false;\n- bool reportDistance = false;\n- bool signDistance = false;\n- bool haveStrandedDistMode = false;\n-\n-\n- // check to see if we should print out some help\n- if(argc <= 1) showHelp = true;\n-\n- for(int i = 1; i < argc; i++) {\n- int parameterLength = (int)strlen(argv[i]);\n-\n- if( (PARAMETER_CHECK("-h", 2, parameterLength)) ||\n- (PARAMETER_CHECK("--help", 5, parameterLength))) {\n- showHelp = true;\n- }\n- }\n-\n- if(showHelp) ShowHelp();\n-\n- // do some parsing (all of these parameters require 2 strings)\n- for(int i = 1; i < argc; i++) {\n-\n- int parameterLength = (int)strlen(argv[i]);\n-\n- if(PARAMETER_CHECK("-a", 2, parameterLength)) {\n- if ((i+1) < argc) {\n- haveBedA = true;\n- bedAFile = argv[i + 1];\n- i++;\n- }\n- }\n- else if(PARAMETER_CHECK("-b", 2, parameterLength)) {\n- if ((i+1) < argc) {\n- haveBedB = true;\n- bedBFile = argv[i + 1];\n- i++;\n- }\n- }\n- else if (PARAMETER_CHECK("-s", 2, parameterLength)) {\n- sameStrand = true;\n- }\n- else if (PARAMETER_CHECK("-S", 2, parameterLength)) {\n- diffStrand = true;\n- }\n- else if (PARAMETER_CHECK("-d", 2, parameterLength)) {\n- reportDistance = true;\n- }\n- else if (PARAMETER_CHECK("-D", 2, parameterLength)) {\n- if ((i+1) < argc) {\n- reportDistance = true;\n- signDistance = true;\n- haveStrandedDistMode = true;\n- strandedDistMode = argv[i + 1];\n- i++;\n- }\n- }\n- else if (PARAMETER_CHECK("-io", 3, parameterLength)) {\n- ignoreOverlaps = true;\n- }\n- else if (PARAMETER_CHECK("-t", 2, parameterLength)) {\n- if ((i+1) < argc) {\n- haveTieMode = true;\n- tieMode = argv[i + 1];\n- i++;\n- }\n- }\n- else {\n- cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl;\n- showHelp = true;\n- }\n- }\n-\n- // make sure we have both input files\n- if (!haveBedA || !haveBedB) {\n- cerr << endl << "*****" << endl << "*****ERROR: Need -a and -b files. " << endl << "*****" << endl;\n- showHelp = true;\n- }\n-\n- if (haveTieMode && (tieMode != "all") && (tieMode != "first")\n- && (tieMode != "last")) {\n- cerr << endl << "*****" << endl << "*****ERROR: Request \\"all\\" or \\"first\\" or \\"last\\" for Tie Mode (-t)" << endl << "*****" << endl;\n- showHelp = true;\n- }\n- \n- if (haveStrandedDi'..b'reOverlaps);\n- delete bc;\n- return 0;\n- }\n- else {\n- ShowHelp();\n- }\n-}\n-\n-void ShowHelp(void) {\n-\n- cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl;\n-\n- cerr << "Authors: Aaron Quinlan (aaronquinlan@gmail.com)" << endl;\n- cerr << "\\t Erik Arner, Riken" << endl << endl;\n-\n- cerr << "Summary: For each feature in A, finds the closest " << endl;\n- cerr << "\\t feature (upstream or downstream) in B." << endl << endl;\n-\n- cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -a <bed/gff/vcf> -b <bed/gff/vcf>" << endl << endl;\n-\n- cerr << "Options: " << endl;\n- cerr << "\\t-s\\t" << "Require same strandedness. That is, find the closest feature in B" << endl;\n- cerr << "\\t\\tthat overlaps A on the _same_ strand." << endl;\n- cerr << "\\t\\t- By default, overlaps are reported without respect to strand." << endl << endl;\n-\n- cerr << "\\t-S\\t" << "Require opposite strandedness. That is, find the closest feature in B" << endl;\n- cerr << "\\t\\tthat overlaps A on the _opposite_ strand." << endl;\n- cerr << "\\t\\t- By default, overlaps are reported without respect to strand." << endl << endl;\n-\n- cerr << "\\t-d\\t" << "In addition to the closest feature in B, " << endl;\n- cerr << "\\t\\treport its distance to A as an extra column." << endl;\n- cerr << "\\t\\t- The reported distance for overlapping features will be 0." << endl << endl;\n- \n- cerr << "\\t-D\\t" << "Like -d, report the closest feature in B, and its distance to A" << endl;\n- cerr << "\\t\\tas an extra column. Unlike -d, use negative distances to report" << endl;\n- cerr << "\\t\\tupstream features. You must specify which orientation defines \\"upstream\\"." << endl;\n- cerr << "\\t\\tThe options are:" << endl;\n- cerr << "\\t\\t- \\"ref\\" Report distance with respect to the reference genome. " << endl;\n- cerr << "\\t\\t B features with a lower (start, stop) are upstream" << endl;\n- cerr << "\\t\\t- \\"a\\" Report distance with respect to A." << endl;\n- cerr << "\\t\\t When A is on the - strand, \\"upstream\\" means B has a higher (start,stop)." << endl;\n- cerr << "\\t\\t- \\"b\\" Report distance with respect to B." << endl;\n- cerr << "\\t\\t When B is on the - strand, \\"upstream\\" means A has a higher (start,stop)." << endl << endl;\n-\n- cerr << "\\t-io\\t" << "Ignore features in B that overlap A. That is, we want close, but " << endl;\n- cerr << "\\t\\tnot touching features only." << endl << endl;\n-\n- cerr << "\\t-t\\t" << "How ties for closest feature are handled. This occurs when two" << endl;\n- cerr << "\\t\\tfeatures in B have exactly the same \\"closeness\\" with A." << endl;\n- cerr << "\\t\\tBy default, all such features in B are reported." << endl;\n- cerr << "\\t\\tHere are all the options:" << endl;\n- cerr << "\\t\\t- \\"all\\" Report all ties (default)." << endl;\n- cerr << "\\t\\t- \\"first\\" Report the first tie that occurred in the B file." << endl;\n- cerr << "\\t\\t- \\"last\\" Report the last tie that occurred in the B file." << endl << endl;\n-\n- cerr << "Notes: " << endl;\n- cerr << "\\tReports \\"none\\" for chrom and \\"-1\\" for all other fields when a feature" << endl;\n- cerr << "\\tis not found in B on the same chromosome as the feature in A." << endl;\n- cerr << "\\tE.g. none\\t-1\\t-1" << endl << endl;\n-\n- // end the program here\n- exit(1);\n-}\n' |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/complementBed/Makefile --- a/BEDTools-Version-2.14.3/src/complementBed/Makefile Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,50 +0,0 @@ -UTILITIES_DIR = ../utils/ -OBJ_DIR = ../../obj/ -BIN_DIR = ../../bin/ - -# ------------------- -# define our includes -# ------------------- -INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \ - -I$(UTILITIES_DIR)/genomeFile/ \ - -I$(UTILITIES_DIR)/lineFileUtilities/ \ - -I$(UTILITIES_DIR)/version/ \ - -I$(UTILITIES_DIR)/gzstream/ \ - -I$(UTILITIES_DIR)/fileType/ \ - -I$(UTILITIES_DIR)/BamTools/include - -# ---------------------------------- -# define our source and object files -# ---------------------------------- -SOURCES= complementMain.cpp complementBed.cpp -OBJECTS= $(SOURCES:.cpp=.o) -_EXT_OBJECTS=bedFile.o genomeFile.o lineFileUtilities.o gzstream.o fileType.o -EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) -BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) -PROGRAM= complementBed - - -all: $(PROGRAM) - -.PHONY: all - -$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS) - @echo " * linking $(PROGRAM)" - @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ $(LIBS) - -$(BUILT_OBJECTS): $(SOURCES) - @echo " * compiling" $(*F).cpp - @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) - -$(EXT_OBJECTS): - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/genomeFile/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/ - -clean: - @echo "Cleaning up." - @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* - -.PHONY: clean |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/complementBed/complementBed.cpp --- a/BEDTools-Version-2.14.3/src/complementBed/complementBed.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,83 +0,0 @@ -/***************************************************************************** - complementBed.cpp - - (c) 2009 - Aaron Quinlan - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licenced under the GNU General Public License 2.0 license. -******************************************************************************/ -#include "lineFileUtilities.h" -#include "complementBed.h" - -BedComplement::BedComplement(string &bedFile, string &genomeFile) { - - _bedFile = bedFile; - _genomeFile = genomeFile; - - _bed = new BedFile(bedFile); - _genome = new GenomeFile(genomeFile); - -} - - -BedComplement::~BedComplement(void) { -} - - -// -// Merge overlapping BED entries into a single entry -// -void BedComplement::ComplementBed() { - - // load the "B" bed file into a map so - // that we can easily compare "A" to it for overlaps - _bed->loadBedFileIntoMapNoBin(); - - // get a list of the chroms in the user's genome - vector<string> chromList = _genome->getChromList(); - - // process each chrom in the genome - for (size_t c = 0; c < chromList.size(); ++c) { - string currChrom = chromList[c]; - - // create a "bit vector" for the chrom - CHRPOS currChromSize = _genome->getChromSize(currChrom); - vector<bool> chromMasks(currChromSize, 0); - - // mask the chrom for every feature in the BED file - bedVector::const_iterator bItr = _bed->bedMapNoBin[currChrom].begin(); - bedVector::const_iterator bEnd = _bed->bedMapNoBin[currChrom].end(); - for (; bItr != bEnd; ++bItr) { - if (bItr->end > currChromSize) { - cout << "Warninge: end of BED entry exceeds chromosome length. Please correct." << endl; - _bed->reportBedNewLine(*bItr); - exit(1); - } - - // mask all of the positions spanned by this BED entry. - for (CHRPOS b = bItr->start; b < bItr->end; b++) - chromMasks[b] = 1; - } - - // report the unmasked, that is, complemented parts of the chrom - CHRPOS i = 0; - CHRPOS start; - while (i < chromMasks.size()) { - if (chromMasks[i] == 0) { - start = i; - while ((chromMasks[i] == 0) && (i < chromMasks.size())) - i++; - - if (start > 0) - cout << currChrom << "\t" << start << "\t" << i << endl; - else - cout << currChrom << "\t" << 0 << "\t" << i << endl; - } - i++; - } - } -} - |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/complementBed/complementBed.h --- a/BEDTools-Version-2.14.3/src/complementBed/complementBed.h Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,47 +0,0 @@ -/***************************************************************************** - complementBed.h - - (c) 2009 - Aaron Quinlan - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licenced under the GNU General Public License 2.0 license. -******************************************************************************/ -#include "bedFile.h" -#include "genomeFile.h" - -#include <vector> -#include <bitset> -#include <algorithm> -#include <iostream> -#include <fstream> -#include <limits.h> -#include <stdlib.h> - -using namespace std; - - -//************************************************ -// Class methods and elements -//************************************************ -class BedComplement { - -public: - - // constructor - BedComplement(string &bedFile, string &genomeFile); - - // destructor - ~BedComplement(void); - - void ComplementBed(); - -private: - - string _bedFile; - string _genomeFile; - BedFile *_bed; - GenomeFile *_genome; -}; |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/complementBed/complementMain.cpp --- a/BEDTools-Version-2.14.3/src/complementBed/complementMain.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,116 +0,0 @@ -/***************************************************************************** - complementBedMain.cpp - - (c) 2009 - Aaron Quinlan - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licenced under the GNU General Public License 2.0 license. -******************************************************************************/ -#include "complementBed.h" -#include "version.h" - -using namespace std; - -// define our program name -#define PROGRAM_NAME "complementBed" - - -// define our parameter checking macro -#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen) - -// function declarations -void ShowHelp(void); - -int main(int argc, char* argv[]) { - - // our configuration variables - bool showHelp = false; - - // input files - string bedFile = "stdin"; - string genomeFile; - - bool haveBed = true; - bool haveGenome = false; - - for(int i = 1; i < argc; i++) { - int parameterLength = (int)strlen(argv[i]); - - if((PARAMETER_CHECK("-h", 2, parameterLength)) || - (PARAMETER_CHECK("--help", 5, parameterLength))) { - showHelp = true; - } - } - - if(showHelp) ShowHelp(); - - // do some parsing (all of these parameters require 2 strings) - for(int i = 1; i < argc; i++) { - - int parameterLength = (int)strlen(argv[i]); - - if(PARAMETER_CHECK("-i", 2, parameterLength)) { - if ((i+1) < argc) { - bedFile = argv[i + 1]; - i++; - } - } - else if(PARAMETER_CHECK("-g", 2, parameterLength)) { - if ((i+1) < argc) { - haveGenome = true; - genomeFile = argv[i + 1]; - i++; - } - } - else { - cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl; - showHelp = true; - } - } - - // make sure we have both input files - if (!haveBed || !haveGenome) { - cerr << endl << "*****" << endl << "*****ERROR: Need -i BED file and -g Genome file. " << endl << "*****" << endl; - showHelp = true; - } - if (!showHelp) { - BedComplement *bc = new BedComplement(bedFile, genomeFile); - bc->ComplementBed(); - return 0; - } - else { - ShowHelp(); - } -} - -void ShowHelp(void) { - - cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl; - - cerr << "Author: Aaron Quinlan (aaronquinlan@gmail.com)" << endl; - - cerr << "Summary: Returns the base pair complement of a feature file." << endl << endl; - - cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -i <bed/gff/vcf> -g <genome>" << endl << endl; - - cerr << "Notes: " << endl; - cerr << "\t(1) The genome file should tab delimited and structured as follows:" << endl; - cerr << "\t <chromName><TAB><chromSize>" << endl << endl; - cerr << "\tFor example, Human (hg19):" << endl; - cerr << "\tchr1\t249250621" << endl; - cerr << "\tchr2\t243199373" << endl; - cerr << "\t..." << endl; - cerr << "\tchr18_gl000207_random\t4262" << endl << endl; - - cerr << "Tips: " << endl; - cerr << "\tOne can use the UCSC Genome Browser's MySQL database to extract" << endl; - cerr << "\tchromosome sizes. For example, H. sapiens:" << endl << endl; - cerr << "\tmysql --user=genome --host=genome-mysql.cse.ucsc.edu -A -e \\" << endl; - cerr << "\t\"select chrom, size from hg19.chromInfo\" > hg19.genome" << endl << endl; - - exit(1); - -} |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/coverageBed/Makefile --- a/BEDTools-Version-2.14.3/src/coverageBed/Makefile Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,51 +0,0 @@ -UTILITIES_DIR = ../utils/ -OBJ_DIR = ../../obj/ -BIN_DIR = ../../bin/ - -# ------------------- -# define our includes -# ------------------- -INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \ - -I$(UTILITIES_DIR)/version/ \ - -I$(UTILITIES_DIR)/gzstream/ \ - -I$(UTILITIES_DIR)/genomeFile/ \ - -I$(UTILITIES_DIR)/lineFileUtilities/ \ - -I$(UTILITIES_DIR)/fileType/ \ - -I$(UTILITIES_DIR)/BamTools/include \ - -I$(UTILITIES_DIR)/BamTools-Ancillary -# ---------------------------------- -# define our source and object files -# ---------------------------------- -SOURCES= coverageMain.cpp coverageBed.cpp -OBJECTS= $(SOURCES:.cpp=.o) -_EXT_OBJECTS=bedFile.o lineFileUtilities.o gzstream.o fileType.o BamAncillary.o -EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) -BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) -PROGRAM= coverageBed - - -all: $(PROGRAM) - -.PHONY: all - -$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS) - @echo " * linking $(PROGRAM)" - @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ -L$(UTILITIES_DIR)/BamTools/lib/ -lbamtools $(LIBS) - -$(BUILT_OBJECTS): $(SOURCES) - @echo " * compiling" $(*F).cpp - @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) - -$(EXT_OBJECTS): - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/BamTools/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/BamTools-Ancillary/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/ - -clean: - @echo "Cleaning up." - @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* - -.PHONY: clean |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/coverageBed/coverageBed.cpp --- a/BEDTools-Version-2.14.3/src/coverageBed/coverageBed.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,293 +0,0 @@\n-/*****************************************************************************\n- coverageBed.cpp\n-\n- (c) 2009 - Aaron Quinlan\n- Hall Laboratory\n- Department of Biochemistry and Molecular Genetics\n- University of Virginia\n- aaronquinlan@gmail.com\n-\n- Licenced under the GNU General Public License 2.0 license.\n-******************************************************************************/\n-#include "lineFileUtilities.h"\n-#include "coverageBed.h"\n-\n-// build\n-BedCoverage::BedCoverage(string &bedAFile, string &bedBFile, bool sameStrand, bool diffStrand,\n- bool writeHistogram, bool bamInput, bool obeySplits, \n- bool eachBase, bool countsOnly) {\n-\n- _bedAFile = bedAFile;\n- _bedBFile = bedBFile;\n-\n- _bedA = new BedFile(bedAFile);\n- _bedB = new BedFile(bedBFile);\n-\n- _sameStrand = sameStrand;\n- _diffStrand = diffStrand;\n- _obeySplits = obeySplits;\n- _eachBase = eachBase;\n- _writeHistogram = writeHistogram;\n- _bamInput = bamInput;\n- _countsOnly = countsOnly;\n-\n-\n- if (_bamInput == false)\n- CollectCoverageBed();\n- else\n- CollectCoverageBam(_bedA->bedFile);\n-}\n-\n-// destroy\n-BedCoverage::~BedCoverage(void) {\n- delete _bedA;\n- delete _bedB;\n-}\n-\n-\n-void BedCoverage::CollectCoverageBed() {\n-\n- // load the "B" bed file into a map so\n- // that we can easily compare "A" to it for overlaps\n- _bedB->loadBedCovFileIntoMap();\n-\n- int lineNum = 0; // current input line number\n- BED a, nullBed;\n- BedLineStatus bedStatus;\n-\n- _bedA->Open();\n- // process each entry in A\n- while ((bedStatus = _bedA->GetNextBed(a, lineNum)) != BED_INVALID) {\n- if (bedStatus == BED_VALID) {\n- // process the BED entry as a single block\n- if (_obeySplits == false)\n- _bedB->countHits(a, _sameStrand, _diffStrand, _countsOnly);\n- // split the BED into discrete blocksand process each independently.\n- else {\n- bedVector bedBlocks;\n- splitBedIntoBlocks(a, lineNum, bedBlocks);\n-\n- // use countSplitHits to avoid over-counting each split chunk\n- // as distinct read coverage.\n- _bedB->countSplitHits(bedBlocks, _sameStrand, _diffStrand, _countsOnly);\n- }\n- a = nullBed;\n- }\n- }\n- _bedA->Close();\n-\n- // report the coverage (summary or histogram) for BED B.\n- if (_countsOnly == true)\n- ReportCounts();\n- else \n- ReportCoverage();\n-}\n-\n-\n-void BedCoverage::CollectCoverageBam(string bamFile) {\n-\n- // load the "B" bed file into a map so\n- // that we can easily compare "A" to it for overlaps\n- _bedB->loadBedCovFileIntoMap();\n-\n- // open the BAM file\n- BamReader reader;\n- reader.Open(bamFile);\n-\n- // get header & reference information\n- string header = reader.GetHeaderText();\n- RefVector refs = reader.GetReferenceData();\n-\n- // convert each aligned BAM entry to BED\n- // and compute coverage on B\n- BamAlignment bam;\n- while (reader.GetNextAlignment(bam)) {\n- if (bam.IsMapped()) {\n- // treat the BAM alignment as a single "block"\n- if (_obeySplits == false) {\n- // construct a new BED entry from the current BAM alignment.\n- BED a;\n- a.chrom = refs.at(bam.RefID).RefName;\n- a.start = bam.Position;\n- a.end = bam.GetEndPosition(false, false);\n- a.strand = "+";\n- if (bam.IsReverseStrand()) a.strand = "-";\n-\n- _bedB->countHits(a, _sameStrand, _diffStrand, _countsOnly);\n- }\n- // split the BAM alignment into discrete blocks and\n- // look for overlaps only within each block.\n- else {\n- // vec to store the discrete BED "blocks"'..b' // update our histograms, assuming we are not reporting "per-base" coverage.\n- if (_eachBase == false) {\n- depthHist[depth]++;\n- allDepthHist[depth]++;\n- }\n- else if ((_eachBase == true) && (bedItr->zeroLength == false))\n- {\n- _bedB->reportBedTab(*bedItr);\n- printf("%d\\t%d\\n", pos-bedItr->start, depth);\n- }\n- }\n- // decrement coverage if ends observed at this position.\n- if (depthItr != bedItr->depthMap.end())\n- depth = depth - depthItr->second.ends;\n- }\n-\n- // handle the special case where the user wants "per-base" depth\n- // but the current feature is length = 0.\n- if ((_eachBase == true) && (bedItr->zeroLength == true)) {\n- _bedB->reportBedTab(*bedItr);\n- printf("1\\t%d\\n",depth);\n- }\n- // Summarize the coverage for the current interval,\n- // assuming the user has not requested "per-base" coverage.\n- else if (_eachBase == false) \n- {\n- CHRPOS length = bedItr->end - bedItr->start;\n- if (bedItr->zeroLength == true) {\n- length = 0;\n- }\n- totalLength += length;\n- int nonZeroBases = (length - zeroDepthCount);\n- \n- float fractCovered = 0.0;\n- if (bedItr->zeroLength == false) {\n- fractCovered = (float) nonZeroBases / length;\n- }\n- \n- // print a summary of the coverage\n- if (_writeHistogram == false) {\n- _bedB->reportBedTab(*bedItr);\n- printf("%d\\t%d\\t%d\\t%0.7f\\n", bedItr->count, nonZeroBases, length, fractCovered);\n- }\n- // HISTOGRAM\n- // report the number of bases with coverage == x\n- else {\n- // produce a histogram when not a zero length feature.\n- if (bedItr->zeroLength == false) {\n- map<unsigned int, unsigned int>::const_iterator histItr = depthHist.begin();\n- map<unsigned int, unsigned int>::const_iterator histEnd = depthHist.end();\n- for (; histItr != histEnd; ++histItr)\n- {\n- float fractAtThisDepth = (float) histItr->second / length;\n- _bedB->reportBedTab(*bedItr);\n- printf("%d\\t%d\\t%d\\t%0.7f\\n", histItr->first, histItr->second, length, fractAtThisDepth);\n- }\n- }\n- // special case when it is a zero length feauture.\n- else {\n- _bedB->reportBedTab(*bedItr);\n- printf("%d\\t%d\\t%d\\t%0.7f\\n", bedItr->count, 0, 0, 1.0000000);\n- }\n- }\n- }\n- }\n- }\n- }\n- // report a histogram of coverage among _all_\n- // features in B.\n- if (_writeHistogram == true) {\n- map<unsigned int, unsigned int>::const_iterator histItr = allDepthHist.begin();\n- map<unsigned int, unsigned int>::const_iterator histEnd = allDepthHist.end();\n- for (; histItr != histEnd; ++histItr) {\n- float fractAtThisDepth = (float) histItr->second / totalLength;\n- printf("all\\t%d\\t%d\\t%d\\t%0.7f\\n", histItr->first, histItr->second, totalLength, fractAtThisDepth);\n- }\n- }\n-}\n-\n-\n' |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/coverageBed/coverageBed.h --- a/BEDTools-Version-2.14.3/src/coverageBed/coverageBed.h Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,83 +0,0 @@ -/***************************************************************************** - coverageBed.h - - (c) 2009 - Aaron Quinlan - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licenced under the GNU General Public License 2.0 license. -******************************************************************************/ -#ifndef COVERAGEBED_H -#define COVERAGEBED_H - -#include "bedFile.h" - -#include "api/BamReader.h" -#include "api/BamAux.h" -#include "BamAncillary.h" -using namespace BamTools; - -#include <vector> -#include <algorithm> -#include <iostream> -#include <iomanip> -#include <fstream> -#include <stdlib.h> - -using namespace std; - -//************************************************ -// Class methods and elements -//************************************************ -class BedCoverage { - -public: - - // constructor - BedCoverage(string &bedAFile, string &bedBFile, bool sameStrand, bool diffStrand, bool writeHistogram, - bool bamInput, bool obeySplits, bool eachBase, bool countsOnly); - - // destructor - ~BedCoverage(void); - -private: - - // input files. - string _bedAFile; - string _bedBFile; - - // instance of a bed file class. - BedFile *_bedA, *_bedB; - - // do we care about same or opposite strandedness when counting coverage? - bool _sameStrand; - bool _diffStrand; - - // should we write a histogram for each feature in B? - bool _writeHistogram; - - // are we dealing with BAM input for "A"? - bool _bamInput; - - // should we split BED/BAM into discrete blocks? - bool _obeySplits; - - // should discrete coverage be reported for each base in each feature? - bool _eachBase; - - // should we just count overlaps and not try to describe the breadth? - bool _countsOnly; - - // private function for reporting coverage information - void ReportCoverage(); - - // private function for reporting overlap counts - void ReportCounts(); - - void CollectCoverageBed(); - - void CollectCoverageBam(string bamFile); -}; -#endif /* COVERAGEBED_H */ |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/coverageBed/coverageMain.cpp --- a/BEDTools-Version-2.14.3/src/coverageBed/coverageMain.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,182 +0,0 @@ -/***************************************************************************** - coverageMain.cpp - - (c) 2009 - Aaron Quinlan - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licenced under the GNU General Public License 2.0 license. -******************************************************************************/ -#include "coverageBed.h" -#include "version.h" - -using namespace std; - -// define the version -#define PROGRAM_NAME "coverageBed" - -// define our parameter checking macro -#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen) - -// function declarations -void ShowHelp(void); - -int main(int argc, char* argv[]) { - - // our configuration variables - bool showHelp = false; - - // input files - string bedAFile; - string bedBFile; - - // parm flags - bool sameStrand = false; - bool diffStrand = false; - bool writeHistogram = false; - bool eachBase = false; - bool obeySplits = false; - bool bamInput = false; - bool haveBedA = false; - bool haveBedB = false; - bool countsOnly = false; - - // check to see if we should print out some help - if(argc <= 1) showHelp = true; - - for(int i = 1; i < argc; i++) { - int parameterLength = (int)strlen(argv[i]); - - if((PARAMETER_CHECK("-h", 2, parameterLength)) || - (PARAMETER_CHECK("--help", 5, parameterLength))) { - showHelp = true; - } - } - - if(showHelp) ShowHelp(); - - // do some parsing (all of these parameters require 2 strings) - for(int i = 1; i < argc; i++) { - - int parameterLength = (int)strlen(argv[i]); - - if(PARAMETER_CHECK("-a", 2, parameterLength)) { - if ((i+1) < argc) { - haveBedA = true; - bedAFile = argv[i + 1]; - i++; - } - } - else if(PARAMETER_CHECK("-abam", 5, parameterLength)) { - if ((i+1) < argc) { - haveBedA = true; - bamInput = true; - bedAFile = argv[i + 1]; - i++; - } - } - else if(PARAMETER_CHECK("-b", 2, parameterLength)) { - if ((i+1) < argc) { - haveBedB = true; - bedBFile = argv[i + 1]; - i++; - } - } - else if (PARAMETER_CHECK("-s", 2, parameterLength)) { - sameStrand = true; - } - else if (PARAMETER_CHECK("-S", 2, parameterLength)) { - diffStrand = true; - } - else if (PARAMETER_CHECK("-hist", 5, parameterLength)) { - writeHistogram = true; - } - else if(PARAMETER_CHECK("-d", 2, parameterLength)) { - eachBase = true; - } - else if (PARAMETER_CHECK("-split", 6, parameterLength)) { - obeySplits = true; - } - else if (PARAMETER_CHECK("-counts", 7, parameterLength)) { - countsOnly = true; - } - else { - cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl; - showHelp = true; - } - } - - // make sure we have both input files - if (!haveBedA || !haveBedB) { - cerr << endl << "*****" << endl << "*****ERROR: Need -a and -b files. " << endl << "*****" << endl; - showHelp = true; - } - - if (sameStrand && diffStrand) { - cerr << endl << "*****" << endl << "*****ERROR: Request either -s OR -S, not both." << endl << "*****" << endl; - showHelp = true; - } - - if (!showHelp) { - BedCoverage *bg = new BedCoverage(bedAFile, bedBFile, sameStrand, diffStrand, - writeHistogram, bamInput, obeySplits, eachBase, countsOnly); - delete bg; - return 0; - } - else { - ShowHelp(); - } -} - -void ShowHelp(void) { - - cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl; - - cerr << "Author: Aaron Quinlan (aaronquinlan@gmail.com)" << endl; - - cerr << "Summary: Returns the depth and breadth of coverage of features from A" << endl; - cerr << "\t on the intervals in B." << endl << endl; - - cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -a <bed/gff/vcf> -b <bed/gff/vcf>" << endl << endl; - - cerr << "Options: " << endl; - - cerr << "\t-abam\t" << "The A input file is in BAM format." << endl << endl; - - cerr << "\t-s\t" << "Require same strandedness. That is, only counts hits in A that" << endl; - cerr << "\t\toverlap B on the _same_ strand." << endl; - cerr << "\t\t- By default, overlaps are counted without respect to strand." << endl << endl; - - cerr << "\t-S\t" << "Require different strandedness. That is, only report hits in A that" << endl; - cerr << "\t\toverlap B on the _opposite_ strand." << endl; - cerr << "\t\t- By default, overlaps are counted without respect to strand." << endl << endl; - - cerr << "\t-hist\t" << "Report a histogram of coverage for each feature in B" << endl; - cerr << "\t\tas well as a summary histogram for _all_ features in B." << endl << endl; - cerr << "\t\tOutput (tab delimited) after each feature in B:" << endl; - cerr << "\t\t 1) depth\n\t\t 2) # bases at depth\n\t\t 3) size of B\n\t\t 4) % of B at depth" << endl << endl; - - cerr << "\t-d\t" << "Report the depth at each position in each B feature." << endl; - cerr << "\t\tPositions reported are one based. Each position" << endl; - cerr << "\t\tand depth follow the complete B feature." << endl << endl; - - cerr << "\t-counts\t" << "Only report the count of overlaps, don't compute fraction, etc." << endl << endl; - - cerr << "\t-split\t" << "Treat \"split\" BAM or BED12 entries as distinct BED intervals." << endl; - cerr << "\t\twhen computing coverage." << endl; - cerr << "\t\tFor BAM files, this uses the CIGAR \"N\" and \"D\" operations " << endl; - cerr << "\t\tto infer the blocks for computing coverage." << endl; - cerr << "\t\tFor BED12 files, this uses the BlockCount, BlockStarts," << endl; - cerr << "\t\tand BlockEnds fields (i.e., columns 10,11,12)." << endl << endl; - - cerr << "Default Output: " << endl; - cerr << "\t" << " After each entry in B, reports: " << endl; - cerr << "\t 1) The number of features in A that overlapped the B interval." << endl; - cerr << "\t 2) The number of bases in B that had non-zero coverage." << endl; - cerr << "\t 3) The length of the entry in B." << endl; - cerr << "\t 4) The fraction of bases in B that had non-zero coverage." << endl << endl; - - exit(1); -} |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/cuffToTrans/Makefile --- a/BEDTools-Version-2.14.3/src/cuffToTrans/Makefile Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,44 +0,0 @@ -UTILITIES_DIR = ../utils/ -OBJ_DIR = ../../obj/ -BIN_DIR = ../../bin/ - -# ------------------- -# define our includes -# ------------------- -INCLUDES = -I$(UTILITIES_DIR)/bedFile/ -I$(UTILITIES_DIR)/sequenceUtilities/ -I$(UTILITIES_DIR)/lineFileUtilities/ -I$(UTILITIES_DIR)/version/ -I$(UTILITIES_DIR)/gzstream/ -I$(UTILITIES_DIR)/fileType/ - -# ---------------------------------- -# define our source and object files -# ---------------------------------- -SOURCES= cuffToTransMain.cpp cuffToTrans.cpp Fasta.cpp split.cpp -OBJECTS= $(SOURCES:.cpp=.o) -_EXT_OBJECTS=bedFile.o sequenceUtils.o lineFileUtilities.o gzstream.o fileType.o -EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) -BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) -PROGRAM= cuffToTrans - - -all: $(PROGRAM) - -.PHONY: all - -$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS) - @echo " * linking $(PROGRAM)" - @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ $(LIBS) - -$(BUILT_OBJECTS): $(SOURCES) - @echo " * compiling" $(*F).cpp - @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) - -$(EXT_OBJECTS): - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/sequenceUtilities/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/ - -clean: - @echo "Cleaning up." - @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* - -.PHONY: clean \ No newline at end of file |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/fastaFromBed/Makefile --- a/BEDTools-Version-2.14.3/src/fastaFromBed/Makefile Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,52 +0,0 @@ -UTILITIES_DIR = ../utils/ -OBJ_DIR = ../../obj/ -BIN_DIR = ../../bin/ - -# ------------------- -# define our includes -# ------------------- -INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \ - -I$(UTILITIES_DIR)/version/ \ - -I$(UTILITIES_DIR)/gzstream/ \ - -I$(UTILITIES_DIR)/genomeFile/ \ - -I$(UTILITIES_DIR)/lineFileUtilities/ \ - -I$(UTILITIES_DIR)/sequenceUtilities/ \ - -I$(UTILITIES_DIR)/fileType/ \ - -I$(UTILITIES_DIR)/Fasta/ \ - -# ---------------------------------- -# define our source and object files -# ---------------------------------- -SOURCES= fastaFromBedMain.cpp fastaFromBed.cpp -OBJECTS= $(SOURCES:.cpp=.o) -_EXT_OBJECTS=bedFile.o sequenceUtils.o lineFileUtilities.o gzstream.o fileType.o Fasta.o split.o -EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) -BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) -PROGRAM= fastaFromBed - - -all: $(PROGRAM) - -.PHONY: all - -$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS) - @echo " * linking $(PROGRAM)" - @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ $(LIBS) - -$(BUILT_OBJECTS): $(SOURCES) - @echo " * compiling" $(*F).cpp - @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) - -$(EXT_OBJECTS): - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/sequenceUtilities/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/Fasta/ - -clean: - @echo "Cleaning up." - @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* - -.PHONY: clean \ No newline at end of file |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/fastaFromBed/fastaFromBed.cpp --- a/BEDTools-Version-2.14.3/src/fastaFromBed/fastaFromBed.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,141 +0,0 @@ -/***************************************************************************** - fastaFromBed.cpp - - (c) 2009 - Aaron Quinlan - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licenced under the GNU General Public License 2.0 license. -******************************************************************************/ -#include "lineFileUtilities.h" -#include "fastaFromBed.h" - - -Bed2Fa::Bed2Fa(bool useName, const string &dbFile, const string &bedFile, - const string &fastaOutFile, bool useFasta, bool useStrand) { - - _useName = useName; - _dbFile = dbFile; - _bedFile = bedFile; - _fastaOutFile = fastaOutFile; - _useFasta = useFasta; - _useStrand = useStrand; - - _bed = new BedFile(_bedFile); - - // Figure out what the output file should be. - if (fastaOutFile == "stdout") { - _faOut = &cout; - } - else { - // Make sure we can open the file. - ofstream fa(fastaOutFile.c_str(), ios::out); - if ( !fa ) { - cerr << "Error: The requested fasta output file (" << fastaOutFile << ") could not be opened. Exiting!" << endl; - exit (1); - } - else { - fa.close(); - _faOut = new ofstream(fastaOutFile.c_str(), ios::out); - } - } - - // Extract the requested intervals from the FASTA input file. - ExtractDNA(); -} - - -Bed2Fa::~Bed2Fa(void) { -} - - -//****************************************************************************** -// ReportDNA -//****************************************************************************** -void Bed2Fa::ReportDNA(const BED &bed, string &dna) { - - // revcomp if necessary. Thanks to Thomas Doktor. - if ((_useStrand == true) && (bed.strand == "-")) - reverseComplement(dna); - - if (!(_useName)) { - if (_useFasta == true) { - if (_useStrand == true) - *_faOut << ">" << bed.chrom << ":" << bed.start << "-" << bed.end << "(" << bed.strand << ")" << endl << dna << endl; - else - *_faOut << ">" << bed.chrom << ":" << bed.start << "-" << bed.end << endl << dna << endl; - } - else { - if (_useStrand == true) - *_faOut << bed.chrom << ":" << bed.start << "-" << bed.end << "(" << bed.strand << ")" << "\t" << dna << endl; - else - *_faOut << bed.chrom << ":" << bed.start << "-" << bed.end << "\t" << dna << endl; - } - } - else { - if (_useFasta == true) - *_faOut << ">" << bed.name << endl << dna << endl; - else - *_faOut << bed.name << "\t" << dna << endl; - } -} - - - -//****************************************************************************** -// ExtractDNA -//****************************************************************************** -void Bed2Fa::ExtractDNA() { - - /* Make sure that we can oen all of the files successfully*/ - - // open the fasta database for reading - ifstream faDb(_dbFile.c_str(), ios::in); - if ( !faDb ) { - cerr << "Error: The requested fasta database file (" << _dbFile << ") could not be opened. Exiting!" << endl; - exit (1); - } - - // open and memory-map genome file - FastaReference *fr = new FastaReference; - bool memmap = true; - fr->open(_dbFile, memmap); - - BED bed, nullBed; - int lineNum = 0; - BedLineStatus bedStatus; - string sequence; - - _bed->Open(); - while ((bedStatus = _bed->GetNextBed(bed, lineNum)) != BED_INVALID) { - if (bedStatus == BED_VALID) { - // make sure we are extracting >= 1 bp - if (bed.zeroLength == false) { - size_t seqLength = fr->sequenceLength(bed.chrom); - // make sure this feature will not exceed the end of the chromosome. - if ( (bed.start <= seqLength) && (bed.end <= seqLength) ) - { - int length = bed.end - bed.start; - sequence = fr->getSubSequence(bed.chrom, bed.start, length); - ReportDNA(bed, sequence); - } - else - { - cerr << "Feature (" << bed.chrom << ":" << bed.start << "-" << bed.end << ") beyond the length of " - << bed.chrom << " size (" << seqLength << " bp). Skipping." << endl; - } - } - // handle zeroLength - else { - cerr << "Feature (" << bed.chrom << ":" << bed.start+1 << "-" << bed.end-1 << ") has length = 0, Skipping." << endl; - } - bed = nullBed; - } - } - _bed->Close(); -} - - - |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/fastaFromBed/fastaFromBed.h --- a/BEDTools-Version-2.14.3/src/fastaFromBed/fastaFromBed.h Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,56 +0,0 @@ -/***************************************************************************** - fastaFromBed.h - - (c) 2009 - Aaron Quinlan - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licenced under the GNU General Public License 2.0 license. -******************************************************************************/ -#ifndef FASTAFROMBED_H -#define FASTAFROMBED_H - -#include "bedFile.h" -#include "sequenceUtils.h" -#include "Fasta.h" -#include <vector> -#include <iostream> -#include <fstream> - -using namespace std; - -//************************************************ -// Class methods and elements -//************************************************ -class Bed2Fa { - -public: - - // constructor - Bed2Fa(bool useName, const string &dbFile, const string &bedFile, const string &fastaOutFile, - bool useFasta, bool useStrand); - - // destructor - ~Bed2Fa(void); - - void ExtractDNA(); - void ReportDNA(const BED &bed, string &dna); - - -private: - - bool _useName; - string _dbFile; - string _bedFile; - string _fastaOutFile; - bool _useFasta; - bool _useStrand; - - // instance of a bed file class. - BedFile *_bed; - ostream *_faOut; -}; - -#endif |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/fastaFromBed/fastaFromBedMain.cpp --- a/BEDTools-Version-2.14.3/src/fastaFromBed/fastaFromBedMain.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,146 +0,0 @@ -/***************************************************************************** - fastaFromBedMain.cpp - - (c) 2009 - Aaron Quinlan - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licenced under the GNU General Public License 2.0 license. -******************************************************************************/ -#include "fastaFromBed.h" -#include "version.h" - -using namespace std; - -// define our program name -#define PROGRAM_NAME "fastaFromBed" - - -// define our parameter checking macro -#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen) - -// function declarations -void ShowHelp(void); - -int main(int argc, char* argv[]) { - - // our configuration variables - bool showHelp = false; - - // input files - string fastaDbFile; - string bedFile; - - // output files - string fastaOutFile; - - // checks for existence of parameters - bool haveFastaDb = false; - bool haveBed = false; - bool haveFastaOut = false; - bool useNameOnly = false; - bool useFasta = true; - bool useStrand = false; - - // check to see if we should print out some help - if(argc <= 1) showHelp = true; - - for(int i = 1; i < argc; i++) { - int parameterLength = (int)strlen(argv[i]); - - if((PARAMETER_CHECK("-h", 2, parameterLength)) || - (PARAMETER_CHECK("--help", 5, parameterLength))) { - showHelp = true; - } - } - - if(showHelp) ShowHelp(); - - // do some parsing (all of these parameters require 2 strings) - for(int i = 1; i < argc; i++) { - - int parameterLength = (int)strlen(argv[i]); - - if(PARAMETER_CHECK("-fi", 3, parameterLength)) { - if ((i+1) < argc) { - haveFastaDb = true; - fastaDbFile = argv[i + 1]; - i++; - } - } - else if(PARAMETER_CHECK("-fo", 3, parameterLength)) { - if ((i+1) < argc) { - haveFastaOut = true; - fastaOutFile = argv[i + 1]; - i++; - } - } - else if(PARAMETER_CHECK("-bed", 4, parameterLength)) { - if ((i+1) < argc) { - haveBed = true; - bedFile = argv[i + 1]; - i++; - } - } - else if(PARAMETER_CHECK("-name", 5, parameterLength)) { - useNameOnly = true; - } - else if(PARAMETER_CHECK("-tab", 4, parameterLength)) { - useFasta = false; - } - else if(PARAMETER_CHECK("-s", 2, parameterLength)) { - useStrand = true; - } - else { - cerr << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl; - showHelp = true; - } - } - - if (!haveFastaDb || !haveFastaOut || !haveBed) { - showHelp = true; - } - - if (!showHelp) { - - Bed2Fa *b2f = new Bed2Fa(useNameOnly, fastaDbFile, bedFile, fastaOutFile, useFasta, useStrand); - delete b2f; - - return 0; - } - else { - ShowHelp(); - } -} - -void ShowHelp(void) { - - cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl; - - cerr << "Author: Aaron Quinlan (aaronquinlan@gmail.com)" << endl; - - cerr << "Summary: Extract DNA sequences into a fasta file based on feature coordinates." << endl << endl; - - cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -fi <fasta> -bed <bed/gff/vcf> -fo <fasta> " << endl << endl; - - cerr << "Options: " << endl; - cerr << "\t-fi\tInput FASTA file" << endl; - cerr << "\t-bed\tBED/GFF/VCF file of ranges to extract from -fi" << endl; - cerr << "\t-fo\tOutput file (can be FASTA or TAB-delimited)" << endl; - cerr << "\t-name\tUse the name field for the FASTA header" << endl; - - cerr << "\t-tab\tWrite output in TAB delimited format." << endl; - cerr << "\t\t- Default is FASTA format." << endl << endl; - - cerr << "\t-s\tForce strandedness. If the feature occupies the antisense strand," << endl; - cerr << "\t\tthe sequence will be reverse complemented." << endl; - cerr << "\t\t- By default, strand information is ignored." << endl << endl; - - - - // end the program here - exit(1); - -} |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/fjoin/Makefile --- a/BEDTools-Version-2.14.3/src/fjoin/Makefile Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,42 +0,0 @@ -UTILITIES_DIR = ../utils/ -OBJ_DIR = ../../obj/ -BIN_DIR = ../../bin/ - -# ------------------- -# define our includes -# ------------------- -INCLUDES = -I$(UTILITIES_DIR)/bedFile/ -I$(UTILITIES_DIR)/lineFileUtilities/ -I$(UTILITIES_DIR)/version/ -I$(UTILITIES_DIR)/gzstream/ -I$(UTILITIES_DIR)/fileType/ - -# ---------------------------------- -# define our source and object files -# ---------------------------------- -SOURCES= fjoinMain.cpp fjoin.cpp -OBJECTS= $(SOURCES:.cpp=.o) -_EXT_OBJECTS=bedFile.o lineFileUtilities.o gzstream.o fileType.o -EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) -BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) -PROGRAM= fjoin - -all: $(PROGRAM) - -.PHONY: all - -$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS) - @echo " * linking $(PROGRAM)" - @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ $(LIBS) - -$(BUILT_OBJECTS): $(SOURCES) - @echo " * compiling" $(*F).cpp - @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) - -$(EXT_OBJECTS): - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/ - -clean: - @echo "Cleaning up." - @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* - -.PHONY: clean |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/fjoin/fjoin.cpp --- a/BEDTools-Version-2.14.3/src/fjoin/fjoin.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,350 +0,0 @@\n-/*****************************************************************************\n- intersectBed.cpp\n-\n- (c) 2009 - Aaron Quinlan\n- Hall Laboratory\n- Department of Biochemistry and Molecular Genetics\n- University of Virginia\n- aaronquinlan@gmail.com\n-\n- Licenced under the GNU General Public License 2.0 license.\n-******************************************************************************/\n-#include "lineFileUtilities.h"\n-#include "fjoin.h"\n-#include <queue>\n-#include <set>\n-\n-bool leftOf(const BED &a, const BED &b);\n-\n-\n-bool BedIntersect::processHits(BED &a, vector<BED> &hits) {\n- // how many overlaps are there b/w the bed and the set of hits?\n- int s, e, overlapBases;\n- int numOverlaps = 0;\n- bool hitsFound = false;\n- int aLength = (a.end - a.start); // the length of a in b.p.\n-\n- // loop through the hits and report those that meet the user\'s criteria\n- vector<BED>::const_iterator h = hits.begin();\n- vector<BED>::const_iterator hitsEnd = hits.end();\n- for (; h != hitsEnd; ++h) {\n- s = max(a.start, h->start);\n- e = min(a.end, h->end);\n- overlapBases = (e - s); // the number of overlapping bases b/w a and b\n-\n- // is there enough overlap relative to the user\'s request? (default ~ 1bp)\n- if ( ( (float) overlapBases / (float) aLength ) >= _overlapFraction ) {\n- // Report the hit if the user doesn\'t care about reciprocal overlap between A and B.\n- if (_reciprocal == false) {\n- hitsFound = true;\n- numOverlaps++;\n- if (_printable == true)\n- ReportOverlapDetail(overlapBases, a, *h, s, e);\n- }\n- // we require there to be sufficient __reciprocal__ overlap\n- else {\n- int bLength = (h->end - h->start);\n- float bOverlap = ( (float) overlapBases / (float) bLength );\n- if (bOverlap >= _overlapFraction) {\n- hitsFound = true;\n- numOverlaps++;\n- if (_printable == true)\n- ReportOverlapDetail(overlapBases, a, *h, s, e);\n- }\n- }\n- }\n- }\n- // report the summary of the overlaps if requested.\n- ReportOverlapSummary(a, numOverlaps);\n- // were hits found for this BED feature?\n- return hitsFound;\n-}\n-\n-/*\n- Constructor\n-*/\n-BedIntersect::BedIntersect(string bedAFile, string bedBFile, bool anyHit,\n- bool writeA, bool writeB, bool writeOverlap, bool writeAllOverlap,\n- float overlapFraction, bool noHit, bool writeCount, bool forceStrand,\n- bool reciprocal, bool obeySplits, bool bamInput, bool bamOutput) {\n-\n- _bedAFile = bedAFile;\n- _bedBFile = bedBFile;\n- _anyHit = anyHit;\n- _noHit = noHit;\n- _writeA = writeA;\n- _writeB = writeB;\n- _writeOverlap = writeOverlap;\n- _writeAllOverlap = writeAllOverlap;\n- _writeCount = writeCount;\n- _overlapFraction = overlapFraction;\n- _forceStrand = forceStrand;\n- _reciprocal = reciprocal;\n- _obeySplits = obeySplits;\n- _bamInput = bamInput;\n- _bamOutput = bamOutput;\n-\n- if (_anyHit || _noHit || _writeCount)\n- _printable = false;\n- else\n- _printable = true;\n-\n- // create new BED file objects for A and B\n- _bedA = new BedFile(bedAFile);\n- _bedB = new BedFile(bedBFile);\n-\n- IntersectBed();\n-}\n-\n-\n-/*\n- Destructor\n-*/\n-BedIntersect::~BedIntersect(void) {\n-}\n-\n-\n-bool leftOf(const BED &a, const BED &b) {\n- return (a.end <= b.start);\n-}\n-\n-\n-void BedIntersect::ReportOverlapDetail(const int &overlapBases, const BED &a, const BED &b,\n- const CHRPOS &s, const CHRPOS'..b'\n- it = _windowA.find(chrom);\n- if (it != _windowA.end()) {\n- return & _windowA[chrom];\n- }\n- else {\n- _windowA.insert(pair<string, vector<BED *> >(chrom, vector<BED *>()));\n- return & _windowA[chrom];\n- }\n- }\n- else {\n- it = _windowB.find(chrom);\n- if (it != _windowB.end()) {\n- return & _windowB[chrom];\n- }\n- else {\n- _windowB.insert(pair<string, vector<BED *> >(chrom, vector<BED *>()));\n- return & _windowB[chrom];\n- }\n- }\n-}\n-\n-\n-void BedIntersect::ChromSwitch(const string &chrom) {\n-\n- vector<BED*>::iterator windowAIter = _windowA[chrom].begin();\n- vector<BED*>::iterator windowAEnd = _windowA[chrom].end();\n- for (; windowAIter != windowAEnd; ++windowAIter)\n- (*windowAIter)->finished = true;\n-\n- vector<BED*>::iterator windowBIter = _windowB[chrom].begin();\n- vector<BED*>::iterator windowBEnd = _windowB[chrom].end();\n- for (; windowBIter != windowBEnd; ++windowBIter)\n- (*windowBIter)->finished = true;\n-\n- FlushOutputBuffer();\n-}\n-\n-\n-void BedIntersect::IntersectBed() {\n-\n- int aLineNum = 0;\n- int bLineNum = 0;\n-\n- // current feature from each file\n- BED *a, *b, *prevA, *prevB;\n-\n- // status of the current lines\n- BedLineStatus aStatus, bStatus;\n-\n- // open the files; get the first line from each\n- _bedA->Open();\n- _bedB->Open();\n-\n- prevA = NULL;\n- prevB = NULL;\n- a = new BED();\n- b = new BED();\n- aStatus = _bedA->GetNextBed(*a, aLineNum);\n- bStatus = _bedB->GetNextBed(*b, bLineNum);\n-\n- cout << a->chrom << " " << a->start << " " << a->chrom << " " << b->start << endl;\n- while (aStatus != BED_INVALID || bStatus != BED_INVALID) {\n- \n- if ((a->start <= b->start) && (a->chrom == b->chrom)) {\n- prevA = a;\n- _lastPick = 0;\n- Scan(a, GetWindow(a->chrom, true), aStatus,\n- *b, GetWindow(a->chrom, false), bStatus);\n-\n- a = new BED();\n- aStatus = _bedA->GetNextBed(*a, aLineNum);\n- }\n- else if ((a->start > b->start) && (a->chrom == b->chrom)) {\n- prevB = b;\n- _lastPick = 1;\n- Scan(b, GetWindow(b->chrom, false), bStatus,\n- *a, GetWindow(b->chrom, true), aStatus);\n-\n- b = new BED();\n- bStatus = _bedB->GetNextBed(*b, bLineNum);\n- }\n- else if (a->chrom != b->chrom) {\n- // A was most recently read\n- if (_lastPick == 0) {\n- prevB = b;\n- while (b->chrom == prevA->chrom){\n- _windowB[prevA->chrom].push_back(b);\n- b = new BED();\n- bStatus = _bedB->GetNextBed(*b, bLineNum);\n- }\n- Scan(prevA, GetWindow(prevA->chrom, true), aStatus,\n- *prevB, GetWindow(prevA->chrom, false), bStatus);\n- }\n- // B was most recently read\n- else {\n- prevA = a;\n- while (a->chrom == prevB->chrom) {\n- _windowA[prevB->chrom].push_back(a);\n- a = new BED();\n- aStatus = _bedA->GetNextBed(*a, aLineNum);\n- }\n- Scan(prevB, GetWindow(prevB->chrom, false), bStatus,\n- *prevA, GetWindow(prevB->chrom, true), aStatus);\n- }\n- FlushOutputBuffer(true);\n- }\n- if (prevA!=NULL&&prevB!=NULL)\n- //cout << prevA->chrom << " " << a->chrom << " " << a->start << " "\n- // << prevB->chrom << " " << b->chrom << " " << b->start << "\\n";\n- if (aStatus == BED_INVALID) a->start = INT_MAX;\n- if (bStatus == BED_INVALID) b->start = INT_MAX;\n- }\n-\n- // clear out the final bit of staged output\n- FlushOutputBuffer(true);\n-\n- // close the files\n- _bedA->Close();\n- _bedB->Close();\n-}\n-\n-\n' |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/fjoin/fjoin.h --- a/BEDTools-Version-2.14.3/src/fjoin/fjoin.h Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,114 +0,0 @@ -/***************************************************************************** - intersectBed.h - - (c) 2009 - Aaron Quinlan - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licenced under the GNU General Public License 2.0 license. -******************************************************************************/ -#ifndef INTERSECTBED_H -#define INTERSECTBED_H - -#include "bedFile.h" -// #include "BamReader.h" -// #include "BamWriter.h" -// #include "BamAncillary.h" -// #include "BamAux.h" -// using namespace BamTools; - - -#include <vector> -#include <queue> -#include <iostream> -#include <fstream> -#include <stdlib.h> -using namespace std; - - - -class BedIntersect { - -public: - - // constructor - BedIntersect(string bedAFile, string bedBFile, bool anyHit, - bool writeA, bool writeB, bool writeOverlap, bool writeAllOverlap, - float overlapFraction, bool noHit, bool writeCount, bool forceStrand, - bool reciprocal, bool obeySplits, bool bamInput, bool bamOutput); - - // destructor - ~BedIntersect(void); - -private: - - //------------------------------------------------ - // private attributes - //------------------------------------------------ - string _bedAFile; - string _bedBFile; - - bool _writeA; // should the original A feature be reported? - bool _writeB; // should the original B feature be reported? - bool _writeOverlap; - bool _writeAllOverlap; - - bool _forceStrand; - bool _reciprocal; - float _overlapFraction; - - bool _anyHit; - bool _noHit; - bool _writeCount; // do we want a count of the number of overlaps in B? - bool _obeySplits; - bool _bamInput; - bool _bamOutput; - - bool _printable; - - queue<BED*> _outputBuffer; - bool _lastPick; - - map<string, vector<BED*> > _windowA; - map<string, vector<BED*> > _windowB; - - // instance of a bed file class. - BedFile *_bedA, *_bedB; - - //------------------------------------------------ - // private methods - //------------------------------------------------ - void IntersectBed(istream &bedInput); - - void Scan(BED *x, vector<BED *> *windowX, BedLineStatus xStatus, - const BED &y, vector<BED *> *windowY, BedLineStatus yStatus); - - void AddHits(BED *x, const BED &y); - - void FlushOutputBuffer(bool final = false); - - vector<BED*>* GetWindow(const string &chrom, bool isA); - - void ChromSwitch(const string &chrom); - - void IntersectBed(); - - void IntersectBam(string bamFile); - - bool processHits(BED &a, vector<BED> &hits); - - bool FindOverlaps(const BED &a, vector<BED> &hits); - - bool FindOneOrMoreOverlap(const BED &a); - - void ReportOverlapDetail(const int &overlapBases, const BED &a, const BED &b, - const CHRPOS &s, const CHRPOS &e); - void ReportOverlapSummary(const BED &a, const int &numOverlapsFound); - - void ReportHits(set<BED> &A, set<BED> &B); - -}; - -#endif /* INTERSECTBED_H */ |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/fjoin/fjoinMain.cpp --- a/BEDTools-Version-2.14.3/src/fjoin/fjoinMain.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,271 +0,0 @@\n-/*****************************************************************************\n- intersectMain.cpp\n-\n- (c) 2009 - Aaron Quinlan\n- Hall Laboratory\n- Department of Biochemistry and Molecular Genetics\n- University of Virginia\n- aaronquinlan@gmail.com\n-\n- Licenced under the GNU General Public License 2.0 license.\n-******************************************************************************/\n-#include "fjoin.h"\n-#include "version.h"\n-\n-using namespace std;\n-\n-// define our program name\n-#define PROGRAM_NAME "fjoin"\n-\n-\n-// define our parameter checking macro\n-#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)\n-\n-// function declarations\n-void ShowHelp(void);\n-\n-int main(int argc, char* argv[]) {\n-\n- // our configuration variables\n- bool showHelp = false;\n-\n- // input files\n- string bedAFile;\n- string bedBFile;\n-\n- // input arguments\n- float overlapFraction = 1E-9;\n-\n- bool haveBedA = false;\n- bool haveBedB = false;\n- bool noHit = false;\n- bool anyHit = false;\n- bool writeA = false;\n- bool writeB = false;\n- bool writeCount = false;\n- bool writeOverlap = false;\n- bool writeAllOverlap = false;\n- bool haveFraction = false;\n- bool reciprocalFraction = false;\n- bool forceStrand = false;\n- bool obeySplits = false;\n- bool inputIsBam = false;\n- bool outputIsBam = true;\n-\n- // check to see if we should print out some help\n- if(argc <= 1) showHelp = true;\n-\n- for(int i = 1; i < argc; i++) {\n- int parameterLength = (int)strlen(argv[i]);\n-\n- if((PARAMETER_CHECK("-h", 2, parameterLength)) ||\n- (PARAMETER_CHECK("--help", 5, parameterLength))) {\n- showHelp = true;\n- }\n- }\n-\n- if(showHelp) ShowHelp();\n-\n- // do some parsing (all of these parameters require 2 strings)\n- for(int i = 1; i < argc; i++) {\n-\n- int parameterLength = (int)strlen(argv[i]);\n-\n- if(PARAMETER_CHECK("-a", 2, parameterLength)) {\n- if ((i+1) < argc) {\n- haveBedA = true;\n- outputIsBam = false;\n- bedAFile = argv[i + 1];\n- i++;\n- }\n- }\n- else if(PARAMETER_CHECK("-abam", 5, parameterLength)) {\n- if ((i+1) < argc) {\n- haveBedA = true;\n- inputIsBam = true;\n- bedAFile = argv[i + 1];\n- i++;\n- }\n- }\n- else if(PARAMETER_CHECK("-b", 2, parameterLength)) {\n- if ((i+1) < argc) {\n- haveBedB = true;\n- bedBFile = argv[i + 1];\n- i++;\n- }\n- }\n- else if(PARAMETER_CHECK("-bed", 4, parameterLength)) {\n- outputIsBam = false;\n- }\n- else if(PARAMETER_CHECK("-u", 2, parameterLength)) {\n- anyHit = true;\n- }\n- else if(PARAMETER_CHECK("-f", 2, parameterLength)) {\n- if ((i+1) < argc) {\n- haveFraction = true;\n- overlapFraction = atof(argv[i + 1]);\n- i++;\n- }\n- }\n- else if(PARAMETER_CHECK("-wa", 3, parameterLength)) {\n- writeA = true;\n- }\n- else if(PARAMETER_CHECK("-wb", 3, parameterLength)) {\n- writeB = true;\n- }\n- else if(PARAMETER_CHECK("-wo", 3, parameterLength)) {\n- writeOverlap = true;\n- }\n- else if(PARAMETER_CHECK("-wao", 4, parameterLength)) {\n- writeAllOverlap = true;\n- writeOverlap = true;\n- }\n- else if(PARAMETER_CHECK("-c", 2, parameterLength)) {\n- writeCount = true;\n- }\n- else if(PARAMETER_CHECK("-r", 2, parameterLength)) {\n- reciprocalFraction = true;\n- }\n- else if (PARAMETER'..b'IsBam, outputIsBam);\n- delete bi;\n- return 0;\n- }\n- else {\n- ShowHelp();\n- }\n-}\n-\n-void ShowHelp(void) {\n-\n- cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl;\n-\n- cerr << "Author: Aaron Quinlan (aaronquinlan@gmail.com)" << endl;\n-\n- cerr << "Summary: Report overlaps between two feature files." << endl << endl;\n-\n- cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -a <bed/gff/vcf> -b <bed/gff/vcf>" << endl << endl;\n-\n- cerr << "Options: " << endl;\n-\n- cerr << "\\t-abam\\t" << "The A input file is in BAM format. Output will be BAM as well." << endl << endl;\n-\n- cerr << "\\t-bed\\t" << "When using BAM input (-abam), write output as BED. The default" << endl;\n- cerr << "\\t\\tis to write output in BAM when using -abam." << endl << endl;\n-\n- cerr << "\\t-wa\\t" << "Write the original entry in A for each overlap." << endl << endl;\n-\n- cerr << "\\t-wb\\t" << "Write the original entry in B for each overlap." << endl;\n- cerr << "\\t\\t- Useful for knowing _what_ A overlaps. Restricted by -f and -r." << endl << endl;\n-\n- cerr << "\\t-wo\\t" << "Write the original A and B entries plus the number of base" << endl;\n- cerr << "\\t\\tpairs of overlap between the two features." << endl;\n- cerr << "\\t\\t- Overlaps restricted by -f and -r." << endl;\n- cerr << "\\t\\t Only A features with overlap are reported." << endl << endl;\n-\n- cerr << "\\t-wao\\t" << "Write the original A and B entries plus the number of base" << endl;\n- cerr << "\\t\\tpairs of overlap between the two features." << endl;\n- cerr << "\\t\\t- Overlapping features restricted by -f and -r." << endl;\n- cerr << "\\t\\t However, A features w/o overlap are also reported" << endl;\n- cerr << "\\t\\t with a NULL B feature and overlap = 0." << endl << endl;\n-\n- cerr << "\\t-u\\t" << "Write the original A entry _once_ if _any_ overlaps found in B." << endl;\n- cerr << "\\t\\t- In other words, just report the fact >=1 hit was found." << endl;\n- cerr << "\\t\\t- Overlaps restricted by -f and -r." << endl << endl;\n-\n- cerr << "\\t-c\\t" << "For each entry in A, report the number of overlaps with B." << endl;\n- cerr << "\\t\\t- Reports 0 for A entries that have no overlap with B." << endl;\n- cerr << "\\t\\t- Overlaps restricted by -f and -r." << endl << endl;\n-\n- cerr << "\\t-v\\t" << "Only report those entries in A that have _no overlaps_ with B." << endl;\n- cerr << "\\t\\t- Similar to \\"grep -v\\" (an homage)." << endl << endl;\n-\n- cerr << "\\t-f\\t" << "Minimum overlap required as a fraction of A." << endl;\n- cerr << "\\t\\t- Default is 1E-9 (i.e., 1bp)." << endl;\n- cerr << "\\t\\t- FLOAT (e.g. 0.50)" << endl << endl;\n-\n- cerr << "\\t-r\\t" << "Require that the fraction overlap be reciprocal for A and B." << endl;\n- cerr << "\\t\\t- In other words, if -f is 0.90 and -r is used, this requires" << endl;\n- cerr << "\\t\\t that B overlap 90% of A and A _also_ overlaps 90% of B." << endl << endl;\n-\n- cerr << "\\t-s\\t" << "Force strandedness. That is, only report hits in B that" << endl;\n- cerr << "\\t\\toverlap A on the same strand." << endl;\n- cerr << "\\t\\t- By default, overlaps are reported without respect to strand." << endl << endl;\n-\n- cerr << "\\t-split\\t" << "Treat \\"split\\" BAM or BED12 entries as distinct BED intervals." << endl << endl;\n-\n-\n- // end the program here\n- exit(1);\n-\n-}\n' |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/flankBed/Makefile --- a/BEDTools-Version-2.14.3/src/flankBed/Makefile Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,50 +0,0 @@ -UTILITIES_DIR = ../utils/ -OBJ_DIR = ../../obj/ -BIN_DIR = ../../bin/ - -# ------------------- -# define our includes -# ------------------- -INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \ - -I$(UTILITIES_DIR)/genomeFile/ \ - -I$(UTILITIES_DIR)/lineFileUtilities/ \ - -I$(UTILITIES_DIR)/version/ \ - -I$(UTILITIES_DIR)/gzstream/ \ - -I$(UTILITIES_DIR)/fileType/ \ - -I$(UTILITIES_DIR)/BamTools/include - -# ---------------------------------- -# define our source and object files -# ---------------------------------- -SOURCES= flankBedMain.cpp flankBed.cpp -OBJECTS= $(SOURCES:.cpp=.o) -_EXT_OBJECTS=bedFile.o genomeFile.o lineFileUtilities.o gzstream.o fileType.o -EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) -BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) -PROGRAM= flankBed - - -all: $(PROGRAM) - -.PHONY: all - -$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS) - @echo " * linking $(PROGRAM)" - @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ $(LIBS) - -$(BUILT_OBJECTS): $(SOURCES) - @echo " * compiling" $(*F).cpp - @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) - -$(EXT_OBJECTS): - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/genomeFile/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/ - -clean: - @echo "Cleaning up." - @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* - -.PHONY: clean |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/flankBed/flankBed.cpp --- a/BEDTools-Version-2.14.3/src/flankBed/flankBed.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,163 +0,0 @@ -/***************************************************************************** - flankBed.cpp - - (c) 2009 - Aaron Quinlan - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licensed under the GNU General Public License 2.0 license. -******************************************************************************/ -#include "lineFileUtilities.h" -#include "flankBed.h" - - -BedFlank::BedFlank(string &bedFile, string &genomeFile, bool forceStrand, float leftFlank, float rightFlank, bool fractional) { - - _bedFile = bedFile; - _genomeFile = genomeFile; - _forceStrand = forceStrand; - _leftFlank = leftFlank; - _rightFlank = rightFlank; - _fractional = fractional; - - _bed = new BedFile(bedFile); - _genome = new GenomeFile(genomeFile); - - // get going, slop it up. - FlankBed(); -} - - -BedFlank::~BedFlank(void) { - -} - - -void BedFlank::FlankBed() { - - int lineNum = 0; - BED bedEntry, nullBed; // used to store the current BED line from the BED file. - BedLineStatus bedStatus; - - _bed->Open(); - bedStatus = _bed->GetNextBed(bedEntry, lineNum); - while (bedStatus != BED_INVALID) { - if (bedStatus == BED_VALID) { - - int leftFlank = _leftFlank; - int rightFlank = _rightFlank; - if (_fractional == true) { - leftFlank = (int) (_leftFlank * bedEntry.size()); - rightFlank = (int) (_rightFlank * bedEntry.size()); - } - - if ((_forceStrand == false) || (bedEntry.strand == "+")) - { - AddFlank(bedEntry, leftFlank, rightFlank); - } - else if ((_forceStrand == true) && (bedEntry.strand == "-" )) - { - AddStrandedFlank(bedEntry, leftFlank, rightFlank); - } - bedEntry = nullBed; - } - bedStatus = _bed->GetNextBed(bedEntry, lineNum); - } - _bed->Close(); -} - - -void BedFlank::AddFlank(BED &bed, int leftFlank, int rightFlank) { - - int chromSize = _genome->getChromSize(bed.chrom); - if (chromSize == -1) { - cerr << "ERROR: chrom \"" << bed.chrom << "\" not found in genome file. Exiting." << endl; - exit(1); - } - - // init. our left and right flanks to the original BED entry. - // we'll create the flanks from these coordinates. - BED left = bed; - BED right = bed; - - // make the left flank (if necessary) - if (leftFlank > 0) { - if ( (static_cast<int>(left.start) - leftFlank) > 0) - { - left.end = left.start; - left.start -= leftFlank; - } - else - { - left.end = left.start; - left.start = 0; - } - // report the left flank - _bed->reportBedNewLine(left); - } - - // make the left flank (if necessary) - if (rightFlank > 0) { - if ( (static_cast<int>(right.end) + (rightFlank+1)) <= static_cast<int>(chromSize)) - { - right.start = right.end; - right.end += (rightFlank); - } - else { - right.start = right.end; - right.end += chromSize; - } - // report the right flank - _bed->reportBedNewLine(right); - } -} - - -void BedFlank::AddStrandedFlank(BED &bed, int leftFlank, int rightFlank) { - - int chromSize = _genome->getChromSize(bed.chrom); - if (chromSize == -1) { - cerr << "ERROR: chrom \"" << bed.chrom << "\" not found in genome file. Exiting." << endl; - exit(1); - } - - // init. our left and right flanks to the original BED entry. - // we'll create the flanks from these coordinates. - BED left = bed; - BED right = bed; - - // make the left flank (if necessary) - if (rightFlank > 0) { - if ( (static_cast<int>(left.start) - rightFlank) > 0) - { - left.end = left.start; - left.start -= rightFlank; - } - else - { - left.end = left.start; - left.start = 0; - } - // report the left flank - _bed->reportBedNewLine(left); - } - - // make the left flank (if necessary) - if (leftFlank > 0) { - if ( (static_cast<int>(right.end) + leftFlank) <= static_cast<int>(chromSize)) - { - right.start = right.end; - right.end += leftFlank; - } - else { - right.start = right.end; - right.end = chromSize; - } - // report the right flank - _bed->reportBedNewLine(right); - } -} - - |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/flankBed/flankBed.h --- a/BEDTools-Version-2.14.3/src/flankBed/flankBed.h Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,63 +0,0 @@ -/***************************************************************************** - flankBed.h - - (c) 2009 - Aaron Quinlan - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licenced under the GNU General Public License 2.0 license. -******************************************************************************/ - -#include "bedFile.h" -#include "genomeFile.h" - -#include <vector> -#include <iostream> -#include <fstream> -#include <map> -#include <cstdlib> -#include <ctime> -using namespace std; - - -//************************************************ -// Class methods and elements -//************************************************ -class BedFlank { - -public: - - // constructor - BedFlank(string &bedFile, string &genomeFile, bool forceStrand, float leftSlop, float rightSlop, bool fractional); - - // destructor - ~BedFlank(void); - - - -private: - - string _bedFile; - string _genomeFile; - - bool _forceStrand; - float _leftFlank; - float _rightFlank; - bool _fractional; - - BedFile *_bed; - GenomeFile *_genome; - - // methods - - void FlankBed(); - - // method to grab requested flank w.r.t. a single BED entry - void AddFlank(BED &bed, int leftSlop, int rightSlop); - - // method to grab requested flank w.r.t. a single BED entry, - // while choosing flanks based on strand - void AddStrandedFlank(BED &bed, int leftSlop, int rightSlop); -}; |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/flankBed/flankBedMain.cpp --- a/BEDTools-Version-2.14.3/src/flankBed/flankBedMain.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,190 +0,0 @@ -/***************************************************************************** - flankBedMain.cpp - - (c) 2009 - Aaron Quinlan - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licenced under the GNU General Public License 2.0 license. -******************************************************************************/ -#include "flankBed.h" -#include "version.h" - -using namespace std; - -// define our program name -#define PROGRAM_NAME "flankBed" - - -// define our parameter checking macro -#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen) - -// function declarations -void ShowHelp(void); - -int main(int argc, char* argv[]) { - - // our configuration variables - bool showHelp = false; - - // input files - string bedFile = "stdin"; - string genomeFile; - - bool haveBed = true; - bool haveGenome = false; - bool haveLeft = false; - bool haveRight = false; - bool haveBoth = false; - - bool forceStrand = false; - float leftSlop = 0.0; - float rightSlop = 0.0; - bool fractional = false; - - for(int i = 1; i < argc; i++) { - int parameterLength = (int)strlen(argv[i]); - - if((PARAMETER_CHECK("-h", 2, parameterLength)) || - (PARAMETER_CHECK("--help", 5, parameterLength))) { - showHelp = true; - } - } - - if(showHelp) ShowHelp(); - - // do some parsing (all of these parameters require 2 strings) - for(int i = 1; i < argc; i++) { - - int parameterLength = (int)strlen(argv[i]); - - if(PARAMETER_CHECK("-i", 2, parameterLength)) { - if ((i+1) < argc) { - bedFile = argv[i + 1]; - i++; - } - } - else if(PARAMETER_CHECK("-g", 2, parameterLength)) { - if ((i+1) < argc) { - haveGenome = true; - genomeFile = argv[i + 1]; - i++; - } - } - else if(PARAMETER_CHECK("-l", 2, parameterLength)) { - if ((i+1) < argc) { - haveLeft = true; - leftSlop = atof(argv[i + 1]); - i++; - } - } - else if(PARAMETER_CHECK("-r", 2, parameterLength)) { - if ((i+1) < argc) { - haveRight = true; - rightSlop = atof(argv[i + 1]); - i++; - } - } - else if(PARAMETER_CHECK("-b", 2, parameterLength)) { - if ((i+1) < argc) { - haveBoth = true; - leftSlop = atof(argv[i + 1]); - rightSlop = atof(argv[i + 1]); - i++; - } - } - else if(PARAMETER_CHECK("-s", 2, parameterLength)) { - forceStrand = true; - } - else if(PARAMETER_CHECK("-pct", 4, parameterLength)) { - fractional = true; - } - else { - cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl; - showHelp = true; - } - } - - // make sure we have both input files - if (!haveBed || !haveGenome) { - cerr << endl << "*****" << endl << "*****ERROR: Need both a BED (-i) and a genome (-g) file. " << endl << "*****" << endl; - showHelp = true; - } - if (!haveLeft && !haveRight && !haveBoth) { - cerr << endl << "*****" << endl << "*****ERROR: Need -l and -r together or -b alone. " << endl << "*****" << endl; - showHelp = true; - } - if ((!haveLeft && haveRight) || (haveLeft && !haveRight)) { - cerr << endl << "*****" << endl << "*****ERROR: Need both -l and -r. " << endl << "*****" << endl; - showHelp = true; - } - if (forceStrand && (!(haveLeft) || !(haveRight))) { - cerr << endl << "*****" << endl << "*****ERROR: Must supply -l and -r with -s. " << endl << "*****" << endl; - showHelp = true; - } - - if (!showHelp) { - BedFlank *bc = new BedFlank(bedFile, genomeFile, forceStrand, leftSlop, rightSlop, fractional); - delete bc; - - return 0; - } - else { - ShowHelp(); - } -} - -void ShowHelp(void) { - - cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl; - - cerr << "Author: Aaron Quinlan (aaronquinlan@gmail.com)" << endl; - - cerr << "Summary: Creates flanking interval(s) for each BED/GFF/VCF feature." << endl << endl; - - cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -i <bed/gff/vcf> -g <genome> [-b <int> or (-l and -r)]" << endl << endl; - - cerr << "Options: " << endl; - cerr << "\t-b\t" << "Create flanking intervak using -b base pairs in each direction." << endl; - cerr << "\t\t- (Integer) or (Float, e.g. 0.1) if used with -pct." << endl << endl; - - cerr << "\t-l\t" << "The number of base pairs that a flank should start from orig. start coordinate." << endl; - cerr << "\t\t- (Integer) or (Float, e.g. 0.1) if used with -pct." << endl << endl; - - cerr << "\t-r\t" << "The number of base pairs that a flank should end from orig. end coordinate." << endl; - cerr << "\t\t- (Integer) or (Float, e.g. 0.1) if used with -pct." << endl << endl; - - cerr << "\t-s\t" << "Define -l and -r based on strand." << endl; - cerr << "\t\tE.g. if used, -l 500 for a negative-stranded feature, " << endl; - cerr << "\t\tit will start the flank 500 bp downstream. Default = false." << endl << endl; - - cerr << "\t-pct\t" << "Define -l and -r as a fraction of the feature's length." << endl; - cerr << "\t\tE.g. if used on a 1000bp feature, -l 0.50, " << endl; - cerr << "\t\twill add 500 bp \"upstream\". Default = false." << endl << endl; - - cerr << "Notes: " << endl; - cerr << "\t(1) Starts will be set to 0 if options would force it below 0." << endl; - cerr << "\t(2) Ends will be set to the chromosome length if requested flank would" << endl; - cerr << "\tforce it above the max chrom length." << endl; - - cerr << "\t(3) The genome file should tab delimited and structured as follows:" << endl; - cerr << "\n\t<chromName><TAB><chromSize>" << endl << endl; - cerr << "\tFor example, Human (hg19):" << endl; - cerr << "\tchr1\t249250621" << endl; - cerr << "\tchr2\t243199373" << endl; - cerr << "\t..." << endl; - cerr << "\tchr18_gl000207_random\t4262" << endl << endl; - - - cerr << "Tips: " << endl; - cerr << "\tOne can use the UCSC Genome Browser's MySQL database to extract" << endl; - cerr << "\tchromosome sizes. For example, H. sapiens:" << endl << endl; - cerr << "\tmysql --user=genome --host=genome-mysql.cse.ucsc.edu -A -e \\" << endl; - cerr << "\t\"select chrom, size from hg19.chromInfo\" > hg19.genome" << endl << endl; - - - // end the program here - exit(1); -} |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/genomeCoverageBed/Makefile --- a/BEDTools-Version-2.14.3/src/genomeCoverageBed/Makefile Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,52 +0,0 @@ -UTILITIES_DIR = ../utils/ -OBJ_DIR = ../../obj/ -BIN_DIR = ../../bin/ - -# ------------------- -# define our includes -# ------------------- -INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \ - -I$(UTILITIES_DIR)/version/ \ - -I$(UTILITIES_DIR)/gzstream/ \ - -I$(UTILITIES_DIR)/genomeFile/ \ - -I$(UTILITIES_DIR)/lineFileUtilities/ \ - -I$(UTILITIES_DIR)/fileType/ \ - -I$(UTILITIES_DIR)/BamTools/include \ - -I$(UTILITIES_DIR)/BamTools-Ancillary -# ---------------------------------- -# define our source and object files -# ---------------------------------- -SOURCES= genomeCoverageMain.cpp genomeCoverageBed.cpp -OBJECTS= $(SOURCES:.cpp=.o) -_EXT_OBJECTS=bedFile.o genomeFile.o lineFileUtilities.o BamAncillary.o gzstream.o fileType.o -EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) -BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) -PROGRAM= genomeCoverageBed - - -all: $(PROGRAM) - -.PHONY: all - -$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS) - @echo " * linking $(PROGRAM)" - @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ -L$(UTILITIES_DIR)/BamTools/lib/ -lbamtools $(LIBS) - -$(BUILT_OBJECTS): $(SOURCES) - @echo " * compiling" $(*F).cpp - @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) - -$(EXT_OBJECTS): - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/genomeFile - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/BamTools/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/BamTools-Ancillary/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/ - -clean: - @echo "Cleaning up." - @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* - -.PHONY: clean |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/genomeCoverageBed/genomeCoverageBed.cpp --- a/BEDTools-Version-2.14.3/src/genomeCoverageBed/genomeCoverageBed.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,396 +0,0 @@\n-/*****************************************************************************\n-genomeCoverage.cpp\n-\n-(c) 2009 - Aaron Quinlan\n-Hall Laboratory\n-Department of Biochemistry and Molecular Genetics\n-University of Virginia\n-aaronquinlan@gmail.com\n-\n-Licenced under the GNU General Public License 2.0 license.\n-******************************************************************************/\n-#include "lineFileUtilities.h"\n-#include "genomeCoverageBed.h"\n-\n-\n-BedGenomeCoverage::BedGenomeCoverage(string bedFile, string genomeFile,\n- bool eachBase, bool startSites, \n- bool bedGraph, bool bedGraphAll,\n- int max, float scale,\n- bool bamInput, bool obeySplits,\n- bool filterByStrand, string requestedStrand,\n- bool only_5p_end, bool only_3p_end,\n- bool eachBaseZeroBased,\n- bool add_gb_track_line, string gb_track_line_opts) {\n-\n- _bedFile = bedFile;\n- _genomeFile = genomeFile;\n- _eachBase = eachBase;\n- _eachBaseZeroBased = eachBaseZeroBased;\n- _startSites = startSites;\n- _bedGraph = bedGraph;\n- _bedGraphAll = bedGraphAll;\n- _max = max;\n- _scale = scale;\n- _bamInput = bamInput;\n- _obeySplits = obeySplits;\n- _filterByStrand = filterByStrand;\n- _requestedStrand = requestedStrand;\n- _only_3p_end = only_3p_end;\n- _only_5p_end = only_5p_end;\n- _add_gb_track_line = add_gb_track_line;\n- _gb_track_line_opts = gb_track_line_opts;\n- _currChromName = "";\n- _currChromSize = 0 ;\n-\n- \n- if (_bamInput == false) {\n- _genome = new GenomeFile(genomeFile);\n- }\n- \n- PrintTrackDefinitionLine();\n-\n- if (_bamInput == false) {\n- _bed = new BedFile(bedFile);\n- CoverageBed();\n- }\n- else {\n- CoverageBam(_bedFile);\n- }\n-}\n-\n-void BedGenomeCoverage::PrintTrackDefinitionLine()\n-{\n- //Print Track Definition line (if requested)\n- if ( (_bedGraph||_bedGraphAll) && _add_gb_track_line) {\n- string line = "track type=bedGraph";\n- if (!_gb_track_line_opts.empty()) {\n- line += " " ;\n- line += _gb_track_line_opts ;\n- }\n- cout << line << endl;\n- }\n-\n-}\n-\n-\n-BedGenomeCoverage::~BedGenomeCoverage(void) {\n- delete _bed;\n- delete _genome;\n-}\n-\n-\n-void BedGenomeCoverage::ResetChromCoverage() {\n- _currChromName = "";\n- _currChromSize = 0 ;\n- std::vector<DEPTH>().swap(_currChromCoverage);\n-}\n-\n-\n-void BedGenomeCoverage::StartNewChrom(const string& newChrom) {\n- // If we\'ve moved beyond the first encountered chromosomes,\n- // process the results of the previous chromosome.\n- if (_currChromName.length() > 0) {\n- ReportChromCoverage(_currChromCoverage, _currChromSize,\n- _currChromName, _currChromDepthHist);\n- }\n-\n- // empty the previous chromosome and reserve new\n- std::vector<DEPTH>().swap(_currChromCoverage);\n-\n- if (_visitedChromosomes.find(newChrom) != _visitedChromosomes.end()) {\n- cerr << "Input error: Chromosome " << _currChromName\n- << " found in non-sequential lines. This suggests that the input file is not sorted correctly." << endl;\n-\n- }\n- _visitedChromosomes.insert(newChrom);\n-\n- _currChromName = newChrom;\n-\n- // get the current chrom size and allocate space\n- _currChromSize = _genome->getChromSize(_currChromName);\n-\n- if (_currChromSize >= 0)\n- _currChromCoverage.resize(_currChromSize);\n- else {\n- cerr << "Input error: Chromosome " << _currChromName << " found in your input file but not in your genome file." << endl;\n- exit(1);\n- }\n-}\n-\n-\n-void BedGenomeCoverage::AddCoverage(int start, int end) {\n- // process the first line for this chromosome.\n- // make sure the coordinates fit within'..b'pth - chromCov[pos].ends;\n- }\n- // report the histogram for each chromosome\n- histMap::const_iterator depthIt = chromDepthHist[chrom].begin();\n- histMap::const_iterator depthEnd = chromDepthHist[chrom].end();\n- for (; depthIt != depthEnd; ++depthIt) {\n- int depth = depthIt->first;\n- unsigned int numBasesAtDepth = depthIt->second;\n- cout << chrom << "\\t" << depth << "\\t" << numBasesAtDepth << "\\t"\n- << chromSize << "\\t" << (float) ((float)numBasesAtDepth / (float)chromSize) << endl;\n- }\n- }\n-}\n-\n-\n-\n-void BedGenomeCoverage::ReportGenomeCoverage(chromHistMap &chromDepthHist) {\n-\n- // get the list of chromosome names in the genome\n- vector<string> chromList = _genome->getChromList();\n-\n- unsigned int genomeSize = 0;\n- vector<string>::const_iterator chromItr = chromList.begin();\n- vector<string>::const_iterator chromEnd = chromList.end();\n- for (; chromItr != chromEnd; ++chromItr) {\n- string chrom = *chromItr;\n- genomeSize += _genome->getChromSize(chrom);\n- // if there were no reads for a give chromosome, then\n- // add the length of the chrom to the 0 bin.\n- if ( chromDepthHist.find(chrom) == chromDepthHist.end() ) {\n- chromDepthHist[chrom][0] += _genome->getChromSize(chrom);\n- }\n- }\n-\n- histMap genomeHist; // depth histogram for the entire genome\n-\n- // loop through each chromosome and add the depth and number of bases at each depth\n- // to the aggregate histogram for the entire genome\n- for (chromHistMap::iterator chromIt = chromDepthHist.begin(); chromIt != chromDepthHist.end(); ++chromIt) {\n- string chrom = chromIt->first;\n- for (histMap::iterator depthIt = chromDepthHist[chrom].begin(); depthIt != chromDepthHist[chrom].end(); ++depthIt) {\n- int depth = depthIt->first;\n- unsigned int numBasesAtDepth = depthIt->second;\n- genomeHist[depth] += numBasesAtDepth;\n- }\n- }\n-\n- // loop through the depths for the entire genome\n- // and report the number and fraction of bases in\n- // the entire genome that are at said depth.\n- for (histMap::iterator genomeDepthIt = genomeHist.begin(); genomeDepthIt != genomeHist.end(); ++genomeDepthIt) {\n- int depth = genomeDepthIt->first;\n- unsigned int numBasesAtDepth = genomeDepthIt->second;\n-\n- cout << "genome" << "\\t" << depth << "\\t" << numBasesAtDepth << "\\t"\n- << genomeSize << "\\t" << (float) ((float)numBasesAtDepth / (float)genomeSize) << endl;\n- }\n-}\n-\n-\n-void BedGenomeCoverage::ReportChromCoverageBedGraph(const vector<DEPTH> &chromCov, const int &chromSize, const string &chrom) {\n-\n- int depth = 0; // initialize the depth\n- int lastStart = -1;\n- int lastDepth = -1;\n-\n- for (int pos = 0; pos < chromSize; pos++) {\n- depth += chromCov[pos].starts;\n-\n- if (depth != lastDepth) {\n- // Coverage depth has changed, print the last interval coverage (if any)\n- // Print if:\n- // (1) depth>0 (the default running mode),\n- // (2) depth==0 and the user requested to print zero covered regions (_bedGraphAll)\n- if ( (lastDepth != -1) && (lastDepth > 0 || _bedGraphAll) ) {\n- cout << chrom << "\\t" << lastStart << "\\t" << pos << "\\t" << lastDepth * _scale << endl;\n- }\n- //Set current position as the new interval start + depth\n- lastDepth = depth;\n- lastStart = pos;\n- }\n- // Default: the depth has not changed, so we will not print anything.\n- // Proceed until the depth changes.\n- // Update depth\n- depth = depth - chromCov[pos].ends;\n- }\n- //Print information about the last position\n- if ( (lastDepth != -1) && (lastDepth > 0 || _bedGraphAll) ) {\n- cout << chrom << "\\t" << lastStart << "\\t" << chromSize << "\\t" << lastDepth * _scale << endl;\n- }\n-}\n' |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/genomeCoverageBed/genomeCoverageBed.h --- a/BEDTools-Version-2.14.3/src/genomeCoverageBed/genomeCoverageBed.h Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,104 +0,0 @@ -/***************************************************************************** -genomeCoverage.h - -(c) 2009 - Aaron Quinlan -Hall Laboratory -Department of Biochemistry and Molecular Genetics -University of Virginia -aaronquinlan@gmail.com - -Licenced under the GNU General Public License 2.0 license. -******************************************************************************/ -#include "bedFile.h" -#include "genomeFile.h" - -#include "BamAncillary.h" -#include "api/BamReader.h" -#include "api/BamAux.h" -using namespace BamTools; - -#include <vector> -#include <set> -#include <iostream> -#include <fstream> -using namespace std; - - -//*********************************************** -// Typedefs -//*********************************************** -typedef map<int, DEPTH, less<int> > depthMap; -typedef map<string, depthMap, less<string> > chromDepthMap; - -typedef map<int, unsigned int, less<int> > histMap; -typedef map<string, histMap, less<string> > chromHistMap; - -//************************************************ -// Class methods and elements -//************************************************ -class BedGenomeCoverage { - -public: - - // constructor - BedGenomeCoverage(string bedFile, string genomeFile, - bool eachBase, bool startSites, - bool bedGraph, bool bedGraphAll, - int max, float scale, - bool bamInput, bool obeySplits, - bool filterByStrand, string requestedStrand, - bool only_5p_end, bool only_3p_end, - bool eachBaseZeroBased, - bool add_gb_track_line, string gb_track_line_opts); - - // destructor - ~BedGenomeCoverage(void); - -private: - - // data (parms) - string _bedFile; - string _genomeFile; - bool _bamInput; - bool _eachBase; - bool _eachBaseZeroBased; - bool _startSites; - bool _bedGraph; - bool _bedGraphAll; - int _max; - float _scale; - bool _obeySplits; - bool _filterByStrand; - bool _only_5p_end; - bool _only_3p_end; - bool _add_gb_track_line; - string _gb_track_line_opts; - string _requestedStrand; - - BedFile *_bed; - GenomeFile *_genome; - - // data for internal processing - chromDepthMap _chromCov; - string _currChromName ; - vector<DEPTH> _currChromCoverage; - chromHistMap _currChromDepthHist; - int _currChromSize ; - set<string> _visitedChromosomes; - - - // methods - void CoverageBed(); - void CoverageBam(string bamFile); - void LoadBamHeaderIntoGenomeFile(const string &bamFile); - void ReportChromCoverage(const vector<DEPTH> &, const int &chromSize, const string &chrom, chromHistMap&); - void ReportGenomeCoverage(chromHistMap &chromDepthHist); - void ReportChromCoverageBedGraph(const vector<DEPTH> &chromCov, const int &chromSize, const string &chrom); - void ResetChromCoverage(); - void StartNewChrom (const string& chrom); - void AddCoverage (int start, int end); - void AddBlockedCoverage(const vector<BED> &bedBlocks); - void PrintFinalCoverage(); - void PrintTrackDefinitionLine(); -}; - |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/genomeCoverageBed/genomeCoverageMain.cpp --- a/BEDTools-Version-2.14.3/src/genomeCoverageBed/genomeCoverageMain.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,312 +0,0 @@\n-/*****************************************************************************\n-genomeCoverageMain.cpp\n-\n-(c) 2009 - Aaron Quinlan\n-Hall Laboratory\n-Department of Biochemistry and Molecular Genetics\n-University of Virginia\n-aaronquinlan@gmail.com\n-\n-Licenced under the GNU General Public License 2.0 license.\n-******************************************************************************/\n-#include "genomeCoverageBed.h"\n-#include "version.h"\n-\n-using namespace std;\n-\n-// define our program name\n-#define PROGRAM_NAME "genomeCoverageBed"\n-\n-\n-// define our parameter checking macro\n-#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)\n-\n-// function declarations\n-void ShowHelp(void);\n-\n-int main(int argc, char* argv[]) {\n-\n- // our configuration variables\n- bool showHelp = false;\n-\n- // input files\n- string bedFile;\n- string genomeFile;\n- int max = INT_MAX;\n- float scale = 1.0;\n-\n- bool haveBed = false;\n- bool bamInput = false;\n- bool haveGenome = false;\n- bool startSites = false;\n- bool bedGraph = false;\n- bool bedGraphAll = false;\n- bool eachBase = false;\n- bool eachBaseZeroBased = false;\n- bool obeySplits = false;\n- bool haveScale = false;\n- bool filterByStrand = false;\n- bool only_5p_end = false;\n- bool only_3p_end = false;\n- bool add_gb_track_line = false;\n- string gb_track_opts;\n- string requestedStrand = "X";\n-\n- // check to see if we should print out some help\n- if(argc <= 1) showHelp = true;\n-\n- for(int i = 1; i < argc; i++) {\n- int parameterLength = (int)strlen(argv[i]);\n-\n- if((PARAMETER_CHECK("-h", 2, parameterLength)) ||\n- (PARAMETER_CHECK("--help", 5, parameterLength))) {\n- showHelp = true;\n- }\n- }\n-\n- if(showHelp) ShowHelp();\n-\n- // do some parsing (all of these parameters require 2 strings)\n- for(int i = 1; i < argc; i++) {\n-\n- int parameterLength = (int)strlen(argv[i]);\n-\n- if(PARAMETER_CHECK("-i", 2, parameterLength)) {\n- if ((i+1) < argc) {\n- haveBed = true;\n- bedFile = argv[i + 1];\n- i++;\n- }\n- }\n- else if(PARAMETER_CHECK("-ibam", 5, parameterLength)) {\n- if ((i+1) < argc) {\n- haveBed = true;\n- bamInput = true;\n- bedFile = argv[i + 1];\n- i++;\n- }\n- }\n- else if(PARAMETER_CHECK("-g", 2, parameterLength)) {\n- if ((i+1) < argc) {\n- haveGenome = true;\n- genomeFile = argv[i + 1];\n- i++;\n- }\n- }\n- else if(PARAMETER_CHECK("-d", 2, parameterLength)) {\n- eachBase = true;\n- }\n- else if(PARAMETER_CHECK("-dz", 3, parameterLength)) {\n- eachBase = true;\n- eachBaseZeroBased = true;\n- }\n- else if(PARAMETER_CHECK("-bg", 3, parameterLength)) {\n- bedGraph = true;\n- }\n- else if(PARAMETER_CHECK("-bga", 4, parameterLength)) {\n- bedGraphAll = true;\n- }\n- else if(PARAMETER_CHECK("-max", 4, parameterLength)) {\n- if ((i+1) < argc) {\n- max = atoi(argv[i + 1]);\n- i++;\n- }\n- }\n- else if(PARAMETER_CHECK("-scale", 6, parameterLength)) {\n- if ((i+1) < argc) {\n- haveScale = true;\n- scale = atof(argv[i + 1]);\n- i++;\n- }\n- }\n- else if(PARAMETER_CHECK("-split", 6, parameterLength)) {\n- obeySplits = true;\n- }\n- else if(PARAMETER_CHECK("-strand", 7, parameterLength)) {\n- if ((i+1) < argc) {\n- filterByStrand = true;\n- requestedStrand = argv[i+1][0];\n- if (!(requestedStrand == "-" || requestedStrand == "+")) {\n- '..b'\\tquickly extract all regions of a genome with 0 " << endl;\n- cerr << "\\t\\t\\tcoverage by applying: \\"grep -w 0$\\" to the output." << endl << endl;\n-\n- cerr << "\\t-split\\t\\t" << "Treat \\"split\\" BAM or BED12 entries as distinct BED intervals." << endl;\n- cerr << "\\t\\t\\twhen computing coverage." << endl;\n- cerr << "\\t\\t\\tFor BAM files, this uses the CIGAR \\"N\\" and \\"D\\" operations " << endl;\n- cerr << "\\t\\t\\tto infer the blocks for computing coverage." << endl;\n- cerr << "\\t\\t\\tFor BED12 files, this uses the BlockCount, BlockStarts, and BlockEnds" << endl;\n- cerr << "\\t\\t\\tfields (i.e., columns 10,11,12)." << endl << endl;\n-\n- cerr << "\\t-strand\\t\\t" << "Calculate coverage of intervals from a specific strand." << endl;\n- cerr << "\\t\\t\\tWith BED files, requires at least 6 columns (strand is column 6). " << endl;\n- cerr << "\\t\\t\\t- (STRING): can be + or -" << endl << endl;\n-\n- cerr << "\\t-5\\t\\t" << "Calculate coverage of 5\\" positions (instead of entire interval)." << endl << endl;\n-\n- cerr << "\\t-3\\t\\t" << "Calculate coverage of 3\\" positions (instead of entire interval)." << endl << endl;\n-\n- cerr << "\\t-max\\t\\t" << "Combine all positions with a depth >= max into" << endl;\n- cerr << "\\t\\t\\ta single bin in the histogram. Irrelevant" << endl;\n- cerr << "\\t\\t\\tfor -d and -bedGraph" << endl;\n- cerr << "\\t\\t\\t- (INTEGER)" << endl << endl;\n-\n- cerr << "\\t-scale\\t\\t" << "Scale the coverage by a constant factor." << endl;\n- cerr << "\\t\\t\\tEach coverage value is multiplied by this factor before being reported." << endl;\n- cerr << "\\t\\t\\tUseful for normalizing coverage by, e.g., reads per million (RPM)." << endl;\n- cerr << "\\t\\t\\t- Default is 1.0; i.e., unscaled." << endl;\n- cerr << "\\t\\t\\t- (FLOAT)" << endl << endl;\n-\n- cerr << "\\t-trackline\\t" << "Adds a UCSC/Genome-Browser track line definition in the first line of the output." << endl;\n- cerr <<"\\t\\t\\t- See here for more details about track line definition:" << endl;\n- cerr <<"\\t\\t\\t http://genome.ucsc.edu/goldenPath/help/bedgraph.html" << endl;\n- cerr <<"\\t\\t\\t- NOTE: When adding a trackline definition, the output BedGraph can be easily" << endl;\n- cerr <<"\\t\\t\\t uploaded to the Genome Browser as a custom track," << endl;\n- cerr <<"\\t\\t\\t BUT CAN NOT be converted into a BigWig file (w/o removing the first line)." << endl << endl;\n-\n- cerr << "\\t-trackopts\\t"<<"Writes additional track line definition parameters in the first line." << endl;\n- cerr <<"\\t\\t\\t- Example:" << endl;\n- cerr <<"\\t\\t\\t -trackopts \'name=\\"My Track\\" visibility=2 color=255,30,30\'" << endl;\n- cerr <<"\\t\\t\\t Note the use of single-quotes if you have spaces in your parameters." << endl;\n- cerr <<"\\t\\t\\t- (TEXT)" << endl << endl;\n-\n- cerr << "Notes: " << endl;\n- cerr << "\\t(1) The genome file should tab delimited and structured as follows:" << endl;\n- cerr << "\\t <chromName><TAB><chromSize>" << endl << endl;\n- cerr << "\\tFor example, Human (hg19):" << endl;\n- cerr << "\\tchr1\\t249250621" << endl;\n- cerr << "\\tchr2\\t243199373" << endl;\n- cerr << "\\t..." << endl;\n- cerr << "\\tchr18_gl000207_random\\t4262" << endl << endl;\n-\n- cerr << "\\t(2) The input BED (-i) file must be grouped by chromosome." << endl;\n- cerr << "\\t A simple \\"sort -k 1,1 <BED> > <BED>.sorted\\" will suffice."<< endl << endl;\n-\n- cerr << "\\t(3) The input BAM (-ibam) file must be sorted by position." << endl;\n- cerr << "\\t A \\"samtools sort <BAM>\\" should suffice."<< endl << endl;\n-\n- cerr << "Tips: " << endl;\n- cerr << "\\tOne can use the UCSC Genome Browser\'s MySQL database to extract" << endl;\n- cerr << "\\tchromosome sizes. For example, H. sapiens:" << endl << endl;\n- cerr << "\\tmysql --user=genome --host=genome-mysql.cse.ucsc.edu -A -e \\\\" << endl;\n- cerr << "\\t\\"select chrom, size from hg19.chromInfo\\" > hg19.genome" << endl << endl;\n-\n-\n- // end the program here\n- exit(1);\n-}\n-\n' |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/intersectBed/Makefile --- a/BEDTools-Version-2.14.3/src/intersectBed/Makefile Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,53 +0,0 @@ -UTILITIES_DIR = ../utils/ -OBJ_DIR = ../../obj/ -BIN_DIR = ../../bin/ - -# ------------------- -# define our includes -# ------------------- -INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \ - -I$(UTILITIES_DIR)/version/ \ - -I$(UTILITIES_DIR)/gzstream/ \ - -I$(UTILITIES_DIR)/genomeFile/ \ - -I$(UTILITIES_DIR)/lineFileUtilities/ \ - -I$(UTILITIES_DIR)/fileType/ \ - -I$(UTILITIES_DIR)/BamTools/include \ - -I$(UTILITIES_DIR)/BamTools-Ancillary \ - -I$(UTILITIES_DIR)/chromsweep \ - -# ---------------------------------- -# define our source and object files -# ---------------------------------- -SOURCES= intersectMain.cpp intersectBed.cpp -OBJECTS= $(SOURCES:.cpp=.o) -_EXT_OBJECTS=bedFile.o lineFileUtilities.o BamAncillary.o gzstream.o fileType.o chromsweep.o -EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) -BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) -PROGRAM= intersectBed - -all: $(PROGRAM) - -.PHONY: all - -$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS) - @echo " * linking $(PROGRAM)" - @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ -L$(UTILITIES_DIR)/BamTools/lib/ -lbamtools $(LIBS) - -$(BUILT_OBJECTS): $(SOURCES) - @echo " * compiling" $(*F).cpp - @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) - -$(EXT_OBJECTS): - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/BamTools/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/BamTools-Ancillary/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/chromsweep/ - -clean: - @echo "Cleaning up." - @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* - -.PHONY: clean |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/intersectBed/intersectBed.cpp --- a/BEDTools-Version-2.14.3/src/intersectBed/intersectBed.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
b'@@ -1,367 +0,0 @@\n-/*****************************************************************************\n- intersectBed.cpp\n-\n- (c) 2009 - Aaron Quinlan\n- Hall Laboratory\n- Department of Biochemistry and Molecular Genetics\n- University of Virginia\n- aaronquinlan@gmail.com\n-\n- Licenced under the GNU General Public License 2.0 license.\n-******************************************************************************/\n-#include "lineFileUtilities.h"\n-#include "intersectBed.h"\n-\n-/************************************\n-Helper functions\n-************************************/\n-bool BedIntersect::processHits(const BED &a, const vector<BED> &hits) {\n-\n- // how many overlaps are there b/w the bed and the set of hits?\n- CHRPOS s, e;\n- int overlapBases;\n- int numOverlaps = 0;\n- bool hitsFound = false;\n- int aLength = (a.end - a.start); // the length of a in b.p.\n-\n- // loop through the hits and report those that meet the user\'s criteria\n- vector<BED>::const_iterator h = hits.begin();\n- vector<BED>::const_iterator hitsEnd = hits.end();\n- for (; h != hitsEnd; ++h) {\n- s = max(a.start, h->start);\n- e = min(a.end, h->end);\n- overlapBases = (e - s); // the number of overlapping bases b/w a and b\n-\n- // is there enough overlap relative to the user\'s request? (default ~ 1bp)\n- if ( ( (float) overlapBases / (float) aLength ) >= _overlapFraction ) {\n- // Report the hit if the user doesn\'t care about reciprocal overlap between A and B.\n- if (_reciprocal == false) {\n- hitsFound = true;\n- numOverlaps++;\n- if (_printable == true)\n- ReportOverlapDetail(overlapBases, a, *h, s, e);\n- }\n- // we require there to be sufficient __reciprocal__ overlap\n- else {\n- int bLength = (h->end - h->start);\n- float bOverlap = ( (float) overlapBases / (float) bLength );\n- if (bOverlap >= _overlapFraction) {\n- hitsFound = true;\n- numOverlaps++;\n- if (_printable == true)\n- ReportOverlapDetail(overlapBases, a, *h, s, e);\n- }\n- }\n- }\n- }\n- // report the summary of the overlaps if requested.\n- ReportOverlapSummary(a, numOverlaps);\n- // were hits found for this BED feature?\n- return hitsFound;\n-}\n-\n-\n-/*\n- Constructor\n-*/\n-BedIntersect::BedIntersect(string bedAFile, string bedBFile, bool anyHit,\n- bool writeA, bool writeB, bool writeOverlap, bool writeAllOverlap,\n- float overlapFraction, bool noHit, bool writeCount, bool sameStrand, bool diffStrand,\n- bool reciprocal, bool obeySplits, bool bamInput, bool bamOutput, bool isUncompressedBam,\n- bool sortedInput) {\n-\n- _bedAFile = bedAFile;\n- _bedBFile = bedBFile;\n- _anyHit = anyHit;\n- _noHit = noHit;\n- _writeA = writeA;\n- _writeB = writeB;\n- _writeOverlap = writeOverlap;\n- _writeAllOverlap = writeAllOverlap;\n- _writeCount = writeCount;\n- _overlapFraction = overlapFraction;\n- _sameStrand = sameStrand;\n- _diffStrand = diffStrand;\n- _reciprocal = reciprocal;\n- _obeySplits = obeySplits;\n- _bamInput = bamInput;\n- _bamOutput = bamOutput;\n- _isUncompressedBam = isUncompressedBam;\n- _sortedInput = sortedInput;\n-\n- // should we print each overlap, or does the user want summary information?\n- _printable = true;\n- if (_anyHit || _noHit || _writeCount)\n- _printable = false;\n- \n- if (_bamInput == false)\n- IntersectBed();\n- else\n- IntersectBam(bedAFile);\n-}\n-\n-\n-/*\n- Destructor'..b'der.Open(bamFile);\n-\n- // get header & reference information\n- string bamHeader = reader.GetHeaderText();\n- RefVector refs = reader.GetReferenceData();\n-\n- // open a BAM output to stdout if we are writing BAM\n- if (_bamOutput == true) {\n- // set compression mode\n- BamWriter::CompressionMode compressionMode = BamWriter::Compressed;\n- if ( _isUncompressedBam ) compressionMode = BamWriter::Uncompressed;\n- writer.SetCompressionMode(compressionMode);\n- // open our BAM writer\n- writer.Open("stdout", bamHeader, refs);\n- }\n-\n- vector<BED> hits;\n- // reserve some space\n- hits.reserve(100);\n-\n- \n- BamAlignment bam; \n- // get each set of alignments for each pair.\n- while (reader.GetNextAlignment(bam)) {\n-\n- if (bam.IsMapped()) {\n- BED a;\n- a.chrom = refs.at(bam.RefID).RefName;\n- a.start = bam.Position;\n- a.end = bam.GetEndPosition(false, false);\n-\n- // build the name field from the BAM alignment.\n- a.name = bam.Name;\n- if (bam.IsFirstMate()) a.name += "/1";\n- if (bam.IsSecondMate()) a.name += "/2";\n-\n- a.score = ToString(bam.MapQuality);\n-\n- a.strand = "+";\n- if (bam.IsReverseStrand()) a.strand = "-";\n-\n- if (_bamOutput == true) {\n- bool overlapsFound = false;\n- // treat the BAM alignment as a single "block"\n- if (_obeySplits == false) {\n- overlapsFound = FindOneOrMoreOverlap(a);\n- }\n- // split the BAM alignment into discrete blocks and\n- // look for overlaps only within each block.\n- else {\n- bool overlapFoundForBlock;\n- bedVector bedBlocks; // vec to store the discrete BED "blocks" from a\n- // we don\'t want to split on "D" ops, hence the "false"\n- getBamBlocks(bam, refs, bedBlocks, false);\n-\n- vector<BED>::const_iterator bedItr = bedBlocks.begin();\n- vector<BED>::const_iterator bedEnd = bedBlocks.end();\n- for (; bedItr != bedEnd; ++bedItr) {\n- overlapFoundForBlock = FindOneOrMoreOverlap(*bedItr);\n- if (overlapFoundForBlock == true)\n- overlapsFound = true;\n- }\n- }\n- if (overlapsFound == true) {\n- if (_noHit == false)\n- writer.SaveAlignment(bam);\n- }\n- else {\n- if (_noHit == true) {\n- writer.SaveAlignment(bam);\n- }\n- }\n- }\n- else {\n- // treat the BAM alignment as a single BED "block"\n- if (_obeySplits == false) {\n- FindOverlaps(a, hits);\n- hits.clear();\n- }\n- // split the BAM alignment into discrete BED blocks and\n- // look for overlaps only within each block.\n- else {\n- bedVector bedBlocks; // vec to store the discrete BED "blocks" from a\n- getBamBlocks(bam, refs, bedBlocks, false);\n-\n- vector<BED>::const_iterator bedItr = bedBlocks.begin();\n- vector<BED>::const_iterator bedEnd = bedBlocks.end();\n- for (; bedItr != bedEnd; ++bedItr) {\n- FindOverlaps(*bedItr, hits);\n- hits.clear();\n- }\n- }\n- }\n- }\n- // BAM IsMapped() is false\n- else if (_noHit == true) {\n- writer.SaveAlignment(bam);\n- }\n- }\n-\n- // close the relevant BAM files.\n- reader.Close();\n- if (_bamOutput == true) {\n- writer.Close();\n- }\n-}\n-\n' |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/intersectBed/intersectBed.h --- a/BEDTools-Version-2.14.3/src/intersectBed/intersectBed.h Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,98 +0,0 @@ -/***************************************************************************** - intersectBed.h - - (c) 2009 - Aaron Quinlan - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licenced under the GNU General Public License 2.0 license. -******************************************************************************/ -#ifndef INTERSECTBED_H -#define INTERSECTBED_H - -#include "bedFile.h" -#include "chromsweep.h" -#include "api/BamReader.h" -#include "api/BamWriter.h" -#include "api/BamAux.h" -#include "BamAncillary.h" -using namespace BamTools; - - -#include <vector> -#include <iostream> -#include <fstream> -#include <stdlib.h> -using namespace std; - - - -class BedIntersect { - -public: - - // constructor - BedIntersect(string bedAFile, string bedBFile, bool anyHit, - bool writeA, bool writeB, bool writeOverlap, bool writeAllOverlap, - float overlapFraction, bool noHit, bool writeCount, bool sameStrand, bool diffStrand, - bool reciprocal, bool obeySplits, bool bamInput, bool bamOutput, bool isUncompressedBam, - bool sortedInput); - - // destructor - ~BedIntersect(void); - -private: - - //------------------------------------------------ - // private attributes - //------------------------------------------------ - string _bedAFile; - string _bedBFile; - - bool _writeA; // should the original A feature be reported? - bool _writeB; // should the original B feature be reported? - bool _writeOverlap; - bool _writeAllOverlap; - - bool _sameStrand; - bool _diffStrand; - bool _reciprocal; - float _overlapFraction; - - bool _anyHit; - bool _noHit; - bool _writeCount; // do we want a count of the number of overlaps in B? - bool _obeySplits; - bool _bamInput; - bool _bamOutput; - bool _isUncompressedBam; - bool _sortedInput; - bool _printable; - - // instance of a bed file class. - BedFile *_bedA, *_bedB; - - //------------------------------------------------ - // private methods - //------------------------------------------------ - void IntersectBed(istream &bedInput); - - void IntersectBed(); - - void IntersectBam(string bamFile); - - bool processHits(const BED &a, const vector<BED> &hits); - - bool FindOverlaps(const BED &a, vector<BED> &hits); - - bool FindOneOrMoreOverlap(const BED &a); - - void ReportOverlapDetail(int overlapBases, const BED &a, const BED &b, CHRPOS s, CHRPOS e); - - void ReportOverlapSummary(const BED &a, const int &numOverlapsFound); - -}; - -#endif /* INTERSECTBED_H */ |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/intersectBed/intersectMain.cpp --- a/BEDTools-Version-2.14.3/src/intersectBed/intersectMain.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,294 +0,0 @@\n-/*****************************************************************************\n- intersectMain.cpp\n-\n- (c) 2009 - Aaron Quinlan\n- Hall Laboratory\n- Department of Biochemistry and Molecular Genetics\n- University of Virginia\n- aaronquinlan@gmail.com\n-\n- Licenced under the GNU General Public License 2.0 license.\n-******************************************************************************/\n-#include "intersectBed.h"\n-#include "version.h"\n-\n-using namespace std;\n-\n-// define our program name\n-#define PROGRAM_NAME "intersectBed"\n-\n-\n-// define our parameter checking macro\n-#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)\n-\n-// function declarations\n-void ShowHelp(void);\n-\n-int main(int argc, char* argv[]) {\n-\n- // our configuration variables\n- bool showHelp = false;\n-\n- // input files\n- string bedAFile;\n- string bedBFile;\n-\n- // input arguments\n- float overlapFraction = 1E-9;\n-\n- bool haveBedA = false;\n- bool haveBedB = false;\n- bool noHit = false;\n- bool anyHit = false;\n- bool writeA = false;\n- bool writeB = false;\n- bool writeCount = false;\n- bool writeOverlap = false;\n- bool writeAllOverlap = false;\n- bool haveFraction = false;\n- bool reciprocalFraction = false;\n- bool sameStrand = false;\n- bool diffStrand = false;\n- bool obeySplits = false;\n- bool inputIsBam = false;\n- bool outputIsBam = true;\n- bool uncompressedBam = false;\n- bool sortedInput = false;\n- // check to see if we should print out some help\n- if(argc <= 1) showHelp = true;\n-\n- for(int i = 1; i < argc; i++) {\n- int parameterLength = (int)strlen(argv[i]);\n-\n- if((PARAMETER_CHECK("-h", 2, parameterLength)) ||\n- (PARAMETER_CHECK("--help", 5, parameterLength))) {\n- showHelp = true;\n- }\n- }\n-\n- if(showHelp) ShowHelp();\n-\n- // do some parsing (all of these parameters require 2 strings)\n- for(int i = 1; i < argc; i++) {\n-\n- int parameterLength = (int)strlen(argv[i]);\n-\n- if(PARAMETER_CHECK("-a", 2, parameterLength)) {\n- if ((i+1) < argc) {\n- haveBedA = true;\n- outputIsBam = false;\n- bedAFile = argv[i + 1];\n- i++;\n- }\n- }\n- else if(PARAMETER_CHECK("-abam", 5, parameterLength)) {\n- if ((i+1) < argc) {\n- haveBedA = true;\n- inputIsBam = true;\n- bedAFile = argv[i + 1];\n- i++;\n- }\n- }\n- else if(PARAMETER_CHECK("-b", 2, parameterLength)) {\n- if ((i+1) < argc) {\n- haveBedB = true;\n- bedBFile = argv[i + 1];\n- i++;\n- }\n- }\n- else if(PARAMETER_CHECK("-bed", 4, parameterLength)) {\n- outputIsBam = false;\n- }\n- else if(PARAMETER_CHECK("-u", 2, parameterLength)) {\n- anyHit = true;\n- }\n- else if(PARAMETER_CHECK("-f", 2, parameterLength)) {\n- if ((i+1) < argc) {\n- haveFraction = true;\n- overlapFraction = atof(argv[i + 1]);\n- i++;\n- }\n- }\n- else if(PARAMETER_CHECK("-wa", 3, parameterLength)) {\n- writeA = true;\n- }\n- else if(PARAMETER_CHECK("-wb", 3, parameterLength)) {\n- writeB = true;\n- }\n- else if(PARAMETER_CHECK("-wo", 3, parameterLength)) {\n- writeOverlap = true;\n- }\n- else if(PARAMETER_CHECK("-wao", 4, parameterLength)) {\n- writeAllOverlap = true;\n- writeOverlap = true;\n- }\n- else if(PARAMETER_CHECK("-c", 2, parameterLength)) {\n- writeCount = true;\n- }\n- else '..b'BAM output. Default is to write compressed BAM." << endl << endl;\n-\n- cerr << "\\t-bed\\t" << "When using BAM input (-abam), write output as BED. The default" << endl;\n- cerr << "\\t\\tis to write output in BAM when using -abam." << endl << endl;\n-\n- cerr << "\\t-wa\\t" << "Write the original entry in A for each overlap." << endl << endl;\n-\n- cerr << "\\t-wb\\t" << "Write the original entry in B for each overlap." << endl;\n- cerr << "\\t\\t- Useful for knowing _what_ A overlaps. Restricted by -f and -r." << endl << endl;\n-\n- cerr << "\\t-wo\\t" << "Write the original A and B entries plus the number of base" << endl;\n- cerr << "\\t\\tpairs of overlap between the two features." << endl;\n- cerr << "\\t\\t- Overlaps restricted by -f and -r." << endl;\n- cerr << "\\t\\t Only A features with overlap are reported." << endl << endl;\n-\n- cerr << "\\t-wao\\t" << "Write the original A and B entries plus the number of base" << endl;\n- cerr << "\\t\\tpairs of overlap between the two features." << endl;\n- cerr << "\\t\\t- Overlapping features restricted by -f and -r." << endl;\n- cerr << "\\t\\t However, A features w/o overlap are also reported" << endl;\n- cerr << "\\t\\t with a NULL B feature and overlap = 0." << endl << endl;\n-\n- cerr << "\\t-u\\t" << "Write the original A entry _once_ if _any_ overlaps found in B." << endl;\n- cerr << "\\t\\t- In other words, just report the fact >=1 hit was found." << endl;\n- cerr << "\\t\\t- Overlaps restricted by -f and -r." << endl << endl;\n-\n- cerr << "\\t-c\\t" << "For each entry in A, report the number of overlaps with B." << endl;\n- cerr << "\\t\\t- Reports 0 for A entries that have no overlap with B." << endl;\n- cerr << "\\t\\t- Overlaps restricted by -f and -r." << endl << endl;\n-\n- cerr << "\\t-v\\t" << "Only report those entries in A that have _no overlaps_ with B." << endl;\n- cerr << "\\t\\t- Similar to \\"grep -v\\" (an homage)." << endl << endl;\n-\n- cerr << "\\t-f\\t" << "Minimum overlap required as a fraction of A." << endl;\n- cerr << "\\t\\t- Default is 1E-9 (i.e., 1bp)." << endl;\n- cerr << "\\t\\t- FLOAT (e.g. 0.50)" << endl << endl;\n-\n- cerr << "\\t-r\\t" << "Require that the fraction overlap be reciprocal for A and B." << endl;\n- cerr << "\\t\\t- In other words, if -f is 0.90 and -r is used, this requires" << endl;\n- cerr << "\\t\\t that B overlap 90% of A and A _also_ overlaps 90% of B." << endl << endl;\n-\n- cerr << "\\t-s\\t" << "Require same strandedness. That is, only report hits in B that" << endl;\n- cerr << "\\t\\toverlap A on the _same_ strand." << endl;\n- cerr << "\\t\\t- By default, overlaps are reported without respect to strand." << endl << endl;\n-\n- cerr << "\\t-S\\t" << "Require different strandedness. That is, only report hits in B that" << endl;\n- cerr << "\\t\\toverlap A on the _opposite_ strand." << endl;\n- cerr << "\\t\\t- By default, overlaps are reported without respect to strand." << endl << endl;\n-\n- cerr << "\\t-split\\t" << "Treat \\"split\\" BAM or BED12 entries as distinct BED intervals." << endl << endl;\n-\n- cerr << "\\t-sorted\\t" << "Use the \\"chromsweep\\" algorithm for sorted (-k1,1 -k2,2n) input" << endl;\n- cerr << "\\t\\tNOTE: this will trust, but not enforce that data is sorted. Caveat emptor." << endl << endl;\n-\n- // end the program here\n- exit(1);\n-\n-}\n' |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/linksBed/Makefile --- a/BEDTools-Version-2.14.3/src/linksBed/Makefile Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,43 +0,0 @@ -UTILITIES_DIR = ../utils/ -OBJ_DIR = ../../obj/ -BIN_DIR = ../../bin/ - -# ------------------- -# define our includes -# ------------------- -INCLUDES = -I$(UTILITIES_DIR)/bedFile/ -I$(UTILITIES_DIR)/lineFileUtilities/ -I$(UTILITIES_DIR)/version/ -I$(UTILITIES_DIR)/gzstream/ -I$(UTILITIES_DIR)/fileType/ - -# ---------------------------------- -# define our source and object files -# ---------------------------------- -SOURCES= linksMain.cpp linksBed.cpp -OBJECTS= $(SOURCES:.cpp=.o) -_EXT_OBJECTS=bedFile.o lineFileUtilities.o gzstream.o fileType.o -EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) -BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) -PROGRAM= linksBed - - -all: $(PROGRAM) - -.PHONY: all - -$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS) - @echo " * linking $(PROGRAM)" - @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ $(LIBS) - -$(BUILT_OBJECTS): $(SOURCES) - @echo " * compiling" $(*F).cpp - @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) - -$(EXT_OBJECTS): - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/ - -clean: - @echo "Cleaning up." - @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* - -.PHONY: clean \ No newline at end of file |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/linksBed/linksBed.cpp --- a/BEDTools-Version-2.14.3/src/linksBed/linksBed.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,122 +0,0 @@ -/***************************************************************************** - linksBed.cpp - - (c) 2009 - Aaron Quinlan - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licenced under the GNU General Public License 2.0 license. -******************************************************************************/ -#include "lineFileUtilities.h" -#include "linksBed.h" - -// -// Constructor -// -BedLinks::BedLinks(string &bedFile, string &base, string &org, string &db) { - _bedFile = bedFile; - _bed = new BedFile(bedFile); - - _base = base; - _org = org; - _db = db; - - CreateLinks(); -} - -// -// Destructor -// -BedLinks::~BedLinks(void) { -} - - -void BedLinks::WriteURL(BED &bed, string &base) { - - string position = bed.chrom; - std::stringstream posStream; - posStream << ":" << bed.start << "-" << bed.end; - position.append(posStream.str()); - - cout << "<tr>" << endl; - cout << "\t<td>" << endl; - cout << "\t\t<a href=" << base << position << ">"; - cout << bed.chrom << ":" << bed.start << "-" << bed.end; - cout << "</a>" << endl; - cout << "\t</td>" << endl; - - if (_bed->bedType == 4) { - cout << "\t<td>" << endl; - cout << bed.name << endl; - cout << "\t</td>" << endl; - } - else if (_bed->bedType == 5) { - cout << "\t<td>" << endl; - cout << bed.name << endl; - cout << "\t</td>" << endl; - - cout << "\t<td>" << endl; - cout << bed.score << endl; - cout << "\t</td>" << endl; - } - else if ((_bed->bedType == 6) || (_bed->bedType == 9) || (_bed->bedType == 12)) { - cout << "\t<td>" << endl; - cout << bed.name << endl; - cout << "\t</td>" << endl; - - cout << "\t<td>" << endl; - cout << bed.score << endl; - cout << "\t</td>" << endl; - - cout << "\t<td>" << endl; - cout << bed.strand << endl; - cout << "\t</td>" << endl; - } - cout << "</tr>" << endl; -} - - -void BedLinks::CreateLinks() { - - - // construct the html base. - string org = _org; - string db = _db; - string base = _base; - base.append("/cgi-bin/hgTracks?org="); - base.append(org); - base.append("&db="); - base.append(db); - base.append("&position="); - - // create the HTML header - cout << "<html>" << endl <<"\t<body>" << endl; - cout << "<title>" << _bedFile << "</title>" << endl; - - // start the table of entries - cout << "<br>Firefox users: Press and hold the \"apple\" or \"alt\" key and click link to open in new tab." << endl; - cout << "<p style=\"font-family:courier\">" << endl; - cout << "<table border=\"0\" align=\"justify\"" << endl; - cout << "<h3>BED Entries from: stdin </h3>" << endl; - - int lineNum = 0; - BED bedEntry, nullBed; - BedLineStatus bedStatus; - - _bed->Open(); - while ((bedStatus = _bed->GetNextBed(bedEntry, lineNum)) != BED_INVALID) { - if (bedStatus == BED_VALID) { - WriteURL(bedEntry, base); - bedEntry = nullBed; - } - } - _bed->Close(); - - cout << "</table>" << endl; - cout << "</p>" << endl; - cout << "\t</body>" << endl <<"</html>" << endl; -} - - |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/linksBed/linksBed.h --- a/BEDTools-Version-2.14.3/src/linksBed/linksBed.h Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,44 +0,0 @@ -/***************************************************************************** - linksBed.h - - (c) 2009 - Aaron Quinlan - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licenced under the GNU General Public License 2.0 license. -******************************************************************************/ -#include "bedFile.h" -#include <vector> -#include <algorithm> -#include <iostream> -#include <fstream> - -using namespace std; - -//************************************************ -// Class methods and elements -//************************************************ -class BedLinks { - -public: - - // constructor - BedLinks(string &bedFile, string &base, string &org, string &db); - - // destructor - ~BedLinks(void); - -private: - string _bedFile; - string _base; - string _org; - string _db; - - // instance of a bed file class. - BedFile *_bed; - - void WriteURL(BED &bed, string &base); - void CreateLinks(); // the default. sorts by chrom (asc.) then by start (asc.) -}; |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/linksBed/linksMain.cpp --- a/BEDTools-Version-2.14.3/src/linksBed/linksMain.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,129 +0,0 @@ -/***************************************************************************** - linksBedMain.cpp - - (c) 2009 - Aaron Quinlan - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licenced under the GNU General Public License 2.0 license. -******************************************************************************/ -#include "linksBed.h" -#include "version.h" - -using namespace std; - -// define our program name -#define PROGRAM_NAME "linksBed" - - -// define our parameter checking macro -#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen) - -// function declarations -void ShowHelp(void); - -int main(int argc, char* argv[]) { - - // our configuration variables - bool showHelp = false; - - // input files - string bedFile = "stdin"; - bool haveBed = true; - - /* Defaults for everyone else */ - string org = "human"; - string db = "hg18"; - string base = "http://genome.ucsc.edu"; - - for(int i = 1; i < argc; i++) { - int parameterLength = (int)strlen(argv[i]); - - if((PARAMETER_CHECK("-h", 2, parameterLength)) || - (PARAMETER_CHECK("--help", 5, parameterLength))) { - showHelp = true; - } - } - - if(showHelp) ShowHelp(); - - // do some parsing (all of these parameters require 2 strings) - for(int i = 1; i < argc; i++) { - - int parameterLength = (int)strlen(argv[i]); - - if(PARAMETER_CHECK("-i", 2, parameterLength)) { - if ((i+1) < argc) { - bedFile = argv[i + 1]; - i++; - } - } - else if(PARAMETER_CHECK("-base", 5, parameterLength)) { - if ((i+1) < argc) { - base = argv[i + 1]; - i++; - } - } - else if(PARAMETER_CHECK("-org", 4, parameterLength)) { - if ((i+1) < argc) { - org = argv[i + 1]; - i++; - } - } - else if(PARAMETER_CHECK("-db", 3, parameterLength)) { - if ((i+1) < argc) { - db = argv[i + 1]; - i++; - } - } - else { - cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl; - showHelp = true; - } - } - - // make sure we have both input files - if (!haveBed) { - cerr << endl << "*****" << endl << "*****ERROR: Need -i BED file. " << endl << "*****" << endl; - showHelp = true; - } - - if (!showHelp) { - BedLinks *bl = new BedLinks(bedFile, base, org, db); - delete bl; - return 0; - } - else { - ShowHelp(); - } -} - -void ShowHelp(void) { - - cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl; - - cerr << "Author: Aaron Quinlan (aaronquinlan@gmail.com)" << endl; - - cerr << "Summary: Creates HTML links to an UCSC Genome Browser from a feature file." << endl << endl; - cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -i <bed/gff/vcf> > out.html" << endl << endl; - - cerr << "Options: " << endl; - cerr << "\t-base\t" << "The browser basename. Default: http://genome.ucsc.edu " << endl; - cerr << "\t-org\t" << "The organism. Default: human" << endl; - cerr << "\t-db\t" << "The build. Default: hg18" << endl << endl; - - cerr << "Example: " << endl; - cerr << "\t" << "By default, the links created will point to human (hg18) UCSC browser." << endl; - cerr << "\tIf you have a local mirror, you can override this behavior by supplying" << endl; - cerr << "\tthe -base, -org, and -db options." << endl << endl; - cerr << "\t" << "For example, if the URL of your local mirror for mouse MM9 is called: " << endl; - cerr << "\thttp://mymirror.myuniversity.edu, then you would use the following:" << endl; - cerr << "\t" << "-base http://mymirror.myuniversity.edu" << endl; - cerr << "\t" << "-org mouse" << endl; - cerr << "\t" << "-db mm9" << endl; - - - exit(1); -} |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/maskFastaFromBed/Makefile --- a/BEDTools-Version-2.14.3/src/maskFastaFromBed/Makefile Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,43 +0,0 @@ -UTILITIES_DIR = ../utils/ -OBJ_DIR = ../../obj/ -BIN_DIR = ../../bin/ - -# ------------------- -# define our includes -# ------------------- -INCLUDES = -I$(UTILITIES_DIR)/bedFile/ -I$(UTILITIES_DIR)/sequenceUtilities/ -I$(UTILITIES_DIR)/lineFileUtilities/ -I$(UTILITIES_DIR)/version/ -I$(UTILITIES_DIR)/gzstream/ -I$(UTILITIES_DIR)/fileType/ - -# ---------------------------------- -# define our source and object files -# ---------------------------------- -SOURCES= maskFastaFromBedMain.cpp maskFastaFromBed.cpp -OBJECTS= $(SOURCES:.cpp=.o) -_EXT_OBJECTS=bedFile.o sequenceUtils.o lineFileUtilities.o gzstream.o fileType.o -EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) -BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) -PROGRAM= maskFastaFromBed - -all: $(PROGRAM) - -.PHONY: all - -$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS) - @echo " * linking $(PROGRAM)" - @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ $(LIBS) - -$(BUILT_OBJECTS): $(SOURCES) - @echo " * compiling" $(*F).cpp - @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) - -$(EXT_OBJECTS): - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/sequenceUtilities/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/ - -clean: - @echo "Cleaning up." - @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* - -.PHONY: clean \ No newline at end of file |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/maskFastaFromBed/maskFastaFromBed.cpp --- a/BEDTools-Version-2.14.3/src/maskFastaFromBed/maskFastaFromBed.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,155 +0,0 @@ -/***************************************************************************** - maskFastaFromBed.cpp - - (c) 2009 - Aaron Quinlan - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licenced under the GNU General Public License 2.0 license. -******************************************************************************/ -#include "lineFileUtilities.h" -#include "maskFastaFromBed.h" - - -MaskFastaFromBed::MaskFastaFromBed(const string &fastaInFile, const string &bedFile, - const string &fastaOutFile, bool softMask, char maskChar) { - _softMask = softMask; - _fastaInFile = fastaInFile; - _bedFile = bedFile; - _fastaOutFile = fastaOutFile; - _maskChar = maskChar; - _bed = new BedFile(_bedFile); - - _bed->loadBedFileIntoMapNoBin(); - // start masking. - MaskFasta(); -} - - -MaskFastaFromBed::~MaskFastaFromBed(void) { -} - - -//****************************************************************************** -// Mask the Fasta file based on the coordinates in the BED file. -//****************************************************************************** -void MaskFastaFromBed::MaskFasta() { - - /* Make sure that we can open all of the files successfully*/ - - // open the fasta database for reading - ifstream fa(_fastaInFile.c_str(), ios::in); - if ( !fa ) { - cerr << "Error: The requested fasta file (" << _fastaInFile << ") could not be opened. Exiting!" << endl; - exit (1); - } - - // open the fasta database for reading - ofstream faOut(_fastaOutFile.c_str(), ios::out); - if ( !faOut ) { - cerr << "Error: The requested fasta output file (" << _fastaOutFile << ") could not be opened. Exiting!" << endl; - exit (1); - } - - - /* Read the fastaDb chromosome by chromosome*/ - string fastaInLine; - string currChrom; - string currDNA = ""; - currDNA.reserve(500000000); - int fastaWidth = -1; - bool widthSet = false; - int start, end, length; - string replacement; - - while (getline(fa,fastaInLine)) { - - if (fastaInLine.find(">",0) != 0 ) { - if (widthSet == false) { - fastaWidth = fastaInLine.size(); - widthSet = true; - } - currDNA += fastaInLine; - } - else { - if (currDNA.size() > 0) { - - vector<BED> bedList = _bed->bedMapNoBin[currChrom]; - - /* - loop through each BED entry for this chrom and - mask the requested sequence in the FASTA file. - */ - for (unsigned int i = 0; i < bedList.size(); i++) { - start = bedList[i].start; - end = bedList[i].end; - length = end - start; - - /* - (1) if soft masking, extract the sequence, lowercase it, - then put it back - (2) otherwise replace with Ns - */ - if (_softMask) { - replacement = currDNA.substr(start, length); - toLowerCase(replacement); - currDNA.replace(start, length, replacement); - } - else { - string hardmask(length, _maskChar); - currDNA.replace(start, length, hardmask); - } - } - // write the masked chrom to the output file - PrettyPrintChrom(faOut, currChrom, currDNA, fastaWidth); - } - - // reset for the next chromosome. - currChrom = fastaInLine.substr(1, fastaInLine.find_first_of(" ")-1); - currDNA = ""; - } - } - - // process the last chromosome. - // exact same logic as in the main loop. - if (currDNA.size() > 0) { - - vector<BED> bedList = _bed->bedMapNoBin[currChrom]; - - for (unsigned int i = 0; i < bedList.size(); i++) { - start = bedList[i].start; - end = bedList[i].end; - length = end - start; - - if (_softMask) { - replacement = currDNA.substr(start, length); - toLowerCase(replacement); - currDNA.replace(start, length, replacement); - } - else { - string hardmask(length, _maskChar); - currDNA.replace(start, length, hardmask); - } - } - PrettyPrintChrom(faOut, currChrom, currDNA, fastaWidth); - } - - // closed for business. - fa.close(); - faOut.close(); -} - - -void MaskFastaFromBed::PrettyPrintChrom(ofstream &out, string chrom, const string &sequence, int width) { - - int seqLength = sequence.size(); - - out << ">" << chrom << endl; - for(int i = 0; i < seqLength; i += width) { - if (i + width < seqLength) out << sequence.substr(i, width) << endl; - else out << sequence.substr(i, seqLength-i) << endl; - } -} - |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/maskFastaFromBed/maskFastaFromBed.h --- a/BEDTools-Version-2.14.3/src/maskFastaFromBed/maskFastaFromBed.h Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,56 +0,0 @@ -/***************************************************************************** - maskFastaFromBed.h - (c) 2009 - Aaron Quinlan - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licenced under the GNU General Public License 2.0 license. -******************************************************************************/ -#ifndef MASKFASTAFROMBED_H -#define MASKFASTAFROMBED_H - -#include "bedFile.h" -#include "sequenceUtils.h" -#include <vector> -#include <iostream> -#include <fstream> -#include <cctype> /* for tolower */ - -using namespace std; - -//************************************************ -// Class methods and elements -//************************************************ -class MaskFastaFromBed { - -public: - - // constructor - MaskFastaFromBed(const string &fastaInFile, const string &bedFile, - const string &fastaOutFile, bool softMask, char maskChar); - - // destructor - ~MaskFastaFromBed(void); - - -private: - - bool _softMask; - - string _fastaInFile; - string _bedFile; - string _fastaOutFile; - char _maskChar; // typically "N", but user's can choose something else, e.g., "X" - - // instance of a bed file class. - BedFile *_bed; - - void MaskFasta(); - - void PrettyPrintChrom(ofstream &out, string chrom, const string &sequence, int width); - -}; - -#endif /* MASKFASTAFROMBED */ |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/maskFastaFromBed/maskFastaFromBedMain.cpp --- a/BEDTools-Version-2.14.3/src/maskFastaFromBed/maskFastaFromBedMain.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,146 +0,0 @@ -/***************************************************************************** - maskFastaFromBedMain.cpp - - (c) 2009 - Aaron Quinlan - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licenced under the GNU General Public License 2.0 license. -******************************************************************************/ -#include "maskFastaFromBed.h" -#include "version.h" - -using namespace std; - -// define our program name -#define PROGRAM_NAME "maskFastaFromBed" - - -// define our parameter checking macro -#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen) - -// function declarations -void ShowHelp(void); - -int main(int argc, char* argv[]) { - - // our configuration variables - bool showHelp = false; - - // input files - string fastaInFile; - string bedFile; - - // output files - string fastaOutFile; - - // defaults for parameters - bool haveFastaIn = false; - bool haveBed = false; - bool haveFastaOut = false; - bool softMask = false; - char maskChar = 'N'; - - // check to see if we should print out some help - if(argc <= 1) showHelp = true; - - for(int i = 1; i < argc; i++) { - int parameterLength = (int)strlen(argv[i]); - - if((PARAMETER_CHECK("-h", 2, parameterLength)) || - (PARAMETER_CHECK("--help", 5, parameterLength))) { - showHelp = true; - } - } - - if(showHelp) ShowHelp(); - - // do some parsing (all of these parameters require 2 strings) - for(int i = 1; i < argc; i++) { - - int parameterLength = (int)strlen(argv[i]); - - if(PARAMETER_CHECK("-fi", 3, parameterLength)) { - if ((i+1) < argc) { - haveFastaIn = true; - fastaInFile = argv[i + 1]; - i++; - } - } - else if(PARAMETER_CHECK("-fo", 3, parameterLength)) { - if ((i+1) < argc) { - haveFastaOut = true; - fastaOutFile = argv[i + 1]; - i++; - } - } - else if(PARAMETER_CHECK("-bed", 4, parameterLength)) { - if ((i+1) < argc) { - haveBed = true; - bedFile = argv[i + 1]; - i++; - } - } - else if(PARAMETER_CHECK("-soft", 5, parameterLength)) { - softMask = true; - } - else if(PARAMETER_CHECK("-mc", 3, parameterLength)) { - if ((i+1) < argc) { - string mask = argv[i + 1]; - if (mask.size() > 1) { - cerr << "*****ERROR: The mask character (-mc) should be a single character.*****" << endl << endl; - showHelp = true; - } - else { - maskChar = mask[0]; - } - i++; - } - } - else { - cerr << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl; - showHelp = true; - } - } - - if (!haveFastaIn || !haveFastaOut || !haveBed) { - showHelp = true; - } - - if (!showHelp) { - - MaskFastaFromBed *maskFasta = new MaskFastaFromBed(fastaInFile, bedFile, fastaOutFile, softMask, maskChar); - delete maskFasta; - return 0; - } - else { - ShowHelp(); - } -} - -void ShowHelp(void) { - - - - cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl; - - cerr << "Author: Aaron Quinlan (aaronquinlan@gmail.com)" << endl; - - cerr << "Summary: Mask a fasta file based on feature coordinates." << endl << endl; - - cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -fi <fasta> -out <fasta> -bed <bed/gff/vcf>" << endl << endl; - - cerr << "Options:" << endl; - cerr << "\t-fi\tInput FASTA file" << endl; - cerr << "\t-bed\tBED/GFF/VCF file of ranges to mask in -fi" << endl; - cerr << "\t-fo\tOutput FASTA file" << endl; - cerr << "\t-soft\tEnforce \"soft\" masking. That is, instead of masking with Ns," << endl; - cerr << "\t\tmask with lower-case bases." << endl; - cerr << "\t-mc\tReplace masking character. That is, instead of masking with Ns, use another character." << endl; - - // end the program here - exit(1); - -} |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/mergeBed/Makefile --- a/BEDTools-Version-2.14.3/src/mergeBed/Makefile Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,44 +0,0 @@ -UTILITIES_DIR = ../utils/ -OBJ_DIR = ../../obj/ -BIN_DIR = ../../bin/ - -# ------------------- -# define our includes -# ------------------- -INCLUDES = -I$(UTILITIES_DIR)/bedFile/ -I$(UTILITIES_DIR)/lineFileUtilities/ -I$(UTILITIES_DIR)/version/ -I$(UTILITIES_DIR)/gzstream/ -I$(UTILITIES_DIR)/fileType/ - -# ---------------------------------- -# define our source and object files -# ---------------------------------- -SOURCES= mergeMain.cpp mergeBed.cpp -OBJECTS= $(SOURCES:.cpp=.o) -_EXT_OBJECTS=bedFile.o lineFileUtilities.o gzstream.o fileType.o -EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) -BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) -PROGRAM= mergeBed - - -all: $(PROGRAM) - -.PHONY: all - -$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS) - @echo " * linking $(PROGRAM)" - @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ $(LIBS) - -$(BUILT_OBJECTS): $(SOURCES) - @echo " * compiling" $(*F).cpp - @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) - -$(EXT_OBJECTS): - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/ - -clean: - @echo "Cleaning up." - @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* - -.PHONY: clean \ No newline at end of file |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/mergeBed/mergeBed.cpp --- a/BEDTools-Version-2.14.3/src/mergeBed/mergeBed.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,364 +0,0 @@\n-/*****************************************************************************\n- mergeBed.cpp\n-\n- (c) 2009 - Aaron Quinlan\n- Hall Laboratory\n- Department of Biochemistry and Molecular Genetics\n- University of Virginia\n- aaronquinlan@gmail.com\n-\n- Licenced under the GNU General Public License 2.0 license.\n-******************************************************************************/\n-#include "lineFileUtilities.h"\n-#include "mergeBed.h"\n-\n-\n-\n-void BedMerge::ReportMergedNames(const vector<string> &names) {\n- if (names.size() > 0) {\n- printf("\\t");\n- vector<string>::const_iterator nameItr = names.begin();\n- vector<string>::const_iterator nameEnd = names.end();\n- for (; nameItr != nameEnd; ++nameItr) {\n- if (nameItr < (nameEnd - 1))\n- cout << *nameItr << ";";\n- else\n- cout << *nameItr;\n- }\n- }\n- else {\n- cerr << endl \n- << "*****" << endl \n- << "*****ERROR: No names found to report for the -names option. Exiting." << endl \n- << "*****" << endl;\n- exit(1);\n- }\n-}\n-\n-\n-void BedMerge::ReportMergedScores(const vector<string> &scores) {\n- if (scores.size() > 0) {\n- printf("\\t");\n-\n- // convert the scores to floats\n- vector<float> data;\n- for (size_t i = 0 ; i < scores.size() ; i++) {\n- data.push_back(atof(scores[i].c_str()));\n- } \n-\n- if (_scoreOp == "sum") {\n- printf("%.3f", accumulate(data.begin(), data.end(), 0.0));\n- }\n- else if (_scoreOp == "min") {\n- printf("%.3f", *min_element( data.begin(), data.end() ));\n- }\n- else if (_scoreOp == "max") {\n- printf("%.3f", *max_element( data.begin(), data.end() ));\n- }\n- else if (_scoreOp == "mean") {\n- double total = accumulate(data.begin(), data.end(), 0.0);\n- double mean = total / data.size();\n- printf("%.3f", mean);\n- }\n- else if (_scoreOp == "median") {\n- double median = 0.0;\n- sort(data.begin(), data.end());\n- int totalLines = data.size();\n- if ((totalLines % 2) > 0) {\n- long mid;\n- mid = totalLines / 2;\n- median = data[mid];\n- }\n- else {\n- long midLow, midHigh;\n- midLow = (totalLines / 2) - 1;\n- midHigh = (totalLines / 2);\n- median = (data[midLow] + data[midHigh]) / 2.0;\n- }\n- printf("%.3f", median);\n- }\n- else if ((_scoreOp == "mode") || (_scoreOp == "antimode")) {\n- // compute the frequency of each unique value\n- map<string, int> freqs;\n- vector<string>::const_iterator dIt = scores.begin();\n- vector<string>::const_iterator dEnd = scores.end();\n- for (; dIt != dEnd; ++dIt) {\n- freqs[*dIt]++;\n- }\n-\n- // grab the mode and the anti mode\n- string mode, antiMode;\n- int count = 0;\n- int minCount = INT_MAX;\n- for(map<string,int>::const_iterator iter = freqs.begin(); iter != freqs.end(); ++iter) {\n- if (iter->second > count) {\n- mode = iter->first;\n- count = iter->second;\n- }\n- if (iter->second < minCount) {\n- antiMode = iter->first;\n- minCount = iter->second;\n- }\n- }\n- // report\n- if (_scoreOp == "mode") {\n- printf("%s", mode.c_str());\n- }\n- else if (_scoreOp == "antimode") {\n- printf("%s", antiMode.c_str());\n- }\n- }\n- else if (_scoreOp == "collapse") { \n- vector<string>::const_iterator scoreItr = scores.begin();\n-'..b'lock, no overlap\n- if ( (((int) bedItr->start - end) > _maxDistance) || (end < 0)) {\n- if (start >= 0) {\n- Report(chrom, start, end, names, scores, mergeCount);\n- // reset\n- mergeCount = 1;\n- names.clear();\n- scores.clear();\n- }\n- start = bedItr->start;\n- end = bedItr->end;\n- if (!bedItr->name.empty()) names.push_back(bedItr->name);\n- if (!bedItr->score.empty()) scores.push_back(bedItr->score);\n- }\n- // same block, overlaps\n- else {\n- if ((int) bedItr-> end > end) end = bedItr->end;\n- mergeCount++;\n- if (!bedItr->name.empty()) names.push_back(bedItr->name);\n- if (!bedItr->score.empty()) scores.push_back(bedItr->score);\n- }\n- }\n- if (start >= 0) {\n- Report(chrom, start, end, names, scores, mergeCount);\n- }\n- }\n-}\n-\n-\n-// ==================================================================================\n-// = Merge overlapping BED entries into a single entry, accounting for strandedness =\n-// ==================================================================================\n-void BedMerge::MergeBedStranded() {\n-\n- // load the "B" bed file into a map so\n- // that we can easily compare "A" to it for overlaps\n- _bed->loadBedFileIntoMapNoBin();\n-\n- // loop through each chromosome and merge their BED entries\n- masterBedMapNoBin::const_iterator m = _bed->bedMapNoBin.begin();\n- masterBedMapNoBin::const_iterator mEnd = _bed->bedMapNoBin.end();\n- for (; m != mEnd; ++m) {\n- \n- // bedList is already sorted by start position.\n- string chrom = m->first;\n- vector<BED> bedList = m->second;\n-\n- // make a list of the two strands to merge separately.\n- vector<string> strands(2);\n- strands[0] = "+";\n- strands[1] = "-";\n-\n- // do two passes, one for each strand.\n- for (unsigned int s = 0; s < strands.size(); s++) {\n-\n- int mergeCount = 1;\n- int numOnStrand = 0;\n- vector<string> names;\n- vector<string> scores;\n-\n- // merge overlapping features for this chromosome.\n- int start = -1;\n- int end = -1;\n- vector<BED>::const_iterator bedItr = bedList.begin();\n- vector<BED>::const_iterator bedEnd = bedList.end();\n- for (; bedItr != bedEnd; ++bedItr) {\n-\n- // if forcing strandedness, move on if the hit\n- // is not on the current strand.\n- if (bedItr->strand != strands[s]) { continue; }\n- else { numOnStrand++; }\n- \n- if ( (((int) bedItr->start - end) > _maxDistance) || (end < 0)) {\n- if (start >= 0) {\n- ReportStranded(chrom, start, end, names, scores, mergeCount, strands[s]);\n- // reset\n- mergeCount = 1;\n- names.clear();\n- scores.clear();\n- }\n- start = bedItr->start;\n- end = bedItr->end;\n- if (!bedItr->name.empty()) names.push_back(bedItr->name);\n- if (!bedItr->score.empty()) scores.push_back(bedItr->score);\n- }\n- else {\n- if ((int) bedItr-> end > end) end = bedItr->end;\n- mergeCount++;\n- if (!bedItr->name.empty()) names.push_back(bedItr->name);\n- if (!bedItr->score.empty()) scores.push_back(bedItr->score);\n- }\n- }\n- if (start >= 0) {\n- ReportStranded(chrom, start, end, names, scores, mergeCount, strands[s]);\n- }\n- }\n- }\n-}\n' |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/mergeBed/mergeBed.h --- a/BEDTools-Version-2.14.3/src/mergeBed/mergeBed.h Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,59 +0,0 @@ -/***************************************************************************** - mergeBed.h - - (c) 2009 - Aaron Quinlan - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licenced under the GNU General Public License 2.0 license. -******************************************************************************/ -#include "bedFile.h" -#include <vector> -#include <algorithm> -#include <numeric> -#include <iostream> -#include <fstream> -#include <limits.h> -#include <stdlib.h> - -using namespace std; - - -//************************************************ -// Class methods and elements -//************************************************ -class BedMerge { - -public: - - // constructor - BedMerge(string &bedFile, bool numEntries, - int maxDistance, bool forceStrand, - bool reportNames, bool reportScores, const string &scoreOp); - - // destructor - ~BedMerge(void); - - void MergeBed(); - void MergeBedStranded(); - -private: - - string _bedFile; - bool _numEntries; - bool _forceStrand; - bool _reportNames; - bool _reportScores; - string _scoreOp; - int _maxDistance; - // instance of a bed file class. - BedFile *_bed; - - void Report(string chrom, int start, int end, const vector<string> &names, const vector<string> &scores, int mergeCount); - void ReportStranded(string chrom, int start, int end, const vector<string> &names, const vector<string> &scores, int mergeCount, string strand); - void ReportMergedNames(const vector<string> &names); - void ReportMergedScores(const vector<string> &scores); - -}; |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/mergeBed/mergeMain.cpp --- a/BEDTools-Version-2.14.3/src/mergeBed/mergeMain.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,163 +0,0 @@ -/***************************************************************************** - mergeMain.cpp - - (c) 2009 - Aaron Quinlan - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licenced under the GNU General Public License 2.0 license. -******************************************************************************/ -#include "mergeBed.h" -#include "version.h" - -using namespace std; - -// define our program name -#define PROGRAM_NAME "mergeBed" - - -// define our parameter checking macro -#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen) - -// function declarations -void ShowHelp(void); - -int main(int argc, char* argv[]) { - - // our configuration variables - bool showHelp = false; - - // input files - string bedFile = "stdin"; - int maxDistance = 0; - string scoreOp = ""; - - // input arguments - bool haveBed = true; - bool numEntries = false; - bool haveMaxDistance = false; - bool forceStrand = false; - bool reportNames = false; - bool reportScores = false; - - for(int i = 1; i < argc; i++) { - int parameterLength = (int)strlen(argv[i]); - - if((PARAMETER_CHECK("-h", 2, parameterLength)) || - (PARAMETER_CHECK("--help", 5, parameterLength))) { - showHelp = true; - } - } - - if(showHelp) ShowHelp(); - - // do some parsing (all of these parameters require 2 strings) - for(int i = 1; i < argc; i++) { - - int parameterLength = (int)strlen(argv[i]); - - if(PARAMETER_CHECK("-i", 2, parameterLength)) { - if ((i+1) < argc) { - bedFile = argv[i + 1]; - i++; - } - } - else if(PARAMETER_CHECK("-n", 2, parameterLength)) { - numEntries = true; - } - else if(PARAMETER_CHECK("-d", 2, parameterLength)) { - if ((i+1) < argc) { - haveMaxDistance = true; - maxDistance = atoi(argv[i + 1]); - i++; - } - } - else if (PARAMETER_CHECK("-s", 2, parameterLength)) { - forceStrand = true; - } - else if (PARAMETER_CHECK("-nms", 4, parameterLength)) { - reportNames = true; - } - else if (PARAMETER_CHECK("-scores", 7, parameterLength)) { - reportScores = true; - if ((i+1) < argc) { - scoreOp = argv[i + 1]; - i++; - } - } - else { - cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl; - showHelp = true; - } - } - - // make sure we have both input files - if (!haveBed) { - cerr << endl << "*****" << endl << "*****ERROR: Need -i BED file. " << endl << "*****" << endl; - showHelp = true; - } - if (reportNames && numEntries) { - cerr << endl << "*****" << endl << "*****ERROR: Request either -n OR -nms, not both." << endl << "*****" << endl; - showHelp = true; - } - if ((reportScores == true) && (scoreOp != "sum") && (scoreOp != "max") && (scoreOp != "min") && (scoreOp != "mean") && - (scoreOp != "mode") && (scoreOp != "median") && (scoreOp != "antimode") && (scoreOp != "collapse")) - { - cerr << endl << "*****" << endl << "*****ERROR: Invalid scoreOp selection \"" << scoreOp << endl << "\" *****" << endl; - showHelp = true; - } - - if (!showHelp) { - BedMerge *bm = new BedMerge(bedFile, numEntries, maxDistance, forceStrand, reportNames, reportScores, scoreOp); - delete bm; - return 0; - } - else { - ShowHelp(); - } -} - -void ShowHelp(void) { - - cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl; - - cerr << "Author: Aaron Quinlan (aaronquinlan@gmail.com)" << endl; - - cerr << "Summary: Merges overlapping BED/GFF/VCF entries into a single interval." << endl << endl; - - cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -i <bed/gff/vcf>" << endl << endl; - - cerr << "Options: " << endl; - cerr << "\t-s\t" << "Force strandedness. That is, only merge features" << endl; - cerr << "\t\tthat are the same strand." << endl; - cerr << "\t\t- By default, merging is done without respect to strand." << endl << endl; - - cerr << "\t-n\t" << "Report the number of BED entries that were merged." << endl; - cerr << "\t\t- Note: \"1\" is reported if no merging occurred." << endl << endl; - - - cerr << "\t-d\t" << "Maximum distance between features allowed for features" << endl; - cerr << "\t\tto be merged." << endl; - cerr << "\t\t- Def. 0. That is, overlapping & book-ended features are merged." << endl; - cerr << "\t\t- (INTEGER)" << endl << endl; - - cerr << "\t-nms\t" << "Report the names of the merged features separated by semicolons." << endl << endl; - - cerr << "\t-scores\t" << "Report the scores of the merged features. Specify one of " << endl; - cerr << "\t\tthe following options for reporting scores:" << endl; - cerr << "\t\t sum, min, max," << endl; - cerr << "\t\t mean, median, mode, antimode," << endl; - cerr << "\t\t collapse (i.e., print a semicolon-separated list)," << endl; - cerr << "\t\t- (INTEGER)" << endl << endl; - - cerr << "Notes: " << endl; - cerr << "\t(1) All output, regardless of input type (e.g., GFF or VCF)" << endl; - cerr << "\t will in BED format with zero-based starts" << endl << endl; - - - // end the program here - exit(1); - -} |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/multiBamCov/Makefile --- a/BEDTools-Version-2.14.3/src/multiBamCov/Makefile Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,48 +0,0 @@ -UTILITIES_DIR = ../utils/ -OBJ_DIR = ../../obj/ -BIN_DIR = ../../bin/ - -# ------------------- -# define our includes -# ------------------- -INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \ - -I$(UTILITIES_DIR)/version/ \ - -I$(UTILITIES_DIR)/gzstream/ \ - -I$(UTILITIES_DIR)/lineFileUtilities/ \ - -I$(UTILITIES_DIR)/fileType/ \ - -I$(UTILITIES_DIR)/BamTools/include - -# ---------------------------------- -# define our source and object files -# ---------------------------------- -SOURCES= multiBamCovMain.cpp multiBamCov.cpp -OBJECTS= $(SOURCES:.cpp=.o) -_EXT_OBJECTS=bedFile.o lineFileUtilities.o gzstream.o fileType.o -EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) -BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) -PROGRAM= multiBamCov - -all: $(PROGRAM) - -.PHONY: all - -$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS) - @echo " * linking $(PROGRAM)" - @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ -L$(UTILITIES_DIR)/BamTools/lib/ -lbamtools $(LIBS) - -$(BUILT_OBJECTS): $(SOURCES) - @echo " * compiling" $(*F).cpp - @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) - -$(EXT_OBJECTS): - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/BamTools/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/ - -clean: - @echo "Cleaning up." - @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* - -.PHONY: clean |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/multiBamCov/multiBamCov.cpp --- a/BEDTools-Version-2.14.3/src/multiBamCov/multiBamCov.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,134 +0,0 @@ -/***************************************************************************** - multiBamCov.cpp - - (c) 2009 - Aaron Quinlan - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licenced under the GNU General Public License 2.0 license. -******************************************************************************/ -#include "lineFileUtilities.h" -#include "multiBamCov.h" -#include "api/BamMultiReader.h" - - -/* - Constructor -*/ -MultiCovBam::MultiCovBam(const vector<string> &bam_files, const string bed_file, - int minQual, bool properOnly, - bool keepDuplicates, bool keepFailedQC) -: -_bam_files(bam_files), -_bed_file(bed_file), -_minQual(minQual), -_properOnly(properOnly), -_keepDuplicates(keepDuplicates), -_keepFailedQC(keepFailedQC) -{ - _bed = new BedFile(_bed_file); - LoadBamFileMap(); -} - - -/* - Destructor -*/ -MultiCovBam::~MultiCovBam(void) -{} - - - -void MultiCovBam::CollectCoverage() -{ - BamMultiReader reader; - - if ( !reader.Open(_bam_files) ) - { - cerr << "Could not open input BAM files." << endl; - exit(1); - } - else - { - // attempt to find index files - reader.LocateIndexes(); - - // if index data available for all BAM files, we can use SetRegion - if ( reader.HasIndexes() ) { - BED bed, nullBed; - int lineNum = 0; - BedLineStatus bedStatus; - - _bed->Open(); - // loop through each BED entry, jump to it, - // and collect coverage from each BAM - while ((bedStatus = _bed->GetNextBed(bed, lineNum)) != BED_INVALID) - { - if (bedStatus == BED_VALID) - { - // initialize counts for each file to 0 - vector<int> counts(_bam_files.size(), 0); - // get the BAM refId for this chrom. - int refId = reader.GetReferenceID(bed.chrom); - // set up a BamRegion to which to attempt to jump - BamRegion region(refId, (int)bed.start, refId, (int)bed.end); - - // everything checks out, just iterate through specified region, counting alignments - if ( (refId != -1) && (reader.SetRegion(region)) ) { - BamAlignment al; - while ( reader.GetNextAlignment(al) ) - { - bool duplicate = al.IsDuplicate(); - bool failedQC = al.IsFailedQC(); - if (_keepDuplicates) duplicate = false; - if (_keepFailedQC) failedQC = false; - // map qual must exceed minimum - if ((al.MapQuality >= _minQual) && (!duplicate) && (!failedQC)) { - // ignore if not properly paired and we actually care. - if (_properOnly && !al.IsProperPair()) - continue; - - // lookup the offset of the file name and tabulate - //coverage for the appropriate file - counts[bamFileMap[al.Filename]]++; - } - } - } - // report the cov at this interval for each file and reset - _bed->reportBedTab(bed); - ReportCounts(counts); - bed = nullBed; - } - } - _bed->Close(); - } - else { - cerr << "Could not find indexes." << endl; - reader.Close(); - exit(1); - } - } -} - - -void MultiCovBam::LoadBamFileMap(void) -{ - for (size_t i = 0; i < _bam_files.size(); ++i) - { - bamFileMap[_bam_files[i]] = i; - } -} - -void MultiCovBam::ReportCounts(const vector<int> &counts) -{ - for (size_t i = 0; i < counts.size(); ++i) - { - if (i < counts.size() - 1) - cout << counts[i] << "\t"; - else - cout << counts[i]; - } - cout << endl; -} |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/multiBamCov/multiBamCov.h --- a/BEDTools-Version-2.14.3/src/multiBamCov/multiBamCov.h Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,64 +0,0 @@ -/***************************************************************************** - multiBamCov.h - - (c) 2009 - Aaron Quinlan - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licenced under the GNU General Public License 2.0 license. -******************************************************************************/ -#ifndef MULTICOVBAM_H -#define MULTICOVBAM_H - -#include "bedFile.h" -#include "api/BamMultiReader.h" -using namespace BamTools; - - -#include <vector> -#include <iostream> -#include <fstream> -#include <stdlib.h> -using namespace std; - - - -class MultiCovBam { - -public: - - // constructor - MultiCovBam(const vector<string> &bam_files, const string bed_file, - int minQual, bool properOnly, - bool keepDuplicates, bool keepFailedQC); - - // destructor - ~MultiCovBam(void); - - void CollectCoverage(); - -private: - - //------------------------------------------------ - // private attributes - //------------------------------------------------ - vector<string> _bam_files; - string _bed_file; - BedFile *_bed; - - // attributes to control what is counted - int _minQual; - bool _properOnly; - bool _keepDuplicates; - bool _keepFailedQC; - - - map<string, int> bamFileMap; - - void LoadBamFileMap(void); - void ReportCounts(const vector<int> &counts); -}; - -#endif /* MULTIBAMCOV_H */ |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/multiBamCov/multiBamCovMain.cpp --- a/BEDTools-Version-2.14.3/src/multiBamCov/multiBamCovMain.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,145 +0,0 @@ -/***************************************************************************** - multiBamCovMain.cpp - - (c) 2009 - Aaron Quinlan - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licenced under the GNU General Public License 2.0 license. -******************************************************************************/ -#include "multiBamCov.h" -#include "version.h" - -using namespace std; - -// define our program name -#define PROGRAM_NAME "multiBamCov" - - -// define our parameter checking macro -#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen) - -// function declarations -void ShowHelp(void); - -int main(int argc, char* argv[]) { - - // our configuration variables - bool showHelp = false; - - // input files - string bedFile; - vector<string> bamFiles; - int minQual = 0; - - // input arguments - bool haveBed = false; - bool haveBams = false; - bool properOnly = false; - bool keepDuplicates = false; - bool keepFailedQC = false; - - // check to see if we should print out some help - if(argc <= 1) showHelp = true; - - for(int i = 1; i < argc; i++) { - int parameterLength = (int)strlen(argv[i]); - - if((PARAMETER_CHECK("-h", 2, parameterLength)) || - (PARAMETER_CHECK("--help", 5, parameterLength))) { - showHelp = true; - } - } - - if(showHelp) ShowHelp(); - - // do some parsing (all of these parameters require 2 strings) - for(int i = 1; i < argc; i++) { - - int parameterLength = (int)strlen(argv[i]); - - if(PARAMETER_CHECK("-bed", 4, parameterLength)) { - if ((i+1) < argc) { - haveBed = true; - bedFile = argv[i + 1]; - i++; - } - } - else if(PARAMETER_CHECK("-bams", 5, parameterLength)) { - if ((i+1) < argc) { - haveBams = true; - i = i+1; - string file = argv[i]; - while (file[0] != '-' && i < argc) { - bamFiles.push_back(file); - i++; - if (i < argc) - file = argv[i]; - } - i--; - } - } - else if(PARAMETER_CHECK("-q", 2, parameterLength)) { - if ((i+1) < argc) { - minQual = atoi(argv[i + 1]); - i++; - } - } - else if(PARAMETER_CHECK("-p", 2, parameterLength)) { - properOnly = true; - } - else if(PARAMETER_CHECK("-D", 2, parameterLength)) { - keepDuplicates = true; - } - - else if(PARAMETER_CHECK("-F", 2, parameterLength)) { - keepFailedQC = true; - } - else { - cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl; - showHelp = true; - } - } - - if (!showHelp) { - MultiCovBam *mc = new MultiCovBam(bamFiles, bedFile, minQual, properOnly, keepDuplicates, keepFailedQC); - mc->CollectCoverage(); - delete mc; - return 0; - } - else { - ShowHelp(); - } -} - -void ShowHelp(void) { - - cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl; - - cerr << "Author: Aaron Quinlan (aaronquinlan@gmail.com)" << endl; - - cerr << "Summary: Counts sequence coverage for multiple bams at specific loci." << endl << endl; - - cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -bams aln.1.bam aln.2.bam ... aln.n.bam -bed <bed/gff/vcf>" << endl << endl; - - cerr << "Options: " << endl; - - cerr << "\t-bams\t" << "The bam files." << endl << endl; - - cerr << "\t-bed\t" << "The bed file." << endl << endl; - - cerr << "\t-q\t" << "Minimum mapping quality allowed. Default is 0." << endl << endl; - - cerr << "\t-D\t" << "Include duplicate-marked reads. Default is to count non-duplicates only" << endl << endl; - - cerr << "\t-F\t" << "Include failed-QC reads. Default is to count pass-QC reads only" << endl << endl; - - cerr << "\t-p\t" << "Only count proper pairs. Default is to count all alignments with MAPQ" << endl; - cerr << "\t\t" << "greater than the -q argument, regardless of the BAM FLAG field." << endl << endl; - - // end the program here - exit(1); - -} |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/multiIntersectBed/Makefile --- a/BEDTools-Version-2.14.3/src/multiIntersectBed/Makefile Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,49 +0,0 @@ -UTILITIES_DIR = ../utils/ -OBJ_DIR = ../../obj/ -BIN_DIR = ../../bin/ - -# ------------------- -# define our includes -# ------------------- -INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \ - -I$(UTILITIES_DIR)/lineFileUtilities/ \ - -I$(UTILITIES_DIR)/genomeFile/ \ - -I$(UTILITIES_DIR)/version/ \ - -I$(UTILITIES_DIR)/gzstream/ \ - -I$(UTILITIES_DIR)/fileType/ \ - -I$(UTILITIES_DIR)/BamTools/include - -# ---------------------------------- -# define our source and object files -# ---------------------------------- -SOURCES= multiIntersectBed.cpp multiIntersectBedMain.cpp -OBJECTS= $(SOURCES:.cpp=.o) -_EXT_OBJECTS=bedFile.o genomeFile.o lineFileUtilities.o gzstream.o fileType.o -EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) -BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) -PROGRAM= multiIntersectBed - -all: $(PROGRAM) - -.PHONY: all - -$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS) - @echo " * linking $(PROGRAM)" - @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ $(LIBS) - -$(BUILT_OBJECTS): $(SOURCES) - @echo " * compiling" $(*F).cpp - @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) - -$(EXT_OBJECTS): - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedGraphFile/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/genomeFile/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/ - -clean: - @echo "Cleaning up." - @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* - -.PHONY: clean |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/multiIntersectBed/intervalItem.h --- a/BEDTools-Version-2.14.3/src/multiIntersectBed/intervalItem.h Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,64 +0,0 @@ -/***************************************************************************** - intervalItem.h - - (c) 2010 - Assaf Gordon - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licenced under the GNU General Public License 2.0 license. -******************************************************************************/ -#ifndef INTERVALITEM_H -#define INTERVALITEM_H - -#include <string> -#include <queue> - -enum COORDINATE_TYPE { - START, - END -}; - -/* - An interval item in the priority queue. - - An IntervalItem can mark either a START position or an END position. - */ -class IntervalItem -{ - - -public: - int source_index; // which source BedGraph file this came from - COORDINATE_TYPE coord_type; // is this the start or the end position? - CHRPOS coord; - - IntervalItem () : - source_index(-1), - coord_type(START), - coord(0) - {} - - IntervalItem(int _index, COORDINATE_TYPE _type, CHRPOS _coord) : - source_index(_index), - coord_type(_type), - coord(_coord) - {} - - IntervalItem(const IntervalItem &other) : - source_index(other.source_index), - coord_type(other.coord_type), - coord(other.coord) - {} - - bool operator< ( const IntervalItem& other ) const - { - return this->coord > other.coord; - } -}; - -// our priority queue -typedef std::priority_queue<IntervalItem> INTERVALS_PRIORITY_QUEUE; - -#endif |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/multiIntersectBed/multiIntersectBed.cpp --- a/BEDTools-Version-2.14.3/src/multiIntersectBed/multiIntersectBed.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,289 +0,0 @@\n-/*****************************************************************************\n- unionBedGraphs.cpp\n-\n- (c) 2010 - Assaf Gordon, CSHL\n- - Aaron Quinlan, UVA\n- Hall Laboratory\n- Department of Biochemistry and Molecular Genetics\n- University of Virginia\n- aaronquinlan@gmail.com\n-\n- Licenced under the GNU General Public License 2.0 license.\n-******************************************************************************/\n-#include <cassert>\n-#include <cstring>\n-#include <cstdlib>\n-#include <iostream>\n-#include <algorithm>\n-\n-#include "bedFile.h"\n-#include "multiIntersectBed.h"\n-\n-using namespace std;\n-\n-\n-MultiIntersectBed::MultiIntersectBed(std::ostream& _output,\n- const vector<string>& _filenames,\n- const vector<string>& _titles,\n- bool _print_empty_regions,\n- const std::string& _genome_size_filename,\n- const std::string& _no_coverage_value ) :\n- filenames(_filenames),\n- titles(_titles),\n- output(_output),\n- current_non_zero_inputs(0),\n- print_empty_regions(_print_empty_regions),\n- haveTitles(false),\n- genome_sizes(NULL),\n- no_coverage_value(_no_coverage_value)\n-{\n- if (print_empty_regions) {\n- assert(!_genome_size_filename.empty());\n-\n- genome_sizes = new GenomeFile(_genome_size_filename);\n- }\n- \n- if (titles.size() > 0) {\n- haveTitles = true;\n- }\n-}\n-\n-\n-MultiIntersectBed::~MultiIntersectBed() {\n- CloseFiles();\n- if (genome_sizes) {\n- delete genome_sizes;\n- genome_sizes = NULL ;\n- }\n-}\n-\n-\n-void MultiIntersectBed::MultiIntersect() {\n- OpenFiles();\n-\n- // Add the first interval from each file\n- for(size_t i = 0;i < input_files.size(); ++i)\n- LoadNextItem(i);\n-\n- // Chromosome loop - once per chromosome\n- do {\n- // Find the first chromosome to use\n- current_chrom = DetermineNextChrom();\n-\n- // Populate the queue with initial values from all files\n- // (if they belong to the correct chromosome)\n- for(size_t i = 0; i < input_files.size(); ++i)\n- AddInterval(i);\n-\n- CHRPOS current_start = ConsumeNextCoordinate();\n-\n- // User wanted empty regions, and the first coordinate is not 0 - print a dummy empty coverage\n- if (print_empty_regions && current_start > 0)\n- PrintEmptyCoverage(0,current_start);\n-\n- // Intervals loop - until all intervals (of current chromosome) from all files are used.\n- do {\n- CHRPOS current_end = queue.top().coord;\n- PrintCoverage(current_start, current_end);\n- current_start = ConsumeNextCoordinate();\n- } while (!queue.empty());\n-\n- // User wanted empty regions, and the last coordinate is not the last coordinate of the chromosome\n- // print a dummy empty coverage\n- if (print_empty_regions) {\n- CHRPOS chrom_size = genome_sizes->getChromSize(current_chrom);\n- if (current_start < chrom_size)\n- PrintEmptyCoverage(current_start, chrom_size);\n- }\n-\n- } while (!AllFilesDone());\n-}\n-\n-\n-CHRPOS MultiIntersectBed::ConsumeNextCoordinate() {\n- assert(!queue.empty());\n-\n- CHRPOS new_position = queue.top().coord;\n- do {\n- IntervalItem item = queue.top();\n- UpdateInformation(item);\n- queue.pop();\n- } while (!queue.empty() && queue.top().coord == new_position);\n-\n- return new_position;\n-}\n-\n-\n-void MultiIntersectBed::UpdateInformation(const IntervalItem &item) {\n- // Update the depth coverage for this file\n-\n- // Which coordinate is it - start or end?\n- switch (item.coord_type)\n- {\n- case START:\n- current_depth[item.source_index] = 1;\n- current_non_zero_inputs++;\n- files_with_coverage[item.source_index] = true;\n- break;\n- case END:\n- //Read the next interval from thi'..b'rval(int index) {\n- assert(static_cast<unsigned int>(index) < input_files.size());\n-\n- //This file has no more intervals\n- if (current_item[index].chrom.empty())\n- return;\n-\n- //If the next interval belongs to a different chrom, don\'t add it\n- if (current_item[index].chrom!=current_chrom)\n- return;\n-\n- const BED &bed(current_item[index]);\n-\n- IntervalItem start_item(index, START, bed.start);\n- IntervalItem end_item(index, END, bed.end);\n-\n- queue.push(start_item);\n- queue.push(end_item);\n-\n- LoadNextItem(index);\n-}\n-\n-\n-void MultiIntersectBed::PrintHeader() {\n- output << "chrom\\tstart\\tend\\tnum\\tlist" ;\n- for (size_t i=0;i<titles.size();++i)\n- output << "\\t" <<titles[i];\n- output << endl;\n-}\n-\n-\n-void MultiIntersectBed::PrintCoverage(CHRPOS start, CHRPOS end) {\n- if ( current_non_zero_inputs == 0 && ! print_empty_regions )\n- return ;\n-\n- output << current_chrom << "\\t"\n- << start << "\\t"\n- << end << "\\t"\n- << current_non_zero_inputs << "\\t";\n- \n- ostringstream file_list_string;\n- ostringstream file_bool_string;\n- int depth_count = 0;\n- for (size_t i = 0; i < current_depth.size(); ++i)\n- {\n- if (current_depth[i] > 0) {\n- if (depth_count < current_non_zero_inputs - 1) {\n- if (!haveTitles)\n- file_list_string << i+1 << ",";\n- else \n- file_list_string << titles[i] << ",";\n- }\n- else {\n- if (!haveTitles)\n- file_list_string << i+1;\n- else \n- file_list_string << titles[i];\n- }\n- depth_count++;\n- }\n- file_bool_string << "\\t" << current_depth[i];\n- }\n- if (current_non_zero_inputs > 0) {\n- cout << file_list_string.str() << file_bool_string.str() << endl;\n- }\n- else {\n- cout << "none" << file_bool_string.str() << endl;\n- }\n-}\n-\n-\n-void MultiIntersectBed::PrintEmptyCoverage(CHRPOS start, CHRPOS end) {\n- output << current_chrom << "\\t"\n- << start << "\\t"\n- << end << "\\t"\n- << "0" << "\\t" << "none";\n- \n- for (size_t i=0;i<current_depth.size();++i)\n- output << "\\t0";\n-\n- output << endl;\n-}\n-\n-\n-void MultiIntersectBed::LoadNextItem(int index) {\n- assert(static_cast<unsigned int>(index) < input_files.size());\n-\n- current_item[index].chrom="";\n-\n- BedFile *file = input_files[index];\n- BED merged_bed;\n- int lineNum = 0;\n- //\n- // TO DO: Do the mergeing on the fly. How best to do this?\n- // \n- // IDEA: Implement a Merge class with GetNextMerge element.\n- //\n-\n- while (file->GetNextMergedBed(merged_bed, lineNum))\n- {\n- current_item[index] = merged_bed;\n- break;\n- }\n-}\n-\n-\n-bool MultiIntersectBed::AllFilesDone() {\n- for (size_t i=0;i<current_item.size();++i)\n- if (!current_item[i].chrom.empty())\n- return false;\n- return true;\n-}\n-\n-\n-string MultiIntersectBed::DetermineNextChrom() {\n- string next_chrom;\n- for (size_t i=0;i<current_item.size();++i) {\n- if (current_item[i].chrom.empty())\n- continue;\n-\n- if (next_chrom.empty())\n- next_chrom = current_item[i].chrom;\n- else\n- if (current_item[i].chrom < next_chrom)\n- next_chrom = current_item[i].chrom ;\n- }\n- return next_chrom;\n-}\n-\n-\n-void MultiIntersectBed::OpenFiles() {\n- for (size_t i = 0; i < filenames.size(); ++i) {\n- BedFile *file = new BedFile(filenames[i]);\n- file->Open();\n- input_files.push_back(file);\n- current_depth.push_back(0);\n- }\n- current_item.resize(filenames.size());\n-}\n-\n-\n-void MultiIntersectBed::CloseFiles() {\n- for (size_t i=0; i < input_files.size(); ++i) {\n- BedFile *file = input_files[i];\n- delete file;\n- input_files[i] = NULL ;\n- }\n- input_files.clear();\n-}\n' |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/multiIntersectBed/multiIntersectBed.h --- a/BEDTools-Version-2.14.3/src/multiIntersectBed/multiIntersectBed.h Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,125 +0,0 @@ -/***************************************************************************** - multiIntersectBed.h - - (c) 2010 - Aaron Quinlan, UVA - - Assaf Gordon, CSHL - Quinlan Laboratory - Department of Public Health Sciences - Center for Public Health Genomics - University of Virginia - aaronquinlan@gmail.com - - Licenced under the GNU General Public License 2.0 license. -******************************************************************************/ -#ifndef MULTIINTERSECTBED_H -#define MULTIINTERSECTBED_H - -#include <vector> -#include <string> -#include "bedFile.h" -#include "genomeFile.h" -#include "intervalItem.h" - -class MultiIntersectBed -{ -private: - - vector<string> filenames; - vector<string> titles; - - vector<BedFile*> input_files; - vector<int> current_depth; - vector<BED> current_item; - - std::ostream &output; - - INTERVALS_PRIORITY_QUEUE queue; - std::string current_chrom; - map<int, bool> files_with_coverage; - int current_non_zero_inputs; - bool print_empty_regions; - bool haveTitles; - - GenomeFile* genome_sizes; - - std::string no_coverage_value; - -public: - MultiIntersectBed(std::ostream& _output, - const vector<string>& _filenames, - const vector<string>& _titles, - bool _print_empty_regions, - const std::string& _genomeFileName, - const std::string& _no_coverage_value); - - virtual ~MultiIntersectBed(); - - // Combines all interval files - void MultiIntersect(); - - // Print the header line: chrom/start/end + name of each bedgraph file. - void PrintHeader(); - - -private: - - // Open all input files, initialize "current_XXX" vectors - void OpenFiles(); - - // Close the input files. - void CloseFiles(); - - /* - Add an interval from BedGraph file 'index' into the queue. - will only be added if it belongs to the current chromosome. - - If the interval was added (=consumed), the next interval will be read from the file - using 'LoadNextItem' - */ - void AddInterval(int index); - - /* - Loads the next interval from Bed file 'index'. - Stores it in 'current_bed_item' vector. - */ - void LoadNextItem(int index); - - /* - Scans the 'current_bedgraph_item' vector, - find the 'first' chromosome to use (different BedGraph files can start with different chromosomes). - */ - std::string DetermineNextChrom(); - - /* - Returns 'true' if ALL intervals from ALL BedGraph files were used - */ - bool AllFilesDone(); - - /* - Extract the next coordinate from the queue, and updates the current coverage information. - If multiple interval share the same coordinate values, all of them are handled. - If an END coordinate is consumed, the next interval (from the corresponding file) is read. - */ - CHRPOS ConsumeNextCoordinate(); - - /* - Updates the coverage information based on the given item. - Item can be a START coordinate or an END coordiante. - */ - void UpdateInformation(const IntervalItem &item); - - /* - prints chrom/start/end and the current depth coverage values of all the files. - */ - void PrintCoverage(CHRPOS start, CHRPOS end); - - /* - prints chrom/start/end and the ZERO depth coverage values of all the files. - */ - void PrintEmptyCoverage(CHRPOS start, CHRPOS end); - - void DebugPrintQueue(); -}; - - -#endif |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/multiIntersectBed/multiIntersectBedMain.cpp --- a/BEDTools-Version-2.14.3/src/multiIntersectBed/multiIntersectBedMain.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,294 +0,0 @@\n-/*****************************************************************************\n- unionBedGraphsMain.cpp\n-\n- (c) 2010 - Assaf Gordon, CSHL\n- - Aaron Quinlan, UVA\n- Hall Laboratory\n- Department of Biochemistry and Molecular Genetics\n- University of Virginia\n- aaronquinlan@gmail.com\n-\n- Licenced under the GNU General Public License 2.0 license.\n-******************************************************************************/\n-#include <climits>\n-#include <cstring>\n-#include <cstdlib>\n-#include <vector>\n-#include <string>\n-#include <iostream>\n-#include <getopt.h>\n-#include <libgen.h> //for basename()\n-#include "version.h"\n-\n-#include "genomeFile.h"\n-#include "multiIntersectBed.h"\n-\n-using namespace std;\n-\n-// define our program name\n-#define PROGRAM_NAME "multiIntersectBed"\n-\n-// define our parameter checking macro\n-#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)\n-\n-//STLized version of basename()\n-// (because POSIX basename() modifies the input string pointer)\n-// Additionally: removes any extension the basename might have.\n-std::string stl_basename(const std::string& path);\n-\n-// function declarations\n-void ShowHelp(void);\n-void ShowExamples(void);\n-\n-\n-int main(int argc, char* argv[])\n-{\n- bool haveFiles = false;\n- bool haveTitles = false;\n- bool haveGenome = false;\n- bool haveFiller = true;\n- bool printHeader = false;\n- bool printEmptyRegions = false;\n- bool showHelp = false;\n- string genomeFile;\n- string basePath;\n- string noCoverageValue = "0";\n- vector<string> inputFiles;\n- vector<string> inputTitles;\n-\n- //Parse command line options\n- if(argc <= 1)\n- ShowHelp();\n-\n- for(int i = 1; i < argc; i++) {\n- int parameterLength = (int)strlen(argv[i]);\n-\n- if((PARAMETER_CHECK("-h", 2, parameterLength)) ||\n- (PARAMETER_CHECK("--help", 5, parameterLength))) {\n- showHelp = true;\n- }\n- }\n-\n- if(showHelp == true) {\n- ShowHelp();\n- exit(1);\n- }\n-\n- // do some parsing (all of these parameters require 2 strings)\n- for(int i = 1; i < argc; i++) {\n-\n- int parameterLength = (int)strlen(argv[i]);\n-\n- if(PARAMETER_CHECK("-i", 2, parameterLength)) {\n- if ((i+1) < argc) {\n- haveFiles = true;\n- i = i+1;\n- string file = argv[i];\n- while (file[0] != \'-\' && i < argc) {\n- inputFiles.push_back(file);\n- i++;\n- if (i < argc)\n- file = argv[i];\n- }\n- i--;\n- }\n- }\n- else if(PARAMETER_CHECK("-names", 6, parameterLength)) {\n- if ((i+1) < argc) {\n- haveTitles = true;\n- i = i+1;\n- string title = argv[i];\n- while (title[0] != \'-\' && i < argc) {\n- inputTitles.push_back(title);\n- i++;\n- if (i < argc)\n- title = argv[i];\n- }\n- i--;\n- }\n- }\n- else if(PARAMETER_CHECK("-g", 2, parameterLength)) {\n- if ((i+1) < argc) {\n- haveGenome = true;\n- genomeFile = argv[i + 1];\n- i++;\n- }\n- }\n- else if(PARAMETER_CHECK("-filler", 7, parameterLength)) {\n- if ((i+1) < argc) {\n- haveFiller = true;\n- noCoverageValue = argv[i + 1];\n- i++;\n- }\n- }\n- else if(PARAMETER_CHECK("-header", 7, parameterLength)) {\n- printHeader = true;\n- }\n- else if(PARAMETER_CHECK("-empty", 6, parameterLength)) {\n- printEmptyRegions = true;\n- }\n- else if(PARAMETER_CHECK("-examples", 9, parameterLengt'..b'-\n- cerr << "\\t-names\\t\\t" << "A list of names (one / file) to describe each file in -i." << endl;\n- cerr << "\\t\\t\\tThese names will be printed in the header line." << endl << endl;\n-\n- cerr << "\\t-g\\t\\t" << "Use genome file to calculate empty regions." << endl;\n- cerr << "\\t\\t\\t- STRING." << endl << endl;\n-\n- cerr << "\\t-empty\\t\\t" << "Report empty regions (i.e., start/end intervals w/o" << endl;\n- cerr << "\\t\\t\\tvalues in all files)." << endl;\n- cerr << "\\t\\t\\t- Requires the \'-g FILE\' parameter.\\n" << endl;\n-\n- cerr << "\\t-filler TEXT\\t" << "Use TEXT when representing intervals having no value." << endl;\n- cerr << "\\t\\t\\t- Default is \'0\', but you can use \'N/A\' or any other text." << endl << endl;\n-\n- cerr << "\\t-examples\\t" << "Show detailed usage examples." << endl << endl;\n-}\n-\n-\n-\n-void ShowExamples()\n-{\n- cerr << "Example usage:\\n\\n" \\\n-"== Input files: ==\\n" \\\n-"\\n" \\\n-" $ cat 1.bg\\n" \\\n-" chr1 1000 1500 10\\n" \\\n-" chr1 2000 2100 20\\n" \\\n-"\\n" \\\n-" $ cat 2.bg\\n" \\\n-" chr1 900 1600 60\\n" \\\n-" chr1 1700 2050 50\\n" \\\n-"\\n" \\\n-" $ cat 3.bg\\n" \\\n-" chr1 1980 2070 80\\n" \\\n-" chr1 2090 2100 20\\n" \\\n-"\\n" \\\n-" $ cat sizes.txt\\n" \\\n-" chr1 5000\\n" \\\n-"\\n" \\\n-"== Union/combine the files: ==\\n" \\\n-"\\n" \\\n-" $ unionBedGraphs -i 1.bg 2.bg 3.bg\\n" \\\n-" chr1 900 1000 0 60 0\\n" \\\n-" chr1 1000 1500 10 60 0\\n" \\\n-" chr1 1500 1600 0 60 0\\n" \\\n-" chr1 1700 1980 0 50 0\\n" \\\n-" chr1 1980 2000 0 50 80\\n" \\\n-" chr1 2000 2050 20 50 80\\n" \\\n-" chr1 2050 2070 20 0 80\\n" \\\n-" chr1 2070 2090 20 0 0\\n" \\\n-" chr1 2090 2100 20 0 20\\n" \\\n-"\\n" \\\n-"== Union/combine the files, with a header line (titles are the file names): ==\\n" \\\n-"\\n" \\\n-" $ unionBedGraphs -header -i 1.bg 2.bg 3.bg\\n" \\\n-" chrom start end 1 2 3\\n" \\\n-" chr1 900 1000 0 60 0\\n" \\\n-" chr1 1000 1500 10 60 0\\n" \\\n-" chr1 1500 1600 0 60 0\\n" \\\n-" chr1 1700 1980 0 50 0\\n" \\\n-" chr1 1980 2000 0 50 80\\n" \\\n-" chr1 2000 2050 20 50 80\\n" \\\n-" chr1 2050 2070 20 0 80\\n" \\\n-" chr1 2070 2090 20 0 0\\n" \\\n-" chr1 2090 2100 20 0 20\\n" \\\n-"\\n" \\\n-"== Union/combine the files, with a header line and custom names: ==\\n" \\\n-"\\n" \\\n-" $ unionBedGraphs -header -i 1.bg 2.bg 3.bg -names WT-1 WT-2 KO-1\\n" \\\n-" chrom start end WT-1 WT-2 KO-1\\n" \\\n-" chr1 900 1000 0 60 0\\n" \\\n-" chr1 1000 1500 10 60 0\\n" \\\n-" chr1 1500 1600 0 60 0\\n" \\\n-" chr1 1700 1980 0 50 0\\n" \\\n-" chr1 1980 2000 0 50 80\\n" \\\n-" chr1 2000 2050 20 50 80\\n" \\\n-" chr1 2050 2070 20 0 80\\n" \\\n-" chr1 2070 2090 20 0 0\\n" \\\n-" chr1 2090 2100 20 0 20\\n" \\\n-"\\n" \\\n-"== Union/combine, showing empty regions (note, requires -g): ==\\n" \\\n-"\\n" \\\n-" $ unionBedGraphs -header -empty -g sizes.TXT -i 1.bg 2.bg 3.bg\\n" \\\n-" chrom start end 1 2 3\\n" \\\n-" chr1 0 900 0 0 0\\n" \\\n-" chr1 900 1000 0 60 0\\n" \\\n-" chr1 1000 1500 10 60 0\\n" \\\n-" chr1 1500 1600 0 60 0\\n" \\\n-" chr1 1600 1700 0 0 0\\n" \\\n-" chr1 1700 1980 0 50 0\\n" \\\n-" chr1 1980 2000 0 50 80\\n" \\\n-" chr1 2000 2050 20 50 80\\n" \\\n-" chr1 2050 2070 20 0 80\\n" \\\n-" chr1 2070 2090 20 0 0\\n" \\\n-" chr1 2090 2100 20 0 20\\n" \\\n-" chr1 2100 5000 0 0 0\\n" \\\n-"\\n" \\\n-;\n-}\n-\n-std::string stl_basename(const std::string& path)\n-{\n- string result;\n-\n- char* path_dup = strdup(path.c_str());\n- char* basename_part = basename(path_dup);\n- result = basename_part;\n- free(path_dup);\n-\n- size_t pos = result.find_last_of(\'.\');\n- if (pos != string::npos )\n- result = result.substr(0,pos);\n-\n- return result;\n-}\n-\n' |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/nucBed/LargeFileSupport.h --- a/BEDTools-Version-2.14.3/src/nucBed/LargeFileSupport.h Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,13 +0,0 @@ -#pragma once - -#define _FILE_OFFSET_BITS 64 - -#ifdef WIN32 -#define ftell64(a) _ftelli64(a) -#define fseek64(a,b,c) _fseeki64(a,b,c) -typedef __int64_t off_type; -#else -#define ftell64(a) ftello(a) -#define fseek64(a,b,c) fseeko(a,b,c) -typedef off_t off_type; -#endif |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/nucBed/Makefile --- a/BEDTools-Version-2.14.3/src/nucBed/Makefile Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,52 +0,0 @@ -UTILITIES_DIR = ../utils/ -OBJ_DIR = ../../obj/ -BIN_DIR = ../../bin/ - -# ------------------- -# define our includes -# ------------------- -INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \ - -I$(UTILITIES_DIR)/sequenceUtilities/ \ - -I$(UTILITIES_DIR)/lineFileUtilities/ \ - -I$(UTILITIES_DIR)/version/ \ - -I$(UTILITIES_DIR)/gzstream/ \ - -I$(UTILITIES_DIR)/fileType/ \ - -I$(UTILITIES_DIR)/Fasta/ - -# ---------------------------------- -# define our source and object files -# ---------------------------------- -SOURCES= nucBedMain.cpp nucBed.cpp -OBJECTS= $(SOURCES:.cpp=.o) -_EXT_OBJECTS=bedFile.o sequenceUtils.o lineFileUtilities.o gzstream.o fileType.o Fasta.o split.o -EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) -BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) -PROGRAM= nucBed - - -all: $(PROGRAM) - -.PHONY: all - -$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS) - @echo " * linking $(PROGRAM)" - @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ $(LIBS) - -$(BUILT_OBJECTS): $(SOURCES) - @echo " * compiling" $(*F).cpp - @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) - -$(EXT_OBJECTS): - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/sequenceUtilities/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/Fasta/ - - -clean: - @echo "Cleaning up." - @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* - -.PHONY: clean \ No newline at end of file |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/nucBed/nucBed.cpp --- a/BEDTools-Version-2.14.3/src/nucBed/nucBed.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,158 +0,0 @@ -/***************************************************************************** - nucBed.cpp - - (c) 2009 - Aaron Quinlan - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licenced under the GNU General Public License 2.0 license. -******************************************************************************/ -#include "lineFileUtilities.h" -#include "nucBed.h" - - -NucBed::NucBed(string &dbFile, string &bedFile, bool printSeq, - bool hasPattern, const string &pattern, bool forceStrand) { - - _dbFile = dbFile; - _bedFile = bedFile; - _printSeq = printSeq; - _hasPattern = hasPattern; - _pattern = pattern; - _forceStrand = forceStrand; - - _bed = new BedFile(_bedFile); - - // Compute the DNA content in each BED/GFF/VCF interval - ProfileDNA(); -} - - -NucBed::~NucBed(void) -{} - - -void NucBed::ReportDnaProfile(const BED& bed, const string &sequence, int seqLength) -{ - int a,c,g,t,n,other,userPatternCount; - a = c = g = t = n = other = userPatternCount = 0; - - getDnaContent(sequence,a,c,g,t,n,other); - - if (_hasPattern) - userPatternCount = countPattern(sequence, _pattern); - - - // report the original interval - _bed->reportBedTab(bed); - // report AT and GC content - printf("%f\t%f\t",(float)(a+t)/seqLength, (float)(c+g)/seqLength); - // report raw nucleotide counts - printf("%d\t%d\t%d\t%d\t%d\t%d\t%d",a,c,g,t,n,other,seqLength); - // add the original sequence if requested. - - if (_printSeq) - printf("\t%s",sequence.c_str()); - if (_hasPattern) - printf("\t%d",userPatternCount); - printf("\n"); - -} - - -void NucBed::PrintHeader(void) { - printf("#"); - - int numOrigColumns = (int) _bed->bedType; - for (int i = 1; i <= numOrigColumns; ++i) { - printf("%d_usercol\t", i); - } - printf("%d_pct_at\t", numOrigColumns + 1); - printf("%d_pct_gc\t", numOrigColumns + 2); - printf("%d_num_A\t", numOrigColumns + 3); - printf("%d_num_C\t", numOrigColumns + 4); - printf("%d_num_G\t", numOrigColumns + 5); - printf("%d_num_T\t", numOrigColumns + 6); - printf("%d_num_N\t", numOrigColumns + 7); - printf("%d_num_oth\t", numOrigColumns + 8); - printf("%d_seq_len\t", numOrigColumns + 9); - - if (_printSeq) - printf("%d_seq", numOrigColumns + 10); - if (_hasPattern && !_printSeq) - printf("%d_user_patt_count", numOrigColumns + 10); - else if (_hasPattern && _printSeq) - printf("\t%d_user_patt_count", numOrigColumns + 11); - printf("\n"); - -} - - -//****************************************************************************** -// ExtractDNA -//****************************************************************************** -void NucBed::ProfileDNA() { - - /* Make sure that we can oen all of the files successfully*/ - - // open the fasta database for reading - ifstream faDb(_dbFile.c_str(), ios::in); - if ( !faDb ) { - cerr << "Error: The requested fasta database file (" << _dbFile << ") could not be opened. Exiting!" << endl; - exit (1); - } - - // open and memory-map genome file - FastaReference fr; - bool memmap = true; - fr.open(_dbFile, memmap); - - bool headerReported = false; - BED bed, nullBed; - int lineNum = 0; - BedLineStatus bedStatus; - string sequence; - - _bed->Open(); - while ((bedStatus = _bed->GetNextBed(bed, lineNum)) != BED_INVALID) { - if (bedStatus == BED_VALID) { - if (headerReported == false) { - PrintHeader(); - headerReported = true; - } - // make sure we are extracting >= 1 bp - if (bed.zeroLength == false) { - size_t seqLength = fr.sequenceLength(bed.chrom); - // make sure this feature will not exceed the end of the chromosome. - if ( (bed.start <= seqLength) && (bed.end <= seqLength) ) - { - // grab the dna at this interval - int length = bed.end - bed.start; - // report the sequence's content - string dna = fr.getSubSequence(bed.chrom, bed.start, length); - // rev comp si necessaire - if ((_forceStrand == true) && (bed.strand == "-")) - reverseComplement(dna); - ReportDnaProfile(bed, dna, length); - bed = nullBed; - } - else - { - cerr << "Feature (" << bed.chrom << ":" << bed.start << "-" << bed.end << ") beyond the length of " - << bed.chrom << " size (" << seqLength << " bp). Skipping." << endl; - } - } - // handle zeroLength - else { - cerr << "Feature (" << bed.chrom << ":" << bed.start+1 << "-" << bed.end-1 << ") has length = 0, Skipping." << endl; - } - bed = nullBed; - } - } - _bed->Close(); -} - - - |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/nucBed/nucBed.h --- a/BEDTools-Version-2.14.3/src/nucBed/nucBed.h Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,55 +0,0 @@ -/***************************************************************************** - nucBed.h - - (c) 2009 - Aaron Quinlan - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licenced under the GNU General Public License 2.0 license. -******************************************************************************/ -#ifndef NUCBED_H -#define NUCBED_H - -#include "bedFile.h" -#include "sequenceUtils.h" -#include "Fasta.h" -#include <vector> -#include <iostream> -#include <fstream> - -using namespace std; - -//************************************************ -// Class methods and elements -//************************************************ -class NucBed { - -public: - - // constructor - NucBed(string &dbFile, string &bedFile, bool printSeq, - bool hasPattern, const string &pattern, - bool forceStrand); - // destructor - ~NucBed(void); - - void ProfileDNA(); - - -private: - string _dbFile; - string _bedFile; - bool _printSeq; - bool _hasPattern; - string _pattern; - bool _forceStrand; - - // instance of a bed file class. - BedFile *_bed; - void PrintHeader(void); - void ReportDnaProfile(const BED& bed, const string &sequence, int seqLength); -}; - -#endif |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/nucBed/nucBedMain.cpp --- a/BEDTools-Version-2.14.3/src/nucBed/nucBedMain.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,147 +0,0 @@ -/***************************************************************************** - nucBedMain.cpp - - (c) 2009 - Aaron Quinlan - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licenced under the GNU General Public License 2.0 license. -******************************************************************************/ -#include "nucBed.h" -#include "version.h" - -using namespace std; - -// define our program name -#define PROGRAM_NAME "nucBed" - - -// define our parameter checking macro -#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen) - -// function declarations -void ShowHelp(void); - -int main(int argc, char* argv[]) { - - // our configuration variables - bool showHelp = false; - - // input files - string fastaDbFile; - string bedFile; - string pattern; - - // checks for existence of parameters - bool haveFastaDb = false; - bool haveBed = false; - bool printSeq = false; - bool hasPattern = false; - bool forceStrand = false; - - // check to see if we should print out some help - if(argc <= 1) showHelp = true; - - for(int i = 1; i < argc; i++) { - int parameterLength = (int)strlen(argv[i]); - - if((PARAMETER_CHECK("-h", 2, parameterLength)) || - (PARAMETER_CHECK("--help", 5, parameterLength))) { - showHelp = true; - } - } - - if(showHelp) ShowHelp(); - - // do some parsing (all of these parameters require 2 strings) - for(int i = 1; i < argc; i++) { - - int parameterLength = (int)strlen(argv[i]); - - if(PARAMETER_CHECK("-fi", 3, parameterLength)) { - if ((i+1) < argc) { - haveFastaDb = true; - fastaDbFile = argv[i + 1]; - i++; - } - } - else if(PARAMETER_CHECK("-bed", 4, parameterLength)) { - if ((i+1) < argc) { - haveBed = true; - bedFile = argv[i + 1]; - i++; - } - } - else if(PARAMETER_CHECK("-seq", 4, parameterLength)) { - printSeq = true; - } - else if(PARAMETER_CHECK("-s", 2, parameterLength)) { - forceStrand = true; - } - else if(PARAMETER_CHECK("-pattern", 8, parameterLength)) { - if ((i+1) < argc) { - hasPattern = true; - pattern = argv[i + 1]; - i++; - } - } - else { - cerr << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl; - showHelp = true; - } - } - - if (!haveFastaDb || !haveBed) { - showHelp = true; - } - - if (!showHelp) { - - NucBed *nuc = new NucBed(fastaDbFile, bedFile, printSeq, hasPattern, pattern, forceStrand); - delete nuc; - - return 0; - } - else { - ShowHelp(); - } -} - -void ShowHelp(void) { - - cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl; - - cerr << "Author: Aaron Quinlan (aaronquinlan@gmail.com)" << endl; - - cerr << "Summary: Profiles the nucleotide content of intervals in a fasta file." << endl << endl; - - cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -fi <fasta> -bed <bed/gff/vcf>" << endl << endl; - - cerr << "Options: " << endl; - cerr << "\t-fi\tInput FASTA file" << endl << endl; - cerr << "\t-bed\tBED/GFF/VCF file of ranges to extract from -fi" << endl << endl; - cerr << "\t-s\tProfile the sequence according to strand." << endl << endl; - cerr << "\t-seq\tPrint the extracted sequence" << endl << endl; - cerr << "\t-pattern\tReport the number of times a user-defined sequence is observed (case-insensitive)." << endl << endl; - - - cerr << "Output format: " << endl; - cerr << "\tThe following information will be reported after each original BED entry:" << endl; - cerr << "\t 1) %AT content" << endl; - cerr << "\t 2) %GC content" << endl; - cerr << "\t 3) Number of As observed" << endl; - cerr << "\t 4) Number of Cs observed" << endl; - cerr << "\t 5) Number of Gs observed" << endl; - cerr << "\t 6) Number of Ts observed" << endl; - cerr << "\t 7) Number of Ns observed" << endl; - cerr << "\t 8) Number of other bases observed" << endl; - cerr << "\t 9) The length of the explored sequence/interval." << endl; - cerr << "\t 10) The sequence extracted from the FASTA file. (optional, if -seq is used)" << endl; - cerr << "\t 11) The number of times a user defined pattern was observed. (optional, if -pattern is used.)" << endl; - - // end the program here - exit(1); - -} |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/overlap/Makefile --- a/BEDTools-Version-2.14.3/src/overlap/Makefile Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,47 +0,0 @@ -UTILITIES_DIR = ../utils/ -OBJ_DIR = ../../obj/ -BIN_DIR = ../../bin/ - - -# ------------------- -# define our includes -# ------------------- -INCLUDES = -I$(UTILITIES_DIR)/bedFile/ -I$(UTILITIES_DIR)/lineFileUtilities/ -I$(UTILITIES_DIR)/version/ -I$(UTILITIES_DIR)/gzstream/ -I$(UTILITIES_DIR)/fileType/ - -# ---------------------------------- -# define our source and object files -# ---------------------------------- -SOURCES= overlap.cpp -OBJECTS= $(SOURCES:.cpp=.o) -_EXT_OBJECTS=bedFile.o lineFileUtilities.o gzstream.o fileType.o -EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) -BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) -PROGRAM= overlap - - -all: $(PROGRAM) - -.PHONY: all - - -$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS) - @echo " * linking $(PROGRAM)" - @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ $(LIBS) - -$(BUILT_OBJECTS): $(SOURCES) - @echo " * compiling" $(*F).cpp - @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) - -$(EXT_OBJECTS): - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/ - - - -clean: - @echo "Cleaning up." - @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* - -.PHONY: clean |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/overlap/overlap.cpp --- a/BEDTools-Version-2.14.3/src/overlap/overlap.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,202 +0,0 @@ -/***************************************************************************** - overlap.cpp - - (c) 2009 - Aaron Quinlan - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licenced under the GNU General Public License 2.0 license. -******************************************************************************/ -#include <vector> -#include <iostream> -#include <fstream> -#include <stdlib.h> - -#include "version.h" -#include "lineFileUtilities.h" -#include "bedFile.h" -using namespace std; - - -// define our program name -#define PROGRAM_NAME "overlap" - -// define our parameter checking macro -#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen) - - -// function declarations -void ShowHelp(void); -void DetermineInput(string &inFile, short &s1Col, short &e1Col, short &s2Col, short &e2Col); -void ComputeOverlaps(istream &input, short &s1Col, short &e1Col, short &s2Col, short &e2Col); - -int main(int argc, char* argv[]) { - - // input files - string inFile = "stdin"; - string columns; - - // our configuration variables - bool showHelp = false; - bool haveInFile = true; - bool haveColumns = false; - - - for(int i = 1; i < argc; i++) { - int parameterLength = (int)strlen(argv[i]); - - if((PARAMETER_CHECK("-h", 2, parameterLength)) || - (PARAMETER_CHECK("--help", 5, parameterLength))) { - showHelp = true; - } - } - - if(showHelp) ShowHelp(); - - // do some parsing (all of these parameters require 2 strings) - for(int i = 1; i < argc; i++) { - - int parameterLength = (int)strlen(argv[i]); - - if(PARAMETER_CHECK("-i", 2, parameterLength)) { - if ((i+1) < argc) { - inFile = argv[i + 1]; - i++; - } - } - else if(PARAMETER_CHECK("-cols", 5, parameterLength)) { - haveColumns = true; - columns = argv[i + 1]; - i++; - } - else { - cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl; - showHelp = true; - } - } - - // make sure we have an input files - if (!haveInFile ) { - cerr << endl << "*****" << endl << "*****ERROR: Need -i file. " << endl << "*****" << endl; - showHelp = true; - } - - if (!showHelp) { - - // Split the column string sent by the user into discrete column numbers - // A comma separated string is expected. - vector<string> posColumns; - Tokenize(columns, posColumns, ","); - - if (posColumns.size() != 4) { - cerr << endl << "*****" << endl << "*****ERROR: Please specify 4, comma-separated position columns. " << endl << "*****" << endl; - ShowHelp(); - } - else { - short s1, e1, s2, e2; - s1 = atoi(posColumns[0].c_str()); - e1 = atoi(posColumns[1].c_str()); - s2 = atoi(posColumns[2].c_str()); - e2 = atoi(posColumns[3].c_str()); - - DetermineInput(inFile, s1, e1, s2, e2); - } - } - else { - ShowHelp(); - } -} - -void ShowHelp(void) { - - cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl; - - cerr << "Author: Aaron Quinlan (aaronquinlan@gmail.com)" << endl; - - cerr << "Summary: Computes the amount of overlap (positive values)" << endl; - cerr << "\t or distance (negative values) between genome features" << endl; - cerr << "\t and reports the result at the end of the same line." << endl << endl; - - cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -i <input> -cols s1,e1,s2,e2 " << endl << endl; - - cerr << "Options: " << endl; - cerr << "\t-i\t" << "Input file. Use \"stdin\" for pipes." << endl << endl; - - cerr << "\t-cols\t" << "Specify the columns (1-based) for the starts and ends of the" << endl; - cerr << "\t\tfeatures for which you'd like to compute the overlap/distance." << endl; - cerr << "\t\tThe columns must be listed in the following order: " << endl << endl; - cerr << "\t\tstart1,end1,start2,end2" << endl << endl; - - cerr << "Example: " << endl; - cerr << "\t$ windowBed -a A.bed -b B.bed -w 10" << endl; - cerr << "\tchr1 10 20 A chr1 15 25 B" << endl; - cerr << "\tchr1 10 20 C chr1 25 35 D" << endl << endl; - cerr << "\t$ windowBed -a A.bed -b B.bed -w 10 | overlap -i stdin -cols 2,3,6,7" << endl; - cerr << "\tchr1 10 20 A chr1 15 25 B 5" << endl; - cerr << "\tchr1 10 20 C chr1 25 35 D -5" << endl; - - // end the program here - exit(1); - -} - - -void DetermineInput(string &inFile, short &s1Col, short &e1Col, short &s2Col, short &e2Col) { - - - if (inFile != "stdin") { // process a file - - ifstream in(inFile.c_str(), ios::in); - if ( !in ) { - cerr << "Error: The requested input file (" << inFile << ") could not be opened. Exiting!" << endl; - exit (1); - } - ComputeOverlaps(in, s1Col, e1Col, s2Col, e2Col); - } - else ComputeOverlaps(cin, s1Col, e1Col, s2Col, e2Col); -} - - -void ComputeOverlaps(istream &input, short &s1Col, short &e1Col, short &s2Col, short &e2Col) { - - int lineNum = 0; - string inLine; - vector<string> inFields; - - int overlap; - - char *s1End, *e1End, *s2End, *e2End; - long s1, e1, s2, e2; - - while (getline(input, inLine)) { - lineNum++; - Tokenize(inLine, inFields); - - if (inFields.size() > 1) { - - // test if columns 2 and 3 are integers. If so, assume BED. - s1 = strtol(inFields[s1Col-1].c_str(), &s1End, 10); - e1 = strtol(inFields[e1Col-1].c_str(), &e1End, 10); - s2 = strtol(inFields[s2Col-1].c_str(), &s2End, 10); - e2 = strtol(inFields[e2Col-1].c_str(), &e2End, 10); - - // strtol will set pointers to the start of the string if non-integral, base 10 - // if they all check out, we have valid numeric columns. Otherwise, complain. - if (s1End != inFields[s1Col-1].c_str() && - e1End != inFields[e1Col-1].c_str() && - s2End != inFields[s2Col-1].c_str() && - e2End != inFields[e2Col-1].c_str()) { - - overlap = overlaps(s1, e1, s2, e2); - printf("%s\t%d\n", inLine.c_str(), overlap); - } - else { - cerr << "One of your columns appears to be non-numeric at line " << lineNum << ". Exiting..." << endl << endl; - exit(1); - } - } - inFields.clear(); - } -} |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/pairToBed/Makefile --- a/BEDTools-Version-2.14.3/src/pairToBed/Makefile Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,52 +0,0 @@ -UTILITIES_DIR = ../utils/ -OBJ_DIR = ../../obj/ -BIN_DIR = ../../bin/ - -# ------------------- -# define our includes -# ------------------- -INCLUDES = -I$(UTILITIES_DIR)/bedFilePE/ \ - -I$(UTILITIES_DIR)/bedFile/ \ - -I$(UTILITIES_DIR)/lineFileUtilities/ \ - -I$(UTILITIES_DIR)/version/ \ - -I$(UTILITIES_DIR)/gzstream/ \ - -I$(UTILITIES_DIR)/fileType/ \ - -I$(UTILITIES_DIR)/BamTools/include \ - -I$(UTILITIES_DIR)/BamTools-Ancillary - -# ---------------------------------- -# define our source and object files -# ---------------------------------- -SOURCES= pairToBedMain.cpp pairToBed.cpp -OBJECTS= $(SOURCES:.cpp=.o) -_EXT_OBJECTS=bedFilePE.o bedFile.o lineFileUtilities.o gzstream.o fileType.o -EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) -BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) -PROGRAM= pairToBed - -all: $(PROGRAM) - -.PHONY: all - -$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS) - @echo " * linking $(PROGRAM)" - @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ -L$(UTILITIES_DIR)/BamTools/lib/ -lbamtools $(LIBS) - -$(BUILT_OBJECTS): $(SOURCES) - @echo " * compiling" $(*F).cpp - @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) - -$(EXT_OBJECTS): - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFilePE/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/BamTools/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/BamTools-Ancillary/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/ - -clean: - @echo "Cleaning up." - @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* - -.PHONY: clean |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/pairToBed/pairToBed.cpp --- a/BEDTools-Version-2.14.3/src/pairToBed/pairToBed.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
b'@@ -1,525 +0,0 @@\n-/*****************************************************************************\n- pairToBed.cpp\n-\n- (c) 2009 - Aaron Quinlan\n- Hall Laboratory\n- Department of Biochemistry and Molecular Genetics\n- University of Virginia\n- aaronquinlan@gmail.com\n-\n- Licenced under the GNU General Public License 2.0 license.\n-******************************************************************************/\n-#include "lineFileUtilities.h"\n-#include "pairToBed.h"\n-\n-\n-bool IsCorrectMappingForBEDPE (const BamAlignment &bam) {\n-\n- if ( (bam.RefID == bam.MateRefID) && (bam.InsertSize > 0) ) {\n- return true;\n- }\n- else if ( (bam.RefID == bam.MateRefID) && (bam.InsertSize == 0) && bam.IsFirstMate() ) {\n- return true;\n- }\n- else if ( (bam.RefID != bam.MateRefID) && bam.IsFirstMate() ) {\n- return true;\n- }\n- else return false;\n-}\n-\n-\n-/*\n- Constructor\n-*/\n-\n-\n-BedIntersectPE::BedIntersectPE(string bedAFilePE, string bedBFile, float overlapFraction,\n- string searchType, bool sameStrand, bool diffStrand, bool bamInput,\n- bool bamOutput, bool uncompressedBam, bool useEditDistance) {\n-\n- _bedAFilePE = bedAFilePE;\n- _bedBFile = bedBFile;\n- _overlapFraction = overlapFraction;\n- _sameStrand = sameStrand;\n- _diffStrand = diffStrand;\n- _useEditDistance = useEditDistance;\n- _searchType = searchType;\n- _bamInput = bamInput;\n- _bamOutput = bamOutput;\n- _isUncompressedBam = uncompressedBam;\n-\n- _bedA = new BedFilePE(bedAFilePE);\n- _bedB = new BedFile(bedBFile);\n-\n- if (_bamInput == false)\n- IntersectBedPE();\n- else\n- IntersectBamPE(_bedAFilePE);\n-}\n-\n-\n-/*\n- Destructor\n-*/\n-\n-BedIntersectPE::~BedIntersectPE(void) {\n-}\n-\n-\n-\n-void BedIntersectPE::FindOverlaps(const BEDPE &a, vector<BED> &hits1, vector<BED> &hits2, const string &type) {\n-\n- // list of hits on each end of BEDPE\n- // that exceed the requested overlap fraction\n- vector<BED> qualityHits1;\n- vector<BED> qualityHits2;\n-\n- // count of hits on each end of BEDPE\n- // that exceed the requested overlap fraction\n- int numOverlapsEnd1 = 0;\n- int numOverlapsEnd2 = 0;\n-\n- // make sure we have a valid chromosome before we search\n- if (a.chrom1 != ".") {\n- // Find the quality hits between ***end1*** of the BEDPE and the B BED file\n- _bedB->FindOverlapsPerBin(a.chrom1, a.start1, a.end1, a.strand1, hits1, _sameStrand, _diffStrand);\n-\n- vector<BED>::const_iterator h = hits1.begin();\n- vector<BED>::const_iterator hitsEnd = hits1.end();\n- for (; h != hitsEnd; ++h) {\n-\n- int s = max(a.start1, h->start);\n- int e = min(a.end1, h->end);\n- int overlapBases = (e - s); // the number of overlapping bases b/w a and b\n- int aLength = (a.end1 - a.start1); // the length of a in b.p.\n-\n- // is there enough overlap relative to the user\'s request? (default ~ 1bp)\n- if ( ( (float) overlapBases / (float) aLength ) >= _overlapFraction ) {\n- numOverlapsEnd1++;\n-\n- if (type == "either") {\n- _bedA->reportBedPETab(a);\n- _bedB->reportBedNewLine(*h);\n- }\n- else {\n- qualityHits1.push_back(*h);\n- }\n- }\n- }\n- }\n-\n-\n- // make sure we have a valid chromosome before we search\n- if (a.chrom2 != ".") {\n- // Now find the quality hits between ***end2*** of the BEDPE and the B BED file\n- _bedB->FindOverlapsPerBin(a.chrom2, a.start2, a.end2, a.strand2, hits2, _sameStrand, _diffStrand);\n-\n- vector<BED>::const_iterator h = hits2.begin();\n- vector<BED>::const_iterator hitsEnd = hits2.end();\n- for (; h != hitsEnd; ++h) {\n-\n- int s = max(a.start2, h->start);\n- int '..b'\n- reader.Close();\n- if (_bamOutput == true) {\n- writer.Close();\n- }\n-}\n-\n-\n-void BedIntersectPE::ProcessBamBlock (const BamAlignment &bam1, const BamAlignment &bam2,\n- const RefVector &refs, BamWriter &writer) {\n-\n- vector<BED> hits, hits1, hits2; // vector of potential hits\n- hits.reserve(1000); // reserve some space\n- hits1.reserve(1000);\n- hits2.reserve(1000);\n-\n- bool overlapsFound; // flag to indicate if overlaps were found\n-\n- if ( (_searchType == "either") || (_searchType == "xor") ||\n- (_searchType == "both") || (_searchType == "notboth") ||\n- (_searchType == "neither") ) {\n-\n- // create a new BEDPE feature from the BAM alignments.\n- BEDPE a;\n- ConvertBamToBedPE(bam1, bam2, refs, a);\n- if (_bamOutput == true) { // BAM output\n- // write to BAM if correct hits found\n- overlapsFound = FindOneOrMoreOverlaps(a, _searchType);\n- if (overlapsFound == true) {\n- writer.SaveAlignment(bam1);\n- writer.SaveAlignment(bam2);\n- }\n- }\n- else { // BEDPE output\n- FindOverlaps(a, hits1, hits2, _searchType);\n- hits1.clear();\n- hits2.clear();\n- }\n- }\n- else if ( (_searchType == "ispan") || (_searchType == "ospan") ) {\n- // only look for ispan and ospan when both ends are mapped.\n- if (bam1.IsMapped() && bam2.IsMapped()) {\n- // only do an inspan or outspan check if the alignment is intrachromosomal\n- if (bam1.RefID == bam2.RefID) {\n- // create a new BEDPE feature from the BAM alignments.\n- BEDPE a;\n- ConvertBamToBedPE(bam1, bam2, refs, a);\n- if (_bamOutput == true) { // BAM output\n- // look for overlaps, and write to BAM if >=1 were found\n- overlapsFound = FindOneOrMoreSpanningOverlaps(a, _searchType);\n- if (overlapsFound == true) {\n- writer.SaveAlignment(bam1);\n- writer.SaveAlignment(bam2);\n- }\n- }\n- else { // BEDPE output\n- FindSpanningOverlaps(a, hits, _searchType);\n- hits.clear();\n- }\n- }\n- }\n- }\n- else if ( (_searchType == "notispan") || (_searchType == "notospan") ) {\n- // only look for notispan and notospan when both ends are mapped.\n- if (bam1.IsMapped() && bam2.IsMapped()) {\n- // only do an inspan or outspan check if the alignment is intrachromosomal\n- if (bam1.RefID == bam2.RefID) {\n- // create a new BEDPE feature from the BAM alignments.\n- BEDPE a;\n- ConvertBamToBedPE(bam1, bam2, refs, a);\n- if (_bamOutput == true) { // BAM output\n- // write to BAM if there were no overlaps\n- overlapsFound = FindOneOrMoreSpanningOverlaps(a, _searchType);\n- if (overlapsFound == false) {\n- writer.SaveAlignment(bam1);\n- writer.SaveAlignment(bam2);\n- }\n- }\n- else { // BEDPE output\n- FindSpanningOverlaps(a, hits, _searchType);\n- hits.clear();\n- }\n- }\n- // if inter-chromosomal or orphaned, we know it\'s not ispan and not ospan\n- else if (_bamOutput == true) {\n- writer.SaveAlignment(bam1);\n- writer.SaveAlignment(bam2);\n- }\n- }\n- // if both ends aren\'t mapped, we know that it\'s notispan and not ospan\n- else if (_bamOutput == true) {\n- writer.SaveAlignment(bam1);\n- writer.SaveAlignment(bam2);\n- }\n- }\n-}\n-\n-\n' |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/pairToBed/pairToBed.h --- a/BEDTools-Version-2.14.3/src/pairToBed/pairToBed.h Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,161 +0,0 @@ -/***************************************************************************** - pairToBed.h - - (c) 2009 - Aaron Quinlan - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licenced under the GNU General Public License 2.0 license. -******************************************************************************/ -#ifndef INTERSECTBED_H -#define INTERSECTBED_H - -#include "api/BamReader.h" -#include "api/BamWriter.h" -#include "api/BamAux.h" -using namespace BamTools; - -#include "bedFile.h" -#include "bedFilePE.h" -#include <vector> -#include <iostream> -#include <fstream> - -using namespace std; - - - -/************************************************** -Helper function protoypes -**************************************************/ -void IsCorrectMappingForBEDPE (const BamAlignment &bam, const RefVector &refs, BEDPE &a); - - - -//************************************************ -// Class methods and elements -//************************************************ -class BedIntersectPE { - -public: - - // constructor - BedIntersectPE(string bedAFilePE, string bedBFile, float overlapFraction, - string searchType, bool sameStrand, bool diffStrand, bool bamInput, bool bamOutput, bool uncompressedBam, bool useEditDistance); - // destructor - ~BedIntersectPE(void); - - void FindOverlaps(const BEDPE &, vector<BED> &hits1, vector<BED> &hits2, const string &type); - - bool FindOneOrMoreOverlaps(const BEDPE &, const string &type); - - void FindSpanningOverlaps(const BEDPE &a, vector<BED> &hits, const string &type); - bool FindOneOrMoreSpanningOverlaps(const BEDPE &a, const string &type); - - void IntersectBedPE(); - void IntersectBamPE(string bamFile); - - void DetermineBedPEInput(); - -private: - - string _bedAFilePE; - string _bedBFile; - float _overlapFraction; - string _searchType; - bool _sameStrand; - bool _diffStrand; - bool _useEditDistance; - bool _bamInput; - bool _bamOutput; - bool _isUncompressedBam; - - // instance of a paired-end bed file class. - BedFilePE *_bedA; - - // instance of a bed file class. - BedFile *_bedB; - - inline - void ConvertBamToBedPE(const BamAlignment &bam1, const BamAlignment &bam2, const RefVector &refs, BEDPE &a) { - - // initialize BEDPE variables - a.start1 = a.start2 = a.end1 = a.end2 = -1; - a.chrom1 = a.chrom2 = "."; - a.strand1 = a.strand2 = '.'; - uint32_t editDistance1, editDistance2; - editDistance1 = editDistance2 = 0; - - // take the qname from end 1. - a.name = bam1.Name; - - // end 1 - if (bam1.IsMapped()) { - a.chrom1 = refs.at(bam1.RefID).RefName; - a.start1 = bam1.Position; - a.end1 = bam1.GetEndPosition(false, false); - a.strand1 = "+"; - if (bam1.IsReverseStrand()) a.strand1 = "-"; - - // extract the edit distance from the NM tag - // if possible. otherwise, complain. - if (_useEditDistance == true) { - if (bam1.GetTag("NM", editDistance1) == false) { - cerr << "The edit distance tag (NM) was not found in the BAM file. Please disable -ed. Exiting\n"; - exit(1); - } - } - } - - // end 2 - if (bam2.IsMapped()) { - a.chrom2 = refs.at(bam2.RefID).RefName; - a.start2 = bam2.Position; - a.end2 = bam2.GetEndPosition(false, false); - a.strand2 = "+"; - if (bam2.IsReverseStrand()) a.strand2 = "-"; - - // extract the edit distance from the NM tag - // if possible. otherwise, complain. - if (_useEditDistance == true) { - if (bam2.GetTag("NM", editDistance2) == false) { - cerr << "The edit distance tag (NM) was not found in the BAM file. Please disable -ed. Exiting\n"; - exit(1); - } - } - } - - // swap the ends if necessary - if ( a.chrom1 > a.chrom2 || ((a.chrom1 == a.chrom2) && (a.start1 > a.start2)) ) { - swap(a.chrom1, a.chrom2); - swap(a.start1, a.start2); - swap(a.end1, a.end2); - swap(a.strand1, a.strand2); - } - - // compute the minimum mapping quality b/w the two ends of the pair. - a.score = "0"; - if (_useEditDistance == false) { - if (bam1.IsMapped() == true && bam2.IsMapped() == true) - a.score = ToString(min(bam1.MapQuality, bam2.MapQuality)); - } - // BEDPE using edit distance - else { - if (bam1.IsMapped() == true && bam2.IsMapped() == true) - a.score = ToString((int) (editDistance1 + editDistance2)); - else if (bam1.IsMapped() == true) - a.score = ToString((int) editDistance1); - else if (bam2.IsMapped() == true) - a.score = ToString((int) editDistance2); - } - }; - - inline - void ProcessBamBlock (const BamAlignment &bam1, const BamAlignment &bam2, - const RefVector &refs, - BamWriter &writer); -}; - -#endif /* PEINTERSECTBED_H */ |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/pairToBed/pairToBedMain.cpp --- a/BEDTools-Version-2.14.3/src/pairToBed/pairToBedMain.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,237 +0,0 @@\n-/*****************************************************************************\n- pairToBedMain.cpp\n-\n- (c) 2009 - Aaron Quinlan\n- Hall Laboratory\n- Department of Biochemistry and Molecular Genetics\n- University of Virginia\n- aaronquinlan@gmail.com\n-\n- Licenced under the GNU General Public License 2.0 license.\n-******************************************************************************/\n-#include "pairToBed.h"\n-#include "version.h"\n-\n-using namespace std;\n-\n-// define our program name\n-#define PROGRAM_NAME "pairToBed"\n-\n-// define our parameter checking macro\n-#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)\n-\n-// function declarations\n-void ShowHelp(void);\n-\n-int main(int argc, char* argv[]) {\n-\n- // our configuration variables\n- bool showHelp = false;\n-\n- // input files\n- string bedAFile;\n- string bedBFile;\n-\n- // input arguments\n- float overlapFraction = 1E-9;\n- string searchType = "either";\n-\n- // flags to track parameters\n- bool haveBedA = false;\n- bool haveBedB = false;\n- bool haveSearchType = false;\n- bool haveFraction = false;\n- bool sameStrand = false;\n- bool diffStrand = false;\n- bool useEditDistance = false;\n- bool inputIsBam = false;\n- bool outputIsBam = true;\n- bool uncompressedBam = false;\n-\n- // check to see if we should print out some help\n- if(argc <= 1) showHelp = true;\n-\n- for(int i = 1; i < argc; i++) {\n- int parameterLength = (int)strlen(argv[i]);\n-\n- if((PARAMETER_CHECK("-h", 2, parameterLength)) ||\n- (PARAMETER_CHECK("--help", 5, parameterLength))) {\n- showHelp = true;\n- }\n- }\n-\n- if(showHelp) ShowHelp();\n-\n- // do some parsing (all of these parameters require 2 strings)\n- for(int i = 1; i < argc; i++) {\n-\n- int parameterLength = (int)strlen(argv[i]);\n-\n- if(PARAMETER_CHECK("-a", 2, parameterLength)) {\n- if ((i+1) < argc) {\n- haveBedA = true;\n- outputIsBam = false;\n- bedAFile = argv[i + 1];\n- i++;\n- }\n- }\n- else if(PARAMETER_CHECK("-abam", 5, parameterLength)) {\n- if ((i+1) < argc) {\n- haveBedA = true;\n- inputIsBam = true;\n- bedAFile = argv[i + 1];\n- i++;\n- }\n- }\n- else if(PARAMETER_CHECK("-b", 2, parameterLength)) {\n- if ((i+1) < argc) {\n- haveBedB = true;\n- bedBFile = argv[i + 1];\n- i++;\n- }\n- }\n- else if(PARAMETER_CHECK("-bedpe", 6, parameterLength)) {\n- outputIsBam = false;\n- }\n- else if(PARAMETER_CHECK("-ed", 3, parameterLength)) {\n- useEditDistance = true;\n- }\n- else if(PARAMETER_CHECK("-type", 5, parameterLength)) {\n- if ((i+1) < argc) {\n- haveSearchType = true;\n- searchType = argv[i + 1];\n- i++;\n- }\n- }\n- else if(PARAMETER_CHECK("-f", 2, parameterLength)) {\n- if ((i+1) < argc) {\n- haveFraction = true;\n- overlapFraction = atof(argv[i + 1]);\n- i++;\n- }\n- }\n- else if (PARAMETER_CHECK("-s", 2, parameterLength)) {\n- sameStrand = true;\n- }\n- else if (PARAMETER_CHECK("-S", 2, parameterLength)) {\n- diffStrand = true;\n- }\n- else if(PARAMETER_CHECK("-ubam", 5, parameterLength)) {\n- uncompressedBam = true;\n- }\n- else {\n- cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl;\n- showHelp = true;\n- }\n- }\n-\n-\n- // make sure we have both input files\n- if (!haveBedA || !haveBedB)'..b' << endl << endl;\n-\n- cerr << "Options: " << endl;\n-\n- cerr << "\\t-abam\\t" << "The A input file is in BAM format. Output will be BAM as well." << endl;\n- cerr << "\\t\\t- Requires BAM to be grouped or sorted by query." << endl << endl;\n-\n- cerr << "\\t-ubam\\t" << "Write uncompressed BAM output. Default is to write compressed BAM." << endl << endl;\n- cerr << "\\t\\tis to write output in BAM when using -abam." << endl << endl;\n-\n- cerr << "\\t-bedpe\\t" << "When using BAM input (-abam), write output as BEDPE. The default" << endl;\n- cerr << "\\t\\tis to write output in BAM when using -abam." << endl << endl;\n-\n- cerr << "\\t-ed\\t" << "Use BAM total edit distance (NM tag) for BEDPE score." << endl;\n- cerr << "\\t\\t- Default for BEDPE is to use the minimum of" << endl;\n- cerr << "\\t\\t of the two mapping qualities for the pair." << endl;\n- cerr << "\\t\\t- When -ed is used the total edit distance" << endl;\n- cerr << "\\t\\t from the two mates is reported as the score." << endl << endl;\n-\n- cerr << "\\t-f\\t" << "Minimum overlap required as fraction of A (e.g. 0.05)." << endl;\n- cerr << "\\t\\tDefault is 1E-9 (effectively 1bp)." << endl << endl;\n-\n- cerr << "\\t-s\\t" << "Require same strandedness when finding overlaps." << endl;\n- cerr << "\\t\\tDefault is to ignore stand." << endl;\n- cerr << "\\t\\tNot applicable with -type inspan or -type outspan." << endl << endl;\n-\n- cerr << "\\t-S\\t" << "Require different strandedness when finding overlaps." << endl;\n- cerr << "\\t\\tDefault is to ignore stand." << endl;\n- cerr << "\\t\\tNot applicable with -type inspan or -type outspan." << endl << endl;\n-\n- cerr << "\\t-type \\t" << "Approach to reporting overlaps between BEDPE and BED." << endl << endl;\n- cerr << "\\t\\teither\\tReport overlaps if either end of A overlaps B." << endl;\n- cerr << "\\t\\t\\t- Default." << endl;\n-\n- cerr << "\\t\\tneither\\tReport A if neither end of A overlaps B." << endl;\n-\n- cerr << "\\t\\tboth\\tReport overlaps if both ends of A overlap B." << endl;\n-\n- cerr << "\\t\\txor\\tReport overlaps if one and only one end of A overlaps B." << endl;\n-\n- cerr << "\\t\\tnotboth\\tReport overlaps if neither end or one and only one " << endl;\n- cerr << "\\t\\t\\tend of A overlap B. That is, xor + neither." << endl << endl;\n-\n- cerr << "\\t\\tispan\\tReport overlaps between [end1, start2] of A and B." << endl;\n- cerr << "\\t\\t\\t- Note: If chrom1 <> chrom2, entry is ignored." << endl << endl;\n-\n- cerr << "\\t\\tospan\\tReport overlaps between [start1, end2] of A and B." << endl;\n- cerr << "\\t\\t\\t- Note: If chrom1 <> chrom2, entry is ignored." << endl << endl;\n-\n- cerr << "\\t\\tnotispan\\tReport A if ispan of A doesn\'t overlap B." << endl;\n- cerr << "\\t\\t\\t\\t- Note: If chrom1 <> chrom2, entry is ignored." << endl << endl;\n-\n- cerr << "\\t\\tnotospan\\tReport A if ospan of A doesn\'t overlap B." << endl;\n- cerr << "\\t\\t\\t\\t- Note: If chrom1 <> chrom2, entry is ignored." << endl << endl;\n-\n- cerr << "Refer to the BEDTools manual for BEDPE format." << endl << endl;\n-\n- exit(1);\n-}\n' |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/pairToPair/Makefile --- a/BEDTools-Version-2.14.3/src/pairToPair/Makefile Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,44 +0,0 @@ -UTILITIES_DIR = ../utils/ -OBJ_DIR = ../../obj/ -BIN_DIR = ../../bin/ - -# ------------------- -# define our includes -# ------------------- -INCLUDES = -I$(UTILITIES_DIR)/bedFilePE/ -I$(UTILITIES_DIR)/bedFile/ -I$(UTILITIES_DIR)/lineFileUtilities/ -I$(UTILITIES_DIR)/gzstream/ -I$(UTILITIES_DIR)/version/ -I$(UTILITIES_DIR)/fileType/ - -# ---------------------------------- -# define our source and object files -# ---------------------------------- -SOURCES= pairToPairMain.cpp pairToPair.cpp -OBJECTS= $(SOURCES:.cpp=.o) -_EXT_OBJECTS=bedFilePE.o bedFile.o lineFileUtilities.o gzstream.o fileType.o -EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) -BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) -PROGRAM= pairToPair - - -all: $(PROGRAM) - -.PHONY: all - -$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS) - @echo " * linking $(PROGRAM)" - @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ $(LIBS) - -$(BUILT_OBJECTS): $(SOURCES) - @echo " * compiling" $(*F).cpp - @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) - -$(EXT_OBJECTS): - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFilePE/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/ - -clean: - @echo "Cleaning up." - @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* - -.PHONY: clean |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/pairToPair/pairToPair.cpp --- a/BEDTools-Version-2.14.3/src/pairToPair/pairToPair.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,216 +0,0 @@\n-/*****************************************************************************\n- pairToPair.cpp\n-\n- (c) 2009 - Aaron Quinlan\n- Hall Laboratory\n- Department of Biochemistry and Molecular Genetics\n- University of Virginia\n- aaronquinlan@gmail.com\n-\n- Licenced under the GNU General Public License 2.0 license.\n-******************************************************************************/\n-#include "lineFileUtilities.h"\n-#include "pairToPair.h"\n-\n-\n-/*\n- Constructor\n-*/\n-PairToPair::PairToPair(string &bedAFilePE, string &bedBFilePE, float &overlapFraction,\n- string searchType, bool ignoreStrand, bool reqDiffNames, int slop, bool strandedSlop) {\n-\n- _bedAFilePE = bedAFilePE;\n- _bedBFilePE = bedBFilePE;\n- _overlapFraction = overlapFraction;\n- _searchType = searchType;\n- _ignoreStrand = ignoreStrand;\n- _reqDiffNames = reqDiffNames;\n- _slop = slop;\n- _strandedSlop = strandedSlop;\n-\n- _bedA = new BedFilePE(bedAFilePE);\n- _bedB = new BedFilePE(bedBFilePE);\n-\n- IntersectPairs();\n-}\n-\n-\n-/*\n- Destructor\n-*/\n-PairToPair::~PairToPair(void) {\n-}\n-\n-\n-\n-void PairToPair::IntersectPairs() {\n-\n- // load the "B" bed file into a map so\n- // that we can easily compare "A" to it for overlaps\n- _bedB->loadBedPEFileIntoMap();\n-\n- int lineNum = 0;\n- BedLineStatus bedStatus;\n- BEDPE a, nullBedPE;\n-\n- _bedA->Open();\n- while ((bedStatus = _bedA->GetNextBedPE(a, lineNum)) != BED_INVALID) {\n- if (bedStatus == BED_VALID) {\n- // identify overlaps b/w the pairs\n- FindOverlaps(a);\n- a = nullBedPE;\n- }\n- }\n- _bedA->Close();\n-}\n-// END IntersectPE\n-\n-\n-\n-void PairToPair::FindOverlaps(const BEDPE &a) {\n- //\n- vector<MATE> hitsA1B1, hitsA1B2, hitsA2B1, hitsA2B2;\n-\n- // add the appropriate slop to the starts and ends\n- int start1 = a.start1;\n- int end1 = a.end1;\n- int start2 = a.start2;\n- int end2 = a.end2;\n-\n- if (_strandedSlop == true) {\n- if (a.strand1 == "+")\n- end1 += _slop;\n- else\n- start1 -= _slop;\n- if (a.strand2 == "+")\n- end2 += _slop;\n- else\n- start2 -= _slop;\n- }\n- else {\n- (start1 - _slop) >= 0 ? start1 -= _slop : start1 = 0;\n- (start2 - _slop) >= 0 ? start2 -= _slop : start2 = 0;\n- end1 += _slop;\n- end2 += _slop;\n- }\n-\n- // Find the _potential_ hits between each end of A and B\n- _bedB->FindOverlapsPerBin(1, a.chrom1, start1, end1, a.name, a.strand1, hitsA1B1, _overlapFraction, !(_ignoreStrand), _reqDiffNames); // hits b/w A1 & B1\n- _bedB->FindOverlapsPerBin(1, a.chrom2, start2, end2, a.name, a.strand2, hitsA2B1, _overlapFraction, !(_ignoreStrand), _reqDiffNames); // hits b/w A2 & B1\n- _bedB->FindOverlapsPerBin(2, a.chrom1, start1, end1, a.name, a.strand1, hitsA1B2, _overlapFraction, !(_ignoreStrand), _reqDiffNames); // hits b/w A1 & B2\n- _bedB->FindOverlapsPerBin(2, a.chrom2, start2, end2, a.name, a.strand2, hitsA2B2, _overlapFraction, !(_ignoreStrand), _reqDiffNames); // hits b/w A2 & B2\n-\n- unsigned int matchCount1 = (hitsA1B1.size() + hitsA2B2.size());\n- unsigned int matchCount2 = (hitsA2B1.size() + hitsA1B2.size());\n-\n- \n- // report the fact that no hits were found iff _searchType is neither.\n- if ((matchCount1 == 0) && (matchCount2 == 0) && (_searchType == "neither")) {\n- _bedA->reportBedPENewLine(a);\n- }\n- else if (_searchType == "both") {\n- bool found1 = false;\n- bool found2 = false;\n- if ((hitsA1B1.size() > 0) || (hitsA2B2.size() > 0))\n- found1 = FindHitsOnBothEnds(a, hitsA1B1, hitsA2B2);\n- if ((hitsA2B1.size() > 0) || (hitsA1B2.size() > 0))\n- found2 = FindHitsOnBothEnds(a, hitsA2B1, hitsA1B2);\n- }\n- else if (_searchType == "notboth") {\n- bool found1 = false;\n- bool found2 = false'..b'End(a, hitsA2B1, hitsA1B2);\n- }\n-}\n-\n-\n-bool PairToPair::FindHitsOnBothEnds(const BEDPE &a, const vector<MATE> &qualityHitsEnd1,\n- const vector<MATE> &qualityHitsEnd2) {\n-\n- map<unsigned int, vector<MATE>, less<int> > hitsMap;\n-\n- for (vector<MATE>::const_iterator h = qualityHitsEnd1.begin(); h != qualityHitsEnd1.end(); ++h) {\n- hitsMap[h->lineNum].push_back(*h);\n- }\n- for (vector<MATE>::const_iterator h = qualityHitsEnd2.begin(); h != qualityHitsEnd2.end(); ++h) {\n- hitsMap[h->lineNum].push_back(*h);\n- }\n-\n-\n- bool bothFound = false;\n- for (map<unsigned int, vector<MATE>, less<unsigned int> >::iterator m = hitsMap.begin(); m != hitsMap.end(); ++m) {\n- \n- // hits on both sides\n- if (m->second.size() >= 2) {\n- bothFound = true;\n- MATE b1 = m->second[0];\n- MATE b2 = m->second[1];\n-\n- if (_searchType == "both") {\n- _bedA->reportBedPETab(a);\n- printf("%s\\t%d\\t%d\\t%s\\t%d\\t%d\\t%s\\t%s\\t%s\\t%s", b1.bed.chrom.c_str(), b1.bed.start, b1.bed.end,\n- b2.bed.chrom.c_str(), b2.bed.start, b2.bed.end,\n- b1.bed.name.c_str(), b1.bed.score.c_str(),\n- b1.bed.strand.c_str(), b2.bed.strand.c_str());\n- for (size_t i = 0; i < b1.bed.otherFields.size(); ++i)\n- printf("\\t%s", b1.bed.otherFields[i].c_str());\n- printf("\\n");\n- }\n- }\n- }\n- return bothFound;\n-}\n-\n-\n-void PairToPair::FindHitsOnEitherEnd(const BEDPE &a, const vector<MATE> &qualityHitsEnd1,\n- const vector<MATE> &qualityHitsEnd2) {\n-\n- map<unsigned int, vector<MATE>, less<int> > hitsMap;\n-\n- for (vector<MATE>::const_iterator h = qualityHitsEnd1.begin(); h != qualityHitsEnd1.end(); ++h) {\n- hitsMap[h->lineNum].push_back(*h);\n- }\n- for (vector<MATE>::const_iterator h = qualityHitsEnd2.begin(); h != qualityHitsEnd2.end(); ++h) {\n- hitsMap[h->lineNum].push_back(*h);\n- }\n-\n- for (map<unsigned int, vector<MATE>, less<unsigned int> >::iterator m = hitsMap.begin(); m != hitsMap.end(); ++m) {\n- if (m->second.size() >= 1) {\n-\n- if ((m->second.size()) == 2) {\n- MATE b1 = m->second[0];\n- MATE b2 = m->second[1];\n-\n- _bedA->reportBedPETab(a);\n- printf("%s\\t%d\\t%d\\t%s\\t%d\\t%d\\t%s\\t%s\\t%s\\t%s", b1.bed.chrom.c_str(), b1.bed.start, b1.bed.end,\n- b2.bed.chrom.c_str(), b2.bed.start, b2.bed.end,\n- b1.bed.name.c_str(), b1.bed.score.c_str(),\n- b1.bed.strand.c_str(), b2.bed.strand.c_str());\n- for (size_t i = 0; i < b1.bed.otherFields.size(); ++i)\n- printf("\\t%s", b1.bed.otherFields[i].c_str());\n- printf("\\n");\n- }\n- else {\n- MATE b1 = m->second[0];\n-\n- _bedA->reportBedPETab(a);\n- printf("%s\\t%d\\t%d\\t%s\\t%d\\t%d\\t%s\\t%s\\t%s\\t%s", b1.bed.chrom.c_str(), b1.bed.start, b1.bed.end,\n- b1.mate->bed.chrom.c_str(), b1.mate->bed.start, b1.mate->bed.end,\n- b1.bed.name.c_str(), b1.bed.score.c_str(),\n- b1.bed.strand.c_str(), b1.mate->bed.strand.c_str());\n- for (size_t i = 0; i < b1.bed.otherFields.size(); ++i)\n- printf("\\t%s", b1.bed.otherFields[i].c_str());\n- printf("\\n");\n- }\n- }\n- }\n-}\n' |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/pairToPair/pairToPair.h --- a/BEDTools-Version-2.14.3/src/pairToPair/pairToPair.h Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,76 +0,0 @@ -/***************************************************************************** - pairToPair.h - - (c) 2009 - Aaron Quinlan - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licenced under the GNU General Public License 2.0 license. -******************************************************************************/ -#ifndef PAIRTOPAIR_H -#define PAIRTOPAIR_H - -#include "bedFile.h" -#include "bedFilePE.h" -#include <vector> -#include <iostream> -#include <fstream> - -using namespace std; - - - -//************************************************ -// Class methods and elements -//************************************************ -class PairToPair { - -public: - - // constructor - PairToPair(string &bedAFilePE, string &bedBFilePE, float &overlapFraction, - string searchType, bool ignoreStrand, bool reqDiffNames, int slop, bool strandedSlop); - - // destructor - ~PairToPair(void); - - void IntersectPairs(); - - -private: - - string _bedAFilePE; - string _bedBFilePE; - - float _overlapFraction; - string _searchType; - bool _ignoreStrand; - bool _reqDiffNames; - int _slop; - bool _strandedSlop; - - // instance of a paired-end bed file class. - BedFilePE *_bedA; - - // instance of a bed file class. - BedFilePE *_bedB; - - // methods - // void FindOverlaps(const BEDPE &a, vector<MATE> &hitsA1B1, vector<MATE> &hitsA1B2, - // vector<MATE> &hitsA2B1, vector<MATE> &hitsA2B2); - void FindOverlaps(const BEDPE &a); - - void FindQualityHitsBetweenEnds(CHRPOS start, CHRPOS end, - const vector<MATE> &hits, vector<MATE> &qualityHits, int &numOverlaps); - - bool FindHitsOnBothEnds(const BEDPE &a, const vector<MATE> &qualityHitsEnd1, - const vector<MATE> &qualityHitsEnd2); - - void FindHitsOnEitherEnd(const BEDPE &a, const vector<MATE> &qualityHitsEnd1, - const vector<MATE> &qualityHitsEnd2); - -}; - -#endif /* PAIRTOPAIR_H */ |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/pairToPair/pairToPairMain.cpp --- a/BEDTools-Version-2.14.3/src/pairToPair/pairToPairMain.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,188 +0,0 @@ -/***************************************************************************** - pairToPairMain.cpp - - (c) 2009 - Aaron Quinlan - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licenced under the GNU General Public License 2.0 license. -******************************************************************************/ -#include "pairToPair.h" -#include "version.h" - -using namespace std; - -// define our program name -#define PROGRAM_NAME "pairToPair" - -// define our parameter checking macro -#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen) - -// function declarations -void ShowHelp(void); - -int main(int argc, char* argv[]) { - - // our configuration variables - bool showHelp = false; - - // input files - string bedAFile; - string bedBFile; - - // input arguments - float overlapFraction = 1E-9; - int slop = 0; - string searchType = "both"; - - // flags to track parameters - bool haveBedA = false; - bool haveBedB = false; - bool haveSearchType = false; - bool haveFraction = false; - bool ignoreStrand = false; - bool requireDifferentNames = false; - bool haveSlop = false; - bool strandedSlop = false; - // check to see if we should print out some help - if(argc <= 1) showHelp = true; - - for(int i = 1; i < argc; i++) { - int parameterLength = (int)strlen(argv[i]); - - if((PARAMETER_CHECK("-h", 2, parameterLength)) || - (PARAMETER_CHECK("--help", 5, parameterLength))) { - showHelp = true; - } - } - - if(showHelp) ShowHelp(); - - // do some parsing (all of these parameters require 2 strings) - for(int i = 1; i < argc; i++) { - - int parameterLength = (int)strlen(argv[i]); - - if(PARAMETER_CHECK("-a", 2, parameterLength)) { - if ((i+1) < argc) { - haveBedA = true; - bedAFile = argv[i + 1]; - i++; - } - } - else if(PARAMETER_CHECK("-b", 2, parameterLength)) { - if ((i+1) < argc) { - haveBedB = true; - bedBFile = argv[i + 1]; - i++; - } - } - else if(PARAMETER_CHECK("-type", 5, parameterLength)) { - if ((i+1) < argc) { - haveSearchType = true; - searchType = argv[i + 1]; - i++; - } - } - else if(PARAMETER_CHECK("-f", 2, parameterLength)) { - if ((i+1) < argc) { - haveFraction = true; - overlapFraction = atof(argv[i + 1]); - i++; - } - } - else if(PARAMETER_CHECK("-slop", 5, parameterLength)) { - if ((i+1) < argc) { - haveSlop = true; - slop = atoi(argv[i + 1]); - i++; - } - } - else if(PARAMETER_CHECK("-ss", 3, parameterLength)) { - strandedSlop = true; - } - else if(PARAMETER_CHECK("-rdn", 4, parameterLength)) { - requireDifferentNames = true; - } - else if(PARAMETER_CHECK("-is", 3, parameterLength)) { - ignoreStrand = true; - } - else { - cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl; - showHelp = true; - } - } - - - // make sure we have both input files - if (!haveBedA || !haveBedB) { - cerr << endl << "*****" << endl << "*****ERROR: Need -a and -b files. " << endl << "*****" << endl; - showHelp = true; - } - - if (haveSearchType && (searchType != "neither") && (searchType != "both") && (searchType != "either") && (searchType != "notboth")) { - cerr << endl << "*****" << endl << "*****ERROR: Request \"both\",\"neither\",\"either\",or \"notboth\"" << endl << "*****" << endl; - showHelp = true; - } - - if (strandedSlop == true && haveSlop == false) { - cerr << endl << "*****" << endl << "*****ERROR: Need a -slop value if requesting -ss." << endl << "*****" << endl; - showHelp = true; - } - - if (!showHelp) { - - PairToPair *bi = new PairToPair(bedAFile, bedBFile, overlapFraction, searchType, - ignoreStrand, requireDifferentNames, slop, strandedSlop); - delete bi; - return 0; - } - else { - ShowHelp(); - } -} - - -void ShowHelp(void) { - cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl; - - cerr << "Author: Aaron Quinlan (aaronquinlan@gmail.com)" << endl; - - cerr << "Summary: Report overlaps between two paired-end BED files (BEDPE)." << endl << endl; - - cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -a <BEDPE> -b <BEDPE>" << endl << endl; - - cerr << "Options: " << endl; - cerr << "\t-f\t" << "Minimum overlap required as fraction of A (e.g. 0.05)." << endl; - cerr << "\t\tDefault is 1E-9 (effectively 1bp)." << endl << endl; - - cerr << "\t-type \t" << "Approach to reporting overlaps between A and B." << endl << endl; - cerr << "\t\tneither\tReport overlaps if neither end of A overlaps B." << endl; - cerr << "\t\teither\tReport overlaps if either ends of A overlap B." << endl; - cerr << "\t\tboth\tReport overlaps if both ends of A overlap B." << endl; - cerr << "\t\tnotboth\tReport overlaps if one or neither of ends of A overlap B." << endl; - - cerr << "\t\t- Default = both." << endl << endl; - - cerr << "\t-slop \t" << "The amount of slop (in b.p.). to be added to each footprint." << endl; - cerr << "\t\t*Note*: Slop is subtracted from start1 and start2 and added to end1 and end2." << endl << endl; - - cerr << "\t-ss\t" << "Add slop based to each BEDPE footprint based on strand." << endl; - cerr << "\t\t- If strand is \"+\", slop is only added to the end coordinates." << endl; - cerr << "\t\t- If strand is \"-\", slop is only added to the start coordinates." << endl; - cerr << "\t\t- By default, slop is added in both directions." << endl << endl; - - cerr << "\t-is\t" << "Ignore strands when searching for overlaps." << endl; - cerr << "\t\t- By default, strands are enforced." << endl << endl; - - cerr << "\t-rdn\t" << "Require the hits to have different names (i.e. avoid self-hits)." << endl; - cerr << "\t\t- By default, same names are allowed." << endl << endl; - - - cerr << "Refer to the BEDTools manual for BEDPE format." << endl << endl; - - // end the program here - exit(1); -} |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/shuffleBed/Makefile --- a/BEDTools-Version-2.14.3/src/shuffleBed/Makefile Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,50 +0,0 @@ -UTILITIES_DIR = ../utils/ -OBJ_DIR = ../../obj/ -BIN_DIR = ../../bin/ - -# ------------------- -# define our includes -# ------------------- -INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \ - -I$(UTILITIES_DIR)/genomeFile/ \ - -I$(UTILITIES_DIR)/lineFileUtilities/ \ - -I$(UTILITIES_DIR)/version/ \ - -I$(UTILITIES_DIR)/gzstream/ \ - -I$(UTILITIES_DIR)/fileType/ \ - -I$(UTILITIES_DIR)/BamTools/include - -# ---------------------------------- -# define our source and object files -# ---------------------------------- -SOURCES= shuffleBedMain.cpp shuffleBed.cpp -OBJECTS= $(SOURCES:.cpp=.o) -_EXT_OBJECTS=bedFile.o genomeFile.o lineFileUtilities.o gzstream.o fileType.o -EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) -BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) -PROGRAM= shuffleBed - - -all: $(PROGRAM) - -.PHONY: all - -$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS) - @echo " * linking $(PROGRAM)" - @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ $(LIBS) - -$(BUILT_OBJECTS): $(SOURCES) - @echo " * compiling" $(*F).cpp - @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) - -$(EXT_OBJECTS): - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/genomeFile/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/ - -clean: - @echo "Cleaning up." - @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* - -.PHONY: clean |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/shuffleBed/shuffleBed.cpp --- a/BEDTools-Version-2.14.3/src/shuffleBed/shuffleBed.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,244 +0,0 @@\n-/*****************************************************************************\n- shuffleBed.cpp\n-\n- (c) 2009 - Aaron Quinlan\n- Hall Laboratory\n- Department of Biochemistry and Molecular Genetics\n- University of Virginia\n- aaronquinlan@gmail.com\n-\n- Licenced under the GNU General Public License 2.0 license.\n-******************************************************************************/\n-#include "lineFileUtilities.h"\n-#include "shuffleBed.h"\n-\n-\n-BedShuffle::BedShuffle(string &bedFile, string &genomeFile, string &excludeFile, string &includeFile, \n- bool haveSeed, bool haveExclude, bool haveInclude, bool sameChrom, \n- float overlapFraction, int seed) {\n-\n- _bedFile = bedFile;\n- _genomeFile = genomeFile;\n- _excludeFile = excludeFile;\n- _includeFile = includeFile;\n- _sameChrom = sameChrom;\n- _haveExclude = haveExclude;\n- _haveInclude = haveInclude;\n- _overlapFraction = overlapFraction;\n- _haveSeed = haveSeed;\n-\n-\n- // use the supplied seed for the random\n- // number generation if given. else,\n- // roll our own.\n- if (_haveSeed) {\n- _seed = seed;\n- srand(seed);\n- }\n- else {\n- // thanks to Rob Long for the tip.\n- _seed = (unsigned)time(0)+(unsigned)getpid();\n- srand(_seed);\n- }\n-\n- _bed = new BedFile(bedFile);\n- _genome = new GenomeFile(genomeFile);\n- _chroms = _genome->getChromList();\n- _numChroms = _genome->getNumberOfChroms();\n-\n- if (_haveExclude) {\n- _exclude = new BedFile(excludeFile);\n- _exclude->loadBedFileIntoMap();\n- }\n- \n- if (_haveInclude) {\n- _include = new BedFile(includeFile);\n- _include->loadBedFileIntoMapNoBin();\n- \n- _numIncludeChroms = 0;\n- masterBedMapNoBin::const_iterator it = _include->bedMapNoBin.begin(); \n- masterBedMapNoBin::const_iterator itEnd = _include->bedMapNoBin.end();\n- for(; it != itEnd; ++it) {\n- _includeChroms.push_back(it->first);\n- _numIncludeChroms++;\n- }\n- }\n-\n- if (_haveExclude == true && _haveInclude == false)\n- ShuffleWithExclusions();\n- else if (_haveExclude == false && _haveInclude == true)\n- ShuffleWithInclusions();\n- else\n- Shuffle();\n-}\n-\n-\n-BedShuffle::~BedShuffle(void) {\n-\n-}\n-\n-\n-void BedShuffle::Shuffle() {\n-\n- int lineNum = 0;\n- BED bedEntry, nullBed; // used to store the current BED line from the BED file.\n- BedLineStatus bedStatus;\n-\n- _bed->Open();\n- while ((bedStatus = _bed->GetNextBed(bedEntry, lineNum)) != BED_INVALID) {\n- if (bedStatus == BED_VALID) {\n- ChooseLocus(bedEntry);\n- _bed->reportBedNewLine(bedEntry);\n- bedEntry = nullBed;\n- }\n- }\n- _bed->Close();\n-}\n-\n-\n-\n-void BedShuffle::ShuffleWithExclusions() {\n-\n- int lineNum = 0;\n- BED bedEntry, nullBed; // used to store the current BED line from the BED file.\n- BedLineStatus bedStatus;\n-\n- _bed->Open();\n- while ((bedStatus = _bed->GetNextBed(bedEntry, lineNum)) != BED_INVALID) {\n- if (bedStatus == BED_VALID) {\n- // keep looking as long as the chosen\n- // locus happens to overlap with regions\n- // that the user wishes to exclude.\n- int tries = 0;\n- bool haveOverlap = false;\n- do \n- {\n- // choose a new locus\n- ChooseLocus(bedEntry);\n- haveOverlap = _exclude->FindOneOrMoreOverlapsPerBin(bedEntry.chrom, bedEntry.start, bedEntry.end,\n- bedEntry.strand, false, _overlapFraction);\n- tries++;\n- } while ((haveOverlap == true) && (tries <= MAX_TRIES));\n- \n-\n- if (tries > MAX_TRIES) {\n- cerr << "Error, line " << lineNum << ":'..b't avoid excluded regions. Ignoring entry and moving on." << endl;\n- }\n- else {\n- _bed->reportBedNewLine(bedEntry);\n- }\n- }\n- bedEntry = nullBed;\n- }\n- _bed->Close();\n-}\n-\n-\n-void BedShuffle::ShuffleWithInclusions() {\n-\n- int lineNum = 0;\n- BED bedEntry, nullBed; // used to store the current BED line from the BED file.\n- BedLineStatus bedStatus;\n-\n- _bed->Open();\n- while ((bedStatus = _bed->GetNextBed(bedEntry, lineNum)) != BED_INVALID) {\n- if (bedStatus == BED_VALID) {\n- // choose a new locus\n- ChooseLocusFromInclusionFile(bedEntry);\n- _bed->reportBedNewLine(bedEntry);\n- }\n- bedEntry = nullBed;\n- }\n- _bed->Close();\n-}\n-\n-\n-void BedShuffle::ChooseLocus(BED &bedEntry) {\n-\n- string chrom = bedEntry.chrom;\n- CHRPOS start = bedEntry.start;\n- CHRPOS end = bedEntry.end;\n- CHRPOS length = end - start;\n-\n- string randomChrom;\n- CHRPOS randomStart;\n- CHRPOS chromSize;\n-\n- if (_sameChrom == false) {\n- randomChrom = _chroms[rand() % _numChroms];\n- chromSize = _genome->getChromSize(randomChrom);\n- randomStart = rand() % chromSize;\n- bedEntry.chrom = randomChrom;\n- bedEntry.start = randomStart;\n- bedEntry.end = randomStart + length;\n- }\n- else {\n- chromSize = _genome->getChromSize(chrom);\n- randomStart = rand() % chromSize;\n- bedEntry.start = randomStart;\n- bedEntry.end = randomStart + length;\n- }\n-\n- // ensure that the chosen location doesn\'t go past\n- // the length of the chromosome. if so, keep looking\n- // for a new spot.\n- while (bedEntry.end > chromSize) {\n- if (_sameChrom == false) {\n- randomChrom = _chroms[rand() % _numChroms];\n- chromSize = _genome->getChromSize(randomChrom);\n- randomStart = rand() % chromSize;\n- bedEntry.chrom = randomChrom;\n- bedEntry.start = randomStart;\n- bedEntry.end = randomStart + length;\n- }\n- else {\n- chromSize = _genome->getChromSize(chrom);\n- randomStart = rand() % chromSize;\n- bedEntry.start = randomStart;\n- bedEntry.end = randomStart + length;\n- }\n- }\n-}\n-\n-\n-void BedShuffle::ChooseLocusFromInclusionFile(BED &bedEntry) {\n-\n- string chrom = bedEntry.chrom;\n- CHRPOS length = bedEntry.end - bedEntry.start;\n-\n- string randomChrom;\n- CHRPOS randomStart;\n- BED includeInterval;\n- \n- if (_sameChrom == false) {\n-\n- // grab a random chromosome from the inclusion file.\n- randomChrom = _includeChroms[rand() % _numIncludeChroms];\n- // get the number of inclusion intervals for that chrom\n- size_t size = _include->bedMapNoBin[randomChrom].size();\n- // grab a random interval on the chosen chromosome.\n- size_t interval = rand() % size;\n- // retreive a ranom -incl interval on the selected chrom\n- includeInterval = _include->bedMapNoBin[randomChrom][interval];\n-\n- bedEntry.chrom = randomChrom; \n- }\n- else {\n- // get the number of inclusion intervals for the original chrom\n- size_t size = _include->bedMapNoBin[chrom].size();\n- // grab a random interval on the chosen chromosome.\n- includeInterval = _include->bedMapNoBin[chrom][rand() % size];\n- }\n- \n- randomStart = includeInterval.start + rand() % (includeInterval.size());\n- bedEntry.start = randomStart;\n- bedEntry.end = randomStart + length;\n- \n- // use recursion to ensure that the chosen location \n- // doesn\'t go past the end of the chrom\n- if (bedEntry.end > ((size_t) _genome->getChromSize(chrom))) {\n- //bedEntry.end = _genome->getChromSize(chrom);\n- ChooseLocusFromInclusionFile(bedEntry);\n- }\n-}\n-\n' |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/shuffleBed/shuffleBed.h --- a/BEDTools-Version-2.14.3/src/shuffleBed/shuffleBed.h Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,76 +0,0 @@ -/***************************************************************************** - shuffleBed.h - - (c) 2009 - Aaron Quinlan - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licenced under the GNU General Public License 2.0 license. -******************************************************************************/ -#include "bedFile.h" -#include "genomeFile.h" - -#include <vector> -#include <iostream> -#include <fstream> -#include <map> -#include <cstdlib> -#include <ctime> -#include <sys/time.h> -#include <unistd.h> -#include <sys/types.h> -using namespace std; - -const int MAX_TRIES = 1000000; - -//************************************************ -// Class methods and elements -//************************************************ -class BedShuffle { - -public: - - // constructor - BedShuffle(string &bedFile, string &genomeFile, string &excludeFile, string &includeFile, - bool haveSeed, bool haveExclude, bool haveInclude, bool sameChrom, - float overlapFraction, int seed); - - // destructor - ~BedShuffle(void); - -private: - - string _bedFile; - string _genomeFile; - string _excludeFile; - string _includeFile; - float _overlapFraction; - int _seed; - bool _sameChrom; - bool _haveExclude; - bool _haveInclude; - bool _haveSeed; - - - // The BED file from which to compute coverage. - BedFile *_bed; - BedFile *_exclude; - BedFile *_include; - - GenomeFile *_genome; - - vector<string> _chroms; - int _numChroms; - vector<string> _includeChroms; - int _numIncludeChroms; - - // methods - void Shuffle(); - void ShuffleWithExclusions(); - void ShuffleWithInclusions(); - - void ChooseLocus(BED &); - void ChooseLocusFromInclusionFile(BED &); -}; |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/shuffleBed/shuffleBedMain.cpp --- a/BEDTools-Version-2.14.3/src/shuffleBed/shuffleBedMain.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,188 +0,0 @@ -/***************************************************************************** - shuffleBedMain.cpp - - (c) 2009 - Aaron Quinlan - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licenced under the GNU General Public License 2.0 license. -******************************************************************************/ -#include "shuffleBed.h" -#include "version.h" - -using namespace std; - -// define our program name -#define PROGRAM_NAME "shuffleBed" - - -// define our parameter checking macro -#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen) - -// function declarations -void ShowHelp(void); - -int main(int argc, char* argv[]) { - - // our configuration variables - bool showHelp = false; - - // input files - string bedFile = "stdin"; - string excludeFile; - string includeFile; - string genomeFile; - - bool haveBed = true; - bool haveGenome = false; - bool haveExclude = false; - bool haveInclude = false; - bool haveSeed = false; - float overlapFraction = 0.0; - int seed = -1; - bool sameChrom = false; - - - for(int i = 1; i < argc; i++) { - int parameterLength = (int)strlen(argv[i]); - - if((PARAMETER_CHECK("-h", 2, parameterLength)) || - (PARAMETER_CHECK("--help", 5, parameterLength))) { - showHelp = true; - } - } - - if(showHelp) ShowHelp(); - - // do some parsing (all of these parameters require 2 strings) - for(int i = 1; i < argc; i++) { - - int parameterLength = (int)strlen(argv[i]); - - if(PARAMETER_CHECK("-i", 2, parameterLength)) { - if ((i+1) < argc) { - bedFile = argv[i + 1]; - i++; - } - } - else if(PARAMETER_CHECK("-g", 2, parameterLength)) { - if ((i+1) < argc) { - haveGenome = true; - genomeFile = argv[i + 1]; - i++; - } - } - else if(PARAMETER_CHECK("-excl", 5, parameterLength)) { - if ((i+1) < argc) { - haveExclude = true; - excludeFile = argv[i + 1]; - i++; - } - } - else if(PARAMETER_CHECK("-incl", 5, parameterLength)) { - if ((i+1) < argc) { - haveInclude = true; - includeFile = argv[i + 1]; - i++; - } - } - else if(PARAMETER_CHECK("-seed", 5, parameterLength)) { - if ((i+1) < argc) { - haveSeed = true; - seed = atoi(argv[i + 1]); - i++; - } - } - else if(PARAMETER_CHECK("-chrom", 6, parameterLength)) { - sameChrom = true; - } - else if(PARAMETER_CHECK("-f", 2, parameterLength)) { - if ((i+1) < argc) { - overlapFraction = atof(argv[i + 1]); - i++; - } - } - else { - cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl; - showHelp = true; - } - } - - // make sure we have both input files - if (!haveBed || !haveGenome) { - cerr << endl << "*****" << endl << "*****ERROR: Need both a BED (-i) and a genome (-g) file. " << endl << "*****" << endl; - showHelp = true; - } - - if (haveInclude && haveExclude) { - cerr << endl << "*****" << endl << "*****ERROR: Cannot use -incl and -excl together." << endl << "*****" << endl; - showHelp = true; - } - - if (!showHelp) { - BedShuffle *bc = new BedShuffle(bedFile, genomeFile, excludeFile, includeFile, - haveSeed, haveExclude, haveInclude, sameChrom, - overlapFraction, seed); - delete bc; - return 0; - } - else { - ShowHelp(); - } -} - -void ShowHelp(void) { - - cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl; - - cerr << "Author: Aaron Quinlan (aaronquinlan@gmail.com)" << endl; - - cerr << "Summary: Randomly permute the locations of a feature file among a genome." << endl << endl; - - cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -i <bed/gff/vcf> -g <genome>" << endl << endl; - - cerr << "Options: " << endl; - cerr << "\t-excl\t" << "A BED/GFF/VCF file of coordinates in which features in -i" << endl; - cerr << "\t\tshould not be placed (e.g. gaps.bed)." << endl << endl; - - cerr << "\t-incl\t" << "Instead of randomly placing features in a genome, the -incl" << endl; - cerr << "\t\toptions defines a BED/GFF/VCF file of coordinates in which " << endl; - cerr << "\t\tfeatures in -i should be randomly placed (e.g. genes.bed). " << endl << endl; - - cerr << "\t-chrom\t" << "Keep features in -i on the same chromosome."<< endl; - cerr << "\t\t- By default, the chrom and position are randomly chosen." << endl << endl; - - cerr << "\t-seed\t" << "Supply an integer seed for the shuffling." << endl; - cerr << "\t\t- By default, the seed is chosen automatically." << endl; - cerr << "\t\t- (INTEGER)" << endl << endl; - - cerr << "\t-f\t" << "Maximum overlap (as a fraction of the -i feature) with an -excl" << endl; - cerr << "\t\tfeature that is tolerated before searching for a new, " << endl; - cerr << "\t\trandomized locus. For example, -f 0.10 allows up to 10%" << endl; - cerr << "\t\tof a randomized feature to overlap with a given feature" << endl; - cerr << "\t\tin the -excl file. **Cannot be used with -incl file.**" << endl; - cerr << "\t\t- Default is 1E-9 (i.e., 1bp)." << endl; - cerr << "\t\t- FLOAT (e.g. 0.50)" << endl << endl; - - cerr << "Notes: " << endl; - cerr << "\t(1) The genome file should tab delimited and structured as follows:" << endl; - cerr << "\t <chromName><TAB><chromSize>" << endl << endl; - cerr << "\tFor example, Human (hg19):" << endl; - cerr << "\tchr1\t249250621" << endl; - cerr << "\tchr2\t243199373" << endl; - cerr << "\t..." << endl; - cerr << "\tchr18_gl000207_random\t4262" << endl << endl; - - - cerr << "Tips: " << endl; - cerr << "\tOne can use the UCSC Genome Browser's MySQL database to extract" << endl; - cerr << "\tchromosome sizes. For example, H. sapiens:" << endl << endl; - cerr << "\tmysql --user=genome --host=genome-mysql.cse.ucsc.edu -A -e \\" << endl; - cerr << "\t\"select chrom, size from hg19.chromInfo\" > hg19.genome" << endl << endl; - - - // end the program here - exit(1); -} |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/slopBed/Makefile --- a/BEDTools-Version-2.14.3/src/slopBed/Makefile Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,50 +0,0 @@ -UTILITIES_DIR = ../utils/ -OBJ_DIR = ../../obj/ -BIN_DIR = ../../bin/ - -# ------------------- -# define our includes -# ------------------- -INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \ - -I$(UTILITIES_DIR)/genomeFile/ \ - -I$(UTILITIES_DIR)/lineFileUtilities/ \ - -I$(UTILITIES_DIR)/version/ \ - -I$(UTILITIES_DIR)/gzstream/ \ - -I$(UTILITIES_DIR)/fileType/ \ - -I$(UTILITIES_DIR)/BamTools/include - -# ---------------------------------- -# define our source and object files -# ---------------------------------- -SOURCES= slopBedMain.cpp slopBed.cpp -OBJECTS= $(SOURCES:.cpp=.o) -_EXT_OBJECTS=bedFile.o genomeFile.o lineFileUtilities.o gzstream.o fileType.o -EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) -BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) -PROGRAM= slopBed - - -all: $(PROGRAM) - -.PHONY: all - -$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS) - @echo " * linking $(PROGRAM)" - @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ $(LIBS) - -$(BUILT_OBJECTS): $(SOURCES) - @echo " * compiling" $(*F).cpp - @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) - -$(EXT_OBJECTS): - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/genomeFile/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/ - -clean: - @echo "Cleaning up." - @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* - -.PHONY: clean |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/slopBed/slopBed.cpp --- a/BEDTools-Version-2.14.3/src/slopBed/slopBed.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,91 +0,0 @@ -/***************************************************************************** - slopBed.cpp - - (c) 2009 - Aaron Quinlan - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licensed under the GNU General Public License 2.0 license. -******************************************************************************/ -#include "lineFileUtilities.h" -#include "slopBed.h" - - -BedSlop::BedSlop(string &bedFile, string &genomeFile, bool forceStrand, float leftSlop, float rightSlop, bool fractional) { - - _bedFile = bedFile; - _genomeFile = genomeFile; - _forceStrand = forceStrand; - _leftSlop = leftSlop; - _rightSlop = rightSlop; - _fractional = fractional; - - _bed = new BedFile(bedFile); - _genome = new GenomeFile(genomeFile); - - // get going, slop it up. - SlopBed(); -} - - -BedSlop::~BedSlop(void) { - -} - - -void BedSlop::SlopBed() { - - int lineNum = 0; - BED bedEntry, nullBed; // used to store the current BED line from the BED file. - BedLineStatus bedStatus; - - _bed->Open(); - bedStatus = _bed->GetNextBed(bedEntry, lineNum); - while (bedStatus != BED_INVALID) { - if (bedStatus == BED_VALID) { - if (_fractional == false) { - AddSlop(bedEntry, (int) _leftSlop, (int) _rightSlop); - } - else { - int leftSlop = (int) (_leftSlop * bedEntry.size()); - int rightSlop = (int) (_rightSlop * bedEntry.size()); - AddSlop(bedEntry, leftSlop, rightSlop); - } - _bed->reportBedNewLine(bedEntry); - bedEntry = nullBed; - } - bedStatus = _bed->GetNextBed(bedEntry, lineNum); - } - _bed->Close(); -} - - -void BedSlop::AddSlop(BED &bed, int leftSlop, int rightSlop) { - - // special handling if the BED entry is on the negative - // strand and the user cares about strandedness. - CHRPOS chromSize = _genome->getChromSize(bed.chrom); - - if ( (_forceStrand) && (bed.strand == "-") ) { - // inspect the start - if ( (static_cast<int>(bed.start) - rightSlop) > 0 ) bed.start -= rightSlop; - else bed.start = 0; - - // inspect the start - if ( (static_cast<int>(bed.end) + leftSlop) <= static_cast<int>(chromSize)) bed.end += leftSlop; - else bed.end = chromSize; - } - else { - // inspect the start - if ( (static_cast<int>(bed.start) - leftSlop) > 0) bed.start -= leftSlop; - else bed.start = 0; - - // inspect the end - if ( (static_cast<int>(bed.end) + rightSlop) <= static_cast<int>(chromSize)) bed.end += rightSlop; - else bed.end = chromSize; - } -} - - |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/slopBed/slopBed.h --- a/BEDTools-Version-2.14.3/src/slopBed/slopBed.h Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,59 +0,0 @@ -/***************************************************************************** - slopBed.h - - (c) 2009 - Aaron Quinlan - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licenced under the GNU General Public License 2.0 license. -******************************************************************************/ - -#include "bedFile.h" -#include "genomeFile.h" - -#include <vector> -#include <iostream> -#include <fstream> -#include <map> -#include <cstdlib> -#include <ctime> -using namespace std; - - -//************************************************ -// Class methods and elements -//************************************************ -class BedSlop { - -public: - - // constructor - BedSlop(string &bedFile, string &genomeFile, bool forceStrand, float leftSlop, float rightSlop, bool fractional); - - // destructor - ~BedSlop(void); - - - -private: - - string _bedFile; - string _genomeFile; - - bool _forceStrand; - float _leftSlop; - float _rightSlop; - bool _fractional; - - BedFile *_bed; - GenomeFile *_genome; - - // methods - - void SlopBed(); - - // method to add requested "slop" to a single BED entry - void AddSlop(BED &bed, int leftSlop, int rightSlop); -}; |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/slopBed/slopBedMain.cpp --- a/BEDTools-Version-2.14.3/src/slopBed/slopBedMain.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,190 +0,0 @@ -/***************************************************************************** - slopBedMain.cpp - - (c) 2009 - Aaron Quinlan - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licenced under the GNU General Public License 2.0 license. -******************************************************************************/ -#include "slopBed.h" -#include "version.h" - -using namespace std; - -// define our program name -#define PROGRAM_NAME "slopBed" - - -// define our parameter checking macro -#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen) - -// function declarations -void ShowHelp(void); - -int main(int argc, char* argv[]) { - - // our configuration variables - bool showHelp = false; - - // input files - string bedFile = "stdin"; - string genomeFile; - - bool haveBed = true; - bool haveGenome = false; - bool haveLeft = false; - bool haveRight = false; - bool haveBoth = false; - - bool forceStrand = false; - float leftSlop = 0.0; - float rightSlop = 0.0; - bool fractional = false; - - for(int i = 1; i < argc; i++) { - int parameterLength = (int)strlen(argv[i]); - - if((PARAMETER_CHECK("-h", 2, parameterLength)) || - (PARAMETER_CHECK("--help", 5, parameterLength))) { - showHelp = true; - } - } - - if(showHelp) ShowHelp(); - - // do some parsing (all of these parameters require 2 strings) - for(int i = 1; i < argc; i++) { - - int parameterLength = (int)strlen(argv[i]); - - if(PARAMETER_CHECK("-i", 2, parameterLength)) { - if ((i+1) < argc) { - bedFile = argv[i + 1]; - i++; - } - } - else if(PARAMETER_CHECK("-g", 2, parameterLength)) { - if ((i+1) < argc) { - haveGenome = true; - genomeFile = argv[i + 1]; - i++; - } - } - else if(PARAMETER_CHECK("-l", 2, parameterLength)) { - if ((i+1) < argc) { - haveLeft = true; - leftSlop = atof(argv[i + 1]); - i++; - } - } - else if(PARAMETER_CHECK("-r", 2, parameterLength)) { - if ((i+1) < argc) { - haveRight = true; - rightSlop = atof(argv[i + 1]); - i++; - } - } - else if(PARAMETER_CHECK("-b", 2, parameterLength)) { - if ((i+1) < argc) { - haveBoth = true; - leftSlop = atof(argv[i + 1]); - rightSlop = atof(argv[i + 1]); - i++; - } - } - else if(PARAMETER_CHECK("-s", 2, parameterLength)) { - forceStrand = true; - } - else if(PARAMETER_CHECK("-pct", 4, parameterLength)) { - fractional = true; - } - else { - cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl; - showHelp = true; - } - } - - // make sure we have both input files - if (!haveBed || !haveGenome) { - cerr << endl << "*****" << endl << "*****ERROR: Need both a BED (-i) and a genome (-g) file. " << endl << "*****" << endl; - showHelp = true; - } - if (!haveLeft && !haveRight && !haveBoth) { - cerr << endl << "*****" << endl << "*****ERROR: Need -l and -r together or -b alone. " << endl << "*****" << endl; - showHelp = true; - } - if ((!haveLeft && haveRight) || (haveLeft && !haveRight)) { - cerr << endl << "*****" << endl << "*****ERROR: Need both -l and -r. " << endl << "*****" << endl; - showHelp = true; - } - if (forceStrand && (!(haveLeft) || !(haveRight))) { - cerr << endl << "*****" << endl << "*****ERROR: Must supply -l and -r with -s. " << endl << "*****" << endl; - showHelp = true; - } - - if (!showHelp) { - BedSlop *bc = new BedSlop(bedFile, genomeFile, forceStrand, leftSlop, rightSlop, fractional); - delete bc; - - return 0; - } - else { - ShowHelp(); - } -} - -void ShowHelp(void) { - - cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl; - - cerr << "Author: Aaron Quinlan (aaronquinlan@gmail.com)" << endl; - - cerr << "Summary: Add requested base pairs of \"slop\" to each feature." << endl << endl; - - cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -i <bed/gff/vcf> -g <genome> [-b <int> or (-l and -r)]" << endl << endl; - - cerr << "Options: " << endl; - cerr << "\t-b\t" << "Increase the BED/GFF/VCF entry by -b base pairs in each direction." << endl; - cerr << "\t\t- (Integer) or (Float, e.g. 0.1) if used with -pct." << endl << endl; - - cerr << "\t-l\t" << "The number of base pairs to subtract from the start coordinate." << endl; - cerr << "\t\t- (Integer) or (Float, e.g. 0.1) if used with -pct." << endl << endl; - - cerr << "\t-r\t" << "The number of base pairs to add to the end coordinate." << endl; - cerr << "\t\t- (Integer) or (Float, e.g. 0.1) if used with -pct." << endl << endl; - - cerr << "\t-s\t" << "Define -l and -r based on strand." << endl; - cerr << "\t\tE.g. if used, -l 500 for a negative-stranded feature, " << endl; - cerr << "\t\tit will add 500 bp downstream. Default = false." << endl << endl; - - cerr << "\t-pct\t" << "Define -l and -r as a fraction of the feature's length." << endl; - cerr << "\t\tE.g. if used on a 1000bp feature, -l 0.50, " << endl; - cerr << "\t\twill add 500 bp \"upstream\". Default = false." << endl << endl; - - cerr << "Notes: " << endl; - cerr << "\t(1) Starts will be set to 0 if options would force it below 0." << endl; - cerr << "\t(2) Ends will be set to the chromosome length if requested slop would" << endl; - cerr << "\tforce it above the max chrom length." << endl; - - cerr << "\t(3) The genome file should tab delimited and structured as follows:" << endl; - cerr << "\n\t<chromName><TAB><chromSize>" << endl << endl; - cerr << "\tFor example, Human (hg19):" << endl; - cerr << "\tchr1\t249250621" << endl; - cerr << "\tchr2\t243199373" << endl; - cerr << "\t..." << endl; - cerr << "\tchr18_gl000207_random\t4262" << endl << endl; - - - cerr << "Tips: " << endl; - cerr << "\tOne can use the UCSC Genome Browser's MySQL database to extract" << endl; - cerr << "\tchromosome sizes. For example, H. sapiens:" << endl << endl; - cerr << "\tmysql --user=genome --host=genome-mysql.cse.ucsc.edu -A -e \\" << endl; - cerr << "\t\"select chrom, size from hg19.chromInfo\" > hg19.genome" << endl << endl; - - - // end the program here - exit(1); -} |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/sortBed/Makefile --- a/BEDTools-Version-2.14.3/src/sortBed/Makefile Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,43 +0,0 @@ -UTILITIES_DIR = ../utils/ -OBJ_DIR = ../../obj/ -BIN_DIR = ../../bin/ - -# ------------------- -# define our includes -# ------------------- -INCLUDES = -I$(UTILITIES_DIR)/bedFile/ -I$(UTILITIES_DIR)/lineFileUtilities/ -I$(UTILITIES_DIR)/version/ -I$(UTILITIES_DIR)/gzstream/ -I$(UTILITIES_DIR)/fileType/ - -# ---------------------------------- -# define our source and object files -# ---------------------------------- -SOURCES= sortMain.cpp sortBed.cpp -OBJECTS= $(SOURCES:.cpp=.o) -_EXT_OBJECTS=bedFile.o lineFileUtilities.o gzstream.o fileType.o -EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) -BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) -PROGRAM= sortBed - - -all: $(PROGRAM) - -.PHONY: all - -$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS) - @echo " * linking $(PROGRAM)" - @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ $(LIBS) - -$(BUILT_OBJECTS): $(SOURCES) - @echo " * compiling" $(*F).cpp - @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) - -$(EXT_OBJECTS): - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/ - -clean: - @echo "Cleaning up." - @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* - -.PHONY: clean \ No newline at end of file |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/sortBed/sortBed.cpp --- a/BEDTools-Version-2.14.3/src/sortBed/sortBed.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,201 +0,0 @@ -/***************************************************************************** - sortBed.cpp - - (c) 2009 - Aaron Quinlan - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licenced under the GNU General Public License 2.0 license. -******************************************************************************/ -#include "lineFileUtilities.h" -#include "sortBed.h" - -// -// Constructor -// -BedSort::BedSort(string &bedFile) { - _bedFile = bedFile; - _bed = new BedFile(bedFile); -} - -// -// Destructor -// -BedSort::~BedSort(void) { -} - - -void BedSort::SortBed() { - - // load the "B" bed file into a map so - // that we can easily compare "A" to it for overlaps - _bed->loadBedFileIntoMapNoBin(); - - // loop through each chromosome and merge their BED entries - for (masterBedMapNoBin::iterator m = _bed->bedMapNoBin.begin(); m != _bed->bedMapNoBin.end(); ++m) { - - // bedList is already sorted by start position. - vector<BED> bedList = m->second; - - for (unsigned int i = 0; i < bedList.size(); ++i) { - _bed->reportBedNewLine(bedList[i]); - } - } -} - - -void BedSort::SortBedBySizeAsc() { - - // load the "B" bed file into a map so - // that we can easily compare "A" to it for overlaps - _bed->loadBedFileIntoMapNoBin(); - - vector<BED> masterList; - masterList.reserve(1000000); - - // loop through each chromosome and merge their BED entries - for (masterBedMapNoBin::iterator m = _bed->bedMapNoBin.begin(); m != _bed->bedMapNoBin.end(); ++m) { - - // bedList is already sorted by start position. - vector<BED> bedList = m->second; - - // add the entries from this chromosome to the current list - for (unsigned int i = 0; i < m->second.size(); ++i) { - masterList.push_back(m->second[i]); - } - } - - // sort the master list by size (asc.) - sort(masterList.begin(), masterList.end(), sortBySizeAsc); - - // report the entries in ascending order - for (unsigned int i = 0; i < masterList.size(); ++i) { - _bed->reportBedNewLine(masterList[i]); - } -} - - -void BedSort::SortBedBySizeDesc() { - - // load the "B" bed file into a map so - // that we can easily compare "A" to it for overlaps - _bed->loadBedFileIntoMapNoBin(); - - vector<BED> masterList; - masterList.reserve(1000000); - - // loop through each chromosome and merge their BED entries - for (masterBedMapNoBin::iterator m = _bed->bedMapNoBin.begin(); m != _bed->bedMapNoBin.end(); ++m) { - - // bedList is already sorted by start position. - vector<BED> bedList = m->second; - - // add the entries from this chromosome to the current list - for (unsigned int i = 0; i < m->second.size(); ++i) { - masterList.push_back(m->second[i]); - } - } - - // sort the master list by size (asc.) - sort(masterList.begin(), masterList.end(), sortBySizeDesc); - - // report the entries in ascending order - for (unsigned int i = 0; i < masterList.size(); ++i) { - _bed->reportBedNewLine(masterList[i]); - } -} - -void BedSort::SortBedByChromThenSizeAsc() { - - // load the "B" bed file into a map so - // that we can easily compare "A" to it for overlaps - _bed->loadBedFileIntoMapNoBin(); - - // loop through each chromosome and merge their BED entries - for (masterBedMapNoBin::iterator m = _bed->bedMapNoBin.begin(); m != _bed->bedMapNoBin.end(); ++m) { - - // bedList is already sorted by start position. - vector<BED> bedList = m->second; - sort(bedList.begin(), bedList.end(), sortBySizeAsc); - - for (unsigned int i = 0; i < bedList.size(); ++i) { - _bed->reportBedNewLine(bedList[i]); - } - } -} - - -void BedSort::SortBedByChromThenSizeDesc() { - - // load the "B" bed file into a map so - // that we can easily compare "A" to it for overlaps - _bed->loadBedFileIntoMapNoBin(); - - // loop through each chromosome and merge their BED entries - for (masterBedMapNoBin::iterator m = _bed->bedMapNoBin.begin(); m != _bed->bedMapNoBin.end(); ++m) { - - // bedList is already sorted by start position. - vector<BED> bedList = m->second; - - sort(bedList.begin(), bedList.end(), sortBySizeDesc); - - for (unsigned int i = 0; i < bedList.size(); ++i) { - _bed->reportBedNewLine(bedList[i]); - } - } -} - - -void BedSort::SortBedByChromThenScoreAsc() { - - // load the "B" bed file into a map so - // that we can easily compare "A" to it for overlaps - _bed->loadBedFileIntoMapNoBin(); - - if (_bed->bedType >= 5) { - // loop through each chromosome and merge their BED entries - for (masterBedMapNoBin::iterator m = _bed->bedMapNoBin.begin(); m != _bed->bedMapNoBin.end(); ++m) { - - // bedList is already sorted by start position. - vector<BED> bedList = m->second; - sort(bedList.begin(), bedList.end(), sortByScoreAsc); - - for (unsigned int i = 0; i < bedList.size(); ++i) { - _bed->reportBedNewLine(bedList[i]); - } - } - } - else { - cerr << "Error: Requested a sort by score, but your BED file does not appear to be in BED 5 format or greater. Exiting." << endl; - exit(1); - } -} - - -void BedSort::SortBedByChromThenScoreDesc() { - - // load the "B" bed file into a map so - // that we can easily compare "A" to it for overlaps - _bed->loadBedFileIntoMapNoBin(); - - if (_bed->bedType >= 5) { - // loop through each chromosome and merge their BED entries - for (masterBedMapNoBin::iterator m = _bed->bedMapNoBin.begin(); m != _bed->bedMapNoBin.end(); ++m) { - - // bedList is already sorted by start position. - vector<BED> bedList = m->second; - sort(bedList.begin(), bedList.end(), sortByScoreDesc); - - for (unsigned int i = 0; i < bedList.size(); ++i) { - _bed->reportBedNewLine(bedList[i]); - } - } - } - else { - cerr << "Error: Requested a sort by score, but your BED file does not appear to be in BED 5 format or greater. Exiting." << endl; - exit(1); - } -} - |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/sortBed/sortBed.h --- a/BEDTools-Version-2.14.3/src/sortBed/sortBed.h Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,50 +0,0 @@ -/***************************************************************************** - sortBed.h - - (c) 2009 - Aaron Quinlan - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licenced under the GNU General Public License 2.0 license. -******************************************************************************/ -#include "bedFile.h" -#include <vector> -#include <algorithm> -#include <iostream> -#include <fstream> - -using namespace std; - - -//************************************************ -// Class methods and elements -//************************************************ -class BedSort { - -public: - - // constructor - BedSort(string &); - - // destructor - ~BedSort(void); - - void SortBed(); // the default. sorts by chrom (asc.) then by start (asc.) - void SortBedBySizeAsc(); - void SortBedBySizeDesc(); - void SortBedByChromThenSizeAsc(); - void SortBedByChromThenSizeDesc(); - void SortBedByChromThenScoreAsc(); - void SortBedByChromThenScoreDesc(); - -private: - string _bedFile; - - // instance of a bed file class. - BedFile *_bed; - - // methods - -}; |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/sortBed/sortMain.cpp --- a/BEDTools-Version-2.14.3/src/sortBed/sortMain.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,157 +0,0 @@ -/***************************************************************************** - sortBedMain.cpp - - (c) 2009 - Aaron Quinlan - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licenced under the GNU General Public License 2.0 license. -******************************************************************************/ -#include "sortBed.h" -#include "version.h" - -using namespace std; - -// define our program name -#define PROGRAM_NAME "sortBed" - - -// define our parameter checking macro -#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen) - -// function declarations -void ShowHelp(void); - -int main(int argc, char* argv[]) { - - // our configuration variables - bool showHelp = false; - - // input files - string bedFile = "stdin"; - bool haveBed = true; - int sortChoices = 0; - - bool sortBySizeAsc = false; - bool sortBySizeDesc = false; - bool sortByChromThenSizeAsc = false; - bool sortByChromThenSizeDesc = false; - bool sortByChromThenScoreAsc = false; - bool sortByChromThenScoreDesc = false; - - - for(int i = 1; i < argc; i++) { - int parameterLength = (int)strlen(argv[i]); - - if((PARAMETER_CHECK("-h", 2, parameterLength)) || - (PARAMETER_CHECK("--help", 5, parameterLength))) { - showHelp = true; - } - } - - if(showHelp) ShowHelp(); - - // do some parsing (all of these parameters require 2 strings) - for(int i = 1; i < argc; i++) { - - int parameterLength = (int)strlen(argv[i]); - - if(PARAMETER_CHECK("-i", 2, parameterLength)) { - if ((i+1) < argc) { - bedFile = argv[i + 1]; - i++; - } - } - else if(PARAMETER_CHECK("-sizeA", 6, parameterLength)) { - sortBySizeAsc = true; - sortChoices++; - } - else if(PARAMETER_CHECK("-sizeD", 6, parameterLength)) { - sortBySizeDesc = true; - sortChoices++; - } - else if(PARAMETER_CHECK("-chrThenSizeA", 13, parameterLength)) { - sortByChromThenSizeAsc = true; - sortChoices++; - } - else if(PARAMETER_CHECK("-chrThenSizeD", 13, parameterLength)) { - sortByChromThenSizeDesc = true; - sortChoices++; - } - else if(PARAMETER_CHECK("-chrThenScoreA", 14, parameterLength)) { - sortByChromThenScoreAsc = true; - sortChoices++; - } - else if(PARAMETER_CHECK("-chrThenScoreD", 14, parameterLength)) { - sortByChromThenScoreDesc = true; - sortChoices++; - } - else { - cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl; - showHelp = true; - } - } - - // make sure we have both input files - if (!haveBed) { - cerr << endl << "*****" << endl << "*****ERROR: Need -i BED file. " << endl << "*****" << endl; - showHelp = true; - } - if (sortChoices > 1) { - cerr << endl << "*****" << endl << "*****ERROR: Sorting options are mutually exclusive. Please choose just one. " << endl << "*****" << endl; - showHelp = true; - } - - - if (!showHelp) { - BedSort *bm = new BedSort(bedFile); - - if (sortBySizeAsc) { - bm->SortBedBySizeAsc(); - } - else if (sortBySizeDesc) { - bm->SortBedBySizeDesc(); - } - else if (sortByChromThenSizeAsc) { - bm->SortBedByChromThenSizeAsc(); - } - else if (sortByChromThenSizeDesc) { - bm->SortBedByChromThenSizeDesc(); - } - else if (sortByChromThenScoreAsc) { - bm->SortBedByChromThenScoreAsc(); - } - else if (sortByChromThenScoreDesc) { - bm->SortBedByChromThenScoreDesc(); - } - else { - bm->SortBed(); - } - return 0; - } - else { - ShowHelp(); - } -} - -void ShowHelp(void) { - - cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl; - - cerr << "Author: Aaron Quinlan (aaronquinlan@gmail.com)" << endl; - cerr << "Summary: Sorts a feature file in various and useful ways." << endl << endl; - cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -i <bed/gff/vcf>" << endl << endl; - - cerr << "Options: " << endl; - cerr << "\t" << "-sizeA\t\t" << "Sort by feature size in ascending order." << endl; - cerr << "\t" << "-sizeD\t\t" << "Sort by feature size in descending order." << endl; - cerr << "\t" << "-chrThenSizeA\t" << "Sort by chrom (asc), then feature size (asc)." << endl; - cerr << "\t" << "-chrThenSizeD\t" << "Sort by chrom (asc), then feature size (desc)." << endl; - cerr << "\t" << "-chrThenScoreA\t" << "Sort by chrom (asc), then score (asc)." << endl; - cerr << "\t" << "-chrThenScoreD\t" << "Sort by chrom (asc), then score (desc)." << endl << endl; - - exit(1); - -} |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/subtractBed/Makefile --- a/BEDTools-Version-2.14.3/src/subtractBed/Makefile Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,47 +0,0 @@ -UTILITIES_DIR = ../utils/ -OBJ_DIR = ../../obj/ -BIN_DIR = ../../bin/ - -# ------------------- -# define our includes -# ------------------- -INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \ - -I$(UTILITIES_DIR)/lineFileUtilities/ \ - -I$(UTILITIES_DIR)/version/ \ - -I$(UTILITIES_DIR)/gzstream/ \ - -I$(UTILITIES_DIR)/fileType/ - -# ---------------------------------- -# define our source and object files -# ---------------------------------- -SOURCES= subtractMain.cpp subtractBed.cpp -OBJECTS= $(SOURCES:.cpp=.o) -_EXT_OBJECTS=bedFile.o lineFileUtilities.o gzstream.o fileType.o -EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) -BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) -PROGRAM= subtractBed - - -all: $(PROGRAM) - -.PHONY: all - -$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS) - @echo " * linking $(PROGRAM)" - @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ $(LIBS) - -$(BUILT_OBJECTS): $(SOURCES) - @echo " * compiling" $(*F).cpp - @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) - -$(EXT_OBJECTS): - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/ - -clean: - @echo "Cleaning up." - @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* - -.PHONY: clean |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/subtractBed/subtractBed.cpp --- a/BEDTools-Version-2.14.3/src/subtractBed/subtractBed.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,178 +0,0 @@ -/***************************************************************************** - subtractBed.cpp - - (c) 2009 - Aaron Quinlan - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licenced under the GNU General Public License 2.0 license. -******************************************************************************/ -#include "lineFileUtilities.h" -#include "subtractBed.h" - - -/* - Constructor -*/ -BedSubtract::BedSubtract(string &bedAFile, string &bedBFile, float overlapFraction, bool sameStrand, bool diffStrand) { - - _bedAFile = bedAFile; - _bedBFile = bedBFile; - _overlapFraction = overlapFraction; - _sameStrand = sameStrand; - _diffStrand = diffStrand; - - _bedA = new BedFile(bedAFile); - _bedB = new BedFile(bedBFile); - - SubtractBed(); -} - - -/* - Destructor -*/ -BedSubtract::~BedSubtract(void) { -} - - -void BedSubtract::FindAndSubtractOverlaps(BED &a, vector<BED> &hits) { - - // find all of the overlaps between a and B. - _bedB->FindOverlapsPerBin(a.chrom, a.start, a.end, a.strand, hits, _sameStrand, _diffStrand); - - // is A completely spanned by an entry in B? - // if so, A should not be reported. - int numConsumedByB = 0; - int numOverlaps = 0; - vector<BED> bOverlaps; // list of hits in B. Special processing if there are multiple. - - vector<BED>::const_iterator h = hits.begin(); - vector<BED>::const_iterator hitsEnd = hits.end(); - for (; h != hitsEnd; ++h) { - - int s = max(a.start, h->start); - int e = min(a.end, h->end); - int overlapBases = (e - s); // the number of overlapping bases b/w a and b - int aLength = (a.end - a.start); // the length of a in b.p. - - if (s < e) { - - // is there enough overlap (default ~ 1bp) - float overlap = ((float) overlapBases / (float) aLength); - - if (overlap >= 1.0) { - numOverlaps++; - numConsumedByB++; - } - else if ( overlap >= _overlapFraction ) { - numOverlaps++; - bOverlaps.push_back(*h); - } - } - } - - if (numOverlaps == 0) { - // no overlap found, so just report A as-is. - _bedA->reportBedNewLine(a); - } - else if (numOverlaps == 1) { - // one overlap found. only need to look at the single - // entry in bOverlaps. - - // if A was not "consumed" by any entry in B - if (numConsumedByB == 0) { - - BED theHit = bOverlaps[0]; - - // A ++++++++++++ - // B ---- - // Res. ==== ==== - if ( (theHit.start > a.start) && (theHit.end < a.end) ) { - _bedA->reportBedRangeNewLine(a,a.start,theHit.start); - _bedA->reportBedRangeNewLine(a,theHit.end,a.end); - } - // A ++++++++++++ - // B ---------- - // Res. == - else if (theHit.start == a.start) { - _bedA->reportBedRangeNewLine(a,theHit.end,a.end); - } - // A ++++++++++++ - // B ---------- - // Res. ==== - else if (theHit.start < a.start) { - _bedA->reportBedRangeNewLine(a,theHit.end,a.end); - } - // A ++++++++++++ - // B ---------- - // Res. ======= - else if (theHit.start > a.start) { - _bedA->reportBedRangeNewLine(a,a.start,theHit.start); - } - } - } - else if (numOverlaps > 1) { - // multiple overlapz found. look at all the hits - // and figure out which bases in A survived. then - // report the contigous intervals that survived. - - vector<bool> aKeep(a.end - a.start, true); - - if (numConsumedByB == 0) { - // track the number of hit starts and ends at each position in A - for (vector<BED>::iterator h = bOverlaps.begin(); h != bOverlaps.end(); ++h) { - int s = max(a.start, h->start); - int e = min(a.end, h->end); - - for (int i = s+1; i <= e; ++i) { - aKeep[i-a.start-1] = false; - } - } - // report the remaining blocks. - for (unsigned int i = 0; i < aKeep.size(); ++i) { - if (aKeep[i] == true) { - CHRPOS blockStart = i + a.start; - while ((aKeep[i] == true) && (i < aKeep.size())) { - i++; - } - CHRPOS blockEnd = i + a.start; - blockEnd = min(a.end, blockEnd); - _bedA->reportBedRangeNewLine(a,blockStart,blockEnd); - } - } - } - } -} - - - -void BedSubtract::SubtractBed() { - - // load the "B" bed file into a map so - // that we can easily compare "A" to it for overlaps - _bedB->loadBedFileIntoMap(); - - BED a, nullBed; - BedLineStatus bedStatus; - int lineNum = 0; // current input line number - vector<BED> hits; // vector of potential hits - // reserve some space - hits.reserve(100); - - _bedA->Open(); - while ((bedStatus = _bedA->GetNextBed(a, lineNum)) != BED_INVALID) { - if (bedStatus == BED_VALID) { - FindAndSubtractOverlaps(a, hits); - hits.clear(); - a = nullBed; - } - } - _bedA->Close(); - -} -// END Intersect - - |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/subtractBed/subtractBed.h --- a/BEDTools-Version-2.14.3/src/subtractBed/subtractBed.h Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,53 +0,0 @@ -/***************************************************************************** - subtractBed.h - - (c) 2009 - Aaron Quinlan - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licenced under the GNU General Public License 2.0 license. -******************************************************************************/ -#ifndef SUBTRACTBED_H -#define SUBTRACTBED_H - -#include "bedFile.h" -#include <vector> -#include <iostream> -#include <fstream> - -using namespace std; - -//************************************************ -// Class methods and elements -//************************************************ -class BedSubtract { - -public: - - // constructor - BedSubtract(string &bedAFile, string &bedBFile, float overlapFraction, bool sameStrand, bool diffStrand); - - // destructor - ~BedSubtract(void); - -private: - - // processing variables - string _bedAFile; - string _bedBFile; - float _overlapFraction; - bool _sameStrand; - bool _diffStrand; - - - // instances of bed file class. - BedFile *_bedA, *_bedB; - - // methods - void FindAndSubtractOverlaps(BED &a, vector<BED> &hits); - void SubtractBed(); -}; - -#endif /* SUBTRACTBED_H */ |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/subtractBed/subtractMain.cpp --- a/BEDTools-Version-2.14.3/src/subtractBed/subtractMain.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,145 +0,0 @@ -/***************************************************************************** - subtractMain.cpp - - (c) 2009 - Aaron Quinlan - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licenced under the GNU General Public License 2.0 license. -******************************************************************************/ -#include "subtractBed.h" -#include "version.h" - -using namespace std; - -// define our program name -#define PROGRAM_NAME "subtractBed" - - -// define our parameter checking macro -#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen) - -// function declarations -void ShowHelp(void); - -int main(int argc, char* argv[]) { - - // our configuration variables - bool showHelp = false; - - // input files - string bedAFile; - string bedBFile; - - // input arguments - float overlapFraction = 1E-9; - - bool haveBedA = false; - bool haveBedB = false; - bool haveFraction = false; - bool sameStrand = false; - bool diffStrand = false; - - // check to see if we should print out some help - if(argc <= 1) showHelp = true; - - for(int i = 1; i < argc; i++) { - int parameterLength = (int)strlen(argv[i]); - - if((PARAMETER_CHECK("-h", 2, parameterLength)) || - (PARAMETER_CHECK("--help", 5, parameterLength))) { - showHelp = true; - } - } - - if(showHelp) ShowHelp(); - - // do some parsing (all of these parameters require 2 strings) - for(int i = 1; i < argc; i++) { - - int parameterLength = (int)strlen(argv[i]); - - if(PARAMETER_CHECK("-a", 2, parameterLength)) { - if ((i+1) < argc) { - haveBedA = true; - bedAFile = argv[i + 1]; - i++; - } - } - else if(PARAMETER_CHECK("-b", 2, parameterLength)) { - if ((i+1) < argc) { - haveBedB = true; - bedBFile = argv[i + 1]; - i++; - } - } - else if(PARAMETER_CHECK("-f", 2, parameterLength)) { - if ((i+1) < argc) { - haveFraction = true; - overlapFraction = atof(argv[i + 1]); - i++; - } - } - else if (PARAMETER_CHECK("-s", 2, parameterLength)) { - sameStrand = true; - } - else if (PARAMETER_CHECK("-S", 2, parameterLength)) { - diffStrand = true; - } - else { - cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl; - showHelp = true; - } - } - - // make sure we have both input files - if (!haveBedA || !haveBedB) { - cerr << endl << "*****" << endl << "*****ERROR: Need -a and -b files. " << endl << "*****" << endl; - showHelp = true; - } - - if (sameStrand && diffStrand) { - cerr << endl << "*****" << endl << "*****ERROR: Request either -s OR -S, not both." << endl << "*****" << endl; - showHelp = true; - } - - if (!showHelp) { - - BedSubtract *bs = new BedSubtract(bedAFile, bedBFile, overlapFraction, sameStrand, diffStrand); - delete bs; - return 0; - } - else { - ShowHelp(); - } -} - -void ShowHelp(void) { - - cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl; - - cerr << "Author: Aaron Quinlan (aaronquinlan@gmail.com)" << endl; - - cerr << "Summary: Removes the portion(s) of an interval that is overlapped" << endl; - cerr << "\t by another feature(s)." << endl << endl; - - cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -a <bed/gff/vcf> -b <bed/gff/vcf>" << endl << endl; - - cerr << "Options: " << endl; - cerr << "\t-f\t" << "Minimum overlap required as a fraction of A." << endl; - cerr << "\t\t- Default is 1E-9 (i.e., 1bp)." << endl; - cerr << "\t\t- (FLOAT) (e.g. 0.50)" << endl << endl; - - cerr << "\t-s\t" << "Require same strandedness. That is, only subtract hits in B that" << endl; - cerr << "\t\toverlap A on the _same_ strand." << endl; - cerr << "\t\t- By default, overlaps are subtracted without respect to strand." << endl << endl; - - cerr << "\t-S\t" << "Force strandedness. That is, only subtract hits in B that" << endl; - cerr << "\t\toverlap A on the _opposite_ strand." << endl; - cerr << "\t\t- By default, overlaps are subtracted without respect to strand." << endl << endl; - - // end the program here - exit(1); -} |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/tagBam/Makefile --- a/BEDTools-Version-2.14.3/src/tagBam/Makefile Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,51 +0,0 @@ -UTILITIES_DIR = ../utils/ -OBJ_DIR = ../../obj/ -BIN_DIR = ../../bin/ - -# ------------------- -# define our includes -# ------------------- -INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \ - -I$(UTILITIES_DIR)/version/ \ - -I$(UTILITIES_DIR)/gzstream/ \ - -I$(UTILITIES_DIR)/genomeFile/ \ - -I$(UTILITIES_DIR)/lineFileUtilities/ \ - -I$(UTILITIES_DIR)/fileType/ \ - -I$(UTILITIES_DIR)/BamTools/include \ - -I$(UTILITIES_DIR)/BamTools-Ancillary -# ---------------------------------- -# define our source and object files -# ---------------------------------- -SOURCES= tagBamMain.cpp tagBam.cpp -OBJECTS= $(SOURCES:.cpp=.o) -_EXT_OBJECTS=bedFile.o lineFileUtilities.o gzstream.o fileType.o -EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) -BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) -PROGRAM= tagBam - - -all: $(PROGRAM) - -.PHONY: all - -$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS) - @echo " * linking $(PROGRAM)" - @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ -L$(UTILITIES_DIR)/BamTools/lib/ -lbamtools $(LIBS) - -$(BUILT_OBJECTS): $(SOURCES) - @echo " * compiling" $(*F).cpp - @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) - -$(EXT_OBJECTS): - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/BamTools/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/BamTools-Ancillary/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/ - -clean: - @echo "Cleaning up." - @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* - -.PHONY: clean |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/tagBam/tagBam.cpp --- a/BEDTools-Version-2.14.3/src/tagBam/tagBam.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,136 +0,0 @@ -/***************************************************************************** - tagBam.cpp - - (c) 2009 - Aaron Quinlan - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licenced under the GNU General Public License 2.0 license. -******************************************************************************/ -#include "lineFileUtilities.h" -#include "tagBam.h" - -// build -TagBam::TagBam(const string &bamFile, const vector<string> &annoFileNames, - const vector<string> &annoLables, const string &tag, - bool useNames, bool useScores, bool sameStrand, bool diffStrand, float overlapFraction): - - _bamFile(bamFile), - _annoFileNames(annoFileNames), - _annoLabels(annoLables), - _tag(tag), - _useNames(useNames), - _useScores(useScores), - _sameStrand(sameStrand), - _diffStrand(diffStrand), - _overlapFraction(overlapFraction) -{} - - -// destroy and delete the open file pointers -TagBam::~TagBam(void) { - delete _bed; - CloseAnnoFiles(); -} - - -void TagBam::OpenAnnoFiles() { - for (size_t i=0; i < _annoFileNames.size(); ++i) { - BedFile *file = new BedFile(_annoFileNames[i]); - file->loadBedFileIntoMap(); - _annoFiles.push_back(file); - } -} - - -void TagBam::CloseAnnoFiles() { - for (size_t i=0; i < _annoFiles.size(); ++i) { - BedFile *file = _annoFiles[i]; - delete file; - _annoFiles[i] = NULL; - } -} - - -void TagBam::Tag() { - - // open the annotations files for processing; - OpenAnnoFiles(); - - // open the BAM file - BamReader reader; - BamWriter writer; - reader.Open(_bamFile); - // get header & reference information - string bamHeader = reader.GetHeaderText(); - RefVector refs = reader.GetReferenceData(); - - // set compression mode - BamWriter::CompressionMode compressionMode = BamWriter::Compressed; -// if ( _isUncompressedBam ) compressionMode = BamWriter::Uncompressed; - writer.SetCompressionMode(compressionMode); - // open our BAM writer - writer.Open("stdout", bamHeader, refs); - - // rip through the BAM file and test for overlaps with each annotation file. - BamAlignment al; - vector<BED> hits; - - while (reader.GetNextAlignment(al)) { - if (al.IsMapped() == true) { - BED a; - a.chrom = refs.at(al.RefID).RefName; - a.start = al.Position; - a.end = al.GetEndPosition(false, false); - a.strand = "+"; - if (al.IsReverseStrand()) a.strand = "-"; - - ostringstream annotations; - // annotate the BAM file based on overlaps with the annotation files. - for (size_t i = 0; i < _annoFiles.size(); ++i) - { - // grab the current annotation file. - BedFile *anno = _annoFiles[i]; - - if (!_useNames && !_useScores) { - // add the label for this annotation file to tag if there is overlap - if (anno->FindOneOrMoreOverlapsPerBin(a.chrom, a.start, a.end, a.strand, _sameStrand, _diffStrand, _overlapFraction)) - { - annotations << _annoLabels[i] << ";"; - } - } - // use the score field - else if (!_useNames && _useScores) { - anno->FindOverlapsPerBin(a.chrom, a.start, a.end, a.strand, hits, _sameStrand, _diffStrand); - for (size_t i = 0; i < hits.size(); ++i) { - annotations << hits[i].score; - if (i < hits.size() - 1) annotations << ","; - } - if (hits.size() > 0) annotations << ";"; - hits.clear(); - } - // use the name field from the annotation files to populate tag - else if (_useNames && !_useScores) { - anno->FindOverlapsPerBin(a.chrom, a.start, a.end, a.strand, hits, _sameStrand, _diffStrand); - for (size_t i = 0; i < hits.size(); ++i) { - annotations << hits[i].name; - if (i < hits.size() - 1) annotations << ","; - } - if (hits.size() > 0) annotations << ";"; - hits.clear(); - } - } - // were there any overlaps with which to make a tag? - if (annotations.str().size() > 0) { - al.AddTag(_tag, "Z", annotations.str().substr(0, annotations.str().size() - 1)); // get rid of the last ";" - } - writer.SaveAlignment(al); - } - } - reader.Close(); - - // close the annotations files; - CloseAnnoFiles(); -} |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/tagBam/tagBam.h --- a/BEDTools-Version-2.14.3/src/tagBam/tagBam.h Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,83 +0,0 @@ -/***************************************************************************** - tagBam.h - - (c) 2009 - Aaron Quinlan - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licenced under the GNU General Public License 2.0 license. -******************************************************************************/ -#ifndef TAGBAM_H -#define TAGBAM_H - -#include "bedFile.h" - -#include "version.h" -#include "api/BamReader.h" -#include "api/BamWriter.h" -#include "api/BamAux.h" -#include "BamAncillary.h" -using namespace BamTools; - -#include "bedFile.h" -#include <vector> -#include <algorithm> -#include <iostream> -#include <iomanip> -#include <fstream> -#include <stdlib.h> - -using namespace std; - -//************************************************ -// Class methods and elements -//************************************************ -class TagBam { - -public: - - // constructor - TagBam(const string &bamFile, const vector<string> &annoFileNames, - const vector<string> &annoLabels, const string &tag, - bool useNames, bool useScores, bool sameStrand, - bool diffStrand, float overlapFraction); - - // destructor - ~TagBam(void); - - // annotate the BAM file with all of the annotation files. - void Tag(); - -private: - - // input files. - string _bamFile; - vector<string> _annoFileNames; - vector<string> _annoLabels; - - string _tag; - - // instance of a bed file class. - BedFile *_bed; - vector<BedFile*> _annoFiles; - - // should we use the name field from the annotation files? - bool _useNames; - bool _useScores; - - // do we care about strandedness when tagging? - bool _sameStrand; - bool _diffStrand; - float _overlapFraction; - - // private function for reporting coverage information - void ReportAnnotations(); - - void OpenAnnoFiles(); - - void CloseAnnoFiles(); - -}; -#endif /* TAGBAM_H */ |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/tagBam/tagBamMain.cpp --- a/BEDTools-Version-2.14.3/src/tagBam/tagBamMain.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,209 +0,0 @@ -/***************************************************************************** - annotateMain.cpp - - (c) 2009 - Aaron Quinlan - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licenced under the GNU General Public License 2.0 license. -******************************************************************************/ -#include "tagBam.h" -#include "version.h" - -using namespace std; - -// define the version -#define PROGRAM_NAME "tagBam" - -// define our parameter checking macro -#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen) - -// function declarations -void ShowHelp(void); - -int main(int argc, char* argv[]) { - - // our configuration variables - bool showHelp = false; - - // input file - string bamFile; - float overlapFraction = 1E-9; - string tag = "YB"; - - // parm flags - bool haveTag = false; - bool haveFraction = false; - bool useNames = false; - bool useScores = false; - bool sameStrand = false; - bool diffStrand = false; - bool haveBam = false; - bool haveFiles = false; - bool haveLabels = false; - - - // list of annotation files / names - vector<string> inputFiles; - vector<string> inputLabels; - - // check to see if we should print out some help - if(argc <= 1) showHelp = true; - - for(int i = 1; i < argc; i++) { - int parameterLength = (int)strlen(argv[i]); - - if((PARAMETER_CHECK("-h", 2, parameterLength)) || - (PARAMETER_CHECK("--help", 5, parameterLength))) { - showHelp = true; - } - } - - if(showHelp) ShowHelp(); - - // do some parsing (all of these parameters require 2 strings) - for(int i = 1; i < argc; i++) { - - int parameterLength = (int)strlen(argv[i]); - - if(PARAMETER_CHECK("-i", 2, parameterLength)) { - if ((i+1) < argc) { - haveBam = true; - bamFile = argv[i + 1]; - i++; - } - } - else if(PARAMETER_CHECK("-files", 6, parameterLength)) { - if ((i+1) < argc) { - haveFiles = true; - i = i+1; - string file = argv[i]; - while (file[0] != '-' && i < argc) { - inputFiles.push_back(file); - i++; - if (i < argc) - file = argv[i]; - } - i--; - } - } - else if(PARAMETER_CHECK("-labels", 7, parameterLength)) { - if ((i+1) < argc) { - haveLabels = true; - i = i+1; - string label = argv[i]; - while (label[0] != '-' && i < argc) { - inputLabels.push_back(label); - i++; - if (i < argc) - label = argv[i]; - } - i--; - } - } - else if (PARAMETER_CHECK("-names", 6, parameterLength)) { - useNames = true; - } - else if (PARAMETER_CHECK("-scores", 7, parameterLength)) { - useScores = true; - } - else if (PARAMETER_CHECK("-s", 2, parameterLength)) { - sameStrand = true; - } - else if (PARAMETER_CHECK("-S", 2, parameterLength)) { - diffStrand = true; - } - else if(PARAMETER_CHECK("-f", 2, parameterLength)) { - if ((i+1) < argc) { - haveFraction = true; - overlapFraction = atof(argv[i + 1]); - i++; - } - } - else if(PARAMETER_CHECK("-tag", 4, parameterLength)) { - if ((i+1) < argc) { - haveTag = true; - tag = argv[i + 1]; - i++; - } - } - else { - cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl; - showHelp = true; - } - } - - // make sure we have both input files - if (!haveBam || !haveFiles) { - cerr << endl << "*****" << endl << "*****ERROR: Need -i, -files" << endl << "*****" << endl; - showHelp = true; - } - if (!useNames && !haveLabels && !useScores) { - cerr << endl << "*****" << endl << "*****ERROR: Need -labels or -names or -scores" << endl << "*****" << endl; - showHelp = true; - } - if (sameStrand && diffStrand) { - cerr << endl << "*****" << endl << "*****ERROR: Use -s or -S, not both. " << endl << "*****" << endl; - showHelp = true; - } - if (haveLabels && useNames) { - cerr << endl << "*****" << endl << "*****ERROR: Use -labels or -names, not both. " << endl << "*****" << endl; - showHelp = true; - } - if (useScores && useNames) { - cerr << endl << "*****" << endl << "*****ERROR: Use -scores or -names, not both. " << endl << "*****" << endl; - showHelp = true; - } - if (haveTag && tag.size() > 2) { - cerr << endl << "*****" << endl << "*****ERROR: Custom tags should be at most two characters per the SAM specification. " << endl << "*****" << endl; - showHelp = true; - } - - if (!showHelp) { - TagBam *ba = new TagBam(bamFile, inputFiles, inputLabels, tag, useNames, useScores, sameStrand, diffStrand, overlapFraction); - ba->Tag(); - delete ba; - return 0; - } - else { - ShowHelp(); - } -} - -void ShowHelp(void) { - - cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl; - - cerr << "Author: Aaron Quinlan (aaronquinlan@gmail.com)" << endl; - - cerr << "Summary: Annotates a BAM file based on overlaps with multiple BED/GFF/VCF files" << endl; - cerr << "\t on the intervals in -i." << endl << endl; - - cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -i <BAM> -files FILE1 .. FILEn -labels LAB1 .. LABn" << endl << endl; - - cerr << "Options: " << endl; - - cerr << "\t-s\t" << "Require overlaps on the same strand. That is, only tag alignments that have the same" << endl; - cerr << "\t\tstrand as a feature in the annotation file(s)." << endl << endl; - - cerr << "\t-S\t" << "Require overlaps on the opposite strand. That is, only tag alignments that have the opposite" << endl; - cerr << "\t\tstrand as a feature in the annotation file(s)." << endl << endl; - - cerr << "\t-f\t" << "Minimum overlap required as a fraction of the alignment." << endl; - cerr << "\t\t- Default is 1E-9 (i.e., 1bp)." << endl; - cerr << "\t\t- FLOAT (e.g. 0.50)" << endl << endl; - - cerr << "\t-tag\t" << "Dictate what the tag should be. Default is YB." << endl; - cerr << "\t\t- STRING (two characters, e.g., YK)" << endl << endl; - - cerr << "\t-names\t" << "Use the name field from the annotation files to populate tags." << endl; - cerr << "\t\tBy default, the -labels values are used." << endl << endl; - - cerr << "\t-scores\t" << "A list of 1-based columns for each annotation file" << endl; - cerr << "\t\tin which a color can be found." << endl << endl; - - - exit(1); -} |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/unionBedGraphs/Makefile --- a/BEDTools-Version-2.14.3/src/unionBedGraphs/Makefile Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,49 +0,0 @@ -UTILITIES_DIR = ../utils/ -OBJ_DIR = ../../obj/ -BIN_DIR = ../../bin/ - -# ------------------- -# define our includes -# ------------------- -INCLUDES = -I$(UTILITIES_DIR)/bedGraphFile/ \ - -I$(UTILITIES_DIR)/lineFileUtilities/ \ - -I$(UTILITIES_DIR)/genomeFile/ \ - -I$(UTILITIES_DIR)/version/ \ - -I$(UTILITIES_DIR)/gzstream/ \ - -I$(UTILITIES_DIR)/fileType/ \ - -I$(UTILITIES_DIR)/BamTools/include - -# ---------------------------------- -# define our source and object files -# ---------------------------------- -SOURCES= unionBedGraphs.cpp unionBedGraphsMain.cpp -OBJECTS= $(SOURCES:.cpp=.o) -_EXT_OBJECTS=bedGraphFile.o genomeFile.o lineFileUtilities.o gzstream.o fileType.o -EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) -BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) -PROGRAM= unionBedGraphs - -all: $(PROGRAM) - -.PHONY: all - -$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS) - @echo " * linking $(PROGRAM)" - @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ $(LIBS) - -$(BUILT_OBJECTS): $(SOURCES) - @echo " * compiling" $(*F).cpp - @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) - -$(EXT_OBJECTS): - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedGraphFile/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/genomeFile/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/ - -clean: - @echo "Cleaning up." - @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* - -.PHONY: clean |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/unionBedGraphs/intervalItem.h --- a/BEDTools-Version-2.14.3/src/unionBedGraphs/intervalItem.h Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,62 +0,0 @@ -/***************************************************************************** - intervalItem.h - - (c) 2010 - Assaf Gordon - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licenced under the GNU General Public License 2.0 license. -******************************************************************************/ -#ifndef INTERVALITEM_H -#define INTERVALITEM_H - -#include <string> -#include <queue> - -enum COORDINATE_TYPE { - START, - END -}; - -/* - An interval item in the priority queue. - - An IntervalItem can mark either a START position or an END position. - */ -class IntervalItem -{ -private: - IntervalItem(); - -public: - int source_index; // which source BedGraph file this came from - COORDINATE_TYPE coord_type; // is this the start or the end position? - CHRPOS coord; - std::string depth; - - IntervalItem(int _index, COORDINATE_TYPE _type, CHRPOS _coord, std::string _depth) : - source_index(_index), - coord_type(_type), - coord(_coord), - depth(_depth) - {} - - IntervalItem(const IntervalItem &other) : - source_index(other.source_index), - coord_type(other.coord_type), - coord(other.coord), - depth(other.depth) - {} - - bool operator< ( const IntervalItem& other ) const - { - return this->coord > other.coord; - } -}; - -// our priority queue -typedef std::priority_queue<IntervalItem> INTERVALS_PRIORITY_QUEUE; - -#endif |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/unionBedGraphs/unionBedGraphs.cpp --- a/BEDTools-Version-2.14.3/src/unionBedGraphs/unionBedGraphs.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,255 +0,0 @@ -/***************************************************************************** - unionBedGraphs.cpp - - (c) 2010 - Assaf Gordon, CSHL - - Aaron Quinlan, UVA - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licenced under the GNU General Public License 2.0 license. -******************************************************************************/ -#include <cassert> -#include <cstring> -#include <cstdlib> -#include <iostream> -#include <algorithm> - -#include "bedGraphFile.h" -#include "unionBedGraphs.h" - -using namespace std; - - -UnionBedGraphs::UnionBedGraphs(std::ostream& _output, - const vector<string>& _filenames, - const vector<string>& _titles, - bool _print_empty_regions, - const std::string& _genome_size_filename, - const std::string& _no_coverage_value ) : - filenames(_filenames), - titles(_titles), - output(_output), - current_non_zero_inputs(0), - print_empty_regions(_print_empty_regions), - genome_sizes(NULL), - no_coverage_value(_no_coverage_value) -{ - if (print_empty_regions) { - assert(!_genome_size_filename.empty()); - - genome_sizes = new GenomeFile(_genome_size_filename); - } -} - - -UnionBedGraphs::~UnionBedGraphs() { - CloseBedgraphFiles(); - if (genome_sizes) { - delete genome_sizes; - genome_sizes = NULL ; - } -} - - -void UnionBedGraphs::Union() { - OpenBedgraphFiles(); - - // Add the first interval from each file - for(size_t i=0;i<bedgraph_files.size();++i) - LoadNextBedgraphItem(i); - - // Chromosome loop - once per chromosome - do { - // Find the first chromosome to use - current_chrom = DetermineNextChrom(); - - // Populate the queue with initial values from all files - // (if they belong to the correct chromosome) - for(size_t i=0;i<bedgraph_files.size();++i) - AddInterval(i); - - CHRPOS current_start = ConsumeNextCoordinate(); - - // User wanted empty regions, and the first coordinate is not 0 - print a dummy empty coverage - if (print_empty_regions && current_start > 0) - PrintEmptyCoverage(0,current_start); - - // Intervals loop - until all intervals (of current chromosome) from all files are used. - do { - CHRPOS current_end = queue.top().coord; - PrintCoverage(current_start, current_end); - current_start = ConsumeNextCoordinate(); - } while (!queue.empty()); - - // User wanted empty regions, and the last coordinate is not the last coordinate of the chromosome - // print a dummy empty coverage - if (print_empty_regions) { - CHRPOS chrom_size = genome_sizes->getChromSize(current_chrom); - if (current_start < chrom_size) - PrintEmptyCoverage(current_start, chrom_size); - } - - } while (!AllFilesDone()); -} - - -CHRPOS UnionBedGraphs::ConsumeNextCoordinate() { - assert(!queue.empty()); - - CHRPOS new_position = queue.top().coord; - do { - IntervalItem item = queue.top(); - UpdateInformation(item); - queue.pop(); - } while (!queue.empty() && queue.top().coord == new_position); - - return new_position; -} - - -void UnionBedGraphs::UpdateInformation(const IntervalItem &item) { - // Update the depth coverage for this file - - // Which coordinate is it - start or end? - switch (item.coord_type) - { - case START: - current_depth[item.source_index] = item.depth; - current_non_zero_inputs++; - break; - case END: - //Read the next interval from this file - AddInterval(item.source_index); - current_depth[item.source_index] = no_coverage_value; - current_non_zero_inputs--; - break; - default: - assert(0); - } -} - - -void UnionBedGraphs::PrintHeader() { - output << "chrom\tstart\tend" ; - for (size_t i=0;i<titles.size();++i) - output << "\t" <<titles[i]; - output << endl; -} - - -void UnionBedGraphs::PrintCoverage(CHRPOS start, CHRPOS end) { - if ( current_non_zero_inputs == 0 && ! print_empty_regions ) - return ; - - output << current_chrom << "\t" - << start << "\t" - << end; - - for (size_t i=0;i<current_depth.size();++i) - output << "\t" << current_depth[i] ; - - output << endl; -} - - -void UnionBedGraphs::PrintEmptyCoverage(CHRPOS start, CHRPOS end) { - output << current_chrom << "\t" - << start << "\t" - << end; - - for (size_t i=0;i<current_depth.size();++i) - output << "\t" << no_coverage_value ; - - output << endl; -} - - -void UnionBedGraphs::LoadNextBedgraphItem(int index) { - assert(static_cast<unsigned int>(index) < bedgraph_files.size()); - - current_bedgraph_item[index].chrom=""; - - BedGraphFile *file = bedgraph_files[index]; - BEDGRAPH_STR bg; - int lineNum = 0; - BedGraphLineStatus status; - - while ( (status = file->GetNextBedGraph(bg, lineNum)) != BEDGRAPH_INVALID ) { - if (status != BEDGRAPH_VALID) - continue; - - current_bedgraph_item[index] = bg ; - break; - } -} - - -bool UnionBedGraphs::AllFilesDone() { - for (size_t i=0;i<current_bedgraph_item.size();++i) - if (!current_bedgraph_item[i].chrom.empty()) - return false; - return true; -} - - -string UnionBedGraphs::DetermineNextChrom() { - string next_chrom; - for (size_t i=0;i<current_bedgraph_item.size();++i) { - if (current_bedgraph_item[i].chrom.empty()) - continue; - - if (next_chrom.empty()) - next_chrom = current_bedgraph_item[i].chrom; - else - if (current_bedgraph_item[i].chrom < next_chrom) - next_chrom = current_bedgraph_item[i].chrom ; - } - return next_chrom; -} - - -void UnionBedGraphs::AddInterval(int index) { - assert(static_cast<unsigned int>(index) < bedgraph_files.size()); - - //This file has no more intervals - if (current_bedgraph_item[index].chrom.empty()) - return ; - - //If the next interval belongs to a different chrom, don't add it - if (current_bedgraph_item[index].chrom!=current_chrom) - return ; - - const BEDGRAPH_STR &bg(current_bedgraph_item[index]); - - IntervalItem start_item(index, START, bg.start, bg.depth); - IntervalItem end_item(index, END, bg.end, bg.depth); - - queue.push(start_item); - queue.push(end_item); - - LoadNextBedgraphItem(index); -} - - -void UnionBedGraphs::OpenBedgraphFiles() { - for (size_t i=0;i<filenames.size();++i) { - BedGraphFile *file = new BedGraphFile(filenames[i]); - file->Open(); - bedgraph_files.push_back(file); - - current_depth.push_back(no_coverage_value); - } - current_bedgraph_item.resize(filenames.size()); -} - - -void UnionBedGraphs::CloseBedgraphFiles() { - for (size_t i=0;i<bedgraph_files.size();++i) { - BedGraphFile *file = bedgraph_files[i]; - delete file; - bedgraph_files[i] = NULL ; - } - bedgraph_files.clear(); -} |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/unionBedGraphs/unionBedGraphs.h --- a/BEDTools-Version-2.14.3/src/unionBedGraphs/unionBedGraphs.h Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,123 +0,0 @@ -/***************************************************************************** - unionBedGraphs.h - - (c) 2010 - Assaf Gordon, CSHL - - Aaron Quinlan, UVA - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licenced under the GNU General Public License 2.0 license. -******************************************************************************/ -#ifndef UNIONBEDGRAPHS_H -#define UNIONBEDGRAPHS_H - -#include <vector> -#include <string> -#include "bedGraphFile.h" -#include "genomeFile.h" -#include "intervalItem.h" - -class UnionBedGraphs -{ -private: - typedef BEDGRAPH_STR BEDGRAPH_TYPE; - - vector<string> filenames; - vector<string> titles; - - vector<BedGraphFile*> bedgraph_files; - vector<BEDGRAPH_TYPE::DEPTH_TYPE> current_depth; - vector<BEDGRAPH_TYPE> current_bedgraph_item; - - std::ostream &output; - - INTERVALS_PRIORITY_QUEUE queue; - std::string current_chrom; - int current_non_zero_inputs; - bool print_empty_regions; - - GenomeFile* genome_sizes; - - std::string no_coverage_value; - -public: - UnionBedGraphs(std::ostream& _output, - const vector<string>& _filenames, - const vector<string>& _titles, - bool _print_empty_regions, - const std::string& _genomeFileName, - const std::string& _no_coverage_value); - - virtual ~UnionBedGraphs(); - - // Combines all bedgraph files - void Union(); - - // Print the header line: chrom/start/end + name of each bedgraph file. - void PrintHeader(); - - -private: - - // Open all BedGraph files, initialize "current_XXX" vectors - void OpenBedgraphFiles(); - - // Close the BedGraph files. - void CloseBedgraphFiles(); - - /* - Add an interval from BedGraph file 'index' into the queue. - will only be added if it belongs to the current chromosome. - - If the interval was added (=consumed), the next interval will be read from the file - using 'LoadNextBedgraphItem' - */ - void AddInterval(int index); - - /* - Loads the next interval from BedGraph file 'index'. - Stores it in 'current_bedgraph_item' vector. - */ - void LoadNextBedgraphItem(int index); - - /* - Scans the 'current_bedgraph_item' vector, - find the 'first' chromosome to use (different BedGraph files can start with different chromosomes). - */ - std::string DetermineNextChrom(); - - /* - Returns 'true' if ALL intervals from ALL BedGraph files were used - */ - bool AllFilesDone(); - - /* - Extract the next coordinate from the queue, and updates the current coverage information. - If multiple interval share the same coordinate values, all of them are handled. - If an END coordinate is consumed, the next interval (from the corresponding file) is read. - */ - CHRPOS ConsumeNextCoordinate(); - - /* - Updates the coverage information based on the given item. - Item can be a START coordinate or an END coordiante. - */ - void UpdateInformation(const IntervalItem &item); - - /* - prints chrom/start/end and the current depth coverage values of all the files. - */ - void PrintCoverage(CHRPOS start, CHRPOS end); - - /* - prints chrom/start/end and the ZERO depth coverage values of all the files. - */ - void PrintEmptyCoverage(CHRPOS start, CHRPOS end); - - void DebugPrintQueue(); -}; - - -#endif |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/unionBedGraphs/unionBedGraphsMain.cpp --- a/BEDTools-Version-2.14.3/src/unionBedGraphs/unionBedGraphsMain.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,294 +0,0 @@\n-/*****************************************************************************\n- unionBedGraphsMain.cpp\n-\n- (c) 2010 - Assaf Gordon, CSHL\n- - Aaron Quinlan, UVA\n- Hall Laboratory\n- Department of Biochemistry and Molecular Genetics\n- University of Virginia\n- aaronquinlan@gmail.com\n-\n- Licenced under the GNU General Public License 2.0 license.\n-******************************************************************************/\n-#include <climits>\n-#include <cstring>\n-#include <cstdlib>\n-#include <vector>\n-#include <string>\n-#include <iostream>\n-#include <getopt.h>\n-#include <libgen.h> //for basename()\n-#include "version.h"\n-\n-#include "genomeFile.h"\n-#include "unionBedGraphs.h"\n-\n-using namespace std;\n-\n-// define our program name\n-#define PROGRAM_NAME "unionBedGraphs"\n-\n-// define our parameter checking macro\n-#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)\n-\n-//STLized version of basename()\n-// (because POSIX basename() modifies the input string pointer)\n-// Additionally: removes any extension the basename might have.\n-std::string stl_basename(const std::string& path);\n-\n-// function declarations\n-void ShowHelp(void);\n-void ShowExamples(void);\n-\n-\n-int main(int argc, char* argv[])\n-{\n- bool haveFiles = false;\n- bool haveTitles = false;\n- bool haveGenome = false;\n- bool haveFiller = true;\n- bool printHeader = false;\n- bool printEmptyRegions = false;\n- bool showHelp = false;\n- string genomeFile;\n- string basePath;\n- string noCoverageValue = "0";\n- vector<string> inputFiles;\n- vector<string> inputTitles;\n-\n- //Parse command line options\n- if(argc <= 1)\n- ShowHelp();\n-\n- for(int i = 1; i < argc; i++) {\n- int parameterLength = (int)strlen(argv[i]);\n-\n- if((PARAMETER_CHECK("-h", 2, parameterLength)) ||\n- (PARAMETER_CHECK("--help", 5, parameterLength))) {\n- showHelp = true;\n- }\n- }\n-\n- if(showHelp == true) {\n- ShowHelp();\n- exit(1);\n- }\n-\n- // do some parsing (all of these parameters require 2 strings)\n- for(int i = 1; i < argc; i++) {\n-\n- int parameterLength = (int)strlen(argv[i]);\n-\n- if(PARAMETER_CHECK("-i", 2, parameterLength)) {\n- if ((i+1) < argc) {\n- haveFiles = true;\n- i = i+1;\n- string file = argv[i];\n- while (file[0] != \'-\' && i < argc) {\n- inputFiles.push_back(file);\n- i++;\n- if (i < argc)\n- file = argv[i];\n- }\n- i--;\n- }\n- }\n- else if(PARAMETER_CHECK("-names", 6, parameterLength)) {\n- if ((i+1) < argc) {\n- haveTitles = true;\n- i = i+1;\n- string title = argv[i];\n- while (title[0] != \'-\' && i < argc) {\n- inputTitles.push_back(title);\n- i++;\n- if (i < argc)\n- title = argv[i];\n- }\n- i--;\n- }\n- }\n- else if(PARAMETER_CHECK("-g", 2, parameterLength)) {\n- if ((i+1) < argc) {\n- haveGenome = true;\n- genomeFile = argv[i + 1];\n- i++;\n- }\n- }\n- else if(PARAMETER_CHECK("-filler", 7, parameterLength)) {\n- if ((i+1) < argc) {\n- haveFiller = true;\n- noCoverageValue = argv[i + 1];\n- i++;\n- }\n- }\n- else if(PARAMETER_CHECK("-header", 7, parameterLength)) {\n- printHeader = true;\n- }\n- else if(PARAMETER_CHECK("-empty", 6, parameterLength)) {\n- printEmptyRegions = true;\n- }\n- else if(PARAMETER_CHECK("-examples", 9, parameterLength)) {\n'..b'-\n- cerr << "\\t-names\\t\\t" << "A list of names (one / file) to describe each file in -i." << endl;\n- cerr << "\\t\\t\\tThese names will be printed in the header line." << endl << endl;\n-\n- cerr << "\\t-g\\t\\t" << "Use genome file to calculate empty regions." << endl;\n- cerr << "\\t\\t\\t- STRING." << endl << endl;\n-\n- cerr << "\\t-empty\\t\\t" << "Report empty regions (i.e., start/end intervals w/o" << endl;\n- cerr << "\\t\\t\\tvalues in all files)." << endl;\n- cerr << "\\t\\t\\t- Requires the \'-g FILE\' parameter.\\n" << endl;\n-\n- cerr << "\\t-filler TEXT\\t" << "Use TEXT when representing intervals having no value." << endl;\n- cerr << "\\t\\t\\t- Default is \'0\', but you can use \'N/A\' or any other text." << endl << endl;\n-\n- cerr << "\\t-examples\\t" << "Show detailed usage examples." << endl << endl;\n-}\n-\n-\n-\n-void ShowExamples()\n-{\n- cerr << "Example usage:\\n\\n" \\\n-"== Input files: ==\\n" \\\n-"\\n" \\\n-" $ cat 1.bg\\n" \\\n-" chr1 1000 1500 10\\n" \\\n-" chr1 2000 2100 20\\n" \\\n-"\\n" \\\n-" $ cat 2.bg\\n" \\\n-" chr1 900 1600 60\\n" \\\n-" chr1 1700 2050 50\\n" \\\n-"\\n" \\\n-" $ cat 3.bg\\n" \\\n-" chr1 1980 2070 80\\n" \\\n-" chr1 2090 2100 20\\n" \\\n-"\\n" \\\n-" $ cat sizes.txt\\n" \\\n-" chr1 5000\\n" \\\n-"\\n" \\\n-"== Union/combine the files: ==\\n" \\\n-"\\n" \\\n-" $ unionBedGraphs -i 1.bg 2.bg 3.bg\\n" \\\n-" chr1 900 1000 0 60 0\\n" \\\n-" chr1 1000 1500 10 60 0\\n" \\\n-" chr1 1500 1600 0 60 0\\n" \\\n-" chr1 1700 1980 0 50 0\\n" \\\n-" chr1 1980 2000 0 50 80\\n" \\\n-" chr1 2000 2050 20 50 80\\n" \\\n-" chr1 2050 2070 20 0 80\\n" \\\n-" chr1 2070 2090 20 0 0\\n" \\\n-" chr1 2090 2100 20 0 20\\n" \\\n-"\\n" \\\n-"== Union/combine the files, with a header line (titles are the file names): ==\\n" \\\n-"\\n" \\\n-" $ unionBedGraphs -header -i 1.bg 2.bg 3.bg\\n" \\\n-" chrom start end 1 2 3\\n" \\\n-" chr1 900 1000 0 60 0\\n" \\\n-" chr1 1000 1500 10 60 0\\n" \\\n-" chr1 1500 1600 0 60 0\\n" \\\n-" chr1 1700 1980 0 50 0\\n" \\\n-" chr1 1980 2000 0 50 80\\n" \\\n-" chr1 2000 2050 20 50 80\\n" \\\n-" chr1 2050 2070 20 0 80\\n" \\\n-" chr1 2070 2090 20 0 0\\n" \\\n-" chr1 2090 2100 20 0 20\\n" \\\n-"\\n" \\\n-"== Union/combine the files, with a header line and custom names: ==\\n" \\\n-"\\n" \\\n-" $ unionBedGraphs -header -i 1.bg 2.bg 3.bg -names WT-1 WT-2 KO-1\\n" \\\n-" chrom start end WT-1 WT-2 KO-1\\n" \\\n-" chr1 900 1000 0 60 0\\n" \\\n-" chr1 1000 1500 10 60 0\\n" \\\n-" chr1 1500 1600 0 60 0\\n" \\\n-" chr1 1700 1980 0 50 0\\n" \\\n-" chr1 1980 2000 0 50 80\\n" \\\n-" chr1 2000 2050 20 50 80\\n" \\\n-" chr1 2050 2070 20 0 80\\n" \\\n-" chr1 2070 2090 20 0 0\\n" \\\n-" chr1 2090 2100 20 0 20\\n" \\\n-"\\n" \\\n-"== Union/combine, showing empty regions (note, requires -g): ==\\n" \\\n-"\\n" \\\n-" $ unionBedGraphs -header -empty -g sizes.TXT -i 1.bg 2.bg 3.bg\\n" \\\n-" chrom start end 1 2 3\\n" \\\n-" chr1 0 900 0 0 0\\n" \\\n-" chr1 900 1000 0 60 0\\n" \\\n-" chr1 1000 1500 10 60 0\\n" \\\n-" chr1 1500 1600 0 60 0\\n" \\\n-" chr1 1600 1700 0 0 0\\n" \\\n-" chr1 1700 1980 0 50 0\\n" \\\n-" chr1 1980 2000 0 50 80\\n" \\\n-" chr1 2000 2050 20 50 80\\n" \\\n-" chr1 2050 2070 20 0 80\\n" \\\n-" chr1 2070 2090 20 0 0\\n" \\\n-" chr1 2090 2100 20 0 20\\n" \\\n-" chr1 2100 5000 0 0 0\\n" \\\n-"\\n" \\\n-;\n-}\n-\n-std::string stl_basename(const std::string& path)\n-{\n- string result;\n-\n- char* path_dup = strdup(path.c_str());\n- char* basename_part = basename(path_dup);\n- result = basename_part;\n- free(path_dup);\n-\n- size_t pos = result.find_last_of(\'.\');\n- if (pos != string::npos )\n- result = result.substr(0,pos);\n-\n- return result;\n-}\n-\n' |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/BamTools-Ancillary/BamAncillary.cpp --- a/BEDTools-Version-2.14.3/src/utils/BamTools-Ancillary/BamAncillary.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,73 +0,0 @@ -/***************************************************************************** - bamAncillary.cpp - - (c) 2009 - Aaron Quinlan - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licensed under the GNU General Public License 2.0 license. -******************************************************************************/ -#include "BamAncillary.h" -using namespace std; - -// 10 15 20 25 30 4000 -// acccctttggacct---ataggga.................aaaa -// acccc---ggaccttttataggga.................aaaa -// 5M 3D 6M 2I 7M 20N 4M - -namespace BamTools { - void getBamBlocks(const BamAlignment &bam, const RefVector &refs, - vector<BED> &blocks, bool breakOnDeletionOps) { - - CHRPOS currPosition = bam.Position; - CHRPOS blockStart = bam.Position; - string chrom = refs.at(bam.RefID).RefName; - string name = bam.Name; - string strand = "+"; - string score = ToString(bam.MapQuality); - char prevOp = '\0'; - if (bam.IsReverseStrand()) strand = "-"; - bool blocksFound = false; - - vector<CigarOp>::const_iterator cigItr = bam.CigarData.begin(); - vector<CigarOp>::const_iterator cigEnd = bam.CigarData.end(); - for ( ; cigItr != cigEnd; ++cigItr ) { - if (cigItr->Type == 'M') { - currPosition += cigItr->Length; - // we only want to create a new block if the current M op - // was preceded by an N op or a D op (and we are breaking on D ops) - if ((prevOp == 'D' && breakOnDeletionOps == true) || (prevOp == 'N')) { - blocks.push_back( BED(chrom, blockStart, currPosition, name, score, strand) ); - blockStart = currPosition; - } - } - else if (cigItr->Type == 'D') { - if (breakOnDeletionOps == false) - currPosition += cigItr->Length; - else { - currPosition += cigItr->Length; - blockStart = currPosition; - } - } - else if (cigItr->Type == 'N') { - currPosition += cigItr->Length; - blockStart = currPosition; - } - else if (cigItr->Type == 'S' || cigItr->Type == 'H' || cigItr->Type == 'P' || cigItr->Type == 'I') { - // do nothing - } - else { - cerr << "Input error: invalid CIGAR type (" << cigItr->Type - << ") for: " << bam.Name << endl; - exit(1); - } - prevOp = cigItr->Type; - } - // if there were no splits, we just create a block representing the contiguous alignment. - if (blocksFound == false) { - blocks.push_back( BED(chrom, bam.Position, currPosition, name, score, strand) ); - } - } -} |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/BamTools-Ancillary/BamAncillary.h --- a/BEDTools-Version-2.14.3/src/utils/BamTools-Ancillary/BamAncillary.h Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,19 +0,0 @@ -/***************************************************************************** - bamAncillary.h - - (c) 2009 - Aaron Quinlan - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licensed under the GNU General Public License 2.0 license. -******************************************************************************/ -#include "bedFile.h" -#include "lineFileUtilities.h" -#include "api/BamAlignment.h" - -namespace BamTools { - void getBamBlocks(const BamAlignment &bam, const RefVector &refs, - vector<BED> &blocks, bool includeDeletions = true); -} |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/BamTools-Ancillary/Makefile --- a/BEDTools-Version-2.14.3/src/utils/BamTools-Ancillary/Makefile Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,26 +0,0 @@ -OBJ_DIR = ../../../obj/ -BIN_DIR = ../../../bin/ -UTILITIES_DIR = ../ - -INCLUDES = -I$(UTILITIES_DIR)/BamTools/include -I$(UTILITIES_DIR)/bedFile/ -I$(UTILITIES_DIR)/lineFileUtilities/ -I$(UTILITIES_DIR)/gzstream/ -I$(UTILITIES_DIR)/fileType/ - -# ---------------------------------- -# define our source and object files -# ---------------------------------- -SOURCES= BamAncillary.cpp -OBJECTS= $(SOURCES:.cpp=.o) -BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) - -all: $(BUILT_OBJECTS) - -.PHONY: all - -$(BUILT_OBJECTS): $(SOURCES) - @echo " * compiling" $(*F).cpp - @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) -L$(BT_ROOT)/lib - -clean: - @echo "Cleaning up." - @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* - -.PHONY: clean \ No newline at end of file |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/BamTools/LICENSE --- a/BEDTools-Version-2.14.3/src/utils/BamTools/LICENSE Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,22 +0,0 @@ -The MIT License - -Copyright (c) 2009-2010 Derek Barnett, Erik Garrison, Gabor Marth, Michael Stromberg - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. - |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/BamTools/Makefile --- a/BEDTools-Version-2.14.3/src/utils/BamTools/Makefile Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,61 +0,0 @@ -# ------------------- -# define our includes -# ------------------- -OBJ_DIR = ../../../obj/ -INCLUDES = -Isrc/ -Iinclude/ - -# ---------------------------------- -# define our source and object files -# ---------------------------------- - -SOURCES= src/api/BamAlignment.cpp \ - src/api/BamMultiReader.cpp \ - src/api/BamReader.cpp \ - src/api/BamWriter.cpp \ - src/api/SamHeader.cpp \ - src/api/SamProgram.cpp \ - src/api/SamProgramChain.cpp \ - src/api/SamReadGroup.cpp \ - src/api/SamReadGroupDictionary.cpp \ - src/api/SamSequence.cpp \ - src/api/SamSequenceDictionary.cpp \ - src/api/internal/BamHeader_p.cpp \ - src/api/internal/BamIndexFactory_p.cpp \ - src/api/internal/BamMultiReader_p.cpp \ - src/api/internal/BamRandomAccessController_p.cpp \ - src/api/internal/BamReader_p.cpp \ - src/api/internal/BamStandardIndex_p.cpp \ - src/api/internal/BamToolsIndex_p.cpp \ - src/api/internal/BamWriter_p.cpp \ - src/api/internal/BgzfStream_p.cpp \ - src/api/internal/SamFormatParser_p.cpp \ - src/api/internal/SamFormatPrinter_p.cpp \ - src/api/internal/SamHeaderValidator_p.cpp - -# ---------------------------------- -# define our source and object files -# ---------------------------------- -OBJECTS= $(SOURCES:.cpp=.o) -LIBRARY=libbamtools.a - -all: $(LIBRARY) - -.PHONY: all - -$(LIBRARY): $(OBJECTS) - [ -d lib ] || mkdir -p lib - [ -d include ] || mkdir -p include - [ -d include/api ] || mkdir -p include/api - [ -d include/shared ] || mkdir -p include/shared - - @cp src/api/*.h include/api - @cp src/shared/*.h include/shared - - - @echo " * linking $(LIBRARY)" - ar cr lib/$@ $^ - -$(OBJECTS): $(SOURCES) - @echo " * compiling" $(*F).cpp - @$(CXX) -c $(*D)/$(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) -o $(*D)/$(*F).o - \ No newline at end of file |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamAlignment.cpp --- a/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamAlignment.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
b'@@ -1,2433 +0,0 @@\n-// ***************************************************************************\n-// BamAlignment.cpp (c) 2009 Derek Barnett\n-// Marth Lab, Department of Biology, Boston College\n-// All rights reserved.\n-// ---------------------------------------------------------------------------\n-// Last modified: 22 April 2011 (DB)\n-// ---------------------------------------------------------------------------\n-// Provides the BamAlignment data structure\n-// ***************************************************************************\n-\n-#include <api/BamAlignment.h>\n-#include <api/BamConstants.h>\n-using namespace BamTools;\n-\n-#include <cctype>\n-#include <cstdio>\n-#include <cstdlib>\n-#include <cstring>\n-#include <exception>\n-#include <iostream>\n-#include <map>\n-#include <utility>\n-using namespace std;\n-\n-/*! \\class BamTools::BamAlignment\n- \\brief The main BAM alignment data structure.\n-\n- Provides methods to query/modify BAM alignment data fields.\n-*/\n-/*! \\var BamAlignment::Name\n- \\brief read name\n-*/\n-/*! \\var BamAlignment::Length\n- \\brief length of query sequence\n-*/\n-/*! \\var BamAlignment::QueryBases\n- \\brief \'original\' sequence (as reported from sequencing machine)\n-*/\n-/*! \\var BamAlignment::AlignedBases\n- \\brief \'aligned\' sequence (includes any indels, padding, clipping)\n-*/\n-/*! \\var BamAlignment::Qualities\n- \\brief FASTQ qualities (ASCII characters, not numeric values)\n-*/\n-/*! \\var BamAlignment::TagData\n- \\brief tag data (use the provided methods to query/modify)\n-*/\n-/*! \\var BamAlignment::RefID\n- \\brief ID number for reference sequence\n-*/\n-/*! \\var BamAlignment::Position\n- \\brief position (0-based) where alignment starts\n-*/\n-/*! \\var BamAlignment::Bin\n- \\brief BAM (standard) index bin number for this alignment\n-*/\n-/*! \\var BamAlignment::MapQuality\n- \\brief mapping quality score\n-*/\n-/*! \\var BamAlignment::AlignmentFlag\n- \\brief alignment bit-flag (use the provided methods to query/modify)\n-*/\n-/*! \\var BamAlignment::CigarData\n- \\brief CIGAR operations for this alignment\n-*/\n-/*! \\var BamAlignment::MateRefID\n- \\brief ID number for reference sequence where alignment\'s mate was aligned\n-*/\n-/*! \\var BamAlignment::MatePosition\n- \\brief position (0-based) where alignment\'s mate starts\n-*/\n-/*! \\var BamAlignment::InsertSize\n- \\brief mate-pair insert size\n-*/\n-/*! \\var BamAlignment::Filename\n- \\brief name of BAM file which this alignment comes from\n-*/\n-\n-/*! \\fn BamAlignment::BamAlignment(void)\n- \\brief constructor\n-*/\n-BamAlignment::BamAlignment(void)\n- : RefID(-1)\n- , Position(-1)\n- , MateRefID(-1)\n- , MatePosition(-1)\n- , InsertSize(0)\n-{ }\n-\n-/*! \\fn BamAlignment::BamAlignment(const BamAlignment& other)\n- \\brief copy constructor\n-*/\n-BamAlignment::BamAlignment(const BamAlignment& other)\n- : Name(other.Name)\n- , Length(other.Length)\n- , QueryBases(other.QueryBases)\n- , AlignedBases(other.AlignedBases)\n- , Qualities(other.Qualities)\n- , TagData(other.TagData)\n- , RefID(other.RefID)\n- , Position(other.Position)\n- , Bin(other.Bin)\n- , MapQuality(other.MapQuality)\n- , AlignmentFlag(other.AlignmentFlag)\n- , CigarData(other.CigarData)\n- , MateRefID(other.MateRefID)\n- , MatePosition(other.MatePosition)\n- , InsertSize(other.InsertSize)\n- , Filename(other.Filename)\n- , SupportData(other.SupportData)\n-{ }\n-\n-/*! \\fn BamAlignment::~BamAlignment(void)\n- \\brief destructor\n-*/\n-BamAlignment::~BamAlignment(void) { }\n-\n-/*! \\fn bool BamAlignment::AddTag(const std::string& tag, const std::string& type, const std::string& value)\n- \\brief Adds a field with string data to the BAM tags.\n-\n- Does NOT modify an existing tag - use \\link BamAlignment::EditTag() \\endlink instead.\n-\n- \\param tag 2-character tag name\n- \\param type 1-character tag type (must be "Z" or "H")\n- \\param value string data to store\n-\n- \\return \\c true if the \\b new tag was added successfully\n- \\sa \\samSpecURL for'..b'ped(bool ok)\n- \\brief Complement of using SetIsMapped().\n- \\deprecated For sake of symmetry with the query methods\n- \\sa IsMapped(), SetIsMapped()\n-*/\n-void BamAlignment::SetIsUnmapped(bool ok) {\n- SetIsMapped(!ok);\n-}\n-\n-/*! \\fn bool BamAlignment::SkipToNextTag(const char storageType, char*& pTagData, unsigned int& numBytesParsed)\n- \\internal\n-\n- Moves to next available tag in tag data string\n-\n- \\param storageType BAM tag type-code that determines how far to move cursor\n- \\param pTagData pointer to current position (cursor) in tag string\n- \\param numBytesParsed report of how many bytes were parsed (cumulatively)\n-\n- \\return \\c if storageType was a recognized BAM tag type\n- \\post \\a pTagData will point to the byte where the next tag data begins.\n- \\a numBytesParsed will correspond to the cursor\'s position in the full TagData string.\n-*/\n-bool BamAlignment::SkipToNextTag(const char storageType,\n- char*& pTagData,\n- unsigned int& numBytesParsed) const\n-{\n- switch (storageType) {\n-\n- case (Constants::BAM_TAG_TYPE_ASCII) :\n- case (Constants::BAM_TAG_TYPE_INT8) :\n- case (Constants::BAM_TAG_TYPE_UINT8) :\n- ++numBytesParsed;\n- ++pTagData;\n- break;\n-\n- case (Constants::BAM_TAG_TYPE_INT16) :\n- case (Constants::BAM_TAG_TYPE_UINT16) :\n- numBytesParsed += sizeof(uint16_t);\n- pTagData += sizeof(uint16_t);\n- break;\n-\n- case (Constants::BAM_TAG_TYPE_FLOAT) :\n- case (Constants::BAM_TAG_TYPE_INT32) :\n- case (Constants::BAM_TAG_TYPE_UINT32) :\n- numBytesParsed += sizeof(uint32_t);\n- pTagData += sizeof(uint32_t);\n- break;\n-\n- case (Constants::BAM_TAG_TYPE_STRING) :\n- case (Constants::BAM_TAG_TYPE_HEX) :\n- while( *pTagData ) {\n- ++numBytesParsed;\n- ++pTagData;\n- }\n- // increment for null-terminator\n- ++numBytesParsed;\n- ++pTagData;\n- break;\n-\n- case (Constants::BAM_TAG_TYPE_ARRAY) :\n-\n- {\n- // read array type\n- const char arrayType = *pTagData;\n- ++numBytesParsed;\n- ++pTagData;\n-\n- // read number of elements\n- int32_t numElements;\n- memcpy(&numElements, pTagData, sizeof(uint32_t)); // already endian-swapped if necessary\n- numBytesParsed += sizeof(uint32_t);\n- pTagData += sizeof(uint32_t);\n-\n- // calculate number of bytes to skip\n- int bytesToSkip = 0;\n- switch (arrayType) {\n- case (Constants::BAM_TAG_TYPE_INT8) :\n- case (Constants::BAM_TAG_TYPE_UINT8) :\n- bytesToSkip = numElements;\n- break;\n- case (Constants::BAM_TAG_TYPE_INT16) :\n- case (Constants::BAM_TAG_TYPE_UINT16) :\n- bytesToSkip = numElements*sizeof(uint16_t);\n- break;\n- case (Constants::BAM_TAG_TYPE_FLOAT) :\n- case (Constants::BAM_TAG_TYPE_INT32) :\n- case (Constants::BAM_TAG_TYPE_UINT32) :\n- bytesToSkip = numElements*sizeof(uint32_t);\n- break;\n- default:\n- cerr << "BamAlignment ERROR: unknown binary array type encountered: "\n- << arrayType << endl;\n- return false;\n- }\n-\n- // skip binary array contents\n- numBytesParsed += bytesToSkip;\n- pTagData += bytesToSkip;\n- break;\n- }\n-\n- default:\n- cerr << "BamAlignment ERROR: unknown tag type encountered"\n- << storageType << endl;\n- return false;\n- }\n-\n- // return success\n- return true;\n-}\n' |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamAlignment.h --- a/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamAlignment.h Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
b'@@ -1,207 +0,0 @@\n-// ***************************************************************************\n-// BamAlignment.h (c) 2009 Derek Barnett\n-// Marth Lab, Department of Biology, Boston College\n-// All rights reserved.\n-// ---------------------------------------------------------------------------\n-// Last modified: 22 April 2011 (DB)\n-// ---------------------------------------------------------------------------\n-// Provides the BamAlignment data structure\n-// ***************************************************************************\n-\n-#ifndef BAMALIGNMENT_H\n-#define BAMALIGNMENT_H\n-\n-#include <api/api_global.h>\n-#include <api/BamAux.h>\n-#include <string>\n-#include <vector>\n-\n-namespace BamTools {\n-\n-// forward declaration of BamAlignment\'s friend classes\n-namespace Internal {\n- class BamReaderPrivate;\n- class BamWriterPrivate;\n-} // namespace Internal\n-\n-// BamAlignment data structure\n-struct API_EXPORT BamAlignment {\n-\n- // constructors & destructor\n- public:\n- BamAlignment(void);\n- BamAlignment(const BamAlignment& other);\n- ~BamAlignment(void);\n-\n- // queries against alignment flags\n- public: \n- bool IsDuplicate(void) const; // returns true if this read is a PCR duplicate\n- bool IsFailedQC(void) const; // returns true if this read failed quality control\n- bool IsFirstMate(void) const; // returns true if alignment is first mate on read\n- bool IsMapped(void) const; // returns true if alignment is mapped\n- bool IsMateMapped(void) const; // returns true if alignment\'s mate is mapped\n- bool IsMateReverseStrand(void) const; // returns true if alignment\'s mate mapped to reverse strand\n- bool IsPaired(void) const; // returns true if alignment part of paired-end read\n- bool IsPrimaryAlignment(void) const; // returns true if reported position is primary alignment\n- bool IsProperPair(void) const; // returns true if alignment is part of read that satisfied paired-end resolution\n- bool IsReverseStrand(void) const; // returns true if alignment mapped to reverse strand\n- bool IsSecondMate(void) const; // returns true if alignment is second mate on read\n-\n- // manipulate alignment flags\n- public: \n- void SetIsDuplicate(bool ok); // sets value of "PCR duplicate" flag\n- void SetIsFailedQC(bool ok); // sets value of "failed quality control" flag\n- void SetIsFirstMate(bool ok); // sets value of "alignment is first mate" flag\n- void SetIsMapped(bool ok); // sets value of "alignment is mapped" flag\n- void SetIsMateMapped(bool ok); // sets value of "alignment\'s mate is mapped" flag\n- void SetIsMateReverseStrand(bool ok); // sets value of "alignment\'s mate mapped to reverse strand" flag\n- void SetIsPaired(bool ok); // sets value of "alignment part of paired-end read" flag\n- void SetIsPrimaryAlignment(bool ok); // sets value of "position is primary alignment" flag\n- void SetIsProperPair(bool ok); // sets value of "alignment is part of read that satisfied paired-end resolution" flag\n- void SetIsReverseStrand(bool ok); // sets value of "alignment mapped to reverse strand" flag\n- void SetIsSecondMate(bool ok); // sets value of "alignment is second mate on read" flag\n-\n- // legacy methods (consider deprecated, but still available)\n- void SetIsMateUnmapped(bool ok); // complement of using SetIsMateMapped()\n- void SetIsSecondaryAlignment(bool ok); // complement of using SetIsPrimaryAlignment()\n- void SetIsUnmapped(bool ok); // complement of using SetIsMapped()\n-\n- // tag data access methods\n- public:\n-\n- // -------------------------------------------------------------------------------------\n- '..b', std::vector<uint32_t>& destination) const;\n- bool GetTag(const std::string& tag, std::vector<int32_t>& destination) const;\n- bool GetTag(const std::string& tag, std::vector<float>& destination) const;\n-\n- // retrieves the BAM tag-type character for a tag\n- bool GetTagType(const std::string& tag, char& type) const;\n-\n- // legacy methods (consider deprecated, but still available)\n- bool GetEditDistance(uint32_t& editDistance) const; // retrieves value of "NM" tag\n- bool GetReadGroup(std::string& readGroup) const; // retrieves value of "RG" tag\n- \n- // returns true if alignment has a record for this tag name\n- bool HasTag(const std::string& tag) const;\n-\n- // removes a tag\n- bool RemoveTag(const std::string& tag);\n-\n- // additional methods\n- public:\n- // populates alignment string fields\n- bool BuildCharData(void);\n- // calculates alignment end position\n- int GetEndPosition(bool usePadded = false, bool zeroBased = true) const; \n-\n- // public data fields\n- public:\n- std::string Name; // read name\n- int32_t Length; // length of query sequence\n- std::string QueryBases; // \'original\' sequence (as reported from sequencing machine)\n- std::string AlignedBases; // \'aligned\' sequence (includes any indels, padding, clipping)\n- std::string Qualities; // FASTQ qualities (ASCII characters, not numeric values)\n- std::string TagData; // tag data (use provided methods to query/modify)\n- int32_t RefID; // ID number for reference sequence\n- int32_t Position; // position (0-based) where alignment starts\n- uint16_t Bin; // BAM (standard) index bin number for this alignment\n- uint16_t MapQuality; // mapping quality score\n- uint32_t AlignmentFlag; // alignment bit-flag (use provided methods to query/modify)\n- std::vector<CigarOp> CigarData; // CIGAR operations for this alignment\n- int32_t MateRefID; // ID number for reference sequence where alignment\'s mate was aligned\n- int32_t MatePosition; // position (0-based) where alignment\'s mate starts\n- int32_t InsertSize; // mate-pair insert size\n- std::string Filename; // name of BAM file which this alignment comes from\n-\n- //! \\cond\n- // internal utility methods\n- private:\n- bool FindTag(const std::string& tag,\n- char*& pTagData,\n- const unsigned int& tagDataLength,\n- unsigned int& numBytesParsed) const;\n- bool IsValidSize(const std::string& tag,\n- const std::string& type) const;\n- bool SkipToNextTag(const char storageType,\n- char*& pTagData,\n- unsigned int& numBytesParsed) const;\n-\n- // internal data\n- private:\n-\n- struct BamAlignmentSupportData {\n- \n- // data members\n- std::string AllCharData;\n- uint32_t BlockLength;\n- uint32_t NumCigarOperations;\n- uint32_t QueryNameLength;\n- uint32_t QuerySequenceLength;\n- bool HasCoreOnly;\n- \n- // constructor\n- BamAlignmentSupportData(void)\n- : BlockLength(0)\n- , NumCigarOperations(0)\n- , QueryNameLength(0)\n- , QuerySequenceLength(0)\n- , HasCoreOnly(false)\n- { }\n- };\n- BamAlignmentSupportData SupportData;\n- friend class Internal::BamReaderPrivate;\n- friend class Internal::BamWriterPrivate;\n- //! \\endcond\n-};\n-\n-typedef std::vector<BamAlignment> BamAlignmentVector;\n-\n-} // namespace BamTools\n-\n-#endif // BAMALIGNMENT_H\n' |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamAux.h --- a/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamAux.h Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,457 +0,0 @@\n-// ***************************************************************************\r\n-// BamAux.h (c) 2009 Derek Barnett, Michael Str\xef\xbf\xbdmberg\r\n-// Marth Lab, Department of Biology, Boston College\r\n-// All rights reserved.\r\n-// ---------------------------------------------------------------------------\r\n-// Last modified: 4 March 2011 (DB)\r\n-// ---------------------------------------------------------------------------\r\n-// Provides data structures & utility methods that are used throughout the API.\r\n-// ***************************************************************************\r\n-\r\n-#ifndef BAMAUX_H\r\n-#define BAMAUX_H\r\n-\r\n-#include <api/api_global.h>\r\n-#include <fstream> \r\n-#include <iostream>\r\n-#include <string>\r\n-#include <vector>\r\n-\r\n-/*! \\file BamAux.h\r\n-\r\n- Provides data structures & utility methods that are used throughout the API.\r\n-*/\r\n-/*! \\namespace BamTools\r\n- \\brief Contains all BamTools classes & methods.\r\n-\r\n- The BamTools API contained in this namespace contains classes and methods\r\n- for reading, writing, and manipulating BAM alignment files.\r\n-*/\r\n-namespace BamTools {\r\n-\r\n-// ----------------------------------------------------------------\r\n-// CigarOp\r\n-\r\n-/*! \\struct BamTools::CigarOp\r\n- \\brief Represents a CIGAR alignment operation.\r\n-\r\n- \\sa http://samtools.sourceforge.net/SAM-1.3.pdf for more details on using CIGAR operations.\r\n-*/\r\n-struct API_EXPORT CigarOp {\r\n- \r\n- char Type; //!< CIGAR operation type (MIDNSHP)\r\n- uint32_t Length; //!< CIGAR operation length (number of bases)\r\n- \r\n- //! constructor\r\n- CigarOp(const char type = \'\\0\', \r\n- const uint32_t& length = 0)\r\n- : Type(type)\r\n- , Length(length) \r\n- { }\r\n-};\r\n-\r\n-// ----------------------------------------------------------------\r\n-// RefData\r\n-\r\n-/*! \\struct BamTools::RefData\r\n- \\brief Represents a reference sequence entry\r\n-*/\r\n-struct API_EXPORT RefData {\r\n- \r\n- std::string RefName; //!< name of reference sequence\r\n- int32_t RefLength; //!< length of reference sequence\r\n- \r\n- //! constructor\r\n- RefData(const std::string& name = "",\r\n- const int32_t& length = 0)\r\n- : RefName(name)\r\n- , RefLength(length)\r\n- { }\r\n-};\r\n-\r\n-//! convenience typedef for vector of RefData entries\r\n-typedef std::vector<RefData> RefVector;\r\n-\r\n-// ----------------------------------------------------------------\r\n-// BamRegion\r\n-\r\n-/*! \\struct BamTools::BamRegion\r\n- \\brief Represents a sequential genomic region\r\n-\r\n- Allowed to span multiple (sequential) references.\r\n-*/\r\n-struct API_EXPORT BamRegion {\r\n- \r\n- int LeftRefID; //!< reference ID for region\'s left boundary\r\n- int LeftPosition; //!< position for region\'s left boundary\r\n- int RightRefID; //!< reference ID for region\'s right boundary\r\n- int RightPosition; //!< position for region\'s right boundary\r\n- \r\n- //! constructor\r\n- BamRegion(const int& leftID = -1, \r\n- const int& leftPos = -1,\r\n- const int& rightID = -1,\r\n- const int& rightPos = -1)\r\n- : LeftRefID(leftID)\r\n- , LeftPosition(leftPos)\r\n- , RightRefID(rightID)\r\n- , RightPosition(rightPos)\r\n- { }\r\n- \r\n- //! copy constructor\r\n- BamRegion(const BamRegion& other)\r\n- : LeftRefID(other.LeftRefID)\r\n- , LeftPosition(other.LeftPosition)\r\n- , RightRefID(other.RightRefID)\r\n- , RightPosition(other.RightPosition)\r\n- { }\r\n- \r\n- //! Clears region boundaries\r\n- void clear(void) {\r\n- LeftRefID = -1; LeftPosition = -1;\r\n- RightRefID = -1; RightPosition = -1;\r\n- }\r\n-\r\n- //! Returns true if region has a left boundary\r\n- bool isLeftBoundSpecified(void) const {\r\n- return ( LeftRefID >= 0 && LeftPosition >= 0 );\r\n- }\r\n-\r\n- //! Returns true if region boundaries are not defined\r\n- bool isNull(void) const {\r\n- return ( !isLeftBoundSpecified()'..b'oat) value read from the buffer\r\n-*/\r\n-API_EXPORT inline float UnpackFloat(char* buffer) {\r\n- return UnpackFloat( (const char*)buffer );\r\n-}\r\n-\r\n-/*! \\fn signed int UnpackSignedInt(const char* buffer)\r\n- \\brief reads a signed integer value from byte buffer\r\n-\r\n- \\param buffer source byte buffer\r\n- \\return the (signed int) value read from the buffer\r\n-*/\r\n-API_EXPORT inline signed int UnpackSignedInt(const char* buffer) {\r\n- union { signed int value; unsigned char valueBuffer[sizeof(signed int)]; } un;\r\n- un.value = 0;\r\n- un.valueBuffer[0] = buffer[0];\r\n- un.valueBuffer[1] = buffer[1];\r\n- un.valueBuffer[2] = buffer[2];\r\n- un.valueBuffer[3] = buffer[3];\r\n- return un.value;\r\n-}\r\n-\r\n-/*! \\fn signed int UnpackSignedInt(char* buffer)\r\n- \\brief reads a signed integer value from byte buffer\r\n-\r\n- This is an overloaded function.\r\n-\r\n- \\param buffer source byte buffer\r\n- \\return the (signed int) value read from the buffer\r\n-*/\r\n-API_EXPORT inline signed int UnpackSignedInt(char* buffer) {\r\n- return UnpackSignedInt( (const char*) buffer );\r\n-}\r\n-\r\n-/*! \\fn signed short UnpackSignedShort(const char* buffer)\r\n- \\brief reads a signed short integer value from byte buffer\r\n-\r\n- \\param buffer source byte buffer\r\n- \\return the (signed short) value read from the buffer\r\n-*/\r\n-API_EXPORT inline signed short UnpackSignedShort(const char* buffer) {\r\n- union { signed short value; unsigned char valueBuffer[sizeof(signed short)]; } un;\r\n- un.value = 0;\r\n- un.valueBuffer[0] = buffer[0];\r\n- un.valueBuffer[1] = buffer[1];\r\n- return un.value;\r\n-}\r\n-\r\n-/*! \\fn signed short UnpackSignedShort(char* buffer)\r\n- \\brief reads a signed short integer value from byte buffer\r\n-\r\n- This is an overloaded function.\r\n-\r\n- \\param buffer source byte buffer\r\n- \\return the (signed short) value read from the buffer\r\n-*/\r\n-API_EXPORT inline signed short UnpackSignedShort(char* buffer) {\r\n- return UnpackSignedShort( (const char*)buffer );\r\n-}\r\n-\r\n-/*! \\fn unsigned int UnpackUnsignedInt(const char* buffer)\r\n- \\brief reads an unsigned integer value from byte buffer\r\n-\r\n- \\param buffer source byte buffer\r\n- \\return the (unsigned int) value read from the buffer\r\n-*/\r\n-API_EXPORT inline unsigned int UnpackUnsignedInt(const char* buffer) {\r\n- union { unsigned int value; unsigned char valueBuffer[sizeof(unsigned int)]; } un;\r\n- un.value = 0;\r\n- un.valueBuffer[0] = buffer[0];\r\n- un.valueBuffer[1] = buffer[1];\r\n- un.valueBuffer[2] = buffer[2];\r\n- un.valueBuffer[3] = buffer[3];\r\n- return un.value;\r\n-}\r\n-\r\n-/*! \\fn unsigned int UnpackUnsignedInt(char* buffer)\r\n- \\brief reads an unsigned integer value from byte buffer\r\n-\r\n- This is an overloaded function.\r\n-\r\n- \\param buffer source byte buffer\r\n- \\return the (unsigned int) value read from the buffer\r\n-*/\r\n-API_EXPORT inline unsigned int UnpackUnsignedInt(char* buffer) {\r\n- return UnpackUnsignedInt( (const char*)buffer );\r\n-}\r\n-\r\n-/*! \\fn unsigned short UnpackUnsignedShort(const char* buffer)\r\n- \\brief reads an unsigned short integer value from byte buffer\r\n-\r\n- \\param buffer source byte buffer\r\n- \\return the (unsigned short) value read from the buffer\r\n-*/\r\n-API_EXPORT inline unsigned short UnpackUnsignedShort(const char* buffer) {\r\n- union { unsigned short value; unsigned char valueBuffer[sizeof(unsigned short)]; } un;\r\n- un.value = 0;\r\n- un.valueBuffer[0] = buffer[0];\r\n- un.valueBuffer[1] = buffer[1];\r\n- return un.value;\r\n-}\r\n-\r\n-/*! \\fn unsigned short UnpackUnsignedShort(char* buffer)\r\n- \\brief reads an unsigned short integer value from byte buffer\r\n-\r\n- This is an overloaded function.\r\n-\r\n- \\param buffer source byte buffer\r\n- \\return the (unsigned short) value read from the buffer\r\n-*/\r\n-API_EXPORT inline unsigned short UnpackUnsignedShort(char* buffer) {\r\n- return UnpackUnsignedShort( (const char*)buffer );\r\n-}\r\n-\r\n-} // namespace BamTools\r\n-\r\n-#endif // BAMAUX_H\r\n' |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamConstants.h --- a/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamConstants.h Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,128 +0,0 @@ -// *************************************************************************** -// BamConstants.h (c) 2011 Derek Barnett -// Marth Lab, Department of Biology, Boston College -// All rights reserved. -// --------------------------------------------------------------------------- -// Last modified: 19 April 2011 (DB) -// --------------------------------------------------------------------------- -// Provides basic constants for handling BAM files. -// *************************************************************************** - -#ifndef BAM_CONSTANTS_H -#define BAM_CONSTANTS_H - -#include <string> - -/*! \namespace BamTools::Constants - \brief Provides basic constants for handling BAM files. -*/ - -namespace BamTools { -namespace Constants { - -const int BAM_SIZEOF_INT = 4; - -// header magic number -const char* const BAM_HEADER_MAGIC = "BAM\1"; -const unsigned int BAM_HEADER_MAGIC_LENGTH = 4; - -// BAM alignment core size -const int BAM_CORE_SIZE = 32; -const int BAM_CORE_BUFFER_SIZE = 8; - -// BAM alignment flags -const int BAM_ALIGNMENT_PAIRED = 0x0001; -const int BAM_ALIGNMENT_PROPER_PAIR = 0x0002; -const int BAM_ALIGNMENT_UNMAPPED = 0x0004; -const int BAM_ALIGNMENT_MATE_UNMAPPED = 0x0008; -const int BAM_ALIGNMENT_REVERSE_STRAND = 0x0010; -const int BAM_ALIGNMENT_MATE_REVERSE_STRAND = 0x0020; -const int BAM_ALIGNMENT_READ_1 = 0x0040; -const int BAM_ALIGNMENT_READ_2 = 0x0080; -const int BAM_ALIGNMENT_SECONDARY = 0x0100; -const int BAM_ALIGNMENT_QC_FAILED = 0x0200; -const int BAM_ALIGNMENT_DUPLICATE = 0x0400; - -// CIGAR constants -const char* const BAM_CIGAR_LOOKUP = "MIDNSHP=X"; -const int BAM_CIGAR_MATCH = 0; -const int BAM_CIGAR_INS = 1; -const int BAM_CIGAR_DEL = 2; -const int BAM_CIGAR_REFSKIP = 3; -const int BAM_CIGAR_SOFTCLIP = 4; -const int BAM_CIGAR_HARDCLIP = 5; -const int BAM_CIGAR_PAD = 6; -const int BAM_CIGAR_SEQMATCH = 7; -const int BAM_CIGAR_MISMATCH = 8; - -const char BAM_CIGAR_MATCH_CHAR = 'M'; -const char BAM_CIGAR_INS_CHAR = 'I'; -const char BAM_CIGAR_DEL_CHAR = 'D'; -const char BAM_CIGAR_REFSKIP_CHAR = 'N'; -const char BAM_CIGAR_SOFTCLIP_CHAR = 'S'; -const char BAM_CIGAR_HARDCLIP_CHAR = 'H'; -const char BAM_CIGAR_PAD_CHAR = 'P'; -const char BAM_CIGAR_SEQMATCH_CHAR = '='; -const char BAM_CIGAR_MISMATCH_CHAR = 'X'; - -const int BAM_CIGAR_SHIFT = 4; -const int BAM_CIGAR_MASK = ((1 << BAM_CIGAR_SHIFT) - 1); - -// BAM tag types -const char BAM_TAG_TYPE_ASCII = 'A'; -const char BAM_TAG_TYPE_UINT8 = 'c'; -const char BAM_TAG_TYPE_INT8 = 'C'; -const char BAM_TAG_TYPE_UINT16 = 's'; -const char BAM_TAG_TYPE_INT16 = 'S'; -const char BAM_TAG_TYPE_UINT32 = 'i'; -const char BAM_TAG_TYPE_INT32 = 'I'; -const char BAM_TAG_TYPE_FLOAT = 'f'; -const char BAM_TAG_TYPE_STRING = 'Z'; -const char BAM_TAG_TYPE_HEX = 'H'; -const char BAM_TAG_TYPE_ARRAY = 'B'; - -const size_t BAM_TAG_TAGSIZE = 2; -const size_t BAM_TAG_TYPESIZE = 1; -const int BAM_TAG_ARRAYBASE_SIZE = 8; - -// DNA bases -const char* const BAM_DNA_LOOKUP = "=ACMGRSVTWYHKDBN"; -const unsigned char BAM_BASECODE_EQUAL = 0; -const unsigned char BAM_BASECODE_A = 1; -const unsigned char BAM_BASECODE_C = 2; -const unsigned char BAM_BASECODE_G = 4; -const unsigned char BAM_BASECODE_T = 8; -const unsigned char BAM_BASECODE_N = 15; - -const char BAM_DNA_EQUAL = '='; -const char BAM_DNA_A = 'A'; -const char BAM_DNA_C = 'C'; -const char BAM_DNA_G = 'G'; -const char BAM_DNA_T = 'T'; -const char BAM_DNA_N = 'N'; -const char BAM_DNA_DEL = '-'; -const char BAM_DNA_PAD = '*'; - -// zlib constants -const int GZIP_ID1 = 31; -const int GZIP_ID2 = 139; -const int CM_DEFLATE = 8; -const int FLG_FEXTRA = 4; -const int OS_UNKNOWN = 255; -const int BGZF_XLEN = 6; -const int BGZF_ID1 = 66; -const int BGZF_ID2 = 67; -const int BGZF_LEN = 2; -const int GZIP_WINDOW_BITS = -15; -const int Z_DEFAULT_MEM_LEVEL = 8; - -// BZGF constants -const int BGZF_BLOCK_HEADER_LENGTH = 18; -const int BGZF_BLOCK_FOOTER_LENGTH = 8; -const int BGZF_MAX_BLOCK_SIZE = 65536; -const int BGZF_DEFAULT_BLOCK_SIZE = 65536; - -} // namespace Constants -} // namespace BamTools - -#endif // BAM_CONSTANTS_H |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamIndex.h --- a/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamIndex.h Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,80 +0,0 @@ -// *************************************************************************** -// BamIndex.h (c) 2009 Derek Barnett -// Marth Lab, Department of Biology, Boston College -// All rights reserved. -// --------------------------------------------------------------------------- -// Last modified: 5 April 2011 (DB) -// --------------------------------------------------------------------------- -// Provides basic BAM index interface -// *************************************************************************** - -#ifndef BAM_INDEX_H -#define BAM_INDEX_H - -#include <api/api_global.h> -#include <api/BamAux.h> -#include <string> - -namespace BamTools { - -namespace Internal { - class BamReaderPrivate; -} // namespace Internal - -/*! \class BamTools::BamIndex - \brief Provides methods for generating & loading BAM index files. - - This class straddles the line between public API and internal - implementation detail. Most client code should never have to use this - class directly. - - It is exposed to the public API to allow advanced users to implement - their own custom indexing schemes. - - More documentation on methods & enums coming soon. -*/ - -class API_EXPORT BamIndex { - - // enums - public: - // specify index-caching behavior - enum IndexCacheMode { FullIndexCaching = 0 // store entire index file contents in memory - , LimitedIndexCaching // store only index data for current reference - , NoIndexCaching // do not store any index data between jumps - }; - - // list of supported BamIndex types - enum IndexType { BAMTOOLS = 0 - , STANDARD - }; - - // ctor & dtor - public: - BamIndex(Internal::BamReaderPrivate* reader) : m_reader(reader) { } - virtual ~BamIndex(void) { } - - // index interface - public: - // builds index from associated BAM file & writes out to index file - virtual bool Create(void) =0; // creates index file from BAM file - // returns whether reference has alignments or no - virtual bool HasAlignments(const int& referenceID) const =0; - // attempts to use index data to jump to @region, returns success/fail - // a "successful" jump indicates no error, but not whether this region has data - // * thus, the method sets a flag to indicate whether there are alignments - // available after the jump position - virtual bool Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion) =0; - // loads existing data from file into memory - virtual bool Load(const std::string& filename) =0; - // change the index caching behavior - virtual void SetCacheMode(const BamIndex::IndexCacheMode& mode) =0; - - // data members - protected: - Internal::BamReaderPrivate* m_reader; // copy, not ownedprivate: -}; - -} // namespace BamTools - -#endif // BAM_INDEX_H |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamMultiReader.cpp --- a/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamMultiReader.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
b'@@ -1,396 +0,0 @@\n-// ***************************************************************************\n-// BamMultiReader.cpp (c) 2010 Erik Garrison, Derek Barnett\n-// Marth Lab, Department of Biology, Boston College\n-// All rights reserved.\n-// ---------------------------------------------------------------------------\n-// Last modified: 15 March 2011 (DB)\n-// ---------------------------------------------------------------------------\n-// Convenience class for reading multiple BAM files.\n-//\n-// This functionality allows applications to work on very large sets of files\n-// without requiring intermediate merge, sort, and index steps for each file\n-// subset. It also improves the performance of our merge system as it\n-// precludes the need to sort merged files.\n-// ***************************************************************************\n-\n-#include <api/BamMultiReader.h>\n-#include <api/internal/BamMultiReader_p.h>\n-using namespace BamTools;\n-\n-#include <string>\n-#include <vector>\n-using namespace std;\n-\n-/*! \\class BamTools::BamReader\n- \\brief Convenience class for reading multiple BAM files.\n-*/\n-\n-/*! \\fn BamMultiReader::BamMultiReader(void)\n- \\brief constructor\n-*/\n-BamMultiReader::BamMultiReader(void)\n- : d(new Internal::BamMultiReaderPrivate)\n-{ }\n-\n-/*! \\fn BamMultiReader::~BamMultiReader(void)\n- \\brief destructor\n-*/\n-BamMultiReader::~BamMultiReader(void) {\n- delete d;\n- d = 0;\n-}\n-\n-/*! \\fn void BamMultiReader::Close(void)\n- \\brief Closes all open BAM files.\n-\n- Also clears out all header and reference data.\n-\n- \\sa CloseFile(), IsOpen(), Open(), BamReader::Close()\n-*/\n-void BamMultiReader::Close(void) {\n- d->Close();\n-}\n-\n-/*! \\fn void BamMultiReader::CloseFile(const std::string& filename)\n- \\brief Closes requested BAM file.\n-\n- Leaves any other file(s) open, along with header and reference data.\n-\n- \\sa Close(), IsOpen(), Open(), BamReader::Close()\n-*/\n-void BamMultiReader::CloseFile(const std::string& filename) {\n- d->CloseFile(filename);\n-}\n-\n-/*! \\fn bool BamMultiReader::CreateIndexes(const BamIndex::IndexType& type)\n- \\brief Creates index files for the current BAM files.\n-\n- \\param type file format to create, see BamIndex::IndexType for available formats\n- \\return \\c true if index files created OK\n- \\sa LocateIndexes(), OpenIndexes(), BamReader::CreateIndex()\n-*/\n-bool BamMultiReader::CreateIndexes(const BamIndex::IndexType& type) {\n- return d->CreateIndexes(type);\n-}\n-\n-/*! \\fn const std::vector<std::string> BamMultiReader::Filenames(void) const\n- \\brief Returns list of filenames for all open BAM files.\n-\n- Retrieved filenames will contain whatever was passed via Open().\n- If you need full directory paths here, be sure to include them\n- when you open the BAM files.\n-\n- \\returns names of open BAM files. If no files are open, returns an empty vector.\n- \\sa IsOpen(), BamReader::GetFilename()\n-*/\n-const std::vector<std::string> BamMultiReader::Filenames(void) const {\n- return d->Filenames();\n-}\n-\n-/*! \\fn SamHeader BamMultiReader::GetHeader(void) const\n- \\brief Returns unified SAM-format header for all files\n-\n- N.B. - Modifying the retrieved text does NOT affect the current\n- BAM files. Thesse file have been opened in a read-only mode. However,\n- your modified header text can be used in conjunction with BamWriter\n- to generate a new BAM file with the appropriate header information.\n-\n- \\returns header data wrapped in SamHeader object\n- \\sa GetHeaderText(), BamReader::GetHeader()\n-*/\n-SamHeader BamMultiReader::GetHeader(void) const {\n- return d->GetHeader();\n-}\n-\n-/*! \\fn std::string BamMultiReader::GetHeaderText(void) const\n- \\brief Returns unified SAM-format header text for all files\n-\n- N.B. - Modifying the retrieved text does NOT affect the current\n- BAM files. Thesse file have been opened in a read-only mode. However,\n- your modified header text can be used in conjunction with BamWriter\n- to gener'..b'mMultiReader::OpenIndexes(const std::vector<std::string>& indexFilenames)\n- \\brief Opens index files for current BAM files.\n-\n- N.B. - Currently assumes that index filenames match the order (and number) of\n- BAM files passed to Open().\n-\n- \\param indexFilenames list of BAM index file names\n- \\returns \\c true if BAM index file was opened & data loaded successfully\n- \\sa LocateIndex(), Open(), SetIndex(), BamReader::OpenIndex()\n-*/\n-bool BamMultiReader::OpenIndexes(const std::vector<std::string>& indexFilenames) {\n- return d->OpenIndexes(indexFilenames);\n-}\n-\n-/*! \\fn void BamMultiReader::PrintFilenames(void) const\n- \\brief Convenience method for printing filenames to stdout.\n- \\deprecated Doesn\'t really belong as an API function. Clients should\n- determine how the data is reported.\n- \\sa Filenames(), BamReader::GetFilename()\n-*/\n-void BamMultiReader::PrintFilenames(void) const {\n- d->PrintFilenames();\n-}\n-\n-/*! \\fn bool BamMultiReader::Rewind(void)\n- \\brief Returns the internal file pointers to the beginning of alignment records.\n-\n- Useful for performing multiple sequential passes through BAM files.\n- Calling this function clears any prior region that may have been set.\n-\n- \\returns \\c true if rewind operation was successful\n- \\sa Jump(), SetRegion(), BamReader::Rewind()\n-*/\n-bool BamMultiReader::Rewind(void) {\n- return d->Rewind();\n-}\n-\n-/*! \\fn void BamMultiReader::SetIndexCacheMode(const BamIndex::IndexCacheMode& mode)\n- \\brief Changes the caching behavior of the index data.\n-\n- Default mode is BamIndex::LimitedIndexCaching.\n-\n- \\param mode desired cache mode for index, see BamIndex::IndexCacheMode for\n- description of the available cache modes\n- \\sa HasIndex(), BamReader::SetIndexCacheMode()\n-*/\n-void BamMultiReader::SetIndexCacheMode(const BamIndex::IndexCacheMode& mode) {\n- d->SetIndexCacheMode(mode);\n-}\n-\n-/*! \\fn bool BamMultiReader::SetRegion(const BamRegion& region)\n- \\brief Sets a target region of interest\n-\n- Equivalent to calling BamReader::SetRegion() on all open BAM files.\n-\n- \\param region desired region-of-interest to activate\n- \\returns \\c true if ALL readers set the region successfully\n- \\sa HasIndexes(), Jump(), BamReader::SetRegion()\n-*/\n-bool BamMultiReader::SetRegion(const BamRegion& region) {\n- return d->SetRegion(region);\n-}\n-\n-/*! \\fn bool BamMultiReader::SetRegion(const int& leftRefID,\n- const int& leftPosition,\n- const int& rightRefID,\n- const int& rightPosition)\n- \\brief Sets a target region of interest\n-\n- This is an overloaded function.\n-\n- Equivalent to calling BamReader::SetRegion() on all open BAM files.\n-\n- \\param leftRefID referenceID of region\'s left boundary\n- \\param leftPosition position of region\'s left boundary\n- \\param rightRefID reference ID of region\'s right boundary\n- \\param rightPosition position of region\'s right boundary\n-\n- \\returns \\c true if ALL readers set the region successfully\n- \\sa HasIndexes(), Jump(), BamReader::SetRegion()\n-*/\n-bool BamMultiReader::SetRegion(const int& leftRefID,\n- const int& leftPosition,\n- const int& rightRefID,\n- const int& rightPosition)\n-{\n- BamRegion region(leftRefID, leftPosition, rightRefID, rightPosition);\n- return d->SetRegion(region);\n-}\n-\n-/*! \\fn void BamMultiReader::SetSortOrder(const SortOrder& order)\n- \\brief Sets the expected sorting order for reading across multiple BAM files.\n-\n- Default is BamMultiReader::SortedByPosition.\n-\n- The SortOrder determines how the reader determines which alignment is "next"\n- from among its open readers.\n-\n- \\param order expected sort order\n-*/\n-void BamMultiReader::SetSortOrder(const SortOrder& order) {\n- d->SetSortOrder(order);\n-}\n' |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamMultiReader.h --- a/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamMultiReader.h Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,127 +0,0 @@ -// *************************************************************************** -// BamMultiReader.h (c) 2010 Erik Garrison, Derek Barnett -// Marth Lab, Department of Biology, Boston College -// All rights reserved. -// --------------------------------------------------------------------------- -// Last modified: 15 March 2011 (DB) -// --------------------------------------------------------------------------- -// Convenience class for reading multiple BAM files. -// *************************************************************************** - -#ifndef BAMMULTIREADER_H -#define BAMMULTIREADER_H - -#include <api/api_global.h> -#include <api/BamReader.h> -#include <map> -#include <sstream> -#include <string> -#include <utility> - -namespace BamTools { - -namespace Internal { - class BamMultiReaderPrivate; -} // namespace Internal - -class API_EXPORT BamMultiReader { - - public: - enum SortOrder { SortedByPosition = 0 - , SortedByReadName - , Unsorted - }; - - // constructor / destructor - public: - BamMultiReader(void); - ~BamMultiReader(void); - - // public interface - public: - - // ---------------------- - // BAM file operations - // ---------------------- - - // closes all open BAM files - void Close(void); - // close only the requested BAM file - void CloseFile(const std::string& filename); - // returns list of filenames for all open BAM files - const std::vector<std::string> Filenames(void) const; - // returns true if multireader has any open BAM files - bool HasOpenReaders(void) const; - // performs random-access jump within current BAM files - bool Jump(int refID, int position = 0); - // opens BAM files - bool Open(const std::vector<std::string>& filenames); - // opens a single BAM file, adding to any other current BAM files - bool OpenFile(const std::string& filename); - // returns file pointers to beginning of alignments - bool Rewind(void); - // sets the target region of interest - bool SetRegion(const BamRegion& region); - // sets the target region of interest - bool SetRegion(const int& leftRefID, - const int& leftPosition, - const int& rightRefID, - const int& rightPosition); - - // ---------------------- - // access alignment data - // ---------------------- - - // retrieves next available alignment - bool GetNextAlignment(BamAlignment& alignment); - // retrieves next available alignmnet (without populating the alignment's string data fields) - bool GetNextAlignmentCore(BamAlignment& alignment); - - // sets the expected sorting order for reading across multiple BAM files - void SetSortOrder(const SortOrder& order); - - // ---------------------- - // access auxiliary data - // ---------------------- - - // returns unified SAM header for all files - SamHeader GetHeader(void) const; - // returns unified SAM header text for all files - std::string GetHeaderText(void) const; - // returns number of reference sequences - int GetReferenceCount(void) const; - // returns all reference sequence entries. - const BamTools::RefVector GetReferenceData(void) const; - // returns the ID of the reference with this name. - int GetReferenceID(const std::string& refName) const; - - // ---------------------- - // BAM index operations - // ---------------------- - - // creates index files for current BAM files - bool CreateIndexes(const BamIndex::IndexType& type = BamIndex::STANDARD); - // returns true if all BAM files have index data available - bool HasIndexes(void) const; - // looks for index files that match current BAM files - bool LocateIndexes(const BamIndex::IndexType& preferredType = BamIndex::STANDARD); - // opens index files for current BAM files. - bool OpenIndexes(const std::vector<std::string>& indexFilenames); - // changes the caching behavior of the index data - void SetIndexCacheMode(const BamIndex::IndexCacheMode& mode); - - // deprecated methods - public: - // returns \c true if all BAM files have index data available. - bool IsIndexLoaded(void) const; - // convenience method for printing filenames to stdout - void PrintFilenames(void) const; - - // private implementation - private: - Internal::BamMultiReaderPrivate* d; -}; - -} // namespace BamTools - -#endif // BAMMULTIREADER_H |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamReader.cpp --- a/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamReader.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
b"@@ -1,370 +0,0 @@\n-// ***************************************************************************\n-// BamReader.cpp (c) 2009 Derek Barnett, Michael Str\xef\xbf\xbdmberg\n-// Marth Lab, Department of Biology, Boston College\n-// All rights reserved.\n-// ---------------------------------------------------------------------------\n-// Last modified: 4 March 2011 (DB)\n-// ---------------------------------------------------------------------------\n-// Provides read access to BAM files.\n-// ***************************************************************************\n-\n-#include <api/BamReader.h>\n-#include <api/internal/BamReader_p.h>\n-using namespace BamTools;\n-using namespace BamTools::Internal;\n-\n-#include <algorithm>\n-#include <iostream>\n-#include <iterator>\n-#include <string>\n-#include <vector>\n-using namespace std;\n-\n-/*! \\class BamTools::BamReader\n- \\brief Provides read access to BAM files.\n-*/\n-\n-/*! \\fn BamReader::BamReader(void)\n- \\brief constructor\n-*/\n-BamReader::BamReader(void)\n- : d(new BamReaderPrivate(this))\n-{ }\n-\n-/*! \\fn BamReader::~BamReader(void)\n- \\brief destructor\n-*/\n-BamReader::~BamReader(void) {\n- delete d;\n- d = 0;\n-}\n-\n-/*! \\fn void BamReader::Close(void)\n- \\brief Closes the current BAM file.\n-\n- Also clears out all header and reference data.\n-\n- \\sa IsOpen(), Open()\n-*/\n-void BamReader::Close(void) {\n- d->Close();\n-}\n-\n-/*! \\fn bool BamReader::CreateIndex(const BamIndex::IndexType& type)\n- \\brief Creates an index file for current BAM file.\n-\n- \\param type file format to create, see BamIndex::IndexType for available formats\n- \\return \\c true if index created OK\n- \\sa LocateIndex(), OpenIndex()\n-*/\n-bool BamReader::CreateIndex(const BamIndex::IndexType& type) {\n- return d->CreateIndex(type);\n-}\n-\n-/*! \\fn const std::string BamReader::GetFilename(void) const\n- \\brief Returns name of current BAM file.\n-\n- Retrieved filename will contain whatever was passed via Open().\n- If you need full directory paths here, be sure to include them\n- when you open the BAM file.\n-\n- \\returns name of open BAM file. If no file is open, returns an empty string.\n- \\sa IsOpen()\n-*/\n-const std::string BamReader::GetFilename(void) const {\n- return d->Filename();\n-}\n-\n-/*! \\fn SamHeader BamReader::GetHeader(void) const\n- \\brief Returns SAM header data.\n-\n- Header data is wrapped in a SamHeader object that can be conveniently queried & modified.\n-\n- N.B. - Modifying the retrieved SamHeader object does NOT affect the\n- current BAM file. This file has been opened in a read-only mode.\n- However, your modified SamHeader object can be used in conjunction with\n- BamWriter to generate a new BAM file with the appropriate header information.\n-\n- \\returns header data object\n- \\sa GetHeaderText()\n-*/\n-SamHeader BamReader::GetHeader(void) const {\n- return d->GetSamHeader();\n-}\n-\n-/*! \\fn std::string BamReader::GetHeaderText(void) const\n- \\brief Returns SAM header data, as SAM-formatted text.\n-\n- N.B. - Modifying the retrieved text does NOT affect the current\n- BAM file. This file has been opened in a read-only mode. However,\n- your modified header text can be used in conjunction with BamWriter\n- to generate a new BAM file with the appropriate header information.\n-\n- \\returns SAM-formatted header text\n- \\sa GetHeader()\n-*/\n-std::string BamReader::GetHeaderText(void) const {\n- return d->GetHeaderText();\n-}\n-\n-/*! \\fn bool BamReader::GetNextAlignment(BamAlignment& alignment)\n- \\brief Retrieves next available alignment.\n-\n- Attempts to read the next alignment record from BAM file, and checks to see\n- if it overlaps the current region. If no region is currently set, then the\n- next alignment available is always considered valid.\n-\n- If a region has been set, via Jump() or SetRegion(), an alignment is only\n- considered valid if it overlaps the region. If the actual 'next' alignment record\n- in the BAM file does not overlap this r"..b"ng& indexFilename)\n- \\brief Opens a BAM index file.\n-\n- \\param indexFilename name of BAM index file\n-\n- \\returns \\c true if BAM index file was opened & data loaded successfully\n- \\sa LocateIndex(), Open(), SetIndex()\n-*/\n-bool BamReader::OpenIndex(const std::string& indexFilename) {\n- return d->OpenIndex(indexFilename);\n-}\n-\n-/*! \\fn bool BamReader::Rewind(void)\n- \\brief Returns the internal file pointer to the first alignment record.\n-\n- Useful for performing multiple sequential passes through a BAM file.\n- Calling this function clears any prior region that may have been set.\n-\n- N.B. - Note that this function sets the file pointer to first alignment record\n- in the BAM file, NOT the beginning of the file.\n-\n- \\returns \\c true if rewind operation was successful\n- \\sa Jump(), SetRegion()\n-*/\n-bool BamReader::Rewind(void) {\n- return d->Rewind();\n-}\n-\n-/*! \\fn void BamReader::SetIndex(BamIndex* index)\n- \\brief Sets a custom BamIndex on this reader.\n-\n- Only necessary for custom BamIndex subclasses. Most clients should\n- never have to use this function.\n-\n- Example:\n- \\code\n- BamReader reader;\n- reader.SetIndex(new MyCustomBamIndex);\n- \\endcode\n-\n- N.B. - BamReader takes ownership of \\a index - i.e. BamReader will\n- take care of deleting the pointer when the reader is destructed,\n- when the current BAM file is closed, or when a new index is requested.\n-\n- \\param index custom BamIndex subclass created by client\n- \\sa CreateIndex(), LocateIndex(), OpenIndex()\n-*/\n-void BamReader::SetIndex(BamIndex* index) {\n- d->SetIndex(index);\n-}\n-\n-/*! \\fn void BamReader::SetIndexCacheMode(const BamIndex::IndexCacheMode& mode)\n- \\brief Changes the caching behavior of the index data.\n-\n- Default mode is BamIndex::LimitedIndexCaching.\n-\n- \\param mode desired cache mode for index, see BamIndex::IndexCacheMode for\n- description of the available cache modes\n- \\sa HasIndex()\n-*/\n-void BamReader::SetIndexCacheMode(const BamIndex::IndexCacheMode& mode) {\n- d->SetIndexCacheMode(mode);\n-}\n-\n-/*! \\fn bool BamReader::SetRegion(const BamRegion& region)\n- \\brief Sets a target region of interest\n-\n- Requires that index data be available. Attempts a random-access\n- jump in the BAM file, near \\a region left boundary position.\n-\n- Subsequent calls to GetNextAlignment() or GetNextAlignmentCore()\n- will only return \\c true when alignments can be found that overlap\n- this \\a region.\n-\n- A \\a region with no right boundary is considered open-ended, meaning\n- that all alignments that lie downstream of the left boundary are\n- considered valid, continuing to the end of the BAM file.\n-\n- \\param region desired region-of-interest to activate\n- \\returns \\c true if reader was able to jump successfully to the region's left boundary\n- \\sa HasIndex(), Jump()\n-*/\n-bool BamReader::SetRegion(const BamRegion& region) {\n- return d->SetRegion(region);\n-}\n-\n-/*! \\fn bool BamReader::SetRegion(const int& leftRefID,\n- const int& leftPosition,\n- const int& rightRefID,\n- const int& rightPosition)\n- \\brief Sets a target region of interest.\n-\n- This is an overloaded function.\n-\n- \\param leftRefID referenceID of region's left boundary\n- \\param leftPosition position of region's left boundary\n- \\param rightRefID reference ID of region's right boundary\n- \\param rightPosition position of region's right boundary\n-\n- \\returns \\c true if reader was able to jump successfully to the region's left boundary\n- \\sa HasIndex(), Jump()\n-*/\n-bool BamReader::SetRegion(const int& leftRefID,\n- const int& leftBound,\n- const int& rightRefID,\n- const int& rightBound)\n-{\n- return d->SetRegion( BamRegion(leftRefID, leftBound, rightRefID, rightBound) );\n-}\n" |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamReader.h --- a/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamReader.h Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,118 +0,0 @@ -// *************************************************************************** -// BamReader.h (c) 2009 Derek Barnett, Michael Str�mberg -// Marth Lab, Department of Biology, Boston College -// All rights reserved. -// --------------------------------------------------------------------------- -// Last modified: 4 March 2011 (DB) -// --------------------------------------------------------------------------- -// Provides read access to BAM files. -// *************************************************************************** - -#ifndef BAMREADER_H -#define BAMREADER_H - -#include <api/api_global.h> -#include <api/BamAlignment.h> -#include <api/BamIndex.h> -#include <api/SamHeader.h> -#include <string> - -namespace BamTools { - -namespace Internal { - class BamReaderPrivate; -} // namespace Internal - -class API_EXPORT BamReader { - - // constructor / destructor - public: - BamReader(void); - ~BamReader(void); - - // public interface - public: - - // ---------------------- - // BAM file operations - // ---------------------- - - // closes the current BAM file - void Close(void); - // returns filename of current BAM file - const std::string GetFilename(void) const; - // returns true if a BAM file is open for reading - bool IsOpen(void) const; - // performs random-access jump within BAM file - bool Jump(int refID, int position = 0); - // opens a BAM file - bool Open(const std::string& filename); - // returns internal file pointer to beginning of alignment data - bool Rewind(void); - // sets the target region of interest - bool SetRegion(const BamRegion& region); - // sets the target region of interest - bool SetRegion(const int& leftRefID, - const int& leftPosition, - const int& rightRefID, - const int& rightPosition); - - // ---------------------- - // access alignment data - // ---------------------- - - // retrieves next available alignment - bool GetNextAlignment(BamAlignment& alignment); - // retrieves next available alignmnet (without populating the alignment's string data fields) - bool GetNextAlignmentCore(BamAlignment& alignment); - - // ---------------------- - // access header data - // ---------------------- - - // returns SAM header data - SamHeader GetHeader(void) const; - // returns SAM header data, as SAM-formatted text - std::string GetHeaderText(void) const; - - // ---------------------- - // access reference data - // ---------------------- - - // returns the number of reference sequences - int GetReferenceCount(void) const; - // returns all reference sequence entries - const RefVector& GetReferenceData(void) const; - // returns the ID of the reference with this name - int GetReferenceID(const std::string& refName) const; - - // ---------------------- - // BAM index operations - // ---------------------- - - // creates an index file for current BAM file, using the requested index type - bool CreateIndex(const BamIndex::IndexType& type = BamIndex::STANDARD); - // returns true if index data is available - bool HasIndex(void) const; - // looks in BAM file's directory for a matching index file - bool LocateIndex(const BamIndex::IndexType& preferredType = BamIndex::STANDARD); - // opens a BAM index file - bool OpenIndex(const std::string& indexFilename); - // sets a custom BamIndex on this reader - void SetIndex(BamIndex* index); - // changes the caching behavior of the index data - void SetIndexCacheMode(const BamIndex::IndexCacheMode& mode); - - // deprecated methods - public: - // returns true if index data is available - bool IsIndexLoaded(void) const; - - // private implementation - private: - Internal::BamReaderPrivate* d; -}; - -} // namespace BamTools - -#endif // BAMREADER_H |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamWriter.cpp --- a/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamWriter.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,143 +0,0 @@ -// *************************************************************************** -// BamWriter.cpp (c) 2009 Michael Str�mberg, Derek Barnett -// Marth Lab, Department of Biology, Boston College -// All rights reserved. -// --------------------------------------------------------------------------- -// Last modified: 4 March 2011 (DB) -// --------------------------------------------------------------------------- -// Provides the basic functionality for producing BAM files -// *************************************************************************** - -#include <api/BamAlignment.h> -#include <api/BamWriter.h> -#include <api/SamHeader.h> -#include <api/internal/BamWriter_p.h> -using namespace BamTools; -using namespace BamTools::Internal; - -#include <iostream> -using namespace std; - -/*! \class BamTools::BamWriter - \brief Provides write access for generating BAM files. -*/ -/*! \enum BamTools::BamWriter::CompressionMode - \brief This enum describes the compression behaviors for output BAM files. -*/ -/*! \var BamWriter::CompressionMode BamWriter::Compressed - \brief Use normal BAM compression -*/ -/*! \var BamWriter::CompressionMode BamWriter::Uncompressed - \brief Disable BAM compression - - Useful in situations where the BAM data is streamed (e.g. piping). - It would be wasteful to compress, and then immediately decompress - the data. -*/ - -/*! \fn BamWriter::BamWriter(void) - \brief constructor -*/ -BamWriter::BamWriter(void) - : d(new BamWriterPrivate) -{ } - -/*! \fn BamWriter::~BamWriter(void) - \brief destructor -*/ -BamWriter::~BamWriter(void) { - delete d; - d = 0; -} - -/*! \fn BamWriter::Close(void) - \brief Closes the current BAM file. - \sa Open() -*/ -void BamWriter::Close(void) { - d->Close(); -} - -/*! \fn bool BamWriter::IsOpen(void) const - \brief Returns \c true if BAM file is open for writing. - \sa Open() -*/ -bool BamWriter::IsOpen(void) const { - return d->IsOpen(); -} - -/*! \fn bool BamWriter::Open(const std::string& filename, - const std::string& samHeaderText, - const RefVector& referenceSequences) - \brief Opens a BAM file for writing. - - Will overwrite the BAM file if it already exists. - - \param filename name of output BAM file - \param samHeaderText header data, as SAM-formatted string - \param referenceSequences list of reference entries - - \return \c true if opened successfully - \sa Close(), IsOpen(), BamReader::GetHeaderText(), BamReader::GetReferenceData() -*/ -bool BamWriter::Open(const std::string& filename, - const std::string& samHeaderText, - const RefVector& referenceSequences) -{ - return d->Open(filename, samHeaderText, referenceSequences); -} - -/*! \fn bool BamWriter::Open(const std::string& filename, - const SamHeader& samHeader, - const RefVector& referenceSequences) - \brief Opens a BAM file for writing. - - This is an overloaded function. - - Will overwrite the BAM file if it already exists. - - \param filename name of output BAM file - \param samHeader header data, wrapped in SamHeader object - \param referenceSequences list of reference entries - - \return \c true if opened successfully - \sa Close(), IsOpen(), BamReader::GetHeader(), BamReader::GetReferenceData() -*/ -bool BamWriter::Open(const std::string& filename, - const SamHeader& samHeader, - const RefVector& referenceSequences) -{ - return d->Open(filename, samHeader.ToString(), referenceSequences); -} - -/*! \fn void BamWriter::SaveAlignment(const BamAlignment& alignment) - \brief Saves an alignment to the BAM file. - - \param alignment BamAlignment record to save - \sa BamReader::GetNextAlignment(), BamReader::GetNextAlignmentCore() -*/ -void BamWriter::SaveAlignment(const BamAlignment& alignment) { - d->SaveAlignment(alignment); -} - -/*! \fn void BamWriter::SetCompressionMode(const CompressionMode& compressionMode) - \brief Sets the output compression mode. - - Default mode is BamWriter::Compressed. - - N.B. - Changing the compression mode is disabled on open files (i.e. the request will be ignored). - Be sure to call this function before opening the BAM file. - - \code - BamWriter writer; - writer.SetCompressionMode(BamWriter::Uncompressed); - writer.Open( ... ); - // ... - \endcode - - \param compressionMode desired output compression behavior - \sa IsOpen(), Open() -*/ -void BamWriter::SetCompressionMode(const CompressionMode& compressionMode) { - d->SetWriteCompressed( compressionMode == BamWriter::Compressed ); -} |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamWriter.h --- a/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/BamWriter.h Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,64 +0,0 @@ -// *************************************************************************** -// BamWriter.h (c) 2009 Michael Str�mberg, Derek Barnett -// Marth Lab, Department of Biology, Boston College -// All rights reserved. -// --------------------------------------------------------------------------- -// Last modified: 4 March 2011 (DB) -// --------------------------------------------------------------------------- -// Provides the basic functionality for producing BAM files -// *************************************************************************** - -#ifndef BAMWRITER_H -#define BAMWRITER_H - -#include <api/api_global.h> -#include <api/BamAux.h> -#include <string> - -namespace BamTools { - -class BamAlignment; -class SamHeader; - -namespace Internal { - class BamWriterPrivate; -} // namespace Internal - -class API_EXPORT BamWriter { - - public: enum CompressionMode { Compressed = 0 - , Uncompressed - }; - - // ctor & dtor - public: - BamWriter(void); - ~BamWriter(void); - - // public interface - public: - // closes the current BAM file - void Close(void); - // returns true if BAM file is open for writing - bool IsOpen(void) const; - // opens a BAM file for writing - bool Open(const std::string& filename, - const std::string& samHeaderText, - const RefVector& referenceSequences); - // opens a BAM file for writing - bool Open(const std::string& filename, - const SamHeader& samHeader, - const RefVector& referenceSequences); - // saves the alignment to the alignment archive - void SaveAlignment(const BamAlignment& alignment); - // sets the output compression mode - void SetCompressionMode(const CompressionMode& compressionMode); - - // private implementation - private: - Internal::BamWriterPrivate* d; -}; - -} // namespace BamTools - -#endif // BAMWRITER_H |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/BamTools/src/api/CMakeLists.txt --- a/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/CMakeLists.txt Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,78 +0,0 @@ -# ========================== -# BamTools CMakeLists.txt -# (c) 2010 Derek Barnett -# -# src/api/ -# ========================== - -# list include paths -include_directories( ${BamTools_SOURCE_DIR}/src ) - -# add compiler definitions -add_definitions( -DBAMTOOLS_API_LIBRARY ) # (for proper exporting of library symbols) -add_definitions( -fPIC ) # (attempt to force PIC compiling on some archs) - -# list of all BamTools API source (.cpp) files -set( BamToolsAPISources - BamAlignment.cpp - BamMultiReader.cpp - BamReader.cpp - BamWriter.cpp - SamHeader.cpp - SamProgram.cpp - SamProgramChain.cpp - SamReadGroup.cpp - SamReadGroupDictionary.cpp - SamSequence.cpp - SamSequenceDictionary.cpp - internal/BamHeader_p.cpp - internal/BamIndexFactory_p.cpp - internal/BamMultiReader_p.cpp - internal/BamRandomAccessController_p.cpp - internal/BamReader_p.cpp - internal/BamStandardIndex_p.cpp - internal/BamToolsIndex_p.cpp - internal/BamWriter_p.cpp - internal/BgzfStream_p.cpp - internal/SamFormatParser_p.cpp - internal/SamFormatPrinter_p.cpp - internal/SamHeaderValidator_p.cpp -) - -# create main BamTools API shared library -add_library( BamTools SHARED ${BamToolsAPISources} ) -set_target_properties( BamTools PROPERTIES SOVERSION "1.0.2" ) -set_target_properties( BamTools PROPERTIES OUTPUT_NAME "bamtools" ) - -# create main BamTools API static library -add_library( BamTools-static STATIC ${BamToolsAPISources} ) -set_target_properties( BamTools-static PROPERTIES OUTPUT_NAME "bamtools" ) -set_target_properties( BamTools-static PROPERTIES PREFIX "lib" ) - -# link libraries with zlib automatically -target_link_libraries( BamTools z ) -target_link_libraries( BamTools-static z ) - -# set library install destinations -install( TARGETS BamTools LIBRARY DESTINATION "lib/bamtools") -install( TARGETS BamTools-static ARCHIVE DESTINATION "lib/bamtools") - -# export API headers -include(../ExportHeader.cmake) -set(ApiIncludeDir "api") -ExportHeader(APIHeaders api_global.h ${ApiIncludeDir}) -ExportHeader(APIHeaders BamAlignment.h ${ApiIncludeDir}) -ExportHeader(APIHeaders BamAux.h ${ApiIncludeDir}) -ExportHeader(APIHeaders BamConstants.h ${ApiIncludeDir}) -ExportHeader(APIHeaders BamIndex.h ${ApiIncludeDir}) -ExportHeader(APIHeaders BamMultiReader.h ${ApiIncludeDir}) -ExportHeader(APIHeaders BamReader.h ${ApiIncludeDir}) -ExportHeader(APIHeaders BamWriter.h ${ApiIncludeDir}) -ExportHeader(APIHeaders SamConstants.h ${ApiIncludeDir}) -ExportHeader(APIHeaders SamHeader.h ${ApiIncludeDir}) -ExportHeader(APIHeaders SamProgram.h ${ApiIncludeDir}) -ExportHeader(APIHeaders SamProgramChain.h ${ApiIncludeDir}) -ExportHeader(APIHeaders SamReadGroup.h ${ApiIncludeDir}) -ExportHeader(APIHeaders SamReadGroupDictionary.h ${ApiIncludeDir}) -ExportHeader(APIHeaders SamSequence.h ${ApiIncludeDir}) -ExportHeader(APIHeaders SamSequenceDictionary.h ${ApiIncludeDir}) |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamConstants.h --- a/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamConstants.h Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,96 +0,0 @@ -// *************************************************************************** -// SamConstants.h (c) 2010 Derek Barnett -// Marth Lab, Department of Biology, Boston College -// All rights reserved. -// --------------------------------------------------------------------------- -// Last modified: 19 April 2011 (DB) -// --------------------------------------------------------------------------- -// Provides constants for SAM header -// *************************************************************************** - -#ifndef SAM_CONSTANTS_H -#define SAM_CONSTANTS_H - -#include <api/api_global.h> -#include <string> - -namespace BamTools { -namespace Constants { - -// basic char constants used in SAM format -const char SAM_COLON = ':'; -const char SAM_EQUAL = '='; -const char SAM_PERIOD = '.'; -const char SAM_STAR = '*'; -const char SAM_TAB = '\t'; -const std::string SAM_DIGITS = "0123456789"; - -// HD entries -const std::string SAM_HD_BEGIN_TOKEN = "@HD"; -const std::string SAM_HD_VERSION_TAG = "VN"; -const std::string SAM_HD_SORTORDER_TAG = "SO"; -const std::string SAM_HD_GROUPORDER_TAG = "GO"; - -// SQ entries -const std::string SAM_SQ_BEGIN_TOKEN = "@SQ"; -const std::string SAM_SQ_ASSEMBLYID_TAG = "AS"; -const std::string SAM_SQ_CHECKSUM_TAG = "M5"; -const std::string SAM_SQ_LENGTH_TAG = "LN"; -const std::string SAM_SQ_NAME_TAG = "SN"; -const std::string SAM_SQ_SPECIES_TAG = "SP"; -const std::string SAM_SQ_URI_TAG = "UR"; - -// RG entries -const std::string SAM_RG_BEGIN_TOKEN = "@RG"; -const std::string SAM_RG_DESCRIPTION_TAG = "DS"; -const std::string SAM_RG_FLOWORDER_TAG = "FO"; -const std::string SAM_RG_ID_TAG = "ID"; -const std::string SAM_RG_KEYSEQUENCE_TAG = "KS"; -const std::string SAM_RG_LIBRARY_TAG = "LB"; -const std::string SAM_RG_PLATFORMUNIT_TAG = "PU"; -const std::string SAM_RG_PREDICTEDINSERTSIZE_TAG = "PI"; -const std::string SAM_RG_PRODUCTIONDATE_TAG = "DT"; -const std::string SAM_RG_PROGRAM_TAG = "PG"; -const std::string SAM_RG_SAMPLE_TAG = "SM"; -const std::string SAM_RG_SEQCENTER_TAG = "CN"; -const std::string SAM_RG_SEQTECHNOLOGY_TAG = "PL"; - -// PG entries -const std::string SAM_PG_BEGIN_TOKEN = "@PG"; -const std::string SAM_PG_COMMANDLINE_TAG = "CL"; -const std::string SAM_PG_ID_TAG = "ID"; -const std::string SAM_PG_NAME_TAG = "PN"; -const std::string SAM_PG_PREVIOUSPROGRAM_TAG = "PP"; -const std::string SAM_PG_VERSION_TAG = "VN"; - -// CO entries -const std::string SAM_CO_BEGIN_TOKEN = "@CO"; - -// HD:SO values -const std::string SAM_HD_SORTORDER_COORDINATE = "coordinate"; -const std::string SAM_HD_SORTORDER_QUERYNAME = "queryname"; -const std::string SAM_HD_SORTORDER_UNKNOWN = "unknown"; -const std::string SAM_HD_SORTORDER_UNSORTED = "unsorted"; - -// HD:GO values -const std::string SAM_HD_GROUPORDER_NONE = "none"; -const std::string SAM_HD_GROUPORDER_QUERY = "query"; -const std::string SAM_HD_GROUPORDER_REFERENCE = "reference"; - -// SQ:LN values -const unsigned int SAM_SQ_LENGTH_MIN = 1; -const unsigned int SAM_SQ_LENGTH_MAX = 536870911; // 2^29 - 1 - -// RG:PL values -const std::string SAM_RG_SEQTECHNOLOGY_CAPILLARY = "CAPILLARY"; -const std::string SAM_RG_SEQTECHNOLOGY_HELICOS = "HELICOS"; -const std::string SAM_RG_SEQTECHNOLOGY_ILLUMINA = "ILLUMINA"; -const std::string SAM_RG_SEQTECHNOLOGY_IONTORRENT = "IONTORRENT"; -const std::string SAM_RG_SEQTECHNOLOGY_LS454 = "LS454"; -const std::string SAM_RG_SEQTECHNOLOGY_PACBIO = "PACBIO"; -const std::string SAM_RG_SEQTECHNOLOGY_SOLID = "SOLID"; - -} // namespace Constants -} // namespace BamTools - -#endif // SAM_CONSTANTS_H |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamHeader.cpp --- a/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamHeader.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,185 +0,0 @@ -// *************************************************************************** -// SamHeader.cpp (c) 2010 Derek Barnett -// Marth Lab, Department of Biology, Boston College -// All rights reserved. -// --------------------------------------------------------------------------- -// Last modified: 19 April 2011 (DB) -// --------------------------------------------------------------------------- -// Provides direct read/write access to the SAM header data fields. -// *************************************************************************** - -#include <api/SamConstants.h> -#include <api/SamHeader.h> -#include <api/internal/SamFormatParser_p.h> -#include <api/internal/SamFormatPrinter_p.h> -#include <api/internal/SamHeaderValidator_p.h> -using namespace BamTools; -using namespace BamTools::Internal; -using namespace std; - -/*! \struct BamTools::SamHeader - \brief Represents the SAM-formatted text header that is part of the BAM file header. - - Provides direct read/write access to the SAM header data fields. - - \sa \samSpecURL -*/ -/*! \var SamHeader::Version - \brief corresponds to \@HD VN:\<Version\> - - Required for valid SAM header, if @HD record is present. -*/ -/*! \var SamHeader::SortOrder - \brief corresponds to \@HD SO:\<SortOrder\> -*/ -/*! \var SamHeader::GroupOrder - \brief corresponds to \@HD GO:\<GroupOrder\> -*/ -/*! \var SamHeader::Sequences - \brief corresponds to \@SQ entries - \sa SamSequence, SamSequenceDictionary -*/ -/*! \var SamHeader::ReadGroups - \brief corresponds to \@RG entries - \sa SamReadGroup, SamReadGroupDictionary -*/ -/*! \var SamHeader::ProgramName - \brief corresponds to \@PG ID:\<ProgramName\> -*/ -/*! \var SamHeader::ProgramVersion - \brief corresponds to \@PG VN:\<ProgramVersion\> -*/ -/*! \var SamHeader::ProgramCommandLine - \brief corresponds to \@PG CL:\<ProgramCommandLine\> -*/ -/*! \var SamHeader::Comments - \brief corresponds to \@CO entries -*/ - -/*! \fn SamHeader::SamHeader(const std::string& headerText = "") - \brief constructor -*/ -SamHeader::SamHeader(const std::string& headerText) - : Version("") - , SortOrder(Constants::SAM_HD_SORTORDER_UNKNOWN) - , GroupOrder("") -{ - SamFormatParser parser(*this); - parser.Parse(headerText); -} - -/*! \fn SamHeader::SamHeader(const SamHeader& other) - \brief copy constructor -*/ -SamHeader::SamHeader(const SamHeader& other) - : Version(other.Version) - , SortOrder(other.SortOrder) - , GroupOrder(other.GroupOrder) - , Sequences(other.Sequences) - , ReadGroups(other.ReadGroups) - , Programs(other.Programs) -{ } - -/*! \fn SamHeader::~SamHeader(void) - \brief destructor -*/ -SamHeader::~SamHeader(void) { } - -/*! \fn void SamHeader::Clear(void) - \brief Clears all header contents. -*/ -void SamHeader::Clear(void) { - Version.clear(); - SortOrder.clear(); - GroupOrder.clear(); - Sequences.Clear(); - ReadGroups.Clear(); - Programs.Clear(); - Comments.clear(); -} - -/*! \fn bool SamHeader::HasVersion(void) const - \brief Returns \c true if header contains \@HD ID:\<Version\> -*/ -bool SamHeader::HasVersion(void) const { - return (!Version.empty()); -} - -/*! \fn bool SamHeader::HasSortOrder(void) const - \brief Returns \c true if header contains \@HD SO:\<SortOrder\> -*/ -bool SamHeader::HasSortOrder(void) const { - return (!SortOrder.empty()); -} - -/*! \fn bool SamHeader::HasGroupOrder(void) const - \brief Returns \c true if header contains \@HD GO:\<GroupOrder\> -*/ -bool SamHeader::HasGroupOrder(void) const { - return (!GroupOrder.empty()); -} - -/*! \fn bool SamHeader::HasSequences(void) const - \brief Returns \c true if header contains any \@SQ entries -*/ -bool SamHeader::HasSequences(void) const { - return (!Sequences.IsEmpty()); -} - -/*! \fn bool SamHeader::HasReadGroups(void) const - \brief Returns \c true if header contains any \@RG entries -*/ -bool SamHeader::HasReadGroups(void) const { - return (!ReadGroups.IsEmpty()); -} - -/*! \fn bool SamHeader::HasPrograms(void) const - \brief Returns \c true if header contains any \@PG entries -*/ -bool SamHeader::HasPrograms(void) const { - return (!Programs.IsEmpty()); -} - -/*! \fn bool SamHeader::HasComments(void) const - \brief Returns \c true if header contains any \@CO entries -*/ -bool SamHeader::HasComments(void) const { - return (!Comments.empty()); -} - -/*! \fn bool SamHeader::IsValid(bool verbose = false) const - \brief Checks header contents for required data and proper formatting. - \param verbose If set to true, validation errors & warnings will be printed to stderr. - Otherwise, output is suppressed and only validation check occurs. - \return \c true if SAM header is well-formed -*/ -bool SamHeader::IsValid(bool verbose) const { - SamHeaderValidator validator(*this); - return validator.Validate(verbose); -} - -/*! \fn void SamHeader::SetHeaderText(const std::string& headerText) - \brief Replaces header contents with \a headerText. - \param headerText SAM formatted-text that will be parsed into data fields -*/ -void SamHeader::SetHeaderText(const std::string& headerText) { - - // clear prior data - Clear(); - - // parse header text into data - SamFormatParser parser(*this); - parser.Parse(headerText); -} - -/*! \fn std::string SamHeader::ToString(void) const - \brief Converts data fields to SAM-formatted text. - - Applies any local modifications made since creating this object or calling SetHeaderText(). - - \return SAM-formatted header text -*/ -string SamHeader::ToString(void) const { - SamFormatPrinter printer(*this); - return printer.ToString(); -} |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamHeader.h --- a/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamHeader.h Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,69 +0,0 @@ -// *************************************************************************** -// SamHeader.h (c) 2010 Derek Barnett -// Marth Lab, Department of Biology, Boston College -// All rights reserved. -// --------------------------------------------------------------------------- -// Last modified: 18 April 2011 (DB) -// --------------------------------------------------------------------------- -// Provides direct read/write access to the SAM header data fields. -// *************************************************************************** - -#ifndef SAM_HEADER_H -#define SAM_HEADER_H - -#include <api/api_global.h> -#include <api/SamProgramChain.h> -#include <api/SamReadGroupDictionary.h> -#include <api/SamSequenceDictionary.h> -#include <string> -#include <vector> - -namespace BamTools { - -struct API_EXPORT SamHeader { - - // ctor & dtor - SamHeader(const std::string& headerText = ""); - SamHeader(const SamHeader& other); - ~SamHeader(void); - - // query/modify entire SamHeader - void Clear(void); // clears all header contents - bool IsValid(bool verbose = false) const; // returns true if SAM header is well-formed - void SetHeaderText(const std::string& headerText); // replaces data fields with contents of SAM-formatted text - std::string ToString(void) const; // returns the printable, SAM-formatted header text - - // convenience query methods - bool HasVersion(void) const; // returns true if header contains format version entry - bool HasSortOrder(void) const; // returns true if header contains sort order entry - bool HasGroupOrder(void) const; // returns true if header contains group order entry - bool HasSequences(void) const; // returns true if header contains any sequence entries - bool HasReadGroups(void) const; // returns true if header contains any read group entries - bool HasPrograms(void) const; // returns true if header contains any program record entries - bool HasComments(void) const; // returns true if header contains comments - - // -------------- - // data members - // -------------- - - // header metadata (@HD line) - std::string Version; // VN:<Version> *Required for valid SAM header, if @HD record is present* - std::string SortOrder; // SO:<SortOrder> - std::string GroupOrder; // GO:<GroupOrder> - - // header sequences (@SQ entries) - SamSequenceDictionary Sequences; - - // header read groups (@RG entries) - SamReadGroupDictionary ReadGroups; - - // header program data (@PG entries) - SamProgramChain Programs; - - // header comments (@CO entries) - std::vector<std::string> Comments; -}; - -} // namespace BamTools - -#endif // SAM_HEADER_H |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamProgram.cpp --- a/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamProgram.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,140 +0,0 @@ -// *************************************************************************** -// SamProgram.cpp (c) 2011 Derek Barnett -// Marth Lab, Department of Biology, Boston College -// All rights reserved. -// --------------------------------------------------------------------------- -// Last modified: 19 April 2011 (DB) -// --------------------------------------------------------------------------- -// Provides direct read/write access to the SAM header program records. -// *************************************************************************** - -#include <api/SamProgram.h> -using namespace BamTools; -using namespace std; - -/*! \struct BamTools::SamProgram - \brief Represents a SAM program record. - - Provides direct read/write access to the SAM header program records. - - \sa \samSpecURL -*/ -/*! \var SamProgram::CommandLine - \brief corresponds to \@PG CL:\<CommandLine\> -*/ -/*! \var SamProgram::ID - \brief corresponds to \@PG ID:\<ID\> - - Required for valid SAM header. -*/ -/*! \var SamProgram::Name - \brief corresponds to \@PG PN:\<Name\> -*/ -/*! \var SamProgram::PreviousProgramID - \brief corresponds to \@PG PP:\<PreviousProgramID\> -*/ -/*! \var SamProgram::Version - \brief corresponds to \@PG VN:\<Version\> -*/ -/*! \var SamProgram::NextProgramID - \internal - Holds ID of the "next" program record in a SamProgramChain -*/ - -/*! \fn SamProgram::SamProgram(void) - \brief default constructor -*/ -SamProgram::SamProgram(void) - : CommandLine("") - , ID("") - , Name("") - , PreviousProgramID("") - , Version("") - , NextProgramID("") -{ } - -/*! \fn SamProgram::SamProgram(const std::string& id) - \brief constructs program record with \a id - - \param id desired program record ID -*/ -SamProgram::SamProgram(const std::string& id) - : CommandLine("") - , ID(id) - , Name("") - , PreviousProgramID("") - , Version("") - , NextProgramID("") -{ } - -/*! \fn SamProgram::SamProgram(const SamProgram& other) - \brief copy constructor -*/ -SamProgram::SamProgram(const SamProgram& other) - : CommandLine(other.CommandLine) - , ID(other.ID) - , Name(other.Name) - , PreviousProgramID(other.PreviousProgramID) - , Version(other.Version) - , NextProgramID(other.NextProgramID) -{ } - -/*! \fn SamProgram::~SamProgram(void) - \brief destructor -*/ -SamProgram::~SamProgram(void) { } - -/*! \fn void SamProgram::Clear(void) - \brief Clears all data fields. -*/ -void SamProgram::Clear(void) { - CommandLine.clear(); - ID.clear(); - Name.clear(); - PreviousProgramID.clear(); - Version.clear(); - NextProgramID.clear(); -} - -/*! \fn bool SamProgram::HasCommandLine(void) const - \brief Returns \c true if program record contains \@PG: CL:\<CommandLine\> -*/ -bool SamProgram::HasCommandLine(void) const { - return (!CommandLine.empty()); -} - -/*! \fn bool SamProgram::HasID(void) const - \brief Returns \c true if program record contains \@PG: ID:\<ID\> -*/ -bool SamProgram::HasID(void) const { - return (!ID.empty()); -} - -/*! \fn bool SamProgram::HasName(void) const - \brief Returns \c true if program record contains \@PG: PN:\<Name\> -*/ -bool SamProgram::HasName(void) const { - return (!Name.empty()); -} - -/*! \fn bool SamProgram::HasNextProgramID(void) const - \internal - \return true if program has a "next" record in a SamProgramChain -*/ -bool SamProgram::HasNextProgramID(void) const { - return (!NextProgramID.empty()); -} - -/*! \fn bool SamProgram::HasPreviousProgramID(void) const - \brief Returns \c true if program record contains \@PG: PP:\<PreviousProgramID\> -*/ -bool SamProgram::HasPreviousProgramID(void) const { - return (!PreviousProgramID.empty()); -} - -/*! \fn bool SamProgram::HasVersion(void) const - \brief Returns \c true if program record contains \@PG: VN:\<Version\> -*/ -bool SamProgram::HasVersion(void) const { - return (!Version.empty()); -} |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamProgram.h --- a/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamProgram.h Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,62 +0,0 @@ -// *************************************************************************** -// SamProgram.h (c) 2011 Derek Barnett -// Marth Lab, Department of Biology, Boston College -// All rights reserved. -// --------------------------------------------------------------------------- -// Last modified: 19 April 2011 (DB) -// --------------------------------------------------------------------------- -// Provides direct read/write access to the SAM header program records. -// *************************************************************************** - -#ifndef SAM_PROGRAM_H -#define SAM_PROGRAM_H - -#include "api/api_global.h" -#include <string> - -namespace BamTools { - -class SamProgramChain; - -struct API_EXPORT SamProgram { - - // ctor & dtor - SamProgram(void); - SamProgram(const std::string& id); - SamProgram(const SamProgram& other); - ~SamProgram(void); - - // query/modify entire program record - void Clear(void); // clears all data fields - - // convenience query methods - bool HasCommandLine(void) const; // returns true if program record has a command line entry - bool HasID(void) const; // returns true if program record has an ID - bool HasName(void) const; // returns true if program record has a name - bool HasPreviousProgramID(void) const; // returns true if program record has a 'previous program ID' - bool HasVersion(void) const; // returns true if program record has a version - - // data members - std::string CommandLine; // CL:<CommandLine> - std::string ID; // ID:<ID> *Required for valid SAM header* - std::string Name; // PN:<Name> - std::string PreviousProgramID; // PP:<PreviousProgramID> - std::string Version; // VN:<Version> - - // internal (non-standard) methods & fields - private: - bool HasNextProgramID(void) const; - std::string NextProgramID; - friend class BamTools::SamProgramChain; -}; - -/*! \fn bool operator==(const SamProgram& lhs, const SamProgram& rhs) - \brief tests equality by comparing program IDs -*/ -API_EXPORT inline bool operator==(const SamProgram& lhs, const SamProgram& rhs) { - return lhs.ID == rhs.ID; -} - -} // namespace BamTools - -#endif // SAM_PROGRAM_H |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamProgramChain.cpp --- a/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamProgramChain.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,352 +0,0 @@\n-// ***************************************************************************\n-// SamProgramChain.cpp (c) 2011 Derek Barnett\n-// Marth Lab, Department of Biology, Boston College\n-// All rights reserved.\n-// ---------------------------------------------------------------------------\n-// Last modified: 19 April 2011 (DB)\n-// ---------------------------------------------------------------------------\n-// Provides methods for operating on a SamProgram record "chain"\n-// ***************************************************************************\n-\n-#include <api/SamProgramChain.h>\n-using namespace BamTools;\n-\n-#include <algorithm>\n-#include <iostream>\n-#include <cstdlib>\n-using namespace std;\n-\n-/*! \\class BamTools::SamProgramChain\n- \\brief Sorted container "chain" of SamProgram records.\n-\n- Provides methods for operating on a collection of SamProgram records.\n-\n- N.B. - Underlying container is *NOT* ordered by linkage, but by order of\n- appearance in SamHeader and subsequent Add() calls. Using the current\n- iterators will not allow you to step through the header\'s program history.\n- Instead use First()/Last() to access oldest/newest records, respectively.\n-*/\n-\n-/*! \\fn SamProgramChain::SamProgramChain(void)\n- \\brief constructor\n-*/\n-SamProgramChain::SamProgramChain(void) { }\n-\n-/*! \\fn SamProgramChain::SamProgramChain(const SamProgramChain& other)\n- \\brief copy constructor\n-*/\n-SamProgramChain::SamProgramChain(const SamProgramChain& other)\n- : m_data(other.m_data)\n-{ }\n-\n-/*! \\fn SamProgramChain::~SamProgramChain(void)\n- \\brief destructor\n-*/\n-SamProgramChain::~SamProgramChain(void) { }\n-\n-/*! \\fn void SamProgramChain::Add(SamProgram& program)\n- \\brief Appends a program to program chain.\n-\n- Duplicate entries are silently discarded.\n-\n- N.B. - Underlying container is *NOT* ordered by linkage, but by order of\n- appearance in SamHeader and subsequent Add() calls. Using the current\n- iterators will not allow you to step through the header\'s program history.\n- Instead use First()/Last() to access oldest/newest records, respectively.\n-\n- \\param program entry to be appended\n-*/\n-void SamProgramChain::Add(SamProgram& program) {\n-\n- // ignore duplicated records\n- if ( Contains(program) )\n- return;\n-\n- // if other programs already in chain, try to find the "next" record\n- // tries to match another record\'s PPID with @program\'s ID\n- if ( !IsEmpty() )\n- program.NextProgramID = NextIdFor(program.ID);\n-\n- // store program record\n- m_data.push_back(program);\n-}\n-\n-/*! \\fn void SamProgramChain::Add(const std::vector<SamProgram>& programs)\n- \\brief Appends a batch of programs to the end of the chain.\n-\n- This is an overloaded function.\n-\n- \\param programs batch of program records to append\n- \\sa Add()\n-*/\n-void SamProgramChain::Add(std::vector<SamProgram>& programs) {\n- vector<SamProgram>::iterator pgIter = programs.begin();\n- vector<SamProgram>::iterator pgEnd = programs.end();\n- for ( ; pgIter != pgEnd; ++pgIter )\n- Add(*pgIter);\n-}\n-\n-/*! \\fn SamProgramIterator SamProgramChain::Begin(void)\n- \\return an STL iterator pointing to the first (oldest) program record\n- \\sa ConstBegin(), End(), First()\n-*/\n-SamProgramIterator SamProgramChain::Begin(void) {\n- return m_data.begin();\n-}\n-\n-/*! \\fn SamProgramConstIterator SamProgramChain::Begin(void) const\n- \\return an STL const_iterator pointing to the first (oldest) program record\n-\n- This is an overloaded function.\n-\n- \\sa ConstBegin(), End(), First()\n-*/\n-SamProgramConstIterator SamProgramChain::Begin(void) const {\n- return m_data.begin();\n-}\n-\n-/*! \\fn void SamProgramChain::Clear(void)\n- \\brief Clears all program records.\n-*/\n-void SamProgramChain::Clear(void) {\n- m_data.clear();\n-}\n-\n-/*! \\fn SamProgramConstIterator SamProgramChain::ConstBegin(void) const\n- \\return an STL const_iterator pointing to the first (oldest) program record\n- '..b'!= end; ++iter ) {\n- const SamProgram& current = (*iter);\n- if ( current.ID == programId )\n- break;\n- }\n- return distance( begin, iter );\n-}\n-\n-/*! \\fn bool SamProgramChain::IsEmpty(void) const\n- \\brief Returns \\c true if chain contains no records\n- \\sa Size()\n-*/\n-bool SamProgramChain::IsEmpty(void) const {\n- return m_data.empty();\n-}\n-\n-/*! \\fn SamProgram& SamProgramChain::Last(void)\n- \\brief Fetches last (newest) record in the chain.\n-\n- N.B. - This function will fail if the chain is empty. If this is possible,\n- check the result of IsEmpty() before calling this function.\n-\n- \\return a modifiable reference to the last (newest) program entry\n- \\sa End(), First()\n-*/\n-SamProgram& SamProgramChain::Last(void) {\n- // find first record in container that has no NextProgramID entry\n- SamProgramIterator iter = Begin();\n- SamProgramIterator end = End();\n- for ( ; iter != end; ++iter ) {\n- SamProgram& current = (*iter);\n- if ( !current.HasNextProgramID() )\n- return current;\n- }\n-\n- // otherwise error\n- cerr << "SamProgramChain ERROR - could not determine last record" << endl;\n- exit(1);\n-}\n-\n-/*! \\fn const SamProgram& SamProgramChain::Last(void) const\n- \\brief Fetches last (newest) record in the chain.\n-\n- This is an overloaded function.\n-\n- N.B. - This function will fail if the chain is empty. If this is possible,\n- check the result of IsEmpty() before calling this function.\n-\n- \\return a read-only reference to the last (newest) program entry\n- \\sa End(), ConstEnd(), First()\n-*/\n-const SamProgram& SamProgramChain::Last(void) const {\n- // find first record in container that has no NextProgramID entry\n- SamProgramConstIterator iter = ConstBegin();\n- SamProgramConstIterator end = ConstEnd();\n- for ( ; iter != end; ++iter ) {\n- const SamProgram& current = (*iter);\n- if ( !current.HasNextProgramID() )\n- return current;\n- }\n-\n- // otherwise error\n- cerr << "SamProgramChain ERROR - could not determine last record" << endl;\n- exit(1);\n-}\n-\n-/*! \\fn const std::string SamProgramChain::NextIdFor(const std::string& programId) const\n- \\internal\n- \\return ID of program record, whose PreviousProgramID matches \\a programId.\n- Otherwise, returns empty string if none found.\n-*/\n-const std::string SamProgramChain::NextIdFor(const std::string& programId) const {\n-\n- // find first record in container whose PreviousProgramID matches @programId\n- SamProgramConstIterator iter = ConstBegin();\n- SamProgramConstIterator end = ConstEnd();\n- for ( ; iter != end; ++iter ) {\n- const SamProgram& current = (*iter);\n- if ( !current.HasPreviousProgramID() &&\n- current.PreviousProgramID == programId\n- )\n- {\n- return current.ID;\n- }\n- }\n-\n- // none found\n- return string();\n-}\n-\n-/*! \\fn int SamProgramChain::Size(void) const\n- \\brief Returns number of program records in the chain.\n- \\sa IsEmpty()\n-*/\n-int SamProgramChain::Size(void) const {\n- return m_data.size();\n-}\n-\n-/*! \\fn SamProgram& SamProgramChain::operator[](const std::string& programId)\n- \\brief Retrieves the modifiable SamProgram record that matches \\a programId.\n-\n- NOTE - If the chain contains no read group matching this ID, this function will\n- print an error and terminate.\n-\n- \\param programId ID of program record to retrieve\n- \\return a modifiable reference to the SamProgram associated with the ID\n-*/\n-SamProgram& SamProgramChain::operator[](const std::string& programId) {\n-\n- // look up program record matching this ID\n- int index = IndexOf(programId);\n-\n- // if record not found\n- if ( index == (int)m_data.size() ) {\n- cerr << "SamProgramChain ERROR - unknown programId: " << programId << endl;\n- exit(1);\n- }\n-\n- // otherwise return program record at index\n- return m_data.at(index);\n-}\n' |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamProgramChain.h --- a/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamProgramChain.h Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,86 +0,0 @@ -// *************************************************************************** -// SamProgramChain.h (c) 2011 Derek Barnett -// Marth Lab, Department of Biology, Boston College -// All rights reserved. -// --------------------------------------------------------------------------- -// Last modified: 19 April 2011 (DB) -// --------------------------------------------------------------------------- -// Provides methods for operating on a SamProgram record "chain" -// *************************************************************************** - -#ifndef SAM_PROGRAMCHAIN_H -#define SAM_PROGRAMCHAIN_H - -#include <api/api_global.h> -#include <api/SamProgram.h> -#include <string> -#include <vector> - -namespace BamTools { - -// chain is *NOT* sorted in any order -// use First()/Last() to retrieve oldest/newest programs, respectively -typedef std::vector<SamProgram> SamProgramContainer; -typedef SamProgramContainer::iterator SamProgramIterator; -typedef SamProgramContainer::const_iterator SamProgramConstIterator; - -class API_EXPORT SamProgramChain { - - // ctor & dtor - public: - SamProgramChain(void); - SamProgramChain(const SamProgramChain& other); - ~SamProgramChain(void); - - // query/modify program data - public: - // appends a program record to the chain - void Add(SamProgram& program); - void Add(std::vector<SamProgram>& programs); - - // clears all read group entries - void Clear(void); - - // returns true if chain contains this program record (matches on ID) - bool Contains(const SamProgram& program) const; - bool Contains(const std::string& programId) const; - - // returns the first (oldest) program in the chain - SamProgram& First(void); - const SamProgram& First(void) const; - - // returns true if chain is empty - bool IsEmpty(void) const; - - // returns last (most recent) program in the chain - SamProgram& Last(void); - const SamProgram& Last(void) const; - - // returns number of program records in the chain - int Size(void) const; - - // retrieves a modifiable reference to the SamProgram object associated with this ID - SamProgram& operator[](const std::string& programId); - - // retrieve STL-compatible iterators - public: - SamProgramIterator Begin(void); // returns iterator to begin() - SamProgramConstIterator Begin(void) const; // returns const_iterator to begin() - SamProgramConstIterator ConstBegin(void) const; // returns const_iterator to begin() - SamProgramIterator End(void); // returns iterator to end() - SamProgramConstIterator End(void) const; // returns const_iterator to end() - SamProgramConstIterator ConstEnd(void) const; // returns const_iterator to end() - - // internal methods - private: - int IndexOf(const std::string& programId) const; - const std::string NextIdFor(const std::string& programId) const; - - // data members - private: - SamProgramContainer m_data; -}; - -} // namespace BamTools - -#endif // SAM_PROGRAMCHAIN_H |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamReadGroup.cpp --- a/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamReadGroup.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,222 +0,0 @@ -// *************************************************************************** -// SamReadGroup.cpp (c) 2010 Derek Barnett -// Marth Lab, Department of Biology, Boston College -// All rights reserved. -// --------------------------------------------------------------------------- -// Last modified: 18 April 2011 (DB) -// --------------------------------------------------------------------------- -// Provides direct read/write access to the SAM read group data fields. -// *************************************************************************** - -#include <api/SamReadGroup.h> -using namespace BamTools; -using namespace std; - -/*! \struct BamTools::SamReadGroup - \brief Represents a SAM read group entry. - - Provides direct read/write access to the SAM read group data fields. - - \sa \samSpecURL -*/ -/*! \var SamReadGroup::Description - \brief corresponds to \@RG DS:\<Description\> -*/ -/*! \var SamReadGroup::FlowOrder - \brief corresponds to \@RG FO:\<FlowOrder\> -*/ -/*! \var SamReadGroup::ID - \brief corresponds to \@RG ID:\<ID\> - - Required for valid SAM header. -*/ -/*! \var SamReadGroup::KeySequence - \brief corresponds to \@RG KS:\<KeySequence\> -*/ -/*! \var SamReadGroup::Library - \brief corresponds to \@RG LB:\<Library\> -*/ -/*! \var SamReadGroup::PlatformUnit - \brief corresponds to \@RG PU:\<PlatformUnit\> -*/ -/*! \var SamReadGroup::PredictedInsertSize - \brief corresponds to \@RG PI:\<PredictedInsertSize\> -*/ -/*! \var SamReadGroup::ProductionDate - \brief corresponds to \@RG DT:\<ProductionDate\> -*/ -/*! \var SamReadGroup::Program - \brief corresponds to \@RG PG:\<Program\> -*/ -/*! \var SamReadGroup::Sample - \brief corresponds to \@RG SM:\<Sample\> -*/ -/*! \var SamReadGroup::SequencingCenter - \brief corresponds to \@RG CN:\<SequencingCenter\> -*/ -/*! \var SamReadGroup::SequencingTechnology - \brief corresponds to \@RG PL:\<SequencingTechnology\> -*/ - -/*! \fn SamReadGroup::SamReadGroup(void) - \brief default constructor -*/ -SamReadGroup::SamReadGroup(void) - : Description("") - , FlowOrder("") - , ID("") - , KeySequence("") - , Library("") - , PlatformUnit("") - , PredictedInsertSize("") - , ProductionDate("") - , Program("") - , Sample("") - , SequencingCenter("") - , SequencingTechnology("") -{ } - -/*! \fn SamReadGroup::SamReadGroup(const std::string& id) - \brief constructs read group with \a id - - \param id desired read group ID -*/ -SamReadGroup::SamReadGroup(const std::string& id) - : Description("") - , FlowOrder("") - , ID(id) - , KeySequence("") - , Library("") - , PlatformUnit("") - , PredictedInsertSize("") - , ProductionDate("") - , Program("") - , Sample("") - , SequencingCenter("") - , SequencingTechnology("") -{ } - -/*! \fn SamReadGroup::SamReadGroup(const SamReadGroup& other) - \brief copy constructor -*/ -SamReadGroup::SamReadGroup(const SamReadGroup& other) - : Description(other.Description) - , FlowOrder(other.FlowOrder) - , ID(other.ID) - , KeySequence(other.KeySequence) - , Library(other.Library) - , PlatformUnit(other.PlatformUnit) - , PredictedInsertSize(other.PredictedInsertSize) - , ProductionDate(other.ProductionDate) - , Program(other.Program) - , Sample(other.Sample) - , SequencingCenter(other.SequencingCenter) - , SequencingTechnology(other.SequencingTechnology) -{ } - -/*! \fn SamReadGroup::~SamReadGroup(void) - \brief destructor -*/ -SamReadGroup::~SamReadGroup(void) { } - -/*! \fn void SamReadGroup::Clear(void) - \brief Clears all data fields. -*/ -void SamReadGroup::Clear(void) { - Description.clear(); - FlowOrder.clear(); - ID.clear(); - KeySequence.clear(); - Library.clear(); - PlatformUnit.clear(); - PredictedInsertSize.clear(); - ProductionDate.clear(); - Program.clear(); - Sample.clear(); - SequencingCenter.clear(); - SequencingTechnology.clear(); -} - -/*! \fn bool SamReadGroup::HasDescription(void) const - \brief Returns \c true if read group contains \@RG DS:\<Description\> -*/ -bool SamReadGroup::HasDescription(void) const { - return (!Description.empty()); -} - -/*! \fn bool SamReadGroup::HasFlowOrder(void) const - \brief Returns \c true if read group contains \@RG FO:\<FlowOrder\> -*/ -bool SamReadGroup::HasFlowOrder(void) const { - return (!FlowOrder.empty()); -} - -/*! \fn bool SamReadGroup::HasID(void) const - \brief Returns \c true if read group contains \@RG: ID:\<ID\> -*/ -bool SamReadGroup::HasID(void) const { - return (!ID.empty()); -} - -/*! \fn bool SamReadGroup::HasKeySequence(void) const - \brief Returns \c true if read group contains \@RG KS:\<KeySequence\> -*/ -bool SamReadGroup::HasKeySequence(void) const { - return (!KeySequence.empty()); -} - -/*! \fn bool SamReadGroup::HasLibrary(void) const - \brief Returns \c true if read group contains \@RG LB:\<Library\> -*/ -bool SamReadGroup::HasLibrary(void) const { - return (!Library.empty()); -} - -/*! \fn bool SamReadGroup::HasPlatformUnit(void) const - \brief Returns \c true if read group contains \@RG PU:\<PlatformUnit\> -*/ -bool SamReadGroup::HasPlatformUnit(void) const { - return (!PlatformUnit.empty()); -} - -/*! \fn bool SamReadGroup::HasPredictedInsertSize(void) const - \brief Returns \c true if read group contains \@RG PI:\<PredictedInsertSize\> -*/ -bool SamReadGroup::HasPredictedInsertSize(void) const { - return (!PredictedInsertSize.empty()); -} - -/*! \fn bool SamReadGroup::HasProductionDate(void) const - \brief Returns \c true if read group contains \@RG DT:\<ProductionDate\> -*/ -bool SamReadGroup::HasProductionDate(void) const { - return (!ProductionDate.empty()); -} - -/*! \fn bool SamReadGroup::HasProgram(void) const - \brief Returns \c true if read group contains \@RG PG:\<Program\> -*/ -bool SamReadGroup::HasProgram(void) const { - return (!Program.empty()); -} - -/*! \fn bool SamReadGroup::HasSample(void) const - \brief Returns \c true if read group contains \@RG SM:\<Sample\> -*/ -bool SamReadGroup::HasSample(void) const { - return (!Sample.empty()); -} - -/*! \fn bool SamReadGroup::HasSequencingCenter(void) const - \brief Returns \c true if read group contains \@RG CN:\<SequencingCenter\> -*/ -bool SamReadGroup::HasSequencingCenter(void) const { - return (!SequencingCenter.empty()); -} - -/*! \fn bool SamReadGroup::HasSequencingTechnology(void) const - \brief Returns \c true if read group contains \@RG PL:\<SequencingTechnology\> -*/ -bool SamReadGroup::HasSequencingTechnology(void) const { - return (!SequencingTechnology.empty()); -} |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamReadGroup.h --- a/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamReadGroup.h Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,69 +0,0 @@ -// *************************************************************************** -// SamReadGroup.h (c) 2010 Derek Barnett -// Marth Lab, Department of Biology, Boston College -// All rights reserved. -// --------------------------------------------------------------------------- -// Last modified: 18 April 2011 (DB) -// --------------------------------------------------------------------------- -// Provides direct read/write access to the SAM read group data fields. -// *************************************************************************** - -#ifndef SAM_READGROUP_H -#define SAM_READGROUP_H - -#include "api/api_global.h" -#include <string> - -namespace BamTools { - -struct API_EXPORT SamReadGroup { - - // ctor & dtor - SamReadGroup(void); - SamReadGroup(const std::string& id); - SamReadGroup(const SamReadGroup& other); - ~SamReadGroup(void); - - // query/modify entire read group - void Clear(void); // clears all data fields - - // convenience query methods - bool HasDescription(void) const; // returns true if read group has a description - bool HasFlowOrder(void) const; // returns true if read group has a flow order entry - bool HasID(void) const; // returns true if read group has a group ID - bool HasKeySequence(void) const; // returns true if read group has a key sequence - bool HasLibrary(void) const; // returns true if read group has a library name - bool HasPlatformUnit(void) const; // returns true if read group has a platform unit ID - bool HasPredictedInsertSize(void) const; // returns true if read group has a predicted insert size - bool HasProductionDate(void) const; // returns true if read group has a production date - bool HasProgram(void) const; // returns true if read group has a program entry - bool HasSample(void) const; // returns true if read group has a sample name - bool HasSequencingCenter(void) const; // returns true if read group has a sequencing center ID - bool HasSequencingTechnology(void) const; // returns true if read group has a sequencing technology ID - - - // data fields - std::string Description; // DS:<Description> - std::string FlowOrder; // FO:<FlowOrder> - std::string ID; // ID:<ID> *Required for valid SAM header* - std::string KeySequence; // KS:<KeySequence> - std::string Library; // LB:<Library> - std::string PlatformUnit; // PU:<PlatformUnit> - std::string PredictedInsertSize; // PI:<PredictedInsertSize> - std::string ProductionDate; // DT:<ProductionDate> - std::string Program; // PG:<Program> - std::string Sample; // SM:<Sample> - std::string SequencingCenter; // CN:<SequencingCenter> - std::string SequencingTechnology; // PL:<SequencingTechnology> -}; - -/*! \fn bool operator==(const SamReadGroup& lhs, const SamReadGroup& rhs) - \brief tests equality by comparing read group IDs -*/ -API_EXPORT inline bool operator==(const SamReadGroup& lhs, const SamReadGroup& rhs) { - return lhs.ID == rhs.ID; -} - -} // namespace BamTools - -#endif // SAM_READGROUP_H |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamReadGroupDictionary.cpp --- a/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamReadGroupDictionary.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,290 +0,0 @@\n-// ***************************************************************************\n-// SamReadGroupDictionary.cpp (c) 2010 Derek Barnett\n-// Marth Lab, Department of Biology, Boston College\n-// All rights reserved.\n-// ---------------------------------------------------------------------------\n-// Last modified: 18 April 2011 (DB)\n-// ---------------------------------------------------------------------------\n-// Provides methods for operating on a collection of SamReadGroup entries.\n-// ***************************************************************************\n-\n-#include <api/SamReadGroupDictionary.h>\n-using namespace BamTools;\n-\n-#include <algorithm>\n-#include <iostream>\n-using namespace std;\n-\n-/*! \\class BamTools::SamReadGroupDictionary\n- \\brief Container of SamReadGroup entries.\n-\n- Provides methods for operating on a collection of SamReadGroup entries.\n-*/\n-\n-/*! \\fn SamReadGroupDictionary::SamReadGroupDictionary(void)\n- \\brief constructor\n-*/\n-SamReadGroupDictionary::SamReadGroupDictionary(void) { }\n-\n-/*! \\fn SamReadGroupDictionary::SamReadGroupDictionary(const SamReadGroupDictionary& other)\n- \\brief copy constructor\n-*/\n-SamReadGroupDictionary::SamReadGroupDictionary(const SamReadGroupDictionary& other)\n- : m_data(other.m_data)\n-{ }\n-\n-/*! \\fn SamReadGroupDictionary::~SamReadGroupDictionary(void)\n- \\brief destructor\n-*/\n-SamReadGroupDictionary::~SamReadGroupDictionary(void) { }\n-\n-/*! \\fn void SamReadGroupDictionary::Add(const SamReadGroup& readGroup)\n- \\brief Adds a read group to the dictionary.\n-\n- Duplicate entries are silently discarded.\n-\n- \\param readGroup entry to be added\n-*/\n-void SamReadGroupDictionary::Add(const SamReadGroup& readGroup) {\n-\n- // TODO: report error on attempted duplicate?\n-\n- if ( IsEmpty() || !Contains(readGroup) )\n- m_data.push_back(readGroup);\n-}\n-\n-/*! \\fn void SamReadGroupDictionary::Add(const std::string& readGroupId)\n- \\brief Adds a read group to the dictionary.\n-\n- This is an overloaded function.\n-\n- \\param readGroupId ID of read group to be added\n- \\sa Add()\n-*/\n-void SamReadGroupDictionary::Add(const std::string& readGroupId) {\n- Add( SamReadGroup(readGroupId) );\n-}\n-\n-/*! \\fn void SamReadGroupDictionary::Add(const std::vector<SamReadGroup>& readGroups)\n- \\brief Adds multiple read groups to the dictionary.\n-\n- This is an overloaded function.\n-\n- \\param readGroups entries to be added\n- \\sa Add()\n-*/\n-void SamReadGroupDictionary::Add(const std::vector<SamReadGroup>& readGroups) {\n- vector<SamReadGroup>::const_iterator rgIter = readGroups.begin();\n- vector<SamReadGroup>::const_iterator rgEnd = readGroups.end();\n- for ( ; rgIter!= rgEnd; ++rgIter )\n- Add(*rgIter);\n-}\n-\n-/*! \\fn void SamReadGroupDictionary::Add(const std::vector<std::string>& readGroupIds)\n- \\brief Adds multiple read groups to the dictionary.\n-\n- This is an overloaded function.\n-\n- \\param readGroupIds IDs of read groups to be added\n- \\sa Add()\n-*/\n-void SamReadGroupDictionary::Add(const std::vector<std::string>& readGroupIds) {\n- vector<string>::const_iterator rgIter = readGroupIds.begin();\n- vector<string>::const_iterator rgEnd = readGroupIds.end();\n- for ( ; rgIter!= rgEnd; ++rgIter )\n- Add(*rgIter);\n-}\n-\n-/*! \\fn SamReadGroupIterator SamReadGroupDictionary::Begin(void)\n- \\return an STL iterator pointing to the first read group\n- \\sa ConstBegin(), End()\n-*/\n-SamReadGroupIterator SamReadGroupDictionary::Begin(void) {\n- return m_data.begin();\n-}\n-\n-/*! \\fn SamReadGroupConstIterator SamReadGroupDictionary::Begin(void) const\n- \\return an STL const_iterator pointing to the first read group\n-\n- This is an overloaded function.\n-\n- \\sa ConstBegin(), End()\n-*/\n-SamReadGroupConstIterator SamReadGroupDictionary::Begin(void) const {\n- return m_data.begin();\n-}\n-\n-/*! \\fn void SamReadGroupDictionary::Clear(void)\n- \\brief Clears all read group entries.\n-*/\n-void SamReadGroupDicti'..b' return m_data.end();\n-}\n-\n-/*! \\fn int SamReadGroupDictionary::IndexOf(const std::string& readGroupId) const\n- \\internal\n- \\return index of read group if found. Otherwise, returns vector::size() (invalid index).\n-*/\n-int SamReadGroupDictionary::IndexOf(const std::string& readGroupId) const {\n- SamReadGroupConstIterator begin = ConstBegin();\n- SamReadGroupConstIterator iter = begin;\n- SamReadGroupConstIterator end = ConstEnd();\n- for ( ; iter != end; ++iter ) {\n- const SamReadGroup& current = (*iter);\n- if ( current.ID == readGroupId )\n- break;\n- }\n- return distance( begin, iter );\n-}\n-\n-/*! \\fn bool SamReadGroupDictionary::IsEmpty(void) const\n- \\brief Returns \\c true if dictionary contains no read groups\n- \\sa Size()\n-*/\n-bool SamReadGroupDictionary::IsEmpty(void) const {\n- return m_data.empty();\n-}\n-\n-/*! \\fn void SamReadGroupDictionary::Remove(const SamReadGroup& readGroup)\n- \\brief Removes read group from dictionary, if found (matching on ID).\n-\n- This is an overloaded function.\n-\n- \\param readGroup read group to remove (matches on ID)\n-*/\n-void SamReadGroupDictionary::Remove(const SamReadGroup& readGroup) {\n- Remove( readGroup.ID );\n-}\n-\n-/*! \\fn void SamReadGroupDictionary::Remove(const std::string& readGroupId)\n- \\brief Removes read group from dictionary, if found.\n- \\param readGroupId ID of read group to remove\n- \\sa Remove()\n-*/\n-void SamReadGroupDictionary::Remove(const std::string& readGroupId) {\n- if ( Contains(readGroupId) )\n- m_data.erase( m_data.begin() + IndexOf(readGroupId) );\n-}\n-\n-/*! \\fn void SamReadGroupDictionary::Remove(const std::vector<SamReadGroup>& readGroups)\n- \\brief Removes multiple read groups from dictionary (matching on ID).\n-\n- This is an overloaded function.\n-\n- \\param readGroups read groups to remove\n- \\sa Remove()\n-*/\n-void SamReadGroupDictionary::Remove(const std::vector<SamReadGroup>& readGroups) {\n- vector<SamReadGroup>::const_iterator rgIter = readGroups.begin();\n- vector<SamReadGroup>::const_iterator rgEnd = readGroups.end();\n- for ( ; rgIter!= rgEnd; ++rgIter )\n- Remove(*rgIter);\n-}\n-\n-/*! \\fn void SamReadGroupDictionary::Remove(const std::vector<std::string>& readGroupIds)\n- \\brief Removes multiple read groups from dictionary.\n-\n- This is an overloaded function.\n-\n- \\param readGroupIds IDs of the read groups to remove\n- \\sa Remove()\n-*/\n-void SamReadGroupDictionary::Remove(const std::vector<std::string>& readGroupIds) {\n- vector<string>::const_iterator rgIter = readGroupIds.begin();\n- vector<string>::const_iterator rgEnd = readGroupIds.end();\n- for ( ; rgIter!= rgEnd; ++rgIter )\n- Remove(*rgIter);\n-}\n-\n-/*! \\fn int SamReadGroupDictionary::Size(void) const\n- \\brief Returns number of read groups in dictionary.\n- \\sa IsEmpty()\n-*/\n-int SamReadGroupDictionary::Size(void) const {\n- return m_data.size();\n-}\n-\n-/*! \\fn SamReadGroup& SamReadGroupDictionary::operator[](const std::string& readGroupId)\n- \\brief Retrieves the modifiable SamReadGroup that matches \\a readGroupId.\n-\n- NOTE - If the dictionary contains no read group matching this ID, this function inserts\n- a new one with this ID, and returns a reference to it.\n-\n- If you want to avoid this insertion behavior, check the result of Contains() before\n- using this operator.\n-\n- \\param readGroupId ID of read group to retrieve\n- \\return a modifiable reference to the SamReadGroup associated with the ID\n-*/\n-SamReadGroup& SamReadGroupDictionary::operator[](const std::string& readGroupId) {\n-\n- // look up read group ID\n- int index = IndexOf(readGroupId);\n-\n- // if found, return read group at index\n- if ( index != (int)m_data.size() )\n- return m_data[index];\n-\n- // otherwise, append new read group and return reference\n- else {\n- SamReadGroup rg(readGroupId);\n- m_data.push_back(rg);\n- return m_data.back();\n- }\n-}\n' |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamReadGroupDictionary.h --- a/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamReadGroupDictionary.h Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,87 +0,0 @@ -// *************************************************************************** -// SamReadGroupDictionary.h (c) 2010 Derek Barnett -// Marth Lab, Department of Biology, Boston College -// All rights reserved. -// --------------------------------------------------------------------------- -// Last modified: 18 April 2011 (DB) -// --------------------------------------------------------------------------- -// Provides methods for operating on a collection of SamReadGroup entries. -// *************************************************************************** - -#ifndef SAM_READGROUP_DICTIONARY_H -#define SAM_READGROUP_DICTIONARY_H - -#include <api/api_global.h> -#include <api/SamReadGroup.h> -#include <string> -#include <vector> - -namespace BamTools { - -typedef std::vector<SamReadGroup> SamReadGroupContainer; -typedef SamReadGroupContainer::iterator SamReadGroupIterator; -typedef SamReadGroupContainer::const_iterator SamReadGroupConstIterator; - -class API_EXPORT SamReadGroupDictionary { - - // ctor & dtor - public: - SamReadGroupDictionary(void); - SamReadGroupDictionary(const SamReadGroupDictionary& other); - ~SamReadGroupDictionary(void); - - // query/modify read group data - public: - // adds a read group - void Add(const SamReadGroup& readGroup); - void Add(const std::string& readGroupId); - - // adds multiple read groups - void Add(const std::vector<SamReadGroup>& readGroups); - void Add(const std::vector<std::string>& readGroupIds); - - // clears all read group entries - void Clear(void); - - // returns true if dictionary contains this read group - bool Contains(const SamReadGroup& readGroup) const; - bool Contains(const std::string& readGroupId) const; - - // returns true if dictionary is empty - bool IsEmpty(void) const; - - // removes read group, if found - void Remove(const SamReadGroup& readGroup); - void Remove(const std::string& readGroupId); - - // removes multiple read groups - void Remove(const std::vector<SamReadGroup>& readGroups); - void Remove(const std::vector<std::string>& readGroupIds); - - // returns number of read groups in dictionary - int Size(void) const; - - // retrieves a modifiable reference to the SamReadGroup object associated with this ID - SamReadGroup& operator[](const std::string& readGroupId); - - // retrieve STL-compatible iterators - public: - SamReadGroupIterator Begin(void); // returns iterator to begin() - SamReadGroupConstIterator Begin(void) const; // returns const_iterator to begin() - SamReadGroupConstIterator ConstBegin(void) const; // returns const_iterator to begin() - SamReadGroupIterator End(void); // returns iterator to end() - SamReadGroupConstIterator End(void) const; // returns const_iterator to end() - SamReadGroupConstIterator ConstEnd(void) const; // returns const_iterator to end() - - // internal methods - private: - int IndexOf(const std::string& readGroupId) const; - - // data members - private: - SamReadGroupContainer m_data; -}; - -} // namespace BamTools - -#endif // SAM_READGROUP_DICTIONARY_H |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamSequence.cpp --- a/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamSequence.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,162 +0,0 @@ -// *************************************************************************** -// SamSequence.cpp (c) 2010 Derek Barnett -// Marth Lab, Department of Biology, Boston College -// All rights reserved. -// --------------------------------------------------------------------------- -// Last modified: 18 April 2011 (DB) -// --------------------------------------------------------------------------- -// Provides direct read/write access to the SAM sequence data fields. -// *************************************************************************** - -#include <api/SamSequence.h> -#include <sstream> -using namespace BamTools; -using namespace std; - -/*! \struct BamTools::SamSequence - \brief Represents a SAM sequence entry. - - Provides direct read/write access to the SAM sequence data fields. - - \sa \samSpecURL -*/ -/*! \var SamSequence::AssemblyID - \brief corresponds to \@SQ AS:\<AssemblyID\> -*/ -/*! \var SamSequence::Checksum - \brief corresponds to \@SQ M5:\<Checksum\> -*/ -/*! \var SamSequence::Length - \brief corresponds to \@SQ LN:\<Length\> - - Required for valid SAM header. -*/ -/*! \var SamSequence::Name - \brief corresponds to \@SQ SN:\<Name\> - - Required for valid SAM header. -*/ -/*! \var SamSequence::Species - \brief corresponds to \@SQ SP:\<Species\> -*/ -/*! \var SamSequence::URI - \brief corresponds to \@SQ UR:\<URI\> -*/ - -/*! \fn SamSequence::SamSequence(void) - \brief default constructor -*/ -SamSequence::SamSequence(void) - : AssemblyID("") - , Checksum("") - , Length("") - , Name("") - , Species("") - , URI("") -{ } - -/*! \fn SamSequence::SamSequence(const std::string& name, const int& length) - \brief constructs sequence with \a name and \a length - - \param name desired sequence name - \param length desired sequence length (numeric value) -*/ -SamSequence::SamSequence(const std::string& name, - const int& length) - : AssemblyID("") - , Checksum("") - , Name(name) - , Species("") - , URI("") -{ - stringstream s(""); - s << length; - Length = s.str(); -} - -/*! \fn SamSequence::SamSequence(const std::string& name, const std::string& length) - \brief constructs sequence with \a name and \a length - - \param name desired sequence name - \param length desired sequence length (string value) -*/ -SamSequence::SamSequence(const std::string& name, - const std::string& length) - : AssemblyID("") - , Checksum("") - , Length(length) - , Name(name) - , Species("") - , URI("") -{ } - -/*! \fn SamSequence::SamSequence(const SamSequence& other) - \brief copy constructor -*/ -SamSequence::SamSequence(const SamSequence& other) - : AssemblyID(other.AssemblyID) - , Checksum(other.Checksum) - , Length(other.Length) - , Name(other.Name) - , Species(other.Species) - , URI(other.URI) -{ } - -/*! \fn SamSequence::~SamSequence(void) - \brief destructor -*/ -SamSequence::~SamSequence(void) { } - -/*! \fn void SamSequence::Clear(void) - \brief Clears all data fields. -*/ -void SamSequence::Clear(void) { - AssemblyID.clear(); - Checksum.clear(); - Length.clear(); - Name.clear(); - Species.clear(); - URI.clear(); -} - -/*! \fn bool SamSequence::HasAssemblyID(void) const - \brief Returns \c true if sequence contains \@SQ AS:\<AssemblyID\> -*/ -bool SamSequence::HasAssemblyID(void) const { - return (!AssemblyID.empty()); -} - -/*! \fn bool SamSequence::HasChecksum(void) const - \brief Returns \c true if sequence contains \@SQ M5:\<Checksum\> -*/ -bool SamSequence::HasChecksum(void) const { - return (!Checksum.empty()); -} - -/*! \fn bool SamSequence::HasLength(void) const - \brief Returns \c true if sequence contains \@SQ LN:\<Length\> -*/ -bool SamSequence::HasLength(void) const { - return (!Length.empty()); -} - -/*! \fn bool SamSequence::HasName(void) const - \brief Returns \c true if sequence contains \@SQ SN:\<Name\> -*/ -bool SamSequence::HasName(void) const { - return (!Name.empty()); -} - -/*! \fn bool SamSequence::HasSpecies(void) const - \brief Returns \c true if sequence contains \@SQ SP:\<Species\> -*/ -bool SamSequence::HasSpecies(void) const { - return (!Species.empty()); -} - -/*! \fn bool SamSequence::HasURI(void) const - \brief Returns \c true if sequence contains \@SQ UR:\<URI\> -*/ -bool SamSequence::HasURI(void) const { - return (!URI.empty()); -} |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamSequence.h --- a/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamSequence.h Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,61 +0,0 @@ -// *************************************************************************** -// SamSequence.h (c) 2010 Derek Barnett -// Marth Lab, Department of Biology, Boston College -// All rights reserved. -// --------------------------------------------------------------------------- -// Last modified: 18 April 2011 (DB) -// --------------------------------------------------------------------------- -// Provides direct read/write access to the SAM sequence data fields. -// *************************************************************************** - -#ifndef SAM_SEQUENCE_H -#define SAM_SEQUENCE_H - -#include <api/api_global.h> -#include <string> - -namespace BamTools { - -struct API_EXPORT SamSequence { - - // ctor & dtor - SamSequence(void); - SamSequence(const std::string& name, const int& length); - SamSequence(const std::string& name, const std::string& length); - SamSequence(const SamSequence& other); - ~SamSequence(void); - - // query/modify entire sequence - void Clear(void); // clears all contents - - // convenience query methods - bool HasAssemblyID(void) const; // returns true if sequence has an assembly ID - bool HasChecksum(void) const; // returns true if sequence has an MD5 checksum - bool HasLength(void) const; // returns true if sequence has a length - bool HasName(void) const; // returns true if sequence has a name - bool HasSpecies(void) const; // returns true if sequence has a species ID - bool HasURI(void) const; // returns true if sequence has a URI - - // data members - std::string AssemblyID; // AS:<AssemblyID> - std::string Checksum; // M5:<Checksum> - std::string Length; // LN:<Length> *Required for valid SAM header* - std::string Name; // SN:<Name> *Required for valid SAM header* - std::string Species; // SP:<Species> - std::string URI; // UR:<URI> -}; - -/*! \fn bool operator==(const SamSequence& lhs, const SamSequence& rhs) - \brief tests equality by comparing sequence names, lengths, & checksums (if available) -*/ -API_EXPORT inline bool operator==(const SamSequence& lhs, const SamSequence& rhs) { - if ( lhs.Name != rhs.Name ) return false; - if ( lhs.Length != rhs.Length ) return false; - if ( lhs.HasChecksum() && rhs.HasChecksum() ) - return (lhs.Checksum == rhs.Checksum); - else return true; -} - -} // namespace BamTools - -#endif // SAM_SEQUENCE_H |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamSequenceDictionary.cpp --- a/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamSequenceDictionary.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,293 +0,0 @@\n-// ***************************************************************************\n-// SamSequenceDictionary.cpp (c) 2010 Derek Barnett\n-// Marth Lab, Department of Biology, Boston College\n-// All rights reserved.\n-// ---------------------------------------------------------------------------\n-// Last modified: 18 April 2011 (DB)\n-// ---------------------------------------------------------------------------\n-// Provides methods for operating on a collection of SamSequence entries.\n-// *************************************************************************\n-\n-#include <api/SamSequenceDictionary.h>\n-using namespace BamTools;\n-\n-#include <iostream>\n-using namespace std;\n-\n-/*! \\class BamTools::SamSequenceDictionary\n- \\brief Container of SamSequence entries.\n-\n- Provides methods for operating on a collection of SamSequence entries.\n-*/\n-\n-/*! \\fn SamSequenceDictionary::SamSequenceDictionary(void)\n- \\brief constructor\n-*/\n-SamSequenceDictionary::SamSequenceDictionary(void) { }\n-\n-/*! \\fn SamSequenceDictionary::SamSequenceDictionary(const SamSequenceDictionary& other)\n- \\brief copy constructor\n-*/\n-SamSequenceDictionary::SamSequenceDictionary(const SamSequenceDictionary& other)\n- : m_data(other.m_data)\n-{ }\n-\n-/*! \\fn SamSequenceDictionary::~SamSequenceDictionary(void)\n- \\brief destructor\n-*/\n-SamSequenceDictionary::~SamSequenceDictionary(void) { }\n-\n-/*! \\fn void SamSequenceDictionary::Add(const SamSequence& sequence)\n- \\brief Adds a sequence to the dictionary.\n-\n- Duplicate entries are silently discarded.\n-\n- \\param sequence entry to be added\n-*/\n-void SamSequenceDictionary::Add(const SamSequence& sequence) {\n-\n- // TODO: report error on attempted duplicate?\n-\n- if ( IsEmpty() || !Contains(sequence) )\n- m_data.push_back(sequence);\n-}\n-\n-/*! \\fn void SamSequenceDictionary::Add(const std::string& name, const int& length)\n- \\brief Adds a sequence to the dictionary.\n-\n- This is an overloaded function.\n-\n- \\param name name of sequence entry to be added\n- \\param length length of sequence entry to be added\n- \\sa Add()\n-*/\n-void SamSequenceDictionary::Add(const std::string& name, const int& length) {\n- Add( SamSequence(name, length) );\n-}\n-\n-/*! \\fn void SamSequenceDictionary::Add(const std::vector<SamSequence>& sequences)\n- \\brief Adds multiple sequences to the dictionary.\n-\n- This is an overloaded function.\n-\n- \\param sequences entries to be added\n- \\sa Add()\n-*/\n-void SamSequenceDictionary::Add(const std::vector<SamSequence>& sequences) {\n- vector<SamSequence>::const_iterator seqIter = sequences.begin();\n- vector<SamSequence>::const_iterator seqEnd = sequences.end();\n- for ( ; seqIter!= seqEnd; ++seqIter )\n- Add(*seqIter);\n-}\n-\n-/*! \\fn void SamSequenceDictionary::Add(const std::map<std::string, int>& sequenceMap)\n- \\brief Adds multiple sequences to the dictionary.\n-\n- This is an overloaded function.\n-\n- \\param sequenceMap map of sequence entries (name => length) to be added\n- \\sa Add()\n-*/\n-void SamSequenceDictionary::Add(const std::map<std::string, int>& sequenceMap) {\n- map<string, int>::const_iterator seqIter = sequenceMap.begin();\n- map<string, int>::const_iterator seqEnd = sequenceMap.end();\n- for ( ; seqIter != seqEnd; ++seqIter ) {\n- const string& name = (*seqIter).first;\n- const int& length = (*seqIter).second;\n- Add( SamSequence(name, length) );\n- }\n-}\n-\n-/*! \\fn SamSequenceIterator SamSequenceDictionary::Begin(void)\n- \\return an STL iterator pointing to the first sequence\n- \\sa ConstBegin(), End()\n-*/\n-SamSequenceIterator SamSequenceDictionary::Begin(void) {\n- return m_data.begin();\n-}\n-\n-/*! \\fn SamSequenceConstIterator SamSequenceDictionary::Begin(void) const\n- \\return an STL const_iterator pointing to the first sequence\n-\n- This is an overloaded function.\n-\n- \\sa ConstBegin(), End()\n-*/\n-SamSequenceConstIterator SamSequenceDictionary::Begin(void) const {\n'..b'r SamSequenceDictionary::End(void) const {\n- return m_data.end();\n-}\n-\n-/*! \\fn int SamSequenceDictionary::IndexOf(const std::string& name) const\n- \\internal\n- \\return index of sequence if found (matching on name). Otherwise, returns vector::size() (invalid index).\n-*/\n-int SamSequenceDictionary::IndexOf(const std::string& name) const {\n- SamSequenceConstIterator begin = ConstBegin();\n- SamSequenceConstIterator iter = begin;\n- SamSequenceConstIterator end = ConstEnd();\n- for ( ; iter != end; ++iter ) {\n- const SamSequence& currentSeq = (*iter);\n- if ( currentSeq.Name == name )\n- break;\n- }\n- return distance( begin, iter );\n-}\n-\n-/*! \\fn bool SamSequenceDictionary::IsEmpty(void) const\n- \\brief Returns \\c true if dictionary contains no sequences\n- \\sa Size()\n-*/\n-bool SamSequenceDictionary::IsEmpty(void) const {\n- return m_data.empty();\n-}\n-\n-/*! \\fn void SamSequenceDictionary::Remove(const SamSequence& sequence)\n- \\brief Removes sequence from dictionary, if found (matches on name).\n-\n- This is an overloaded function.\n-\n- \\param sequence SamSequence to remove (matching on name)\n-*/\n-void SamSequenceDictionary::Remove(const SamSequence& sequence) {\n- Remove( sequence.Name );\n-}\n-\n-/*! \\fn void SamSequenceDictionary::Remove(const std::string& sequenceName)\n- \\brief Removes sequence from dictionary, if found.\n-\n- \\param sequenceName name of sequence to remove\n- \\sa Remove()\n-*/\n-void SamSequenceDictionary::Remove(const std::string& sequenceName) {\n- if ( Contains(sequenceName) )\n- m_data.erase( m_data.begin() + IndexOf(sequenceName) );\n-}\n-\n-/*! \\fn void SamSequenceDictionary::Remove(const std::vector<SamSequence>& sequences)\n- \\brief Removes multiple sequences from dictionary.\n-\n- This is an overloaded function.\n-\n- \\param sequences sequences to remove\n- \\sa Remove()\n-*/\n-void SamSequenceDictionary::Remove(const std::vector<SamSequence>& sequences) {\n- vector<SamSequence>::const_iterator rgIter = sequences.begin();\n- vector<SamSequence>::const_iterator rgEnd = sequences.end();\n- for ( ; rgIter!= rgEnd; ++rgIter )\n- Remove(*rgIter);\n-}\n-\n-/*! \\fn void SamSequenceDictionary::Remove(const std::vector<std::string>& sequenceNames)\n- \\brief Removes multiple sequences from dictionary.\n-\n- This is an overloaded function.\n-\n- \\param sequenceNames names of the sequences to remove\n- \\sa Remove()\n-*/\n-void SamSequenceDictionary::Remove(const std::vector<std::string>& sequenceNames) {\n- vector<string>::const_iterator rgIter = sequenceNames.begin();\n- vector<string>::const_iterator rgEnd = sequenceNames.end();\n- for ( ; rgIter!= rgEnd; ++rgIter )\n- Remove(*rgIter);\n-}\n-\n-/*! \\fn int SamSequenceDictionary::Size(void) const\n- \\brief Returns number of sequences in dictionary.\n- \\sa IsEmpty()\n-*/\n-int SamSequenceDictionary::Size(void) const {\n- return m_data.size();\n-}\n-\n-/*! \\fn SamSequence& SamSequenceDictionary::operator[](const std::string& sequenceName)\n- \\brief Retrieves the modifiable SamSequence that matches \\a sequenceName.\n-\n- NOTE - If the dictionary contains no sequence matching this name, this function inserts\n- a new one with this name (length:0), and returns a reference to it.\n-\n- If you want to avoid this insertion behavior, check the result of Contains() before\n- using this operator.\n-\n- \\param sequenceName name of sequence to retrieve\n- \\return a modifiable reference to the SamSequence associated with the name\n-*/\n-SamSequence& SamSequenceDictionary::operator[](const std::string& sequenceName) {\n-\n- // look up sequence ID\n- int index = IndexOf(sequenceName);\n-\n- // if found, return sequence at index\n- if ( index != (int)m_data.size() )\n- return m_data[index];\n-\n- // otherwise, append new sequence and return reference\n- else {\n- m_data.push_back( SamSequence(sequenceName, 0) );\n- return m_data.back();\n- }\n-}\n' |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamSequenceDictionary.h --- a/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/SamSequenceDictionary.h Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,89 +0,0 @@ -// *************************************************************************** -// SamSequenceDictionary.h (c) 2010 Derek Barnett -// Marth Lab, Department of Biology, Boston College -// All rights reserved. -// --------------------------------------------------------------------------- -// Last modified: 18 April 2011 -// --------------------------------------------------------------------------- -// Provides methods for operating on a collection of SamSequence entries. -// *************************************************************************** - -#ifndef SAM_SEQUENCE_DICTIONARY_H -#define SAM_SEQUENCE_DICTIONARY_H - -#include <api/api_global.h> -#include <api/SamSequence.h> -#include <string> -#include <map> -#include <vector> - -namespace BamTools { - -typedef std::vector<SamSequence> SamSequenceContainer; -typedef SamSequenceContainer::iterator SamSequenceIterator; -typedef SamSequenceContainer::const_iterator SamSequenceConstIterator; - -class API_EXPORT SamSequenceDictionary { - - // ctor & dtor - public: - SamSequenceDictionary(void); - SamSequenceDictionary(const SamSequenceDictionary& other); - ~SamSequenceDictionary(void); - - // query/modify sequence data - public: - // adds a sequence - void Add(const SamSequence& sequence); - void Add(const std::string& name, const int& length); - - // adds multiple sequences - void Add(const std::vector<SamSequence>& sequences); - void Add(const std::map<std::string, int>& sequenceMap); - - // clears all sequence entries - void Clear(void); - - // returns true if dictionary contains this sequence - bool Contains(const SamSequence& sequence) const; - bool Contains(const std::string& sequenceName) const; - - // returns true if dictionary is empty - bool IsEmpty(void) const; - - // removes sequence, if found - void Remove(const SamSequence& sequence); - void Remove(const std::string& sequenceName); - - // removes multiple sequences - void Remove(const std::vector<SamSequence>& sequences); - void Remove(const std::vector<std::string>& sequenceNames); - - // returns number of sequences in dictionary - int Size(void) const; - - // retrieves a modifiable reference to the SamSequence object associated with this name - SamSequence& operator[](const std::string& sequenceName); - - // retrieve STL-compatible iterators - public: - SamSequenceIterator Begin(void); // returns iterator to begin() - SamSequenceConstIterator Begin(void) const; // returns const_iterator to begin() - SamSequenceConstIterator ConstBegin(void) const; // returns const_iterator to begin() - SamSequenceIterator End(void); // returns iterator to end() - SamSequenceConstIterator End(void) const; // returns const_iterator to end() - SamSequenceConstIterator ConstEnd(void) const; // returns const_iterator to end() - - // internal methods - private: - int IndexOf(const std::string& name) const; - - // data members - private: - SamSequenceContainer m_data; -}; - -} // namespace BamTools - -#endif // SAM_SEQUENCE_DICTIONARY_H - |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/BamTools/src/api/api_global.h --- a/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/api_global.h Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,22 +0,0 @@ -// *************************************************************************** -// api_global.h (c) 2010 Derek Barnett -// Marth Lab, Department of Biology, Boston College -// All rights reserved. -// --------------------------------------------------------------------------- -// Last modified: 19 November 2010 (DB) -// --------------------------------------------------------------------------- -// Provides macros for exporting & importing BamTools API library symbols -// *************************************************************************** - -#ifndef API_GLOBAL_H -#define API_GLOBAL_H - -#include "shared/bamtools_global.h" - -#ifdef BAMTOOLS_API_LIBRARY -# define API_EXPORT BAMTOOLS_LIBRARY_EXPORT -#else -# define API_EXPORT BAMTOOLS_LIBRARY_IMPORT -#endif - -#endif // API_GLOBAL_H |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamHeader_p.cpp --- a/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamHeader_p.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,133 +0,0 @@ -// *************************************************************************** -// BamHeader_p.cpp (c) 2010 Derek Barnett -// Marth Lab, Department of Biology, Boston College -// All rights reserved. -// --------------------------------------------------------------------------- -// Last modified: 21 March 2011 (DB) -// --------------------------------------------------------------------------- -// Provides the basic functionality for handling BAM headers. -// *************************************************************************** - -#include <api/BamAux.h> -#include <api/BamConstants.h> -#include <api/internal/BamHeader_p.h> -#include <api/internal/BgzfStream_p.h> -using namespace BamTools; -using namespace BamTools::Internal; - -#include <cstdio> -#include <cstdlib> -#include <cstring> -#include <iostream> -using namespace std; - -// ctor -BamHeader::BamHeader(void) { } - -// dtor -BamHeader::~BamHeader(void) { } - -// reads magic number from BGZF stream, returns true if valid -bool BamHeader::CheckMagicNumber(BgzfStream* stream) { - - // try to read magic number - char buffer[Constants::BAM_HEADER_MAGIC_LENGTH]; - if ( stream->Read(buffer, Constants::BAM_HEADER_MAGIC_LENGTH) != (int)Constants::BAM_HEADER_MAGIC_LENGTH ) { - fprintf(stderr, "BamHeader ERROR: could not read magic number\n"); - return false; - } - - // validate magic number - if ( strncmp(buffer, Constants::BAM_HEADER_MAGIC, Constants::BAM_HEADER_MAGIC_LENGTH) != 0 ) { - fprintf(stderr, "BamHeader ERROR: invalid magic number\n"); - return false; - } - - // all checks out - return true; -} - -// clear SamHeader data -void BamHeader::Clear(void) { - m_header.Clear(); -} - -// return true if SamHeader data is valid -bool BamHeader::IsValid(void) const { - return m_header.IsValid(); -} - -// load BAM header ('magic number' and SAM header text) from BGZF stream -// returns true if all OK -bool BamHeader::Load(BgzfStream* stream) { - - // cannot load if invalid stream - if ( stream == 0 ) - return false; - - // cannot load if magic number is invalid - if ( !CheckMagicNumber(stream) ) - return false; - - // cannot load header if cannot read header length - uint32_t length(0); - if ( !ReadHeaderLength(stream, length) ) - return false; - - // cannot load header if cannot read header text - if ( !ReadHeaderText(stream, length) ) - return false; - - // otherwise, everything OK - return true; -} - -// reads SAM header text length from BGZF stream, stores it in @length -// returns read success/fail status -bool BamHeader::ReadHeaderLength(BgzfStream* stream, uint32_t& length) { - - // attempt to read BAM header text length - char buffer[sizeof(uint32_t)]; - if ( stream->Read(buffer, sizeof(uint32_t)) != sizeof(uint32_t) ) { - fprintf(stderr, "BamHeader ERROR: could not read header length\n"); - return false; - } - - // convert char buffer to length, return success - length = BamTools::UnpackUnsignedInt(buffer); - if ( BamTools::SystemIsBigEndian() ) - BamTools::SwapEndian_32(length); - return true; -} - -// reads SAM header text from BGZF stream, stores in SamHeader object -// returns read success/fail status -bool BamHeader::ReadHeaderText(BgzfStream* stream, const uint32_t& length) { - - // set up destination buffer - char* headerText = (char*)calloc(length + 1, 1); - - // attempt to read header text - const unsigned bytesRead = stream->Read(headerText, length); - const bool readOk = ( bytesRead == length ); - if ( readOk ) - m_header.SetHeaderText( (string)((const char*)headerText) ); - else - fprintf(stderr, "BamHeader ERROR: could not read header text\n"); - - // clean up calloc-ed temp variable (on success or fail) - free(headerText); - - // return read success - return readOk; -} - -// returns *copy* of SamHeader data object -SamHeader BamHeader::ToSamHeader(void) const { - return m_header; -} - -// returns SAM-formatted string of header data -string BamHeader::ToString(void) const { - return m_header.ToString(); -} |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamHeader_p.h --- a/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamHeader_p.h Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,72 +0,0 @@ -// *************************************************************************** -// BamHeader_p.h (c) 2010 Derek Barnett -// Marth Lab, Department of Biology, Boston College -// All rights reserved. -// --------------------------------------------------------------------------- -// Last modified: 26 January 2011 (DB) -// --------------------------------------------------------------------------- -// Provides the basic functionality for handling BAM headers. -// *************************************************************************** - -#ifndef BAMHEADER_P_H -#define BAMHEADER_P_H - -// ------------- -// W A R N I N G -// ------------- -// -// This file is not part of the BamTools API. It exists purely as an -// implementation detail. This header file may change from version to version -// without notice, or even be removed. -// -// We mean it. - -#include <api/SamHeader.h> -#include <string> - -namespace BamTools { -namespace Internal { - -class BgzfStream; - -class BamHeader { - - // ctor & dtor - public: - BamHeader(void); - ~BamHeader(void); - - // BamHeader interface - public: - // clear SamHeader data - void Clear(void); - // return true if SamHeader data is valid - bool IsValid(void) const; - // load BAM header ('magic number' and SAM header text) from BGZF stream - // returns true if all OK - bool Load(BgzfStream* stream); - // returns (editable) copy of SamHeader data object - SamHeader ToSamHeader(void) const; - // returns SAM-formatted string of header data - std::string ToString(void) const; - - // internal methods - private: - // reads magic number from BGZF stream, returns true if valid - bool CheckMagicNumber(BgzfStream* stream); - // reads SAM header length from BGZF stream, stores it in @length - // returns read success/fail status - bool ReadHeaderLength(BgzfStream* stream, uint32_t& length); - // reads SAM header text from BGZF stream, stores in SamHeader object - // returns read success/fail status - bool ReadHeaderText(BgzfStream* stream, const uint32_t& length); - - // data members - private: - SamHeader m_header; -}; - -} // namespace Internal -} // namespace BamTools - -#endif // BAMHEADER_P_H |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamIndexFactory_p.cpp --- a/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamIndexFactory_p.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,113 +0,0 @@ -// *************************************************************************** -// BamIndexFactory_p.cpp (c) 2011 Derek Barnett -// Marth Lab, Department of Biology, Boston College -// All rights reserved. -// --------------------------------------------------------------------------- -// Last modified: 5 April 2011 (DB) -// --------------------------------------------------------------------------- -// Provides interface for generating BamIndex implementations -// *************************************************************************** - -#include <api/BamAux.h> -#include <api/internal/BamIndexFactory_p.h> -#include <api/internal/BamStandardIndex_p.h> -#include <api/internal/BamToolsIndex_p.h> -using namespace BamTools; -using namespace BamTools::Internal; - -#include <cstdio> -using namespace std; - -// generates index filename from BAM filename (depending on requested type) -// if type is unknown, returns empty string -const string BamIndexFactory::CreateIndexFilename(const string& bamFilename, - const BamIndex::IndexType& type) -{ - switch ( type ) { - case ( BamIndex::STANDARD ) : return ( bamFilename + BamStandardIndex::Extension() ); - case ( BamIndex::BAMTOOLS ) : return ( bamFilename + BamToolsIndex::Extension() ); - default : - cerr << "BamIndexFactory ERROR: unknown index type" << type << endl; - return string(); - } -} - -// creates a new BamIndex object, depending on extension of @indexFilename -BamIndex* BamIndexFactory::CreateIndexFromFilename(const string& indexFilename, BamReaderPrivate* reader) { - - // if file doesn't exist, return null index - if ( !BamTools::FileExists(indexFilename) ) - return 0; - - // get file extension from index filename, including dot (".EXT") - // if can't get file extension, return null index - const string extension = FileExtension(indexFilename); - if ( extension.empty() ) - return 0; - - // create index based on extension - if ( extension == BamStandardIndex::Extension() ) return new BamStandardIndex(reader); - else if ( extension == BamToolsIndex::Extension() ) return new BamToolsIndex(reader); - else - return 0; -} - -// creates a new BamIndex, object of requested @type -BamIndex* BamIndexFactory::CreateIndexOfType(const BamIndex::IndexType& type, - BamReaderPrivate* reader) -{ - switch ( type ) { - case ( BamIndex::STANDARD ) : return new BamStandardIndex(reader); - case ( BamIndex::BAMTOOLS ) : return new BamToolsIndex(reader); - default : - cerr << "BamIndexFactory ERROR: unknown index type " << type << endl; - return 0; - } -} - -// retrieves file extension (including '.') -const string BamIndexFactory::FileExtension(const string& filename) { - - // if filename cannot contain valid path + extension, return empty string - if ( filename.empty() || filename.length() <= 4 ) - return string(); - - // look for last dot in filename - size_t lastDotPosition = filename.find_last_of('.'); - - // if none found, return empty string - if ( lastDotPosition == string::npos ) - return string(); - - // return substring from last dot position - return filename.substr(lastDotPosition); -} - -// returns name of existing index file that corresponds to @bamFilename -// will defer to @preferredType if possible, if not will attempt to load any supported type -// returns empty string if not found -const string BamIndexFactory::FindIndexFilename(const string& bamFilename, - const BamIndex::IndexType& preferredType) -{ - // try to find index of preferred type first - // return index filename if found - string indexFilename = CreateIndexFilename(bamFilename, preferredType); - if ( !indexFilename.empty() && BamTools::FileExists(indexFilename) ) - return indexFilename; - - // couldn't find preferred type, try the other supported types - // return index filename if found - if ( preferredType != BamIndex::STANDARD ) { - indexFilename = CreateIndexFilename(bamFilename, BamIndex::STANDARD); - if ( !indexFilename.empty() && BamTools::FileExists(indexFilename) ) - return indexFilename; - } - if ( preferredType != BamIndex::BAMTOOLS ) { - indexFilename = CreateIndexFilename(bamFilename, BamIndex::BAMTOOLS); - if ( !indexFilename.empty() && BamTools::FileExists(indexFilename) ) - return indexFilename; - } - - // otherwise couldn't find any index matching this filename - return string(); -} |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamIndexFactory_p.h --- a/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamIndexFactory_p.h Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,50 +0,0 @@ -// *************************************************************************** -// BamIndexFactory_p.h (c) 2011 Derek Barnett -// Marth Lab, Department of Biology, Boston College -// All rights reserved. -// --------------------------------------------------------------------------- -// Last modified: 5 April 2011 (DB) -// --------------------------------------------------------------------------- -// Provides interface for generating BamIndex implementations -// *************************************************************************** - -#ifndef BAMINDEX_FACTORY_P_H -#define BAMINDEX_FACTORY_P_H - -#include <api/BamIndex.h> -#include <string> - -namespace BamTools { -namespace Internal { - -class BamIndexFactory { - - // static interface methods - public: - // creates a new BamIndex object, depending on extension of @indexFilename - static BamIndex* CreateIndexFromFilename(const std::string& indexFilename, - BamReaderPrivate* reader); - // creates a new BamIndex object, of requested @type - static BamIndex* CreateIndexOfType(const BamIndex::IndexType& type, - BamReaderPrivate* reader); - // returns name of existing index file that corresponds to @bamFilename - // will defer to @preferredType if possible - // if @preferredType not found, will attempt to load any supported index type - // returns empty string if no index file (of any type) is found - static const std::string FindIndexFilename(const std::string& bamFilename, - const BamIndex::IndexType& preferredType); - - // internal methods - public: - // generates index filename from BAM filename (depending on requested type) - // if type is unknown, returns empty string - static const std::string CreateIndexFilename(const std::string& bamFilename, - const BamIndex::IndexType& type); - // retrieves file extension (including '.') - static const std::string FileExtension(const std::string& filename); -}; - -} // namespace Internal -} // namespace BamTools - -#endif // BAMINDEX_FACTORY_P_H |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamMultiMerger_p.h --- a/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamMultiMerger_p.h Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
b'@@ -1,295 +0,0 @@\n-// ***************************************************************************\n-// BamMultiMerger_p.h (c) 2010 Derek Barnett\n-// Marth Lab, Department of Biology, Boston College\n-// All rights reserved.\n-// ---------------------------------------------------------------------------\n-// Last modified: 18 March 2011 (DB)\n-// ---------------------------------------------------------------------------\n-// Provides merging functionality for BamMultiReader. At this point, supports\n-// sorting results by (refId, position) or by read name.\n-// ***************************************************************************\n-\n-#ifndef BAMMULTIMERGER_P_H\n-#define BAMMULTIMERGER_P_H\n-\n-// -------------\n-// W A R N I N G\n-// -------------\n-//\n-// This file is not part of the BamTools API. It exists purely as an\n-// implementation detail. This header file may change from version to version\n-// without notice, or even be removed.\n-//\n-// We mean it.\n-\n-#include <api/BamAlignment.h>\n-#include <api/BamReader.h>\n-#include <map>\n-#include <queue>\n-#include <string>\n-#include <utility>\n-\n-namespace BamTools {\n-namespace Internal {\n-\n-typedef std::pair<BamReader*, BamAlignment*> ReaderAlignment;\n-\n-// generic MultiMerger interface\n-class IBamMultiMerger {\n-\n- public:\n- IBamMultiMerger(void) { }\n- virtual ~IBamMultiMerger(void) { }\n-\n- public:\n- virtual void Add(const ReaderAlignment& value) =0;\n- virtual void Clear(void) =0;\n- virtual const ReaderAlignment& First(void) const =0;\n- virtual bool IsEmpty(void) const =0;\n- virtual void Remove(BamReader* reader) =0;\n- virtual int Size(void) const =0;\n- virtual ReaderAlignment TakeFirst(void) =0;\n-};\n-\n-// IBamMultiMerger implementation - sorted on BamAlignment: (RefId, Position)\n-class PositionMultiMerger : public IBamMultiMerger {\n-\n- public:\n- PositionMultiMerger(void) : IBamMultiMerger() { }\n- ~PositionMultiMerger(void) { }\n-\n- public:\n- void Add(const ReaderAlignment& value);\n- void Clear(void);\n- const ReaderAlignment& First(void) const;\n- bool IsEmpty(void) const;\n- void Remove(BamReader* reader);\n- int Size(void) const;\n- ReaderAlignment TakeFirst(void);\n-\n- private:\n- typedef std::pair<int, int> KeyType;\n- typedef ReaderAlignment ValueType;\n- typedef std::pair<KeyType, ValueType> ElementType;\n-\n- typedef std::multimap<KeyType, ValueType> ContainerType;\n- typedef ContainerType::iterator DataIterator;\n- typedef ContainerType::const_iterator DataConstIterator;\n-\n- ContainerType m_data;\n-};\n-\n-// IBamMultiMerger implementation - sorted on BamAlignment: Name\n-class ReadNameMultiMerger : public IBamMultiMerger {\n-\n- public:\n- ReadNameMultiMerger(void) : IBamMultiMerger() { }\n- ~ReadNameMultiMerger(void) { }\n-\n- public:\n- void Add(const ReaderAlignment& value);\n- void Clear(void);\n- const ReaderAlignment& First(void) const;\n- bool IsEmpty(void) const;\n- void Remove(BamReader* reader);\n- int Size(void) const;\n- ReaderAlignment TakeFirst(void);\n-\n- private:\n- typedef std::string KeyType;\n- typedef ReaderAlignment ValueType;\n- typedef std::pair<KeyType, ValueType> ElementType;\n-\n- typedef std::multimap<KeyType, ValueType> ContainerType;\n- typedef ContainerType::iterator DataIterator;\n- typedef ContainerType::const_iterator DataConstIterator;\n-\n- ContainerType m_data;\n-};\n-\n-// IBamMultiMerger implementation - unsorted BAM file(s)\n-class UnsortedMultiMerger : public IBamMultiMerger {\n-\n- public:\n- UnsortedMultiMerger(void) : IBamMultiMerger() { }\n- ~UnsortedMultiMerger(void) { }\n-\n- public:\n- void Add(const ReaderAlignment& value);\n- void Clear(void);\n- con'..b't {\n- return m_data.empty();\n-}\n-\n-inline void PositionMultiMerger::Remove(BamReader* reader) {\n-\n- if ( reader == 0 ) return;\n- const std::string filenameToRemove = reader->GetFilename();\n-\n- // iterate over readers in cache\n- DataIterator dataIter = m_data.begin();\n- DataIterator dataEnd = m_data.end();\n- for ( ; dataIter != dataEnd; ++dataIter ) {\n- const ValueType& entry = (*dataIter).second;\n- const BamReader* entryReader = entry.first;\n- if ( entryReader == 0 ) continue;\n-\n- // remove iterator on match\n- if ( entryReader->GetFilename() == filenameToRemove ) {\n- m_data.erase(dataIter);\n- return;\n- }\n- }\n-}\n-\n-inline int PositionMultiMerger::Size(void) const {\n- return m_data.size();\n-}\n-\n-inline ReaderAlignment PositionMultiMerger::TakeFirst(void) {\n- DataIterator first = m_data.begin();\n- ReaderAlignment next = (*first).second;\n- m_data.erase(first);\n- return next;\n-}\n-\n-// ------------------------------------------\n-// ReadNameMultiMerger implementation\n-\n-inline void ReadNameMultiMerger::Add(const ReaderAlignment& value) {\n- const KeyType key(value.second->Name);\n- m_data.insert( ElementType(key, value) );\n-}\n-\n-inline void ReadNameMultiMerger::Clear(void) {\n- m_data.clear();\n-}\n-\n-inline const ReaderAlignment& ReadNameMultiMerger::First(void) const {\n- const ElementType& entry = (*m_data.begin());\n- return entry.second;\n-}\n-\n-inline bool ReadNameMultiMerger::IsEmpty(void) const {\n- return m_data.empty();\n-}\n-\n-inline void ReadNameMultiMerger::Remove(BamReader* reader) {\n-\n- if ( reader == 0 ) return;\n- const std::string filenameToRemove = reader->GetFilename();\n-\n- // iterate over readers in cache\n- DataIterator dataIter = m_data.begin();\n- DataIterator dataEnd = m_data.end();\n- for ( ; dataIter != dataEnd; ++dataIter ) {\n- const ValueType& entry = (*dataIter).second;\n- const BamReader* entryReader = entry.first;\n- if ( entryReader == 0 ) continue;\n-\n- // remove iterator on match\n- if ( entryReader->GetFilename() == filenameToRemove ) {\n- m_data.erase(dataIter);\n- return;\n- }\n- }\n-\n-}\n-\n-inline int ReadNameMultiMerger::Size(void) const {\n- return m_data.size();\n-}\n-\n-inline ReaderAlignment ReadNameMultiMerger::TakeFirst(void) {\n- DataIterator first = m_data.begin();\n- ReaderAlignment next = (*first).second;\n- m_data.erase(first);\n- return next;\n-}\n-\n-// ------------------------------------------\n-// UnsortedMultiMerger implementation\n-\n-inline void UnsortedMultiMerger::Add(const ReaderAlignment& value) {\n- m_data.push_back(value);\n-}\n-\n-inline void UnsortedMultiMerger::Clear(void) {\n- for (size_t i = 0; i < m_data.size(); ++i )\n- m_data.pop_back();\n-}\n-\n-inline const ReaderAlignment& UnsortedMultiMerger::First(void) const {\n- return m_data.front();\n-}\n-\n-inline bool UnsortedMultiMerger::IsEmpty(void) const {\n- return m_data.empty();\n-}\n-\n-inline void UnsortedMultiMerger::Remove(BamReader* reader) {\n-\n- if ( reader == 0 ) return;\n- const std::string filenameToRemove = reader->GetFilename();\n-\n- // iterate over readers in cache\n- DataIterator dataIter = m_data.begin();\n- DataIterator dataEnd = m_data.end();\n- for ( ; dataIter != dataEnd; ++dataIter ) {\n- const BamReader* entryReader = (*dataIter).first;\n- if ( entryReader == 0 ) continue;\n-\n- // remove iterator on match\n- if ( entryReader->GetFilename() == filenameToRemove ) {\n- m_data.erase(dataIter);\n- return;\n- }\n- }\n-}\n-\n-inline int UnsortedMultiMerger::Size(void) const {\n- return m_data.size();\n-}\n-\n-inline ReaderAlignment UnsortedMultiMerger::TakeFirst(void) {\n- ReaderAlignment first = m_data.front();\n- m_data.erase( m_data.begin() );\n- return first;\n-}\n-\n-} // namespace Internal\n-} // namespace BamTools\n-\n-#endif // BAMMULTIMERGER_P_H\n' |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamMultiReader_p.cpp --- a/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamMultiReader_p.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
b"@@ -1,802 +0,0 @@\n-// ***************************************************************************\n-// BamMultiReader_p.cpp (c) 2010 Derek Barnett, Erik Garrison\n-// Marth Lab, Department of Biology, Boston College\n-// All rights reserved.\n-// ---------------------------------------------------------------------------\n-// Last modified: 5 April 2011 (DB)\n-// ---------------------------------------------------------------------------\n-// Functionality for simultaneously reading multiple BAM files\n-// *************************************************************************\n-\n-#include <api/BamAlignment.h>\n-#include <api/BamMultiReader.h>\n-#include <api/internal/BamMultiMerger_p.h>\n-#include <api/internal/BamMultiReader_p.h>\n-using namespace BamTools;\n-using namespace BamTools::Internal;\n-\n-#include <algorithm>\n-#include <fstream>\n-#include <iostream>\n-#include <iterator>\n-#include <sstream>\n-using namespace std;\n-\n-// ctor\n-BamMultiReaderPrivate::BamMultiReaderPrivate(void)\n- : m_alignments(0)\n- , m_isCoreMode(false)\n- , m_sortOrder(BamMultiReader::SortedByPosition)\n-{ }\n-\n-// dtor\n-BamMultiReaderPrivate::~BamMultiReaderPrivate(void) {\n-\n- // close all open BAM readers\n- Close();\n-\n- // clean up alignment cache\n- delete m_alignments;\n- m_alignments = 0;\n-}\n-\n-// close all BAM files\n-void BamMultiReaderPrivate::Close(void) {\n- CloseFiles( Filenames() );\n-}\n-\n-// close requested BAM file\n-void BamMultiReaderPrivate::CloseFile(const string& filename) { \n- vector<string> filenames(1, filename);\n- CloseFiles(filenames);\n-}\n-\n-// close requested BAM files\n-void BamMultiReaderPrivate::CloseFiles(const vector<string>& filenames) {\n-\n- // iterate over filenames\n- vector<string>::const_iterator filesIter = filenames.begin();\n- vector<string>::const_iterator filesEnd = filenames.end();\n- for ( ; filesIter != filesEnd; ++filesIter ) {\n- const string& filename = (*filesIter);\n- if ( filename.empty() ) continue;\n-\n- // iterate over readers\n- vector<ReaderAlignment>::iterator readerIter = m_readers.begin();\n- vector<ReaderAlignment>::iterator readerEnd = m_readers.end();\n- for ( ; readerIter != readerEnd; ++readerIter ) {\n- BamReader* reader = (*readerIter).first;\n- if ( reader == 0 ) continue;\n-\n- // if reader matches requested filename\n- if ( reader->GetFilename() == filename ) {\n-\n- // remove reader/alignment from alignment cache\n- m_alignments->Remove(reader);\n-\n- // close & delete reader\n- reader->Close();\n- delete reader;\n- reader = 0;\n-\n- // delete reader's alignment entry\n- BamAlignment* alignment = (*readerIter).second;\n- delete alignment;\n- alignment = 0;\n-\n- // remove reader from container\n- m_readers.erase(readerIter);\n-\n- // on match, just go on to next filename\n- // (no need to keep looking and iterator is invalid now anyway)\n- break;\n- }\n- }\n- }\n-\n- // make sure alignment cache is cleared if all readers are now closed\n- if ( m_readers.empty() && m_alignments != 0 )\n- m_alignments->Clear();\n-}\n-\n-// creates index files for BAM files that don't have them\n-bool BamMultiReaderPrivate::CreateIndexes(const BamIndex::IndexType& type) {\n-\n- bool result = true;\n-\n- // iterate over readers\n- vector<ReaderAlignment>::iterator readerIter = m_readers.begin();\n- vector<ReaderAlignment>::iterator readerEnd = m_readers.end();\n- for ( ; readerIter != readerEnd; ++readerIter ) {\n- BamReader* reader = (*readerIter).first;\n- if ( reader == 0 ) continue;\n-\n- // if reader doesn't have an index, create one\n- if ( !reader->HasIndex() )\n- result &= reader->CreateIndex(type);\n- }\n-\n- return result;\n-}\n-\n-I"..b'= (*readerIter).second;\n- if ( reader == 0 || alignment == 0 ) continue;\n-\n- // save next alignment from each reader in cache\n- SaveNextAlignment(reader, alignment);\n- }\n-}\n-\n-// ValidateReaders checks that all the readers point to BAM files representing\n-// alignments against the same set of reference sequences, and that the\n-// sequences are identically ordered. If these checks fail the operation of\n-// the multireader is undefined, so we force program exit.\n-void BamMultiReaderPrivate::ValidateReaders(void) const {\n-\n- // retrieve first reader data\n- const BamReader* firstReader = m_readers.front().first;\n- if ( firstReader == 0 ) return;\n- const RefVector firstReaderRefData = firstReader->GetReferenceData();\n- const int firstReaderRefCount = firstReader->GetReferenceCount();\n- const int firstReaderRefSize = firstReaderRefData.size();\n-\n- // iterate over all readers\n- vector<ReaderAlignment>::const_iterator readerIter = m_readers.begin();\n- vector<ReaderAlignment>::const_iterator readerEnd = m_readers.end();\n- for ( ; readerIter != readerEnd; ++readerIter ) {\n-\n- // get current reader data\n- BamReader* reader = (*readerIter).first;\n- if ( reader == 0 ) continue;\n- const RefVector currentReaderRefData = reader->GetReferenceData();\n- const int currentReaderRefCount = reader->GetReferenceCount();\n- const int currentReaderRefSize = currentReaderRefData.size();\n-\n- // init container iterators\n- RefVector::const_iterator firstRefIter = firstReaderRefData.begin();\n- RefVector::const_iterator firstRefEnd = firstReaderRefData.end();\n- RefVector::const_iterator currentRefIter = currentReaderRefData.begin();\n-\n- // compare reference counts from BamReader ( & container size, in case of BR error)\n- if ( (currentReaderRefCount != firstReaderRefCount) ||\n- (firstReaderRefSize != currentReaderRefSize) )\n- {\n- cerr << "BamMultiReader ERROR: mismatched number of references in " << reader->GetFilename()\n- << " expected " << firstReaderRefCount\n- << " reference sequences but only found " << currentReaderRefCount << endl;\n- exit(1);\n- }\n-\n- // this will be ok; we just checked above that we have identically-sized sets of references\n- // here we simply check if they are all, in fact, equal in content\n- while ( firstRefIter != firstRefEnd ) {\n- const RefData& firstRef = (*firstRefIter);\n- const RefData& currentRef = (*currentRefIter);\n-\n- // compare reference name & length\n- if ( (firstRef.RefName != currentRef.RefName) ||\n- (firstRef.RefLength != currentRef.RefLength) )\n- {\n- cerr << "BamMultiReader ERROR: mismatched references found in " << reader->GetFilename()\n- << " expected: " << endl;\n-\n- // print first reader\'s reference data\n- RefVector::const_iterator refIter = firstReaderRefData.begin();\n- RefVector::const_iterator refEnd = firstReaderRefData.end();\n- for ( ; refIter != refEnd; ++refIter ) {\n- const RefData& entry = (*refIter);\n- cerr << entry.RefName << " " << entry.RefLength << endl;\n- }\n-\n- cerr << "but found: " << endl;\n-\n- // print current reader\'s reference data\n- refIter = currentReaderRefData.begin();\n- refEnd = currentReaderRefData.end();\n- for ( ; refIter != refEnd; ++refIter ) {\n- const RefData& entry = (*refIter);\n- cerr << entry.RefName << " " << entry.RefLength << endl;\n- }\n-\n- exit(1);\n- }\n-\n- // update iterators\n- ++firstRefIter;\n- ++currentRefIter;\n- }\n- }\n-}\n' |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamMultiReader_p.h --- a/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamMultiReader_p.h Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,102 +0,0 @@ -// *************************************************************************** -// BamMultiReader_p.h (c) 2010 Derek Barnett -// Marth Lab, Department of Biology, Boston College -// All rights reserved. -// --------------------------------------------------------------------------- -// Last modified: 13 March 2011 (DB) -// --------------------------------------------------------------------------- -// Functionality for simultaneously reading multiple BAM files -// ************************************************************************* - -#ifndef BAMMULTIREADER_P_H -#define BAMMULTIREADER_P_H - -// ------------- -// W A R N I N G -// ------------- -// -// This file is not part of the BamTools API. It exists purely as an -// implementation detail. This header file may change from version to version -// without notice, or even be removed. -// -// We mean it. - -#include <api/SamHeader.h> -#include <api/BamMultiReader.h> -#include <string> -#include <vector> - -namespace BamTools { -namespace Internal { - -class IBamMultiMerger; - -class BamMultiReaderPrivate { - - // constructor / destructor - public: - BamMultiReaderPrivate(void); - ~BamMultiReaderPrivate(void); - - // public interface - public: - - // file operations - void Close(void); - void CloseFile(const std::string& filename); - void CloseFiles(const std::vector<std::string>& filenames); - const std::vector<std::string> Filenames(void) const; - bool Jump(int refID, int position = 0); - bool Open(const std::vector<std::string>& filenames); - bool OpenFile(const std::string& filename); - void PrintFilenames(void) const; - bool Rewind(void); - bool SetRegion(const BamRegion& region); - - // access alignment data - bool GetNextAlignment(BamAlignment& al); - bool GetNextAlignmentCore(BamAlignment& al); - bool HasOpenReaders(void); - void SetSortOrder(const BamMultiReader::SortOrder& order); - - // access auxiliary data - SamHeader GetHeader(void) const; - std::string GetHeaderText(void) const; - int GetReferenceCount(void) const; - const BamTools::RefVector GetReferenceData(void) const; - int GetReferenceID(const std::string& refName) const; - - // BAM index operations - bool CreateIndexes(const BamIndex::IndexType& type = BamIndex::STANDARD); - bool HasIndexes(void) const; - bool LocateIndexes(const BamIndex::IndexType& preferredType = BamIndex::STANDARD); - bool OpenIndexes(const std::vector<std::string>& indexFilenames); - void SetIndexCacheMode(const BamIndex::IndexCacheMode mode); - - // 'internal' methods - public: - IBamMultiMerger* CreateMergerForCurrentSortOrder(void) const; - const std::string ExtractReadGroup(const std::string& headerLine) const; - bool HasAlignmentData(void) const; - bool LoadNextAlignment(BamAlignment& al); - BamTools::BamReader* OpenReader(const std::string& filename); - bool RewindReaders(void); - void SaveNextAlignment(BamTools::BamReader* reader, BamTools::BamAlignment* alignment); - const std::vector<std::string> SplitHeaderText(const std::string& headerText) const; - void UpdateAlignmentCache(void); - void ValidateReaders(void) const; - - // data members - public: - typedef std::pair<BamReader*, BamAlignment*> ReaderAlignment; - std::vector<ReaderAlignment> m_readers; - - IBamMultiMerger* m_alignments; - bool m_isCoreMode; - BamMultiReader::SortOrder m_sortOrder; -}; - -} // namespace Internal -} // namespace BamTools - -#endif // BAMMULTIREADER_P_H |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamRandomAccessController_p.cpp --- a/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamRandomAccessController_p.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,273 +0,0 @@\n-// ***************************************************************************\n-// BamRandomAccessController_p.cpp (c) 2011 Derek Barnett\n-// Marth Lab, Department of Biology, Boston College\n-// All rights reserved.\n-// ---------------------------------------------------------------------------\n-// Last modified: 5 April 2011(DB)\n-// ---------------------------------------------------------------------------\n-// Manages random access operations in a BAM file\n-// **************************************************************************\n-\n-#include <api/BamIndex.h>\n-#include <api/internal/BamRandomAccessController_p.h>\n-#include <api/internal/BamReader_p.h>\n-#include <api/internal/BamIndexFactory_p.h>\n-using namespace BamTools;\n-using namespace BamTools::Internal;\n-\n-#include <iostream>\n-using namespace std;\n-\n-BamRandomAccessController::BamRandomAccessController(void)\n- : m_index(0)\n- , m_indexCacheMode(BamIndex::LimitedIndexCaching)\n- , m_hasAlignmentsInRegion(true)\n-{ }\n-\n-BamRandomAccessController::~BamRandomAccessController(void) {\n- Close();\n-}\n-\n-void BamRandomAccessController::AdjustRegion(const int& referenceCount) {\n-\n- // skip if no index available\n- if ( m_index == 0 )\n- return;\n-\n- // see if any references in region have alignments\n- m_hasAlignmentsInRegion = false;\n- int currentId = m_region.LeftRefID;\n- const int rightBoundRefId = ( m_region.isRightBoundSpecified() ? m_region.RightRefID : referenceCount - 1 );\n- while ( currentId <= rightBoundRefId ) {\n- m_hasAlignmentsInRegion = m_index->HasAlignments(currentId);\n- if ( m_hasAlignmentsInRegion ) break;\n- ++currentId;\n- }\n-\n- // if no data found on any reference in region\n- if ( !m_hasAlignmentsInRegion )\n- return;\n-\n- // if left bound of desired region had no data, use first reference that had data\n- // otherwise, leave requested region as-is\n- if ( currentId != m_region.LeftRefID ) {\n- m_region.LeftRefID = currentId;\n- m_region.LeftPosition = 0;\n- }\n-}\n-\n-// returns alignments\' "RegionState": { Before|Overlaps|After } current region\n-BamRandomAccessController::RegionState\n-BamRandomAccessController::AlignmentState(const BamAlignment& alignment) const {\n-\n- // if region has no left bound at all\n- if ( !m_region.isLeftBoundSpecified() )\n- return OverlapsRegion;\n-\n- // handle unmapped reads - return AFTER region to halt processing\n- if ( alignment.RefID == -1 )\n- return AfterRegion;\n-\n- // if alignment is on any reference before left bound reference\n- if ( alignment.RefID < m_region.LeftRefID )\n- return BeforeRegion;\n-\n- // if alignment is on left bound reference\n- else if ( alignment.RefID == m_region.LeftRefID ) {\n-\n- // if alignment starts at or after left bound position\n- if ( alignment.Position >= m_region.LeftPosition) {\n-\n- if ( m_region.isRightBoundSpecified() && // right bound is specified AND\n- m_region.LeftRefID == m_region.RightRefID && // left & right bounds on same reference AND\n- alignment.Position > m_region.RightPosition ) // alignment starts after right bound position\n- return AfterRegion;\n-\n- // otherwise, alignment overlaps region\n- else return OverlapsRegion;\n- }\n-\n- // alignment starts before left bound position\n- else {\n-\n- // if alignment overlaps left bound position\n- if ( alignment.GetEndPosition() >= m_region.LeftPosition )\n- return OverlapsRegion;\n- else\n- return BeforeRegion;\n- }\n- }\n-\n- // otherwise alignment is on a reference after left bound reference\n- else {\n-\n- // if region has a right bound\n- if ( m_region.isRightBoundSpecified() ) {\n-\n- // alignment is on any reference between boundaries\n- if ( alignment.RefID < m_region.Ri'..b' "BamRandomAccessController ERROR: could not create index for BAM file: "\n- << reader->Filename() << endl;\n- return false;\n- }\n-\n- // save new index\n- SetIndex(newIndex);\n-\n- // set new index\'s cache mode & return success\n- newIndex->SetCacheMode(m_indexCacheMode);\n- return true;\n-}\n-\n-bool BamRandomAccessController::HasIndex(void) const {\n- return ( m_index != 0 );\n-}\n-\n-bool BamRandomAccessController::HasRegion(void) const {\n- return ( !m_region.isNull() );\n-}\n-\n-bool BamRandomAccessController::IndexHasAlignmentsForReference(const int& refId) {\n- return m_index->HasAlignments(refId);\n-}\n-\n-bool BamRandomAccessController::LocateIndex(BamReaderPrivate* reader,\n- const BamIndex::IndexType& preferredType)\n-{\n- // look up index filename, deferring to preferredType if possible\n- const string& indexFilename = BamIndexFactory::FindIndexFilename(reader->Filename(), preferredType);\n-\n- // if no index file found (of any type)\n- if ( indexFilename.empty() ) {\n- cerr << "BamRandomAccessController WARNING: "\n- << "could not find index file for BAM: "\n- << reader->Filename() << endl;\n- return false;\n- }\n-\n- // otherwise open & use index file that was found\n- return OpenIndex(indexFilename, reader);\n-}\n-\n-bool BamRandomAccessController::OpenIndex(const string& indexFilename, BamReaderPrivate* reader) {\n-\n- // attempt create new index of type based on filename\n- BamIndex* index = BamIndexFactory::CreateIndexFromFilename(indexFilename, reader);\n- if ( index == 0 ) {\n- cerr << "BamRandomAccessController ERROR: could not create index for file: " << indexFilename << endl;\n- return false;\n- }\n-\n- // set cache mode\n- index->SetCacheMode(m_indexCacheMode);\n-\n- // attempt to load data from index file\n- if ( !index->Load(indexFilename) ) {\n- cerr << "BamRandomAccessController ERROR: could not load index data from file: " << indexFilename << endl;\n- return false;\n- }\n-\n- // save new index & return success\n- SetIndex(index);\n- return true;\n-}\n-\n-bool BamRandomAccessController::RegionHasAlignments(void) const {\n- return m_hasAlignmentsInRegion;\n-}\n-\n-void BamRandomAccessController::SetIndex(BamIndex* index) {\n- if ( m_index )\n- ClearIndex();\n- m_index = index;\n-}\n-\n-void BamRandomAccessController::SetIndexCacheMode(const BamIndex::IndexCacheMode& mode) {\n- m_indexCacheMode = mode;\n- if ( m_index )\n- m_index->SetCacheMode(mode);\n-}\n-\n-bool BamRandomAccessController::SetRegion(BamReaderPrivate* reader,\n- const BamRegion& region,\n- const int& referenceCount)\n-{\n- // store region\n- m_region = region;\n-\n- // cannot jump when no index is available\n- if ( !HasIndex() )\n- return false;\n-\n- // adjust region as necessary to reflect where data actually begins\n- AdjustRegion(referenceCount);\n-\n- // if no data present, return true\n- // * Not an error, but future attempts to access alignments in this region will not return data\n- // Returning true is useful in a BamMultiReader setting where some BAM files may\n- // lack alignments in regions where other BAMs do have data.\n- if ( !m_hasAlignmentsInRegion )\n- return true;\n-\n- // return success/failure of jump to specified region,\n- //\n- // * Index::Jump() is allowed to modify the m_hasAlignmentsInRegion flag\n- // This covers \'corner case\' where a region is requested that lies beyond the last\n- // alignment on a reference. If this occurs, any subsequent calls to GetNextAlignment[Core]\n- // will not return data. BamMultiReader will still be able to successfully pull alignments\n- // from a region from multiple files even if one or more have no data.\n- return m_index->Jump(m_region, &m_hasAlignmentsInRegion);\n-}\n' |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamRandomAccessController_p.h --- a/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamRandomAccessController_p.h Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,94 +0,0 @@ -// *************************************************************************** -// BamRandomAccessController_p.h (c) 2011 Derek Barnett -// Marth Lab, Department of Biology, Boston College -// All rights reserved. -// --------------------------------------------------------------------------- -// Last modified: 24 February 2011(DB) -// --------------------------------------------------------------------------- -// Manages random access operations in a BAM file -// *************************************************************************** - -#ifndef BAMRACONTROLLER_P_H -#define BAMRACONTROLLER_P_H - -// ------------- -// W A R N I N G -// ------------- -// -// This file is not part of the BamTools API. It exists purely as an -// implementation detail. This header file may change from version to version -// without notice, or even be removed. -// -// We mean it. - -#include <api/BamAux.h> -#include <api/BamIndex.h> - -namespace BamTools { - -class BamAlignment; - -namespace Internal { - -class BamReaderPrivate; - -class BamRandomAccessController { - - // enums - public: enum RegionState { BeforeRegion = 0 - , OverlapsRegion - , AfterRegion - }; - - // ctor & dtor - public: - BamRandomAccessController(void); - ~BamRandomAccessController(void); - - // general interface - public: - void Close(void); - - // index operations - public: - // - void ClearIndex(void); - bool CreateIndex(BamReaderPrivate* reader, const BamIndex::IndexType& type); - bool HasIndex(void) const; - bool IndexHasAlignmentsForReference(const int& refId); - bool LocateIndex(BamReaderPrivate* reader, const BamIndex::IndexType& preferredType); - bool OpenIndex(const std::string& indexFilename, BamReaderPrivate* reader); - void SetIndex(BamIndex* index); - void SetIndexCacheMode(const BamIndex::IndexCacheMode& mode); - - // region operations - public: - void ClearRegion(void); - bool HasRegion(void) const; - RegionState AlignmentState(const BamAlignment& alignment) const; - bool RegionHasAlignments(void) const; - bool SetRegion(BamReaderPrivate* reader, - const BamRegion& region, - const int& referenceCount); - - // 'internal' methods - public: - // adjusts requested region if necessary (depending on where data actually begins) - void AdjustRegion(const int& referenceCount); - - // data members - private: - - // index data - BamIndex* m_index; // owns index, not a copy - responsible for deleting - BamIndex::IndexCacheMode m_indexCacheMode; - - // region data - BamRegion m_region; - bool m_hasAlignmentsInRegion; -}; - -} // namespace Internal -} // namespace BamTools - -#endif // BAMRACONTROLLER_P_H |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamReader_p.cpp --- a/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamReader_p.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,381 +0,0 @@\n-// ***************************************************************************\n-// BamReader_p.cpp (c) 2009 Derek Barnett\n-// Marth Lab, Department of Biology, Boston College\n-// All rights reserved.\n-// ---------------------------------------------------------------------------\n-// Last modified: 10 May 2011 (DB)\n-// ---------------------------------------------------------------------------\n-// Provides the basic functionality for reading BAM files\n-// ***************************************************************************\n-\n-#include <api/BamConstants.h>\n-#include <api/BamReader.h>\n-#include <api/internal/BamHeader_p.h>\n-#include <api/internal/BamRandomAccessController_p.h>\n-#include <api/internal/BamReader_p.h>\n-#include <api/internal/BamStandardIndex_p.h>\n-#include <api/internal/BamToolsIndex_p.h>\n-#include <api/internal/BgzfStream_p.h>\n-using namespace BamTools;\n-using namespace BamTools::Internal;\n-\n-#include <algorithm>\n-#include <iostream>\n-#include <iterator>\n-#include <vector>\n-using namespace std;\n-\n-// constructor\n-BamReaderPrivate::BamReaderPrivate(BamReader* parent)\n- : m_alignmentsBeginOffset(0)\n- , m_parent(parent)\n-{\n- m_isBigEndian = BamTools::SystemIsBigEndian();\n-}\n-\n-// destructor\n-BamReaderPrivate::~BamReaderPrivate(void) {\n- Close();\n-}\n-\n-// closes the BAM file\n-void BamReaderPrivate::Close(void) {\n-\n- // clear header & reference data\n- m_references.clear();\n- m_header.Clear();\n-\n- // close internal\n- m_randomAccessController.Close();\n- m_stream.Close();\n-\n- // clear filename\n- m_filename.clear();\n-}\n-\n-// creates an index file of requested type on current BAM file\n-bool BamReaderPrivate::CreateIndex(const BamIndex::IndexType& type) {\n- if ( !IsOpen() ) return false;\n- return m_randomAccessController.CreateIndex(this, type);\n-}\n-\n-// return path & filename of current BAM file\n-const string BamReaderPrivate::Filename(void) const {\n- return m_filename;\n-}\n-\n-// return header data as std::string\n-string BamReaderPrivate::GetHeaderText(void) const {\n- return m_header.ToString();\n-}\n-\n-// return header data as SamHeader object\n-SamHeader BamReaderPrivate::GetSamHeader(void) const {\n- return m_header.ToSamHeader();\n-}\n-\n-// get next alignment (with character data fully parsed)\n-bool BamReaderPrivate::GetNextAlignment(BamAlignment& alignment) {\n-\n- // if valid alignment found\n- if ( GetNextAlignmentCore(alignment) ) {\n-\n- // store alignment\'s "source" filename\n- alignment.Filename = m_filename;\n-\n- // return success/failure of parsing char data\n- return alignment.BuildCharData();\n- }\n-\n- // no valid alignment found\n- return false;\n-}\n-\n-// retrieves next available alignment core data (returns success/fail)\n-// ** DOES NOT populate any character data fields (read name, bases, qualities, tag data, filename)\n-// these can be accessed, if necessary, from the supportData\n-// useful for operations requiring ONLY positional or other alignment-related information\n-bool BamReaderPrivate::GetNextAlignmentCore(BamAlignment& alignment) {\n-\n- // skip if region is set but has no alignments\n- if ( m_randomAccessController.HasRegion() &&\n- !m_randomAccessController.RegionHasAlignments() )\n- {\n- return false;\n- }\n-\n- // if can\'t read next alignment\n- if ( !LoadNextAlignment(alignment) )\n- return false;\n-\n- // check alignment\'s region-overlap state\n- BamRandomAccessController::RegionState state = m_randomAccessController.AlignmentState(alignment);\n-\n- // if alignment starts after region, no need to keep reading\n- if ( state == BamRandomAccessController::AfterRegion )\n- return false;\n-\n- // read until overlap is found\n- while ( state != BamRandomAccessController::OverlapsRegion ) {\n-\n- // if can\'t read next alignment\n- if ( !LoadNextAlignment(alignment) )\n- return false;\n-\n- // check alignment\'s region-overlap sta'..b'n readCharDataOK;\n-}\n-\n-// loads reference data from BAM file\n-bool BamReaderPrivate::LoadReferenceData(void) {\n-\n- // get number of reference sequences\n- char buffer[sizeof(uint32_t)];\n- m_stream.Read(buffer, sizeof(uint32_t));\n- uint32_t numberRefSeqs = BamTools::UnpackUnsignedInt(buffer);\n- if ( m_isBigEndian ) BamTools::SwapEndian_32(numberRefSeqs);\n- m_references.reserve((int)numberRefSeqs);\n-\n- // iterate over all references in header\n- for ( unsigned int i = 0; i != numberRefSeqs; ++i ) {\n-\n- // get length of reference name\n- m_stream.Read(buffer, sizeof(uint32_t));\n- uint32_t refNameLength = BamTools::UnpackUnsignedInt(buffer);\n- if ( m_isBigEndian ) BamTools::SwapEndian_32(refNameLength);\n- char* refName = (char*)calloc(refNameLength, 1);\n-\n- // get reference name and reference sequence length\n- m_stream.Read(refName, refNameLength);\n- m_stream.Read(buffer, sizeof(int32_t));\n- int32_t refLength = BamTools::UnpackSignedInt(buffer);\n- if ( m_isBigEndian ) BamTools::SwapEndian_32(refLength);\n-\n- // store data for reference\n- RefData aReference;\n- aReference.RefName = (string)((const char*)refName);\n- aReference.RefLength = refLength;\n- m_references.push_back(aReference);\n-\n- // clean up calloc-ed temp variable\n- free(refName);\n- }\n-\n- // return success\n- return true;\n-}\n-\n-bool BamReaderPrivate::LocateIndex(const BamIndex::IndexType& preferredType) {\n- return m_randomAccessController.LocateIndex(this, preferredType);\n-}\n-\n-// opens BAM file (and index)\n-bool BamReaderPrivate::Open(const string& filename) {\n-\n- // close current BAM file if open\n- if ( m_stream.IsOpen )\n- Close();\n-\n- // attempt to open BgzfStream for reading\n- if ( !m_stream.Open(filename, "rb") ) {\n- cerr << "BamReader ERROR: Could not open BGZF stream for " << filename << endl;\n- return false;\n- }\n-\n- // attempt to load header data\n- if ( !LoadHeaderData() ) {\n- cerr << "BamReader ERROR: Could not load header data for " << filename << endl;\n- Close();\n- return false;\n- }\n-\n- // attempt to load reference data\n- if ( !LoadReferenceData() ) {\n- cerr << "BamReader ERROR: Could not load reference data for " << filename << endl;\n- Close();\n- return false;\n- }\n-\n- // if all OK, store filename & offset of first alignment\n- m_filename = filename;\n- m_alignmentsBeginOffset = m_stream.Tell();\n-\n- // return success\n- return true;\n-}\n-\n-bool BamReaderPrivate::OpenIndex(const std::string& indexFilename) {\n- return m_randomAccessController.OpenIndex(indexFilename, this);\n-}\n-\n-// returns BAM file pointer to beginning of alignment data\n-bool BamReaderPrivate::Rewind(void) {\n-\n- // attempt rewind to first alignment\n- if ( !m_stream.Seek(m_alignmentsBeginOffset) )\n- return false;\n-\n- // verify that we can read first alignment\n- BamAlignment al;\n- if ( !LoadNextAlignment(al) )\n- return false;\n-\n- // reset region\n- m_randomAccessController.ClearRegion();\n-\n- // rewind back to beginning of first alignment\n- // return success/fail of seek\n- return m_stream.Seek(m_alignmentsBeginOffset);\n-}\n-\n-bool BamReaderPrivate::Seek(const int64_t& position) {\n- return m_stream.Seek(position);\n-}\n-\n-void BamReaderPrivate::SetIndex(BamIndex* index) {\n- m_randomAccessController.SetIndex(index);\n-}\n-\n-// change the index caching behavior\n-void BamReaderPrivate::SetIndexCacheMode(const BamIndex::IndexCacheMode& mode) {\n- m_randomAccessController.SetIndexCacheMode(mode);\n-}\n-\n-// sets current region & attempts to jump to it\n-// returns success/failure\n-bool BamReaderPrivate::SetRegion(const BamRegion& region) {\n- return m_randomAccessController.SetRegion(this, region, m_references.size());\n-}\n-\n-int64_t BamReaderPrivate::Tell(void) const {\n- return m_stream.Tell();\n-}\n' |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamReader_p.h --- a/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamReader_p.h Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,113 +0,0 @@ -// *************************************************************************** -// BamReader_p.h (c) 2010 Derek Barnett -// Marth Lab, Department of Biology, Boston College -// All rights reserved. -// --------------------------------------------------------------------------- -// Last modified: 5 April 2011 (DB) -// --------------------------------------------------------------------------- -// Provides the basic functionality for reading BAM files -// *************************************************************************** - -#ifndef BAMREADER_P_H -#define BAMREADER_P_H - -// ------------- -// W A R N I N G -// ------------- -// -// This file is not part of the BamTools API. It exists purely as an -// implementation detail. This header file may change from version to version -// without notice, or even be removed. -// -// We mean it. - -#include <api/BamAlignment.h> -#include <api/BamIndex.h> -#include <api/BamReader.h> -#include <api/SamHeader.h> -#include <api/internal/BamHeader_p.h> -#include <api/internal/BamRandomAccessController_p.h> -#include <api/internal/BgzfStream_p.h> -#include <string> - -namespace BamTools { -namespace Internal { - -class BamReaderPrivate { - - // ctor & dtor - public: - BamReaderPrivate(BamReader* parent); - ~BamReaderPrivate(void); - - // BamReader interface - public: - - // file operations - void Close(void); - const std::string Filename(void) const; - bool IsOpen(void) const; - bool Open(const std::string& filename); - bool Rewind(void); - bool SetRegion(const BamRegion& region); - - // access alignment data - bool GetNextAlignment(BamAlignment& alignment); - bool GetNextAlignmentCore(BamAlignment& alignment); - - // access auxiliary data - std::string GetHeaderText(void) const; - SamHeader GetSamHeader(void) const; - int GetReferenceCount(void) const; - const RefVector& GetReferenceData(void) const; - int GetReferenceID(const std::string& refName) const; - - // index operations - bool CreateIndex(const BamIndex::IndexType& type); - bool HasIndex(void) const; - bool LocateIndex(const BamIndex::IndexType& preferredType); - bool OpenIndex(const std::string& indexFilename); - void SetIndex(BamIndex* index); - void SetIndexCacheMode(const BamIndex::IndexCacheMode& mode); - - // internal methods, but available as a BamReaderPrivate 'interface' - // - // these methods should only be used by BamTools::Internal classes - // (currently only used by the BamIndex subclasses) - public: - // retrieves header text from BAM file - bool LoadHeaderData(void); - // retrieves BAM alignment under file pointer - // (does no overlap checking or character data parsing) - bool LoadNextAlignment(BamAlignment& alignment); - // builds reference data structure from BAM file - bool LoadReferenceData(void); - // seek reader to file position - bool Seek(const int64_t& position); - // return reader's file position - int64_t Tell(void) const; - - // data members - public: - - // general BAM file data - int64_t m_alignmentsBeginOffset; - std::string m_filename; - RefVector m_references; - - // system data - bool m_isBigEndian; - - // parent BamReader - BamReader* m_parent; - - // BamReaderPrivate components - BamHeader m_header; - BamRandomAccessController m_randomAccessController; - BgzfStream m_stream; -}; - -} // namespace Internal -} // namespace BamTools - -#endif // BAMREADER_P_H |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamStandardIndex_p.cpp --- a/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamStandardIndex_p.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
b'@@ -1,974 +0,0 @@\n-// ***************************************************************************\n-// BamStandardIndex.cpp (c) 2010 Derek Barnett\n-// Marth Lab, Department of Biology, Boston College\n-// All rights reserved.\n-// ---------------------------------------------------------------------------\n-// Last modified: 16 June 2011 (DB)\n-// ---------------------------------------------------------------------------\n-// Provides index operations for the standardized BAM index format (".bai")\n-// ***************************************************************************\n-\n-#include <api/BamAlignment.h>\n-#include <api/internal/BamReader_p.h>\n-#include <api/internal/BamStandardIndex_p.h>\n-using namespace BamTools;\n-using namespace BamTools::Internal;\n-\n-#include <cstdio>\n-#include <cstdlib>\n-#include <cstring>\n-#include <algorithm>\n-#include <iostream>\n-using namespace std;\n-\n-// static BamStandardIndex constants\n-const int BamStandardIndex::MAX_BIN = 37450; // =(8^6-1)/7+1\n-const int BamStandardIndex::BAM_LIDX_SHIFT = 14;\n-const string BamStandardIndex::BAI_EXTENSION = ".bai";\n-const char* const BamStandardIndex::BAI_MAGIC = "BAI\\1";\n-const int BamStandardIndex::SIZEOF_ALIGNMENTCHUNK = sizeof(uint64_t)*2;\n-const int BamStandardIndex::SIZEOF_BINCORE = sizeof(uint32_t) + sizeof(int32_t);\n-const int BamStandardIndex::SIZEOF_LINEAROFFSET = sizeof(uint64_t);\n-\n-// ctor\n-BamStandardIndex::BamStandardIndex(Internal::BamReaderPrivate* reader)\n- : BamIndex(reader)\n- , m_indexStream(0)\n- , m_cacheMode(BamIndex::LimitedIndexCaching)\n- , m_buffer(0)\n- , m_bufferLength(0)\n-{\n- m_isBigEndian = BamTools::SystemIsBigEndian();\n-}\n-\n-// dtor\n-BamStandardIndex::~BamStandardIndex(void) {\n- CloseFile();\n-}\n-\n-bool BamStandardIndex::AdjustRegion(const BamRegion& region, uint32_t& begin, uint32_t& end) {\n-\n- // retrieve references from reader\n- const RefVector& references = m_reader->GetReferenceData();\n-\n- // make sure left-bound position is valid\n- if ( region.LeftPosition > references.at(region.LeftRefID).RefLength )\n- return false;\n-\n- // set region \'begin\'\n- begin = (unsigned int)region.LeftPosition;\n-\n- // if right bound specified AND left&right bounds are on same reference\n- // OK to use right bound position as region \'end\'\n- if ( region.isRightBoundSpecified() && ( region.LeftRefID == region.RightRefID ) )\n- end = (unsigned int)region.RightPosition;\n-\n- // otherwise, set region \'end\' to last reference base\n- else end = (unsigned int)references.at(region.LeftRefID).RefLength - 1;\n-\n- // return success\n- return true;\n-}\n-\n-void BamStandardIndex::CalculateCandidateBins(const uint32_t& begin,\n- const uint32_t& end,\n- set<uint16_t>& candidateBins)\n-{\n- // initialize list, bin \'0\' is always a valid bin\n- candidateBins.insert(0);\n-\n- // get rest of bins that contain this region\n- unsigned int k;\n- for (k = 1 + (begin>>26); k <= 1 + (end>>26); ++k) { candidateBins.insert(k); }\n- for (k = 9 + (begin>>23); k <= 9 + (end>>23); ++k) { candidateBins.insert(k); }\n- for (k = 73 + (begin>>20); k <= 73 + (end>>20); ++k) { candidateBins.insert(k); }\n- for (k = 585 + (begin>>17); k <= 585 + (end>>17); ++k) { candidateBins.insert(k); }\n- for (k = 4681 + (begin>>14); k <= 4681 + (end>>14); ++k) { candidateBins.insert(k); }\n-}\n-\n-bool BamStandardIndex::CalculateCandidateOffsets(const BaiReferenceSummary& refSummary,\n- const uint64_t& minOffset,\n- set<uint16_t>& candidateBins,\n- vector<int64_t>& offsets)\n-{\n- // attempt seek to first bin\n- if ( !Seek(refSummary.FirstBinFilePosition, SEEK_SET) )\n- return false;\n-\n- // iterate over reference bins\n- uint32_t bi'..b"ctor& chunks) {\n-\n- // make sure chunks are merged (simplified) before writing & saving summary\n- MergeAlignmentChunks(chunks);\n-\n- size_t elementsWritten = 0;\n-\n- // write chunks\n- int32_t chunkCount = chunks.size();\n- if ( m_isBigEndian ) SwapEndian_32(chunkCount);\n- elementsWritten += fwrite(&chunkCount, sizeof(chunkCount), 1, m_indexStream);\n-\n- // iterate over chunks\n- bool chunksOk = true;\n- BaiAlignmentChunkVector::const_iterator chunkIter = chunks.begin();\n- BaiAlignmentChunkVector::const_iterator chunkEnd = chunks.end();\n- for ( ; chunkIter != chunkEnd; ++chunkIter )\n- chunksOk &= WriteAlignmentChunk( (*chunkIter) );\n-\n- // return success/failure of write\n- return ( (elementsWritten == 1) && chunksOk );\n-}\n-\n-bool BamStandardIndex::WriteBin(const uint32_t& binId, BaiAlignmentChunkVector& chunks) {\n-\n- size_t elementsWritten = 0;\n-\n- // write BAM bin ID\n- uint32_t binKey = binId;\n- if ( m_isBigEndian ) SwapEndian_32(binKey);\n- elementsWritten += fwrite(&binKey, sizeof(binKey), 1, m_indexStream);\n-\n- // write bin's alignment chunks\n- bool chunksOk = WriteAlignmentChunks(chunks);\n-\n- // return success/failure of write\n- return ( (elementsWritten == 1) && chunksOk );\n-}\n-\n-bool BamStandardIndex::WriteBins(const int& refId, BaiBinMap& bins) {\n-\n- size_t elementsWritten = 0;\n-\n- // write number of bins\n- int32_t binCount = bins.size();\n- if ( m_isBigEndian ) SwapEndian_32(binCount);\n- elementsWritten += fwrite(&binCount, sizeof(binCount), 1, m_indexStream);\n-\n- // save summary for reference's bins\n- SaveBinsSummary(refId, bins.size());\n-\n- // iterate over bins\n- bool binsOk = true;\n- BaiBinMap::iterator binIter = bins.begin();\n- BaiBinMap::iterator binEnd = bins.end();\n- for ( ; binIter != binEnd; ++binIter )\n- binsOk &= WriteBin( (*binIter).first, (*binIter).second );\n-\n- // return success/failure of write\n- return ( (elementsWritten == 1) && binsOk );\n-}\n-\n-bool BamStandardIndex::WriteHeader(void) {\n-\n- size_t elementsWritten = 0;\n-\n- // write magic number\n- elementsWritten += fwrite(BamStandardIndex::BAI_MAGIC, sizeof(char), 4, m_indexStream);\n-\n- // write number of reference sequences\n- int32_t numReferences = m_indexFileSummary.size();\n- if ( m_isBigEndian ) SwapEndian_32(numReferences);\n- elementsWritten += fwrite(&numReferences, sizeof(numReferences), 1, m_indexStream);\n-\n- // return success/failure of write\n- return (elementsWritten == 5);\n-}\n-\n-bool BamStandardIndex::WriteLinearOffsets(const int& refId, BaiLinearOffsetVector& linearOffsets) {\n-\n- // make sure linear offsets are sorted before writing & saving summary\n- SortLinearOffsets(linearOffsets);\n-\n- size_t elementsWritten = 0;\n-\n- // write number of linear offsets\n- int32_t offsetCount = linearOffsets.size();\n- if ( m_isBigEndian ) SwapEndian_32(offsetCount);\n- elementsWritten += fwrite(&offsetCount, sizeof(offsetCount), 1, m_indexStream);\n-\n- // save summary for reference's linear offsets\n- SaveLinearOffsetsSummary(refId, linearOffsets.size());\n-\n- // iterate over linear offsets\n- BaiLinearOffsetVector::const_iterator offsetIter = linearOffsets.begin();\n- BaiLinearOffsetVector::const_iterator offsetEnd = linearOffsets.end();\n- for ( ; offsetIter != offsetEnd; ++offsetIter ) {\n-\n- // write linear offset\n- uint64_t linearOffset = (*offsetIter);\n- if ( m_isBigEndian ) SwapEndian_64(linearOffset);\n- elementsWritten += fwrite(&linearOffset, sizeof(linearOffset), 1, m_indexStream);\n- }\n-\n- // return success/failure of write\n- return ( elementsWritten == (size_t)(linearOffsets.size() + 1) );\n-}\n-\n-bool BamStandardIndex::WriteReferenceEntry(BaiReferenceEntry& refEntry) {\n- bool refOk = true;\n- refOk &= WriteBins(refEntry.ID, refEntry.Bins);\n- refOk &= WriteLinearOffsets(refEntry.ID, refEntry.LinearOffsets);\n- return refOk;\n-}\n" |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamStandardIndex_p.h --- a/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamStandardIndex_p.h Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
b'@@ -1,237 +0,0 @@\n-// ***************************************************************************\n-// BamStandardIndex.h (c) 2010 Derek Barnett\n-// Marth Lab, Department of Biology, Boston College\n-// All rights reserved.\n-// ---------------------------------------------------------------------------\n-// Last modified: 5 April 2011 (DB)\n-// ---------------------------------------------------------------------------\n-// Provides index operations for the standardized BAM index format (".bai")\n-// ***************************************************************************\n-\n-#ifndef BAM_STANDARD_INDEX_FORMAT_H\n-#define BAM_STANDARD_INDEX_FORMAT_H\n-\n-// -------------\n-// W A R N I N G\n-// -------------\n-//\n-// This file is not part of the BamTools API. It exists purely as an\n-// implementation detail. This header file may change from version to\n-// version without notice, or even be removed.\n-//\n-// We mean it.\n-\n-#include <api/BamAux.h>\n-#include <api/BamIndex.h>\n-#include <map>\n-#include <set>\n-#include <string>\n-#include <vector>\n-\n-namespace BamTools {\n-namespace Internal {\n-\n-// -----------------------------------------------------------------------------\n-// BamStandardIndex data structures\n-\n-// defines start and end of a contiguous run of alignments\n-struct BaiAlignmentChunk {\n-\n- // data members\n- uint64_t Start;\n- uint64_t Stop;\n-\n- // constructor\n- BaiAlignmentChunk(const uint64_t& start = 0,\n- const uint64_t& stop = 0)\n- : Start(start)\n- , Stop(stop)\n- { }\n-};\n-\n-// comparison operator (for sorting)\n-inline\n-bool operator<(const BaiAlignmentChunk& lhs, const BaiAlignmentChunk& rhs) {\n- return lhs.Start < rhs.Start;\n-}\n-\n-// convenience typedef for a list of all alignment \'chunks\' in a BAI bin\n-typedef std::vector<BaiAlignmentChunk> BaiAlignmentChunkVector;\n-\n-// convenience typedef for a map of all BAI bins in a reference (ID => chunks)\n-typedef std::map<uint32_t, BaiAlignmentChunkVector> BaiBinMap;\n-\n-// convenience typedef for a list of all \'linear offsets\' in a reference\n-typedef std::vector<uint64_t> BaiLinearOffsetVector;\n-\n-// contains all fields necessary for building, loading, & writing\n-// full BAI index data for a single reference\n-struct BaiReferenceEntry {\n-\n- // data members\n- int32_t ID;\n- BaiBinMap Bins;\n- BaiLinearOffsetVector LinearOffsets;\n-\n- // ctor\n- BaiReferenceEntry(const int32_t& id = -1)\n- : ID(id)\n- { }\n-};\n-\n-// provides (persistent) summary of BaiReferenceEntry\'s index data\n-struct BaiReferenceSummary {\n-\n- // data members\n- int NumBins;\n- int NumLinearOffsets;\n- uint64_t FirstBinFilePosition;\n- uint64_t FirstLinearOffsetFilePosition;\n-\n- // ctor\n- BaiReferenceSummary(void)\n- : NumBins(0)\n- , NumLinearOffsets(0)\n- , FirstBinFilePosition(0)\n- , FirstLinearOffsetFilePosition(0)\n- { }\n-};\n-\n-// convenience typedef for describing a full BAI index file summary\n-typedef std::vector<BaiReferenceSummary> BaiFileSummary;\n-\n-// end BamStandardIndex data structures\n-// -----------------------------------------------------------------------------\n-\n-class BamStandardIndex : public BamIndex {\n-\n- // ctor & dtor\n- public:\n- BamStandardIndex(Internal::BamReaderPrivate* reader);\n- ~BamStandardIndex(void);\n-\n- // BamIndex implementation\n- public:\n- // builds index from associated BAM file & writes out to index file\n- bool Create(void);\n- // returns whether reference has alignments or no\n- bool HasAlignments(const int& referenceID) const;\n- // attempts to use index data to jump to @region, returns success/fail\n- // a "successful" jump indicates no error, but not whether this region has data\n- // * thus, the method sets a flag to indicate whether there are alignments\n- // available after the jump position\n- bool Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegio'..b'ds\n- private:\n- bool AdjustRegion(const BamRegion& region, uint32_t& begin, uint32_t& end);\n- void CalculateCandidateBins(const uint32_t& begin,\n- const uint32_t& end,\n- std::set<uint16_t>& candidateBins);\n- bool CalculateCandidateOffsets(const BaiReferenceSummary& refSummary,\n- const uint64_t& minOffset,\n- std::set<uint16_t>& candidateBins,\n- std::vector<int64_t>& offsets);\n- uint64_t CalculateMinOffset(const BaiReferenceSummary& refSummary, const uint32_t& begin);\n- bool GetOffsets(const BamRegion& region, std::vector<int64_t>& offsets);\n- uint64_t LookupLinearOffset(const BaiReferenceSummary& refSummary, const int& index);\n-\n- // internal BAI summary (create/load) methods\n- private:\n- void ReserveForSummary(const int& numReferences);\n- void SaveBinsSummary(const int& refId, const int& numBins);\n- void SaveLinearOffsetsSummary(const int& refId, const int& numLinearOffsets);\n- bool SkipBins(const int& numBins);\n- bool SkipLinearOffsets(const int& numLinearOffsets);\n- bool SummarizeBins(BaiReferenceSummary& refSummary);\n- bool SummarizeIndexFile(void);\n- bool SummarizeLinearOffsets(BaiReferenceSummary& refSummary);\n- bool SummarizeReference(BaiReferenceSummary& refSummary);\n-\n- // internal BAI full index input methods\n- private:\n- bool ReadBinID(uint32_t& binId);\n- bool ReadBinIntoBuffer(uint32_t& binId, int32_t& numAlignmentChunks);\n- bool ReadIntoBuffer(const unsigned int& bytesRequested);\n- bool ReadLinearOffset(uint64_t& linearOffset);\n- bool ReadNumAlignmentChunks(int& numAlignmentChunks);\n- bool ReadNumBins(int& numBins);\n- bool ReadNumLinearOffsets(int& numLinearOffsets);\n- bool ReadNumReferences(int& numReferences);\n-\n- // internal BAI full index output methods\n- private:\n- void MergeAlignmentChunks(BaiAlignmentChunkVector& chunks);\n- void SortLinearOffsets(BaiLinearOffsetVector& linearOffsets);\n- bool WriteAlignmentChunk(const BaiAlignmentChunk& chunk);\n- bool WriteAlignmentChunks(BaiAlignmentChunkVector& chunks);\n- bool WriteBin(const uint32_t& binId, BaiAlignmentChunkVector& chunks);\n- bool WriteBins(const int& refId, BaiBinMap& bins);\n- bool WriteHeader(void);\n- bool WriteLinearOffsets(const int& refId, BaiLinearOffsetVector& linearOffsets);\n- bool WriteReferenceEntry(BaiReferenceEntry& refEntry);\n-\n- // data members\n- private:\n- FILE* m_indexStream;\n- bool m_isBigEndian;\n- BamIndex::IndexCacheMode m_cacheMode;\n- BaiFileSummary m_indexFileSummary;\n-\n- // our input buffer\n- char* m_buffer;\n- unsigned int m_bufferLength;\n-\n- // static methods\n- private:\n- // checks if the buffer is large enough to accomodate the requested size\n- static void CheckBufferSize(char*& buffer,\n- unsigned int& bufferLength,\n- const unsigned int& requestedBytes);\n- // checks if the buffer is large enough to accomodate the requested size\n- static void CheckBufferSize(unsigned char*& buffer,\n- unsigned int& bufferLength,\n- const unsigned int& requestedBytes);\n- // static constants\n- private:\n- static const int MAX_BIN;\n- static const int BAM_LIDX_SHIFT;\n- static const std::string BAI_EXTENSION;\n- static const char* const BAI_MAGIC;\n- static const int SIZEOF_ALIGNMENTCHUNK;\n- static const int SIZEOF_BINCORE;\n- static const int SIZEOF_LINEAROFFSET;\n-};\n-\n-} // namespace Internal\n-} // namespace BamTools\n-\n-#endif // BAM_STANDARD_INDEX_FORMAT_H\n' |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamToolsIndex_p.cpp --- a/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamToolsIndex_p.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,642 +0,0 @@\n-// ***************************************************************************\n-// BamToolsIndex.cpp (c) 2010 Derek Barnett\n-// Marth Lab, Department of Biology, Boston College\n-// All rights reserved.\n-// ---------------------------------------------------------------------------\n-// Last modified: 27 April 2011 (DB)\n-// ---------------------------------------------------------------------------\n-// Provides index operations for the BamTools index format (".bti")\n-// ***************************************************************************\n-\n-#include <api/BamAlignment.h>\n-#include <api/internal/BamReader_p.h>\n-#include <api/internal/BamToolsIndex_p.h>\n-#include <api/internal/BgzfStream_p.h>\n-using namespace BamTools;\n-using namespace BamTools::Internal;\n-\n-#include <cstdio>\n-#include <cstdlib>\n-#include <cstring>\n-#include <algorithm>\n-#include <iostream>\n-#include <iterator>\n-#include <map>\n-using namespace std;\n-\n-// static BamToolsIndex constants\n-const int BamToolsIndex::DEFAULT_BLOCK_LENGTH = 1000;\n-const string BamToolsIndex::BTI_EXTENSION = ".bti";\n-const char* const BamToolsIndex::BTI_MAGIC = "BTI\\1";\n-const int BamToolsIndex::SIZEOF_BLOCK = sizeof(int32_t)*2 + sizeof(int64_t);\n-\n-// ctor\n-BamToolsIndex::BamToolsIndex(Internal::BamReaderPrivate* reader)\n- : BamIndex(reader)\n- , m_indexStream(0)\n- , m_cacheMode(BamIndex::LimitedIndexCaching)\n- , m_blockSize(BamToolsIndex::DEFAULT_BLOCK_LENGTH)\n- , m_inputVersion(0)\n- , m_outputVersion(BTI_1_2) // latest version - used for writing new index files\n-{\n- m_isBigEndian = BamTools::SystemIsBigEndian();\n-}\n-\n-// dtor\n-BamToolsIndex::~BamToolsIndex(void) {\n- CloseFile();\n-}\n-\n-bool BamToolsIndex::CheckMagicNumber(void) {\n-\n- // check \'magic number\' to see if file is BTI index\n- char magic[4];\n- size_t elementsRead = fread(magic, sizeof(char), 4, m_indexStream);\n- if ( elementsRead != 4 ) {\n- cerr << "BamToolsIndex ERROR: could not read format \'magic\' number" << endl;\n- return false;\n- }\n-\n- if ( strncmp(magic, BamToolsIndex::BTI_MAGIC, 4) != 0 ) {\n- cerr << "BamToolsIndex ERROR: invalid format" << endl;\n- return false;\n- }\n-\n- // otherwise ok\n- return true;\n-}\n-\n-// check index file version, return true if OK\n-bool BamToolsIndex::CheckVersion(void) {\n-\n- // read version from file\n- size_t elementsRead = fread(&m_inputVersion, sizeof(m_inputVersion), 1, m_indexStream);\n- if ( elementsRead != 1 ) return false;\n- if ( m_isBigEndian ) SwapEndian_32(m_inputVersion);\n-\n- // if version is negative, or zero\n- if ( m_inputVersion <= 0 ) {\n- cerr << "BamToolsIndex ERROR: could not load index file: invalid version."\n- << endl;\n- return false;\n- }\n-\n- // if version is newer than can be supported by this version of bamtools\n- else if ( m_inputVersion > m_outputVersion ) {\n- cerr << "BamToolsIndex ERROR: could not load index file. This version of BamTools does not recognize new index file version"\n- << endl\n- << "Please update BamTools to a more recent version to support this index file."\n- << endl;\n- return false;\n- }\n-\n- // ------------------------------------------------------------------\n- // check for deprecated, unsupported versions\n- // (typically whose format did not accomodate a particular bug fix)\n-\n- else if ( (Version)m_inputVersion == BamToolsIndex::BTI_1_0 ) {\n- cerr << "BamToolsIndex ERROR: could not load index file. This version of the index contains a bug related to accessing data near reference ends."\n- << endl << endl\n- << "Please run \'bamtools index -bti -in yourData.bam\' to generate an up-to-date, fixed BTI file."\n- << endl << endl;\n- return false;\n- }\n-\n- else if ( (Version)m_inputVersion == BamToolsIndex::BTI_1_1 ) {\n- cerr << "BamToolsIndex ERROR: could not load index file. '..b"efSummary.FirstBlockFilePosition << endl;\n- return false;\n- }\n-\n- // read & store block entries\n- bool readOk = true;\n- BtiBlock block;\n- for ( int i = 0; i < refSummary.NumBlocks; ++i ) {\n- readOk &= ReadBlock(block);\n- blocks.push_back(block);\n- }\n- return readOk;\n-}\n-\n-bool BamToolsIndex::ReadReferenceEntry(BtiReferenceEntry& refEntry) {\n-\n- // return false if refId not valid index in file summary structure\n- if ( refEntry.ID < 0 || refEntry.ID >= (int)m_indexFileSummary.size() )\n- return false;\n-\n- // use index summary to assist reading the reference's BTI blocks\n- const BtiReferenceSummary& refSummary = m_indexFileSummary.at(refEntry.ID);\n- return ReadBlocks(refSummary, refEntry.Blocks);\n-}\n-\n-bool BamToolsIndex::Seek(const int64_t& position, const int& origin) {\n- return ( fseek64(m_indexStream, position, origin) == 0 );\n-}\n-\n-// change the index caching behavior\n-void BamToolsIndex::SetCacheMode(const BamIndex::IndexCacheMode& mode) {\n- m_cacheMode = mode;\n- // do nothing else here ? cache mode will be ignored from now on, most likely\n-}\n-\n-bool BamToolsIndex::SkipBlocks(const int& numBlocks) {\n- return Seek( numBlocks*BamToolsIndex::SIZEOF_BLOCK, SEEK_CUR );\n-}\n-\n-int64_t BamToolsIndex::Tell(void) const {\n- return ftell64(m_indexStream);\n-}\n-\n-bool BamToolsIndex::WriteBlock(const BtiBlock& block) {\n-\n- // copy entry data\n- int32_t maxEndPosition = block.MaxEndPosition;\n- int64_t startOffset = block.StartOffset;\n- int32_t startPosition = block.StartPosition;\n-\n- // swap endian-ness if necessary\n- if ( m_isBigEndian ) {\n- SwapEndian_32(maxEndPosition);\n- SwapEndian_64(startOffset);\n- SwapEndian_32(startPosition);\n- }\n-\n- // write the reference index entry\n- size_t elementsWritten = 0;\n- elementsWritten += fwrite(&maxEndPosition, sizeof(maxEndPosition), 1, m_indexStream);\n- elementsWritten += fwrite(&startOffset, sizeof(startOffset), 1, m_indexStream);\n- elementsWritten += fwrite(&startPosition, sizeof(startPosition), 1, m_indexStream);\n- return ( elementsWritten == 3 );\n-}\n-\n-bool BamToolsIndex::WriteBlocks(const BtiBlockVector& blocks) {\n- bool writtenOk = true;\n- BtiBlockVector::const_iterator blockIter = blocks.begin();\n- BtiBlockVector::const_iterator blockEnd = blocks.end();\n- for ( ; blockIter != blockEnd; ++blockIter )\n- writtenOk &= WriteBlock(*blockIter);\n- return writtenOk;\n-}\n-\n-bool BamToolsIndex::WriteHeader(void) {\n-\n- size_t elementsWritten = 0;\n-\n- // write BTI index format 'magic number'\n- elementsWritten += fwrite(BamToolsIndex::BTI_MAGIC, 1, 4, m_indexStream);\n-\n- // write BTI index format version\n- int32_t currentVersion = (int32_t)m_outputVersion;\n- if ( m_isBigEndian ) SwapEndian_32(currentVersion);\n- elementsWritten += fwrite(¤tVersion, sizeof(currentVersion), 1, m_indexStream);\n-\n- // write block size\n- int32_t blockSize = m_blockSize;\n- if ( m_isBigEndian ) SwapEndian_32(blockSize);\n- elementsWritten += fwrite(&blockSize, sizeof(blockSize), 1, m_indexStream);\n-\n- // write number of references\n- int32_t numReferences = m_indexFileSummary.size();\n- if ( m_isBigEndian ) SwapEndian_32(numReferences);\n- elementsWritten += fwrite(&numReferences, sizeof(numReferences), 1, m_indexStream);\n-\n- // return success/failure of write\n- return ( elementsWritten == 7 );\n-}\n-\n-bool BamToolsIndex::WriteReferenceEntry(const BtiReferenceEntry& refEntry) {\n-\n- size_t elementsWritten = 0;\n-\n- // write number of blocks this reference\n- uint32_t numBlocks = refEntry.Blocks.size();\n- if ( m_isBigEndian ) SwapEndian_32(numBlocks);\n- elementsWritten += fwrite(&numBlocks, sizeof(numBlocks), 1, m_indexStream);\n-\n- // write actual block entries\n- const bool blocksOk = WriteBlocks(refEntry.Blocks);\n-\n- // return success/fail\n- return ( elementsWritten == 1) && blocksOk;\n-}\n" |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamToolsIndex_p.h --- a/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamToolsIndex_p.h Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,188 +0,0 @@ -// *************************************************************************** -// BamToolsIndex.h (c) 2010 Derek Barnett -// Marth Lab, Department of Biology, Boston College -// All rights reserved. -// --------------------------------------------------------------------------- -// Last modified: 5 April 2011 (DB) -// --------------------------------------------------------------------------- -// Provides index operations for the BamTools index format (".bti") -// *************************************************************************** - -#ifndef BAMTOOLS_INDEX_FORMAT_H -#define BAMTOOLS_INDEX_FORMAT_H - -// ------------- -// W A R N I N G -// ------------- -// -// This file is not part of the BamTools API. It exists purely as an -// implementation detail. This header file may change from version to -// version without notice, or even be removed. -// -// We mean it. - -#include <api/BamAux.h> -#include <api/BamIndex.h> -#include <map> -#include <string> -#include <vector> - -namespace BamTools { -namespace Internal { - -// contains data for each 'block' in a BTI index -struct BtiBlock { - - // data members - int32_t MaxEndPosition; - int64_t StartOffset; - int32_t StartPosition; - - // ctor - BtiBlock(const int32_t& maxEndPosition = 0, - const int64_t& startOffset = 0, - const int32_t& startPosition = 0) - : MaxEndPosition(maxEndPosition) - , StartOffset(startOffset) - , StartPosition(startPosition) - { } -}; - -// convenience typedef for describing a a list of BTI blocks on a reference -typedef std::vector<BtiBlock> BtiBlockVector; - -// contains all fields necessary for building, loading, & writing -// full BTI index data for a single reference -struct BtiReferenceEntry { - - // data members - int32_t ID; - BtiBlockVector Blocks; - - // ctor - BtiReferenceEntry(const int& id = -1) - : ID(id) - { } -}; - -// provides (persistent) summary of BtiReferenceEntry's index data -struct BtiReferenceSummary { - - // data members - int NumBlocks; - uint64_t FirstBlockFilePosition; - - // ctor - BtiReferenceSummary(void) - : NumBlocks(0) - , FirstBlockFilePosition(0) - { } -}; - -// convenience typedef for describing a full BTI index file summary -typedef std::vector<BtiReferenceSummary> BtiFileSummary; - -class BamToolsIndex : public BamIndex { - - // keep a list of any supported versions here - // (might be useful later to handle any 'legacy' versions if the format changes) - // listed for example like: BTI_1_0 = 1, BTI_1_1 = 2, BTI_1_2 = 3, BTI_2_0 = 4, and so on - // - // so a change introduced in (hypothetical) BTI_1_2 would be handled from then on by: - // - // if ( indexVersion >= BTI_1_2 ) - // do something new - // else - // do the old thing - enum Version { BTI_1_0 = 1 - , BTI_1_1 - , BTI_1_2 - }; - - // ctor & dtor - public: - BamToolsIndex(Internal::BamReaderPrivate* reader); - ~BamToolsIndex(void); - - // BamIndex implementation - public: - // builds index from associated BAM file & writes out to index file - bool Create(void); - // returns whether reference has alignments or no - bool HasAlignments(const int& referenceID) const; - // attempts to use index data to jump to @region, returns success/fail - // a "successful" jump indicates no error, but not whether this region has data - // * thus, the method sets a flag to indicate whether there are alignments - // available after the jump position - bool Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion); - // loads existing data from file into memory - bool Load(const std::string& filename); - // change the index caching behavior - void SetCacheMode(const BamIndex::IndexCacheMode& mode); - public: - // returns format's file extension - static const std::string Extension(void); - - // internal file ops - private: - bool CheckMagicNumber(void); - bool CheckVersion(void); - void CloseFile(void); - bool IsFileOpen(void) const; - bool OpenFile(const std::string& filename, const char* mode); - bool Seek(const int64_t& position, const int& origin); - int64_t Tell(void) const; - - // internal BTI index building methods - private: - void ClearReferenceEntry(BtiReferenceEntry& refEntry); - - // internal random-access methods - private: - bool GetOffset(const BamRegion& region, int64_t& offset, bool* hasAlignmentsInRegion); - - // internal BTI summary data methods - private: - void InitializeFileSummary(const int& numReferences); - bool LoadFileSummary(void); - bool LoadHeader(void); - bool LoadNumBlocks(int& numBlocks); - bool LoadNumReferences(int& numReferences); - bool LoadReferenceSummary(BtiReferenceSummary& refSummary); - bool SkipBlocks(const int& numBlocks); - - // internal BTI full index input methods - private: - bool ReadBlock(BtiBlock& block); - bool ReadBlocks(const BtiReferenceSummary& refSummary, BtiBlockVector& blocks); - bool ReadReferenceEntry(BtiReferenceEntry& refEntry); - - // internal BTI full index output methods - private: - bool WriteBlock(const BtiBlock& block); - bool WriteBlocks(const BtiBlockVector& blocks); - bool WriteHeader(void); - bool WriteReferenceEntry(const BtiReferenceEntry& refEntry); - - // data members - private: - FILE* m_indexStream; - bool m_isBigEndian; - BamIndex::IndexCacheMode m_cacheMode; - BtiFileSummary m_indexFileSummary; - int m_blockSize; - int32_t m_inputVersion; // Version is serialized as int - Version m_outputVersion; - - // static constants - private: - static const int DEFAULT_BLOCK_LENGTH; - static const std::string BTI_EXTENSION; - static const char* const BTI_MAGIC; - static const int SIZEOF_BLOCK; -}; - -} // namespace Internal -} // namespace BamTools - -#endif // BAMTOOLS_INDEX_FORMAT_H |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamWriter_p.cpp --- a/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamWriter_p.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,425 +0,0 @@\n-// ***************************************************************************\n-// BamWriter_p.cpp (c) 2010 Derek Barnett\n-// Marth Lab, Department of Biology, Boston College\n-// All rights reserved.\n-// ---------------------------------------------------------------------------\n-// Last modified: 16 June 2011 (DB)\n-// ---------------------------------------------------------------------------\n-// Provides the basic functionality for producing BAM files\n-// ***************************************************************************\n-\n-#include <api/BamAlignment.h>\n-#include <api/BamConstants.h>\n-#include <api/internal/BamWriter_p.h>\n-using namespace BamTools;\n-using namespace BamTools::Internal;\n-\n-#include <cstdio>\n-#include <cstdlib>\n-#include <cstring>\n-using namespace std;\n-\n-// ctor\n-BamWriterPrivate::BamWriterPrivate(void)\n- : m_isBigEndian( BamTools::SystemIsBigEndian() )\n-{ }\n-\n-// dtor\n-BamWriterPrivate::~BamWriterPrivate(void) {\n- m_stream.Close();\n-}\n-\n-// calculates minimum bin for a BAM alignment interval\n-unsigned int BamWriterPrivate::CalculateMinimumBin(const int begin, int end) const {\n- --end;\n- if ( (begin >> 14) == (end >> 14) ) return 4681 + (begin >> 14);\n- if ( (begin >> 17) == (end >> 17) ) return 585 + (begin >> 17);\n- if ( (begin >> 20) == (end >> 20) ) return 73 + (begin >> 20);\n- if ( (begin >> 23) == (end >> 23) ) return 9 + (begin >> 23);\n- if ( (begin >> 26) == (end >> 26) ) return 1 + (begin >> 26);\n- return 0;\n-}\n-\n-// closes the alignment archive\n-void BamWriterPrivate::Close(void) {\n- m_stream.Close();\n-}\n-\n-// creates a cigar string from the supplied alignment\n-void BamWriterPrivate::CreatePackedCigar(const vector<CigarOp>& cigarOperations, string& packedCigar) {\n-\n- // initialize\n- const unsigned int numCigarOperations = cigarOperations.size();\n- packedCigar.resize(numCigarOperations * Constants::BAM_SIZEOF_INT);\n-\n- // pack the cigar data into the string\n- unsigned int* pPackedCigar = (unsigned int*)packedCigar.data();\n-\n- // iterate over cigar operations\n- vector<CigarOp>::const_iterator coIter = cigarOperations.begin();\n- vector<CigarOp>::const_iterator coEnd = cigarOperations.end();\n- for ( ; coIter != coEnd; ++coIter ) {\n-\n- // store op in packedCigar\n- unsigned int cigarOp;\n- switch ( coIter->Type ) {\n- case (Constants::BAM_CIGAR_MATCH_CHAR) : cigarOp = Constants::BAM_CIGAR_MATCH; break;\n- case (Constants::BAM_CIGAR_INS_CHAR) : cigarOp = Constants::BAM_CIGAR_INS; break;\n- case (Constants::BAM_CIGAR_DEL_CHAR) : cigarOp = Constants::BAM_CIGAR_DEL; break;\n- case (Constants::BAM_CIGAR_REFSKIP_CHAR) : cigarOp = Constants::BAM_CIGAR_REFSKIP; break;\n- case (Constants::BAM_CIGAR_SOFTCLIP_CHAR) : cigarOp = Constants::BAM_CIGAR_SOFTCLIP; break;\n- case (Constants::BAM_CIGAR_HARDCLIP_CHAR) : cigarOp = Constants::BAM_CIGAR_HARDCLIP; break;\n- case (Constants::BAM_CIGAR_PAD_CHAR) : cigarOp = Constants::BAM_CIGAR_PAD; break;\n- case (Constants::BAM_CIGAR_SEQMATCH_CHAR) : cigarOp = Constants::BAM_CIGAR_SEQMATCH; break;\n- case (Constants::BAM_CIGAR_MISMATCH_CHAR) : cigarOp = Constants::BAM_CIGAR_MISMATCH; break;\n- default:\n- fprintf(stderr, "BamWriter ERROR: unknown cigar operation found: %c\\n", coIter->Type);\n- exit(1);\n- }\n-\n- *pPackedCigar = coIter->Length << Constants::BAM_CIGAR_SHIFT | cigarOp;\n- pPackedCigar++;\n- }\n-}\n-\n-// encodes the supplied query sequence into 4-bit notation\n-void BamWriterPrivate::EncodeQuerySequence(const string& query, string& encodedQuery) {\n-\n- // prepare the encoded query string\n- const unsigned int queryLen = query.size();\n- const unsigned int encodedQueryLen = (unsigned int)((queryLen / 2.0) + 0.5);\n- encodedQuery.resize(encodedQueryLen);\n- char* pEncode'..b' ++i;\n- break;\n- case (Constants::BAM_TAG_TYPE_INT16) :\n- case (Constants::BAM_TAG_TYPE_UINT16) :\n- BamTools::SwapEndian_16p(&tagData[i]);\n- i += sizeof(uint16_t);\n- break;\n- case (Constants::BAM_TAG_TYPE_FLOAT) :\n- case (Constants::BAM_TAG_TYPE_INT32) :\n- case (Constants::BAM_TAG_TYPE_UINT32) :\n- BamTools::SwapEndian_32p(&tagData[i]);\n- i += sizeof(uint32_t);\n- break;\n- default:\n- // error case\n- fprintf(stderr,\n- "BamWriter ERROR: unknown binary array type encountered: [%c]\\n",\n- arrayType);\n- exit(1);\n- }\n- }\n-\n- break;\n- }\n-\n- default :\n- fprintf(stderr, "BamWriter ERROR: invalid tag value type\\n"); // shouldn\'t get here\n- free(tagData);\n- exit(1);\n- }\n- }\n- m_stream.Write(tagData, tagDataLength);\n- free(tagData);\n- }\n- else\n- m_stream.Write(al.TagData.data(), tagDataLength);\n- }\n-}\n-\n-void BamWriterPrivate::SetWriteCompressed(bool ok) {\n-\n- // warn if BAM file is already open\n- // modifying compression is not allowed in this case\n- if ( IsOpen() ) {\n- cerr << "BamWriter WARNING: attempting to change compression mode on an open BAM file is not allowed. "\n- << "Ignoring request." << endl;\n- return;\n- }\n-\n- // set BgzfStream compression mode\n- m_stream.SetWriteCompressed(ok);\n-}\n-\n-void BamWriterPrivate::WriteMagicNumber(void) {\n- // write BAM file \'magic number\'\n- m_stream.Write(Constants::BAM_HEADER_MAGIC, Constants::BAM_HEADER_MAGIC_LENGTH);\n-}\n-\n-void BamWriterPrivate::WriteReferences(const BamTools::RefVector& referenceSequences) {\n-\n- // write the number of reference sequences\n- uint32_t numReferenceSequences = referenceSequences.size();\n- if ( m_isBigEndian ) BamTools::SwapEndian_32(numReferenceSequences);\n- m_stream.Write((char*)&numReferenceSequences, Constants::BAM_SIZEOF_INT);\n-\n- // foreach reference sequence\n- RefVector::const_iterator rsIter = referenceSequences.begin();\n- RefVector::const_iterator rsEnd = referenceSequences.end();\n- for ( ; rsIter != rsEnd; ++rsIter ) {\n-\n- // write the reference sequence name length\n- uint32_t referenceSequenceNameLen = rsIter->RefName.size() + 1;\n- if ( m_isBigEndian ) BamTools::SwapEndian_32(referenceSequenceNameLen);\n- m_stream.Write((char*)&referenceSequenceNameLen, Constants::BAM_SIZEOF_INT);\n-\n- // write the reference sequence name\n- m_stream.Write(rsIter->RefName.c_str(), referenceSequenceNameLen);\n-\n- // write the reference sequence length\n- int32_t referenceLength = rsIter->RefLength;\n- if ( m_isBigEndian ) BamTools::SwapEndian_32(referenceLength);\n- m_stream.Write((char*)&referenceLength, Constants::BAM_SIZEOF_INT);\n- }\n-}\n-\n-void BamWriterPrivate::WriteSamHeaderText(const std::string& samHeaderText) {\n-\n- // write the SAM header text length\n- uint32_t samHeaderLen = samHeaderText.size();\n- if ( m_isBigEndian ) BamTools::SwapEndian_32(samHeaderLen);\n- m_stream.Write((char*)&samHeaderLen, Constants::BAM_SIZEOF_INT);\n-\n- // write the SAM header text\n- if ( samHeaderLen > 0 )\n- m_stream.Write(samHeaderText.data(), samHeaderLen);\n-}\n' |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamWriter_p.h --- a/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BamWriter_p.h Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,67 +0,0 @@ -// *************************************************************************** -// BamWriter_p.h (c) 2010 Derek Barnett -// Marth Lab, Department of Biology, Boston College -// All rights reserved. -// --------------------------------------------------------------------------- -// Last modified: 24 February 2011 (DB) -// --------------------------------------------------------------------------- -// Provides the basic functionality for producing BAM files -// *************************************************************************** - -#ifndef BAMWRITER_P_H -#define BAMWRITER_P_H - -// ------------- -// W A R N I N G -// ------------- -// -// This file is not part of the BamTools API. It exists purely as an -// implementation detail. This header file may change from version to -// version without notice, or even be removed. -// -// We mean it. - -#include <api/BamAux.h> -#include <api/internal/BgzfStream_p.h> -#include <string> -#include <vector> - -namespace BamTools { -namespace Internal { - -class BamWriterPrivate { - - // ctor & dtor - public: - BamWriterPrivate(void); - ~BamWriterPrivate(void); - - // interface methods - public: - void Close(void); - bool IsOpen(void) const; - bool Open(const std::string& filename, - const std::string& samHeaderText, - const BamTools::RefVector& referenceSequences); - void SaveAlignment(const BamAlignment& al); - void SetWriteCompressed(bool ok); - - // 'internal' methods - public: - unsigned int CalculateMinimumBin(const int begin, int end) const; - void CreatePackedCigar(const std::vector<BamTools::CigarOp>& cigarOperations, std::string& packedCigar); - void EncodeQuerySequence(const std::string& query, std::string& encodedQuery); - void WriteMagicNumber(void); - void WriteReferences(const BamTools::RefVector& referenceSequences); - void WriteSamHeaderText(const std::string& samHeaderText); - - // data members - private: - BgzfStream m_stream; - bool m_isBigEndian; -}; - -} // namespace Internal -} // namespace BamTools - -#endif // BAMWRITER_P_H |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BgzfStream_p.cpp --- a/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BgzfStream_p.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,439 +0,0 @@\n-// ***************************************************************************\n-// BgzfStream_p.cpp (c) 2011 Derek Barnett\n-// Marth Lab, Department of Biology, Boston College\n-// All rights reserved.\n-// ---------------------------------------------------------------------------\n-// Last modified: 5 April 2011(DB)\n-// ---------------------------------------------------------------------------\n-// Based on BGZF routines developed at the Broad Institute.\n-// Provides the basic functionality for reading & writing BGZF files\n-// Replaces the old BGZF.* files to avoid clashing with other toolkits\n-// ***************************************************************************\n-\n-#include <api/internal/BgzfStream_p.h>\n-using namespace BamTools;\n-using namespace BamTools::Internal;\n-\n-#include <cstring>\n-#include <algorithm>\n-using namespace std;\n-\n-// constructor\n-BgzfStream::BgzfStream(void)\n- : UncompressedBlockSize(Constants::BGZF_DEFAULT_BLOCK_SIZE)\n- , CompressedBlockSize(Constants::BGZF_MAX_BLOCK_SIZE)\n- , BlockLength(0)\n- , BlockOffset(0)\n- , BlockAddress(0)\n- , IsOpen(false)\n- , IsWriteOnly(false)\n- , IsWriteCompressed(true)\n- , Stream(NULL)\n- , UncompressedBlock(NULL)\n- , CompressedBlock(NULL)\n-{\n- try {\n- CompressedBlock = new char[CompressedBlockSize];\n- UncompressedBlock = new char[UncompressedBlockSize];\n- } catch( std::bad_alloc& ba ) {\n- fprintf(stderr, "BgzfStream ERROR: unable to allocate memory\\n");\n- exit(1);\n- }\n-}\n-\n-// destructor\n-BgzfStream::~BgzfStream(void) {\n- if( CompressedBlock ) delete[] CompressedBlock;\n- if( UncompressedBlock ) delete[] UncompressedBlock;\n-}\n-\n-// closes BGZF file\n-void BgzfStream::Close(void) {\n-\n- // skip if file not open\n- if ( !IsOpen ) return;\n-\n- // if writing to file, flush the current BGZF block,\n- // then write an empty block (as EOF marker)\n- if ( IsWriteOnly ) {\n- FlushBlock();\n- int blockLength = DeflateBlock();\n- fwrite(CompressedBlock, 1, blockLength, Stream);\n- }\n-\n- // flush and close stream\n- fflush(Stream);\n- fclose(Stream);\n-\n- // reset flags\n- IsWriteCompressed = true;\n- IsOpen = false;\n-}\n-\n-// compresses the current block\n-int BgzfStream::DeflateBlock(void) {\n-\n- // initialize the gzip header\n- char* buffer = CompressedBlock;\n- memset(buffer, 0, 18);\n- buffer[0] = Constants::GZIP_ID1;\n- buffer[1] = (char)Constants::GZIP_ID2;\n- buffer[2] = Constants::CM_DEFLATE;\n- buffer[3] = Constants::FLG_FEXTRA;\n- buffer[9] = (char)Constants::OS_UNKNOWN;\n- buffer[10] = Constants::BGZF_XLEN;\n- buffer[12] = Constants::BGZF_ID1;\n- buffer[13] = Constants::BGZF_ID2;\n- buffer[14] = Constants::BGZF_LEN;\n-\n- // set compression level\n- const int compressionLevel = ( IsWriteCompressed ? Z_DEFAULT_COMPRESSION : 0 );\n-\n- // loop to retry for blocks that do not compress enough\n- int inputLength = BlockOffset;\n- int compressedLength = 0;\n- unsigned int bufferSize = CompressedBlockSize;\n-\n- while ( true ) {\n-\n- // initialize zstream values\n- z_stream zs;\n- zs.zalloc = NULL;\n- zs.zfree = NULL;\n- zs.next_in = (Bytef*)UncompressedBlock;\n- zs.avail_in = inputLength;\n- zs.next_out = (Bytef*)&buffer[Constants::BGZF_BLOCK_HEADER_LENGTH];\n- zs.avail_out = bufferSize - Constants::BGZF_BLOCK_HEADER_LENGTH - Constants::BGZF_BLOCK_FOOTER_LENGTH;\n-\n- // initialize the zlib compression algorithm\n- if ( deflateInit2(&zs,\n- compressionLevel,\n- Z_DEFLATED,\n- Constants::GZIP_WINDOW_BITS,\n- Constants::Z_DEFAULT_MEM_LEVEL,\n- Z_DEFAULT_STRATEGY) != Z_OK )\n- {\n- fprintf(stderr, "BgzfStream ERROR: zlib deflate initialization failed\\n");\n- exit(1);\n- }\n-\n- /'..b'Read += copyLength;\n- }\n-\n- // update block data\n- if ( BlockOffset == BlockLength ) {\n- BlockAddress = ftell64(Stream);\n- BlockOffset = 0;\n- BlockLength = 0;\n- }\n-\n- return numBytesRead;\n-}\n-\n-// reads a BGZF block\n-bool BgzfStream::ReadBlock(void) {\n-\n- char header[Constants::BGZF_BLOCK_HEADER_LENGTH];\n- int64_t blockAddress = ftell64(Stream);\n-\n- // read block header from file\n- int count = fread(header, 1, sizeof(header), Stream);\n-\n- // if block header empty\n- if ( count == 0 ) {\n- BlockLength = 0;\n- return true;\n- }\n-\n- // if block header invalid size\n- if ( count != sizeof(header) ) {\n- fprintf(stderr, "BgzfStream ERROR: read block failed - could not read block header\\n");\n- return false;\n- }\n-\n- // validate block header contents\n- if ( !BgzfStream::CheckBlockHeader(header) ) {\n- fprintf(stderr, "BgzfStream ERROR: read block failed - invalid block header\\n");\n- return false;\n- }\n-\n- // copy header contents to compressed buffer\n- int blockLength = BamTools::UnpackUnsignedShort(&header[16]) + 1;\n- char* compressedBlock = CompressedBlock;\n- memcpy(compressedBlock, header, Constants::BGZF_BLOCK_HEADER_LENGTH);\n- int remaining = blockLength - Constants::BGZF_BLOCK_HEADER_LENGTH;\n-\n- // read remainder of block\n- count = fread(&compressedBlock[Constants::BGZF_BLOCK_HEADER_LENGTH], 1, remaining, Stream);\n- if ( count != remaining ) {\n- fprintf(stderr, "BgzfStream ERROR: read block failed - could not read data from block\\n");\n- return false;\n- }\n-\n- // decompress block data\n- count = InflateBlock(blockLength);\n- if ( count < 0 ) {\n- fprintf(stderr, "BgzfStream ERROR: read block failed - could not decompress block data\\n");\n- return false;\n- }\n-\n- // update block data\n- if ( BlockLength != 0 )\n- BlockOffset = 0;\n- BlockAddress = blockAddress;\n- BlockLength = count;\n-\n- // return success\n- return true;\n-}\n-\n-// seek to position in BGZF file\n-bool BgzfStream::Seek(const int64_t& position) {\n-\n- // skip if not open\n- if ( !IsOpen ) return false;\n-\n- // determine adjusted offset & address\n- int blockOffset = (position & 0xFFFF);\n- int64_t blockAddress = (position >> 16) & 0xFFFFFFFFFFFFLL;\n-\n- // attempt seek in file\n- if ( fseek64(Stream, blockAddress, SEEK_SET) != 0 ) {\n- fprintf(stderr, "BgzfStream ERROR: unable to seek in file\\n");\n- return false;\n- }\n-\n- // update block data & return success\n- BlockLength = 0;\n- BlockAddress = blockAddress;\n- BlockOffset = blockOffset;\n- return true;\n-}\n-\n-void BgzfStream::SetWriteCompressed(bool ok) {\n- IsWriteCompressed = ok;\n-}\n-\n-// get file position in BGZF file\n-int64_t BgzfStream::Tell(void) const {\n- if ( !IsOpen )\n- return 0;\n- return ( (BlockAddress << 16) | (BlockOffset & 0xFFFF) );\n-}\n-\n-// writes the supplied data into the BGZF buffer\n-unsigned int BgzfStream::Write(const char* data, const unsigned int dataLen) {\n-\n- // skip if file not open for writing\n- if ( !IsOpen || !IsWriteOnly ) return false;\n-\n- // write blocks as needed til all data is written\n- unsigned int numBytesWritten = 0;\n- const char* input = data;\n- unsigned int blockLength = UncompressedBlockSize;\n- while ( numBytesWritten < dataLen ) {\n-\n- // copy data contents to uncompressed output buffer\n- unsigned int copyLength = min(blockLength - BlockOffset, dataLen - numBytesWritten);\n- char* buffer = UncompressedBlock;\n- memcpy(buffer + BlockOffset, input, copyLength);\n-\n- // update counter\n- BlockOffset += copyLength;\n- input += copyLength;\n- numBytesWritten += copyLength;\n-\n- // flush (& compress) output buffer when full\n- if ( BlockOffset == blockLength ) FlushBlock();\n- }\n-\n- // return result\n- return numBytesWritten;\n-}\n' |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BgzfStream_p.h --- a/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/BgzfStream_p.h Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,109 +0,0 @@ -// *************************************************************************** -// BgzfStream_p.h (c) 2011 Derek Barnett -// Marth Lab, Department of Biology, Boston College -// All rights reserved. -// --------------------------------------------------------------------------- -// Last modified: 5 April 2011(DB) -// --------------------------------------------------------------------------- -// Based on BGZF routines developed at the Broad Institute. -// Provides the basic functionality for reading & writing BGZF files -// Replaces the old BGZF.* files to avoid clashing with other toolkits -// *************************************************************************** - -#ifndef BGZFSTREAM_P_H -#define BGZFSTREAM_P_H - -// ------------- -// W A R N I N G -// ------------- -// -// This file is not part of the BamTools API. It exists purely as an -// implementation detail. This header file may change from version to version -// without notice, or even be removed. -// -// We mean it. - -#include <api/BamAux.h> -#include <api/BamConstants.h> -#include "zlib.h" -#include <cstdio> -#include <string> - -namespace BamTools { -namespace Internal { - -class BgzfStream { - - // constructor & destructor - public: - BgzfStream(void); - ~BgzfStream(void); - - // main interface methods - public: - // closes BGZF file - void Close(void); - // opens the BGZF file (mode is either "rb" for reading, or "wb" for writing) - bool Open(const std::string& filename, const char* mode); - // reads BGZF data into a byte buffer - int Read(char* data, const unsigned int dataLength); - // seek to position in BGZF file - bool Seek(const int64_t& position); - // enable/disable compressed output - void SetWriteCompressed(bool ok); - // get file position in BGZF file - int64_t Tell(void) const; - // writes the supplied data into the BGZF buffer - unsigned int Write(const char* data, const unsigned int dataLen); - - // internal methods - private: - // compresses the current block - int DeflateBlock(void); - // flushes the data in the BGZF block - void FlushBlock(void); - // de-compresses the current block - int InflateBlock(const int& blockLength); - // reads a BGZF block - bool ReadBlock(void); - - // static 'utility' methods - public: - // checks BGZF block header - static inline bool CheckBlockHeader(char* header); - - // data members - public: - unsigned int UncompressedBlockSize; - unsigned int CompressedBlockSize; - unsigned int BlockLength; - unsigned int BlockOffset; - uint64_t BlockAddress; - bool IsOpen; - bool IsWriteOnly; - bool IsWriteCompressed; - FILE* Stream; - char* UncompressedBlock; - char* CompressedBlock; -}; - -// ------------------------------------------------------------- -// static 'utility' method implementations - -// checks BGZF block header -inline -bool BgzfStream::CheckBlockHeader(char* header) { - return (header[0] == Constants::GZIP_ID1 && - header[1] == (char)Constants::GZIP_ID2 && - header[2] == Z_DEFLATED && - (header[3] & Constants::FLG_FEXTRA) != 0 && - BamTools::UnpackUnsignedShort(&header[10]) == Constants::BGZF_XLEN && - header[12] == Constants::BGZF_ID1 && - header[13] == Constants::BGZF_ID2 && - BamTools::UnpackUnsignedShort(&header[14]) == Constants::BGZF_LEN ); -} - -} // namespace Internal -} // namespace BamTools - -#endif // BGZFSTREAM_P_H |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/SamFormatParser_p.cpp --- a/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/SamFormatParser_p.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
b'@@ -1,231 +0,0 @@\n-// ***************************************************************************\n-// SamFormatParser.cpp (c) 2010 Derek Barnett\n-// Marth Lab, Department of Biology, Boston College\n-// All rights reserved.\n-// ---------------------------------------------------------------------------\n-// Last modified: 19 April 2011 (DB)\n-// ---------------------------------------------------------------------------\n-// Provides functionality for parsing SAM header text into SamHeader object\n-// ***************************************************************************\n-\n-#include <api/SamConstants.h>\n-#include <api/SamHeader.h>\n-#include <api/internal/SamFormatParser_p.h>\n-using namespace BamTools;\n-using namespace BamTools::Internal;\n-\n-#include <iostream>\n-#include <sstream>\n-#include <vector>\n-using namespace std;\n-\n-SamFormatParser::SamFormatParser(SamHeader& header)\n- : m_header(header)\n-{ }\n-\n-SamFormatParser::~SamFormatParser(void) { }\n-\n-void SamFormatParser::Parse(const string& headerText) {\n-\n- // clear header\'s prior contents\n- m_header.Clear();\n-\n- // empty header is OK, but skip processing\n- if ( headerText.empty() )\n- return;\n-\n- // other wise parse SAM lines\n- istringstream headerStream(headerText);\n- string headerLine("");\n- while ( getline(headerStream, headerLine) )\n- ParseSamLine(headerLine);\n-}\n-\n-void SamFormatParser::ParseSamLine(const string& line) {\n-\n- // skip if line is not long enough to contain true values\n- if (line.length() < 5 ) return;\n-\n- // determine token at beginning of line\n- const string firstToken = line.substr(0,3);\n- string restOfLine = line.substr(4);\n- if ( firstToken == Constants::SAM_HD_BEGIN_TOKEN) ParseHDLine(restOfLine);\n- else if ( firstToken == Constants::SAM_SQ_BEGIN_TOKEN) ParseSQLine(restOfLine);\n- else if ( firstToken == Constants::SAM_RG_BEGIN_TOKEN) ParseRGLine(restOfLine);\n- else if ( firstToken == Constants::SAM_PG_BEGIN_TOKEN) ParsePGLine(restOfLine);\n- else if ( firstToken == Constants::SAM_CO_BEGIN_TOKEN) ParseCOLine(restOfLine);\n- else\n- cerr << "SamFormatParser ERROR: unknown token: " << firstToken << endl;\n-}\n-\n-void SamFormatParser::ParseHDLine(const string& line) {\n-\n- // split HD lines into tokens\n- vector<string> tokens = Split(line, Constants::SAM_TAB);\n-\n- // iterate over tokens\n- vector<string>::const_iterator tokenIter = tokens.begin();\n- vector<string>::const_iterator tokenEnd = tokens.end();\n- for ( ; tokenIter != tokenEnd; ++tokenIter ) {\n-\n- // get tag/value\n- const string tokenTag = (*tokenIter).substr(0,2);\n- const string tokenValue = (*tokenIter).substr(3);\n-\n- // set header contents\n- if ( tokenTag == Constants::SAM_HD_VERSION_TAG ) m_header.Version = tokenValue;\n- else if ( tokenTag == Constants::SAM_HD_SORTORDER_TAG ) m_header.SortOrder = tokenValue;\n- else if ( tokenTag == Constants::SAM_HD_GROUPORDER_TAG ) m_header.GroupOrder = tokenValue;\n- else\n- cerr << "SamFormatParser ERROR: unknown HD tag: " << tokenTag << endl;\n- }\n-\n- // if @HD line exists, VN must be provided\n- if ( !m_header.HasVersion() )\n- cerr << "SamFormatParser ERROR: @HD line is missing VN tag" << endl;\n-}\n-\n-void SamFormatParser::ParseSQLine(const string& line) {\n-\n- SamSequence seq;\n-\n- // split SQ line into tokens\n- vector<string> tokens = Split(line, Constants::SAM_TAB);\n-\n- // iterate over tokens\n- vector<string>::const_iterator tokenIter = tokens.begin();\n- vector<string>::const_iterator tokenEnd = tokens.end();\n- for ( ; tokenIter != tokenEnd; ++tokenIter ) {\n-\n- // get tag/value\n- const string tokenTag = (*tokenIter).substr(0,2);\n- const string tokenValue = (*tokenIter).substr(3);\n-\n- // set sequence contents\n- if ( tokenTag == Constants::SAM_SQ_NAME_TAG ) seq.Name = tokenValue;\n- else if ( toke'..b'r tokenIter = tokens.begin();\n- vector<string>::const_iterator tokenEnd = tokens.end();\n- for ( ; tokenIter != tokenEnd; ++tokenIter ) {\n-\n- // get token tag/value\n- const string tokenTag = (*tokenIter).substr(0,2);\n- const string tokenValue = (*tokenIter).substr(3);\n-\n- // set read group contents\n- if ( tokenTag == Constants::SAM_RG_ID_TAG ) rg.ID = tokenValue;\n- else if ( tokenTag == Constants::SAM_RG_DESCRIPTION_TAG ) rg.Description = tokenValue;\n- else if ( tokenTag == Constants::SAM_RG_FLOWORDER_TAG ) rg.FlowOrder = tokenValue;\n- else if ( tokenTag == Constants::SAM_RG_KEYSEQUENCE_TAG ) rg.KeySequence = tokenValue;\n- else if ( tokenTag == Constants::SAM_RG_LIBRARY_TAG ) rg.Library = tokenValue;\n- else if ( tokenTag == Constants::SAM_RG_PLATFORMUNIT_TAG ) rg.PlatformUnit = tokenValue;\n- else if ( tokenTag == Constants::SAM_RG_PREDICTEDINSERTSIZE_TAG ) rg.PredictedInsertSize = tokenValue;\n- else if ( tokenTag == Constants::SAM_RG_PRODUCTIONDATE_TAG ) rg.ProductionDate = tokenValue;\n- else if ( tokenTag == Constants::SAM_RG_PROGRAM_TAG ) rg.Program = tokenValue;\n- else if ( tokenTag == Constants::SAM_RG_SAMPLE_TAG ) rg.Sample = tokenValue;\n- else if ( tokenTag == Constants::SAM_RG_SEQCENTER_TAG ) rg.SequencingCenter = tokenValue;\n- else if ( tokenTag == Constants::SAM_RG_SEQTECHNOLOGY_TAG ) rg.SequencingTechnology = tokenValue;\n- else\n- cerr << "SamFormatParser ERROR: unknown RG tag: " << tokenTag << endl;\n- }\n-\n- bool isMissingRequiredFields = false;\n-\n- // if @RG line exists, ID must be provided\n- if ( !rg.HasID() ) {\n- isMissingRequiredFields = true;\n- cerr << "SamFormatParser ERROR: @RG line is missing ID tag" << endl;\n- }\n-\n- // store SAM read group entry\n- if ( !isMissingRequiredFields )\n- m_header.ReadGroups.Add(rg);\n-}\n-\n-void SamFormatParser::ParsePGLine(const string& line) {\n-\n- SamProgram pg;\n-\n- // split string into tokens\n- vector<string> tokens = Split(line, Constants::SAM_TAB);\n-\n- // iterate over tokens\n- vector<string>::const_iterator tokenIter = tokens.begin();\n- vector<string>::const_iterator tokenEnd = tokens.end();\n- for ( ; tokenIter != tokenEnd; ++tokenIter ) {\n-\n- // get token tag/value\n- const string tokenTag = (*tokenIter).substr(0,2);\n- const string tokenValue = (*tokenIter).substr(3);\n-\n- // set program record contents\n- if ( tokenTag == Constants::SAM_PG_ID_TAG ) pg.ID = tokenValue;\n- else if ( tokenTag == Constants::SAM_PG_NAME_TAG ) pg.Name = tokenValue;\n- else if ( tokenTag == Constants::SAM_PG_COMMANDLINE_TAG ) pg.CommandLine = tokenValue;\n- else if ( tokenTag == Constants::SAM_PG_PREVIOUSPROGRAM_TAG ) pg.PreviousProgramID = tokenValue;\n- else if ( tokenTag == Constants::SAM_PG_VERSION_TAG ) pg.Version = tokenValue;\n- else\n- cerr << "SamFormatParser ERROR: unknown PG tag: " << tokenTag << endl;\n- }\n-\n- bool isMissingRequiredFields = false;\n-\n- // if @PG line exists, ID must be provided\n- if ( !pg.HasID() ) {\n- isMissingRequiredFields = true;\n- cerr << "SamFormatParser ERROR: @PG line is missing ID tag" << endl;\n- }\n-\n- // store SAM program record\n- if ( !isMissingRequiredFields )\n- m_header.Programs.Add(pg);\n-}\n-\n-void SamFormatParser::ParseCOLine(const string& line) {\n- // simply add line to comments list\n- m_header.Comments.push_back(line);\n-}\n-\n-const vector<string> SamFormatParser::Split(const string& line, const char delim) {\n- vector<string> tokens;\n- stringstream lineStream(line);\n- string token;\n- while ( getline(lineStream, token, delim) )\n- tokens.push_back(token);\n- return tokens;\n-}\n' |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/SamFormatParser_p.h --- a/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/SamFormatParser_p.h Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,62 +0,0 @@ -// *************************************************************************** -// SamFormatParser.h (c) 2010 Derek Barnett -// Marth Lab, Department of Biology, Boston College -// All rights reserved. -// --------------------------------------------------------------------------- -// Last modified: 23 December 2010 (DB) -// --------------------------------------------------------------------------- -// Provides functionality for parsing SAM header text into SamHeader object -// *************************************************************************** - -#ifndef SAM_FORMAT_PARSER_H -#define SAM_FORMAT_PARSER_H - -// ------------- -// W A R N I N G -// ------------- -// -// This file is not part of the BamTools API. It exists purely as an -// implementation detail. This header file may change from version to version -// without notice, or even be removed. -// -// We mean it. - -#include <string> -#include <vector> - -namespace BamTools { - -class SamHeader; - -namespace Internal { - -class SamFormatParser { - - // ctor & dtor - public: - SamFormatParser(BamTools::SamHeader& header); - ~SamFormatParser(void); - - // parse text & populate header data - public: - void Parse(const std::string& headerText); - - // internal methods - private: - void ParseSamLine(const std::string& line); - void ParseHDLine(const std::string& line); - void ParseSQLine(const std::string& line); - void ParseRGLine(const std::string& line); - void ParsePGLine(const std::string& line); - void ParseCOLine(const std::string& line); - const std::vector<std::string> Split(const std::string& line, const char delim); - - // data members - private: - SamHeader& m_header; -}; - -} // namespace Internal -} // namespace BamTools - -#endif // SAM_FORMAT_PARSER_H |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/SamFormatPrinter_p.cpp --- a/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/SamFormatPrinter_p.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,211 +0,0 @@ -// *************************************************************************** -// SamFormatPrinter.cpp (c) 2010 Derek Barnett -// Marth Lab, Department of Biology, Boston College -// All rights reserved. -// --------------------------------------------------------------------------- -// Last modified: 19 April 2011 (DB) -// --------------------------------------------------------------------------- -// Provides functionality for printing formatted SAM header to string -// *************************************************************************** - -#include <api/SamConstants.h> -#include <api/SamHeader.h> -#include <api/internal/SamFormatPrinter_p.h> -using namespace BamTools; -using namespace BamTools::Internal; - -#include <iostream> -#include <sstream> -#include <vector> -using namespace std; - -SamFormatPrinter::SamFormatPrinter(const SamHeader& header) - : m_header(header) -{ } - -SamFormatPrinter::~SamFormatPrinter(void) { } - -const string SamFormatPrinter::FormatTag(const string &tag, const string &value) const { - return string(Constants::SAM_TAB + tag + Constants::SAM_COLON + value); -} - -const string SamFormatPrinter::ToString(void) const { - - // clear out stream - stringstream out(""); - - // generate formatted header text - PrintHD(out); - PrintSQ(out); - PrintRG(out); - PrintPG(out); - PrintCO(out); - - // return result - return out.str(); -} - -void SamFormatPrinter::PrintHD(std::stringstream& out) const { - - // if header has @HD data - if ( m_header.HasVersion() ) { - - // @HD VN:<Version> - out << Constants::SAM_HD_BEGIN_TOKEN - << FormatTag(Constants::SAM_HD_VERSION_TAG, m_header.Version); - - // SO:<SortOrder> - if ( m_header.HasSortOrder() ) - out << FormatTag(Constants::SAM_HD_SORTORDER_TAG, m_header.SortOrder); - - // GO:<GroupOrder> - if ( m_header.HasGroupOrder() ) - out << FormatTag(Constants::SAM_HD_GROUPORDER_TAG, m_header.GroupOrder); - - // newline - out << endl; - } -} - -void SamFormatPrinter::PrintSQ(std::stringstream& out) const { - - // iterate over sequence entries - SamSequenceConstIterator seqIter = m_header.Sequences.ConstBegin(); - SamSequenceConstIterator seqEnd = m_header.Sequences.ConstEnd(); - for ( ; seqIter != seqEnd; ++seqIter ) { - const SamSequence& seq = (*seqIter); - - // @SQ SN:<Name> LN:<Length> - out << Constants::SAM_SQ_BEGIN_TOKEN - << FormatTag(Constants::SAM_SQ_NAME_TAG, seq.Name) - << FormatTag(Constants::SAM_SQ_LENGTH_TAG, seq.Length); - - // AS:<AssemblyID> - if ( seq.HasAssemblyID() ) - out << FormatTag(Constants::SAM_SQ_ASSEMBLYID_TAG, seq.AssemblyID); - - // M5:<Checksum> - if ( seq.HasChecksum() ) - out << FormatTag(Constants::SAM_SQ_CHECKSUM_TAG, seq.Checksum); - - // SP:<Species> - if ( seq.HasSpecies() ) - out << FormatTag(Constants::SAM_SQ_SPECIES_TAG, seq.Species); - - // UR:<URI> - if ( seq.HasURI() ) - out << FormatTag(Constants::SAM_SQ_URI_TAG, seq.URI); - - // newline - out << endl; - } -} - -void SamFormatPrinter::PrintRG(std::stringstream& out) const { - - // iterate over read group entries - SamReadGroupConstIterator rgIter = m_header.ReadGroups.ConstBegin(); - SamReadGroupConstIterator rgEnd = m_header.ReadGroups.ConstEnd(); - for ( ; rgIter != rgEnd; ++rgIter ) { - const SamReadGroup& rg = (*rgIter); - - // @RG ID:<ID> - out << Constants::SAM_RG_BEGIN_TOKEN - << FormatTag(Constants::SAM_RG_ID_TAG, rg.ID); - - // CN:<SequencingCenter> - if ( rg.HasSequencingCenter() ) - out << FormatTag(Constants::SAM_RG_SEQCENTER_TAG, rg.SequencingCenter); - - // DS:<Description> - if ( rg.HasDescription() ) - out << FormatTag(Constants::SAM_RG_DESCRIPTION_TAG, rg.Description); - - // DT:<ProductionDate> - if ( rg.HasProductionDate() ) - out << FormatTag(Constants::SAM_RG_PRODUCTIONDATE_TAG, rg.ProductionDate); - - // FO:<FlowOrder> - if ( rg.HasFlowOrder() ) - out << FormatTag(Constants::SAM_RG_FLOWORDER_TAG, rg.FlowOrder); - - // KS:<KeySequence> - if ( rg.HasKeySequence() ) - out << FormatTag(Constants::SAM_RG_KEYSEQUENCE_TAG, rg.KeySequence); - - // LB:<Library> - if ( rg.HasLibrary() ) - out << FormatTag(Constants::SAM_RG_LIBRARY_TAG, rg.Library); - - // PG:<Program> - if ( rg.HasProgram() ) - out << FormatTag(Constants::SAM_RG_PROGRAM_TAG, rg.Program); - - // PI:<PredictedInsertSize> - if ( rg.HasPredictedInsertSize() ) - out << FormatTag(Constants::SAM_RG_PREDICTEDINSERTSIZE_TAG, rg.PredictedInsertSize); - - // PL:<SequencingTechnology> - if ( rg.HasSequencingTechnology() ) - out << FormatTag(Constants::SAM_RG_SEQTECHNOLOGY_TAG, rg.SequencingTechnology); - - // PU:<PlatformUnit> - if ( rg.HasPlatformUnit() ) - out << FormatTag(Constants::SAM_RG_PLATFORMUNIT_TAG, rg.PlatformUnit); - - // SM:<Sample> - if ( rg.HasSample() ) - out << FormatTag(Constants::SAM_RG_SAMPLE_TAG, rg.Sample); - - // newline - out << endl; - } -} - -void SamFormatPrinter::PrintPG(std::stringstream& out) const { - - // iterate over program record entries - SamProgramConstIterator pgIter = m_header.Programs.ConstBegin(); - SamProgramConstIterator pgEnd = m_header.Programs.ConstEnd(); - for ( ; pgIter != pgEnd; ++pgIter ) { - const SamProgram& pg = (*pgIter); - - // @PG ID:<ID> - out << Constants::SAM_PG_BEGIN_TOKEN - << FormatTag(Constants::SAM_PG_ID_TAG, pg.ID); - - // PN:<Name> - if ( pg.HasName() ) - out << FormatTag(Constants::SAM_PG_NAME_TAG, pg.Name); - - // CL:<CommandLine> - if ( pg.HasCommandLine() ) - out << FormatTag(Constants::SAM_PG_COMMANDLINE_TAG, pg.CommandLine); - - // PP:<PreviousProgramID> - if ( pg.HasPreviousProgramID() ) - out << FormatTag(Constants::SAM_PG_PREVIOUSPROGRAM_TAG, pg.PreviousProgramID); - - // VN:<Version> - if ( pg.HasVersion() ) - out << FormatTag(Constants::SAM_PG_VERSION_TAG, pg.Version); - - // newline - out << endl; - } -} - -void SamFormatPrinter::PrintCO(std::stringstream& out) const { - - // iterate over comments - vector<string>::const_iterator commentIter = m_header.Comments.begin(); - vector<string>::const_iterator commentEnd = m_header.Comments.end(); - for ( ; commentIter != commentEnd; ++commentIter ) { - - // @CO <Comment> - out << Constants::SAM_CO_BEGIN_TOKEN - << Constants::SAM_TAB - << (*commentIter) - << endl; - } -} |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/SamFormatPrinter_p.h --- a/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/SamFormatPrinter_p.h Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,61 +0,0 @@ -// *************************************************************************** -// SamFormatPrinter.h (c) 2010 Derek Barnett -// Marth Lab, Department of Biology, Boston College -// All rights reserved. -// --------------------------------------------------------------------------- -// Last modified: 23 December 2010 (DB) -// --------------------------------------------------------------------------- -// Provides functionality for printing formatted SAM header to string -// *************************************************************************** - -#ifndef SAM_FORMAT_PRINTER_H -#define SAM_FORMAT_PRINTER_H - -// ------------- -// W A R N I N G -// ------------- -// -// This file is not part of the BamTools API. It exists purely as an -// implementation detail. This header file may change from version to version -// without notice, or even be removed. -// -// We mean it. - -#include <sstream> -#include <string> - -namespace BamTools { - -class SamHeader; - -namespace Internal { - -class SamFormatPrinter { - - // ctor & dtor - public: - SamFormatPrinter(const BamTools::SamHeader& header); - ~SamFormatPrinter(void); - - // generates SAM-formatted string from header data - public: - const std::string ToString(void) const; - - // internal methods - private: - const std::string FormatTag(const std::string& tag, const std::string& value) const; - void PrintHD(std::stringstream& out) const; - void PrintSQ(std::stringstream& out) const; - void PrintRG(std::stringstream& out) const; - void PrintPG(std::stringstream& out) const; - void PrintCO(std::stringstream& out) const; - - // data members - private: - const SamHeader& m_header; -}; - -} // namespace Internal -} // namespace BamTools - -#endif // SAM_FORMAT_PRINTER_H |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/SamHeaderValidator_p.cpp --- a/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/SamHeaderValidator_p.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
b'@@ -1,511 +0,0 @@\n-// ***************************************************************************\n-// SamHeaderValidator.cpp (c) 2010 Derek Barnett\n-// Marth Lab, Department of Biology, Boston College\n-// All rights reserved.\n-// ---------------------------------------------------------------------------\n-// Last modified: 18 April 2011 (DB)\n-// ---------------------------------------------------------------------------\n-// Provides functionality for validating SamHeader data\n-// ***************************************************************************\n-\n-#include <api/SamConstants.h>\n-#include <api/SamHeader.h>\n-#include <api/internal/SamHeaderValidator_p.h>\n-#include <api/internal/SamHeaderVersion_p.h>\n-using namespace BamTools;\n-using namespace BamTools::Internal;\n-\n-#include <cctype>\n-#include <iostream>\n-#include <set>\n-#include <sstream>\n-using namespace std;\n-\n-namespace BamTools {\n-namespace Internal {\n-\n-bool caseInsensitiveCompare(const string& lhs, const string& rhs) {\n-\n- // can omit checking chars if lengths not equal\n- const int lhsLength = lhs.length();\n- const int rhsLength = rhs.length();\n- if ( lhsLength != rhsLength )\n- return false;\n-\n- // do *basic* toupper checks on each string char\'s\n- for ( int i = 0; i < lhsLength; ++i ) {\n- if ( toupper( (int)lhs.at(i)) != toupper( (int)rhs.at(i)) )\n- return false;\n- }\n-\n- // otherwise OK\n- return true;\n-}\n-\n-} // namespace Internal\n-} // namespace BamTools\n-\n-// ------------------------------------------------------------------------\n-// Allow validation rules to vary, as needed, between SAM header versions\n-//\n-// use SAM_VERSION_X_Y to tag important changes\n-//\n-// Together, they will allow for comparisons like:\n-// if ( m_version < SAM_VERSION_2_0 ) {\n-// // use some older rule\n-// else\n-// // use rule introduced with version 2.0\n-\n-static const SamHeaderVersion SAM_VERSION_1_0 = SamHeaderVersion(1,0);\n-static const SamHeaderVersion SAM_VERSION_1_1 = SamHeaderVersion(1,1);\n-static const SamHeaderVersion SAM_VERSION_1_2 = SamHeaderVersion(1,2);\n-static const SamHeaderVersion SAM_VERSION_1_3 = SamHeaderVersion(1,3);\n-static const SamHeaderVersion SAM_VERSION_1_4 = SamHeaderVersion(1,4);\n-\n-// TODO: This functionality is currently unused.\n-// Make validation "version-aware."\n-//\n-// ------------------------------------------------------------------------\n-\n-const string SamHeaderValidator::ERROR_PREFIX = "ERROR: ";\n-const string SamHeaderValidator::WARN_PREFIX = "WARNING: ";\n-const string SamHeaderValidator::NEWLINE = "\\n";\n-\n-SamHeaderValidator::SamHeaderValidator(const SamHeader& header)\n- : m_header(header)\n-{ }\n-\n-SamHeaderValidator::~SamHeaderValidator(void) { }\n-\n-bool SamHeaderValidator::Validate(bool verbose) {\n-\n- // validate header components\n- bool isValid = true;\n- isValid &= ValidateMetadata();\n- isValid &= ValidateSequenceDictionary();\n- isValid &= ValidateReadGroupDictionary();\n- isValid &= ValidateProgramChain();\n-\n- // report errors if desired\n- if ( verbose ) {\n- PrintErrorMessages();\n- PrintWarningMessages();\n- }\n-\n- // return validation status\n- return isValid;\n-}\n-\n-bool SamHeaderValidator::ValidateMetadata(void) {\n- bool isValid = true;\n- isValid &= ValidateVersion();\n- isValid &= ValidateSortOrder();\n- isValid &= ValidateGroupOrder();\n- return isValid;\n-}\n-\n-bool SamHeaderValidator::ValidateVersion(void) {\n-\n- const string& version = m_header.Version;\n-\n- // warn if version not present\n- if ( version.empty() ) {\n- AddWarning("Version (VN) missing. Not required, but strongly recommended");\n- return true;\n- }\n-\n- // invalid if version does not contain a period\n- const size_t periodFound = version.find(Constants::SAM_PERIOD);\n- if ( periodFound == string::npos ) {\n- AddError("Invalid version (VN) format: " + version);\n- return false;\n- }\n-\n- // invalid if '..b'APILLARY) ||\n- caseInsensitiveCompare(technology, Constants::SAM_RG_SEQTECHNOLOGY_HELICOS) ||\n- caseInsensitiveCompare(technology, Constants::SAM_RG_SEQTECHNOLOGY_ILLUMINA) ||\n- caseInsensitiveCompare(technology, Constants::SAM_RG_SEQTECHNOLOGY_IONTORRENT) ||\n- caseInsensitiveCompare(technology, Constants::SAM_RG_SEQTECHNOLOGY_LS454) ||\n- caseInsensitiveCompare(technology, Constants::SAM_RG_SEQTECHNOLOGY_PACBIO) ||\n- caseInsensitiveCompare(technology, Constants::SAM_RG_SEQTECHNOLOGY_SOLID)\n- )\n- {\n- return true;\n- }\n-\n- // otherwise\n- AddError("Invalid read group sequencing platform (PL): " + technology);\n- return false;\n-}\n-\n-bool SamHeaderValidator::ValidateProgramChain(void) {\n- bool isValid = true;\n- isValid &= ContainsUniqueProgramIds();\n- isValid &= ValidatePreviousProgramIds();\n- return isValid;\n-}\n-\n-bool SamHeaderValidator::ContainsUniqueProgramIds(void) {\n-\n- bool isValid = true;\n- set<string> programIds;\n- set<string>::iterator pgIdIter;\n-\n- // iterate over program records\n- const SamProgramChain& programs = m_header.Programs;\n- SamProgramConstIterator pgIter = programs.ConstBegin();\n- SamProgramConstIterator pgEnd = programs.ConstEnd();\n- for ( ; pgIter != pgEnd; ++pgIter ) {\n- const SamProgram& pg = (*pgIter);\n-\n- // lookup program ID\n- const string& pgId = pg.ID;\n- pgIdIter = programIds.find(pgId);\n-\n- // error if found (duplicate entry)\n- if ( pgIdIter != programIds.end() ) {\n- AddError("Program ID (ID): " + pgId + " is not unique");\n- isValid = false;\n- }\n-\n- // otherwise ok, store ID\n- programIds.insert(pgId);\n- }\n-\n- // return validation state\n- return isValid;\n-}\n-\n-bool SamHeaderValidator::ValidatePreviousProgramIds(void) {\n-\n- bool isValid = true;\n-\n- // iterate over program records\n- const SamProgramChain& programs = m_header.Programs;\n- SamProgramConstIterator pgIter = programs.ConstBegin();\n- SamProgramConstIterator pgEnd = programs.ConstEnd();\n- for ( ; pgIter != pgEnd; ++pgIter ) {\n- const SamProgram& pg = (*pgIter);\n-\n- // ignore record for validation if PreviousProgramID is empty\n- const string& ppId = pg.PreviousProgramID;\n- if ( ppId.empty() )\n- continue;\n-\n- // see if program "chain" contains an entry for ppId\n- if ( !programs.Contains(ppId) ) {\n- AddError("PreviousProgramID (PP): " + ppId + " is not a known ID");\n- isValid = false;\n- }\n- }\n-\n- // return validation state\n- return isValid;\n-}\n-void SamHeaderValidator::AddError(const string& message) {\n- m_errorMessages.push_back(ERROR_PREFIX + message + NEWLINE);\n-}\n-\n-void SamHeaderValidator::AddWarning(const string& message) {\n- m_warningMessages.push_back(WARN_PREFIX + message + NEWLINE);\n-}\n-\n-void SamHeaderValidator::PrintErrorMessages(void) {\n-\n- // skip if no error messages\n- if ( m_errorMessages.empty() ) return;\n-\n- // print error header line\n- cerr << "* SAM header has " << m_errorMessages.size() << " errors:" << endl;\n-\n- // print each error message\n- vector<string>::const_iterator errorIter = m_errorMessages.begin();\n- vector<string>::const_iterator errorEnd = m_errorMessages.end();\n- for ( ; errorIter != errorEnd; ++errorIter )\n- cerr << (*errorIter);\n-}\n-\n-void SamHeaderValidator::PrintWarningMessages(void) {\n-\n- // skip if no warning messages\n- if ( m_warningMessages.empty() ) return;\n-\n- // print warning header line\n- cerr << "* SAM header has " << m_warningMessages.size() << " warnings:" << endl;\n-\n- // print each warning message\n- vector<string>::const_iterator warnIter = m_warningMessages.begin();\n- vector<string>::const_iterator warnEnd = m_warningMessages.end();\n- for ( ; warnIter != warnEnd; ++warnIter )\n- cerr << (*warnIter);\n-}\n' |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/SamHeaderValidator_p.h --- a/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/SamHeaderValidator_p.h Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,102 +0,0 @@ -// *************************************************************************** -// SamHeaderValidator.h (c) 2010 Derek Barnett -// Marth Lab, Department of Biology, Boston College -// All rights reserved. -// --------------------------------------------------------------------------- -// Last modified: 13 January 2011 (DB) -// --------------------------------------------------------------------------- -// Provides functionality for validating SamHeader data -// *************************************************************************** - -#ifndef SAM_HEADER_VALIDATOR_P_H -#define SAM_HEADER_VALIDATOR_P_H - -// ------------- -// W A R N I N G -// ------------- -// -// This file is not part of the BamTools API. It exists purely as an -// implementation detail. This header file may change from version to version -// without notice, or even be removed. -// -// We mean it. - -#include <string> -#include <vector> - -namespace BamTools { - -class SamHeader; -class SamReadGroup; -class SamSequence; - -namespace Internal { - -class SamHeaderValidator { - - // ctor & dtor - public: - SamHeaderValidator(const SamHeader& header); - ~SamHeaderValidator(void); - - // SamHeaderValidator interface - public: - // validates SamHeader data, returns true/false accordingly - // prints error & warning messages to stderr when @verbose is true - bool Validate(bool verbose = false); - - // internal methods - private: - - // validate header metadata - bool ValidateMetadata(void); - bool ValidateVersion(void); - bool ContainsOnlyDigits(const std::string& s); - bool ValidateSortOrder(void); - bool ValidateGroupOrder(void); - - // validate sequence dictionary - bool ValidateSequenceDictionary(void); - bool ContainsUniqueSequenceNames(void); - bool CheckNameFormat(const std::string& name); - bool ValidateSequence(const SamSequence& seq); - bool CheckLengthInRange(const std::string& length); - - // validate read group dictionary - bool ValidateReadGroupDictionary(void); - bool ContainsUniqueIDsAndPlatformUnits(void); - bool ValidateReadGroup(const SamReadGroup& rg); - bool CheckReadGroupID(const std::string& id); - bool CheckSequencingTechnology(const std::string& technology); - - // validate program data - bool ValidateProgramChain(void); - bool ContainsUniqueProgramIds(void); - bool ValidatePreviousProgramIds(void); - - // error reporting - void AddError(const std::string& message); - void AddWarning(const std::string& message); - void PrintErrorMessages(void); - void PrintWarningMessages(void); - - // data members - private: - - // SamHeader being validated - const SamHeader& m_header; - - // error reporting helpers - static const std::string ERROR_PREFIX; - static const std::string WARN_PREFIX; - static const std::string NEWLINE; - - // error reporting messages - std::vector<std::string> m_errorMessages; - std::vector<std::string> m_warningMessages; -}; - -} // namespace Internal -} // namespace BamTools - -#endif // SAM_HEADER_VALIDATOR_P_H |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/SamHeaderVersion_p.h --- a/BEDTools-Version-2.14.3/src/utils/BamTools/src/api/internal/SamHeaderVersion_p.h Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,135 +0,0 @@ -// *************************************************************************** -// SamHeaderVersion.h (c) 2010 Derek Barnett -// Marth Lab, Department of Biology, Boston College -// All rights reserved. -// --------------------------------------------------------------------------- -// Last modified: 24 February 2011 (DB) -// --------------------------------------------------------------------------- -// Provides functionality for comparing SAM header versions -// ************************************************************************* - -#ifndef SAM_HEADERVERSION_P_H -#define SAM_HEADERVERSION_P_H - -// ------------- -// W A R N I N G -// ------------- -// -// This file is not part of the BamTools API. It exists purely as an -// implementation detail. This header file may change from version to version -// without notice, or even be removed. -// -// We mean it. - -#include <api/SamConstants.h> -#include <sstream> -#include <string> - -namespace BamTools { -namespace Internal { - -class SamHeaderVersion { - - // ctors & dtor - public: - SamHeaderVersion(void) - : m_majorVersion(0) - , m_minorVersion(0) - { } - - explicit SamHeaderVersion(const std::string& version) - : m_majorVersion(0) - , m_minorVersion(0) - { - SetVersion(version); - } - - SamHeaderVersion(const unsigned int& major, const unsigned int& minor) - : m_majorVersion(major) - , m_minorVersion(minor) - { } - - ~SamHeaderVersion(void) { - m_majorVersion = 0; - m_minorVersion = 0; - } - - // acess data - public: - unsigned int MajorVersion(void) const { return m_majorVersion; } - unsigned int MinorVersion(void) const { return m_minorVersion; } - - void SetVersion(const std::string& version); - std::string ToString(void) const; - - // data members - private: - unsigned int m_majorVersion; - unsigned int m_minorVersion; -}; - -inline -void SamHeaderVersion::SetVersion(const std::string& version) { - - // do nothing if version is empty - if ( !version.empty() ) { - - std::stringstream versionStream(""); - - // do nothing if period not found - const size_t periodFound = version.find(Constants::SAM_PERIOD); - if ( periodFound != std::string::npos ) { - - // store major version if non-empty and contains only digits - const std::string& majorVersion = version.substr(0, periodFound); - versionStream.str(majorVersion); - if ( !majorVersion.empty() ) { - const size_t nonDigitFound = majorVersion.find_first_not_of(Constants::SAM_DIGITS); - if ( nonDigitFound == std::string::npos ) - versionStream >> m_majorVersion; - } - - // store minor version if non-empty and contains only digits - const std::string& minorVersion = version.substr(periodFound + 1); - versionStream.str(minorVersion); - if ( !minorVersion.empty() ) { - const size_t nonDigitFound = minorVersion.find_first_not_of(Constants::SAM_DIGITS); - if ( nonDigitFound == std::string::npos ) - versionStream >> m_minorVersion; - } - } - } -} - -// ----------------------------------------------------- -// printing - -inline std::string SamHeaderVersion::ToString(void) const { - std::stringstream version; - version << m_majorVersion << Constants::SAM_PERIOD << m_minorVersion; - return version.str(); -} - -// ----------------------------------------------------- -// comparison operators - -inline bool operator==(const SamHeaderVersion& lhs, const SamHeaderVersion& rhs) { - return (lhs.MajorVersion() == rhs.MajorVersion()) && - (lhs.MinorVersion() == rhs.MinorVersion()); -} - -inline bool operator<(const SamHeaderVersion& lhs, const SamHeaderVersion& rhs) { - if ( lhs.MajorVersion() == rhs.MajorVersion() ) - return lhs.MinorVersion() < rhs.MinorVersion(); - else - return lhs.MajorVersion() < rhs.MajorVersion(); -} - -inline bool operator> (const SamHeaderVersion& lhs, const SamHeaderVersion& rhs) { return rhs < lhs; } -inline bool operator<=(const SamHeaderVersion& lhs, const SamHeaderVersion& rhs) { return !(lhs>rhs); } -inline bool operator>=(const SamHeaderVersion& lhs, const SamHeaderVersion& rhs) { return !(lhs<rhs); } - -} // namespace Internal -} // namespace BamTools - -#endif // SAM_HEADERVERSION_P_H |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/BamTools/src/shared/bamtools_global.h --- a/BEDTools-Version-2.14.3/src/utils/BamTools/src/shared/bamtools_global.h Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,79 +0,0 @@ -// *************************************************************************** -// bamtools_global.h (c) 2010 Derek Barnett -// Marth Lab, Department of Biology, Boston College -// All rights reserved. -// --------------------------------------------------------------------------- -// Last modified: 3 March 2011 (DB) -// --------------------------------------------------------------------------- -// Provides the basic definitions for exporting & importing library symbols. -// Also provides some platform-specific rules for definitions. -// *************************************************************************** - -#ifndef BAMTOOLS_GLOBAL_H -#define BAMTOOLS_GLOBAL_H - -/*! \brief Library export macro - \internal -*/ -#ifndef BAMTOOLS_LIBRARY_EXPORT -# if defined(WIN32) -# define BAMTOOLS_LIBRARY_EXPORT __declspec(dllexport) -# else -# define BAMTOOLS_LIBRARY_EXPORT __attribute__((visibility("default"))) -# endif -#endif // BAMTOOLS_LIBRARY_EXPORT - -/*! \brief Library import macro - \internal -*/ -#ifndef BAMTOOLS_LIBRARY_IMPORT -# if defined(WIN32) -# define BAMTOOLS_LIBRARY_IMPORT __declspec(dllimport) -# else -# define BAMTOOLS_LIBRARY_IMPORT -# endif -#endif // BAMTOOLS_LIBRARY_IMPORT - -/*! \brief Platform-specific type definitions - \internal -*/ -#ifndef BAMTOOLS_LFS -#define BAMTOOLS_LFS - #ifdef WIN32 - #define ftell64(a) _ftelli64(a) - #define fseek64(a,b,c) _fseeki64(a,b,c) - #else - #define ftell64(a) ftello(a) - #define fseek64(a,b,c) fseeko(a,b,c) - #endif -#endif // BAMTOOLS_LFS - -/*! \def ftell64(a) - \brief Platform-independent tell() operation. - \internal -*/ -/*! \def fseek64(a,b,c) - \brief Platform-independent seek() operation. - \internal -*/ - -/*! \brief Platform-specific type definitions - \internal -*/ -#ifndef BAMTOOLS_TYPES -#define BAMTOOLS_TYPES - #ifdef _MSC_VER - typedef char int8_t; - typedef unsigned char uint8_t; - typedef short int16_t; - typedef unsigned short uint16_t; - typedef int int32_t; - typedef unsigned int uint32_t; - typedef long long int64_t; - typedef unsigned long long uint64_t; - #else - #include <stdint.h> - #endif -#endif // BAMTOOLS_TYPES - -#endif // BAMTOOLS_GLOBAL_H |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/Fasta/Fasta.cpp --- a/BEDTools-Version-2.14.3/src/utils/Fasta/Fasta.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,325 +0,0 @@\n-// ***************************************************************************\n-// FastaIndex.cpp (c) 2010 Erik Garrison <erik.garrison@bc.edu>\n-// Marth Lab, Department of Biology, Boston College\n-// All rights reserved.\n-// ---------------------------------------------------------------------------\n-// Last modified: 9 February 2010 (EG)\n-// ---------------------------------------------------------------------------\n-\n-#include "Fasta.h"\n-\n-FastaIndexEntry::FastaIndexEntry(string name, int length, long long offset, int line_blen, int line_len)\n- : name(name)\n- , length(length)\n- , offset(offset)\n- , line_blen(line_blen)\n- , line_len(line_len)\n-{}\n-\n-FastaIndexEntry::FastaIndexEntry(void) // empty constructor\n-{ clear(); }\n-\n-FastaIndexEntry::~FastaIndexEntry(void)\n-{}\n-\n-void FastaIndexEntry::clear(void)\n-{\n- name = "";\n- length = NULL;\n- offset = -1; // no real offset will ever be below 0, so this allows us to\n- // check if we have already recorded a real offset\n- line_blen = NULL;\n- line_len = NULL;\n-}\n-\n-ostream& operator<<(ostream& output, const FastaIndexEntry& e) {\n- // just write the first component of the name, for compliance with other tools\n- output << split(e.name, \' \').at(0) << "\\t" << e.length << "\\t" << e.offset << "\\t" <<\n- e.line_blen << "\\t" << e.line_len;\n- return output; // for multiple << operators.\n-}\n-\n-FastaIndex::FastaIndex(void) \n-{}\n-\n-void FastaIndex::readIndexFile(string fname) {\n- string line;\n- long long linenum = 0;\n- indexFile.open(fname.c_str(), ifstream::in);\n- if (indexFile.is_open()) {\n- while (getline (indexFile, line)) {\n- ++linenum;\n- // the fai format defined in samtools is tab-delimited, every line being:\n- // fai->name[i], (int)x.len, (long long)x.offset, (int)x.line_blen, (int)x.line_len\n- vector<string> fields = split(line, \'\\t\');\n- if (fields.size() == 5) { // if we don\'t get enough fields then there is a problem with the file\n- // note that fields[0] is the sequence name\n- char* end;\n- string name = split(fields[0], " \\t").at(0); // key by first token of name\n- sequenceNames.push_back(name);\n- this->insert(make_pair(name, FastaIndexEntry(fields[0], atoi(fields[1].c_str()),\n- strtoll(fields[2].c_str(), &end, 10),\n- atoi(fields[3].c_str()),\n- atoi(fields[4].c_str()))));\n- } else {\n- cerr << "Warning: malformed fasta index file " << fname << \n- "does not have enough fields @ line " << linenum << endl;\n- cerr << line << endl;\n- exit(1);\n- }\n- }\n- } else {\n- cerr << "could not open index file " << fname << endl;\n- exit(1);\n- }\n-}\n-\n-// for consistency this should be a class method\n-bool fastaIndexEntryCompare ( FastaIndexEntry a, FastaIndexEntry b) { return (a.offset<b.offset); }\n-\n-ostream& operator<<(ostream& output, FastaIndex& fastaIndex) {\n- vector<FastaIndexEntry> sortedIndex;\n- for(vector<string>::const_iterator it = fastaIndex.sequenceNames.begin(); it != fastaIndex.sequenceNames.end(); ++it)\n- {\n- sortedIndex.push_back(fastaIndex[*it]);\n- }\n- sort(sortedIndex.begin(), sortedIndex.end(), fastaIndexEntryCompare);\n- for( vector<FastaIndexEntry>::iterator fit = sortedIndex.begin(); fit != sortedIndex.end(); ++fit) {\n- output << *fit << endl;\n- }\n- return output;\n-}\n-\n-void FastaIndex::indexReference(string refname) {\n- // overview:\n- // for line in the reference fasta file\n- // track byte offset from the start of the file\n- // if line is a fasta header, take the name and dump the last sequnece to the index\n- // if line is a sequen'..b' exit(1);\n- } else {\n- return e->second;\n- }\n-}\n-\n-string FastaIndex::indexFileExtension() { return ".fai"; }\n-\n-void FastaReference::open(string reffilename, bool usemmap) {\n- filename = reffilename;\n- if (!(file = fopen(filename.c_str(), "r"))) {\n- cerr << "could not open " << filename << endl;\n- exit(1);\n- }\n- index = new FastaIndex();\n- struct stat stFileInfo; \n- string indexFileName = filename + index->indexFileExtension(); \n- // if we can find an index file, use it\n- if(stat(indexFileName.c_str(), &stFileInfo) == 0) { \n- index->readIndexFile(indexFileName);\n- } else { // otherwise, read the reference and generate the index file in the cwd\n- cerr << "index file " << indexFileName << " not found, generating..." << endl;\n- index->indexReference(filename);\n- index->writeIndexFile(indexFileName);\n- }\n- if (usemmap) {\n- usingmmap = true;\n- int fd = fileno(file);\n- struct stat sb;\n- if (fstat(fd, &sb) == -1)\n- cerr << "could not stat file" << filename << endl;\n- filesize = sb.st_size;\n- // map the whole file\n- filemm = mmap(NULL, filesize, PROT_READ, MAP_SHARED, fd, 0);\n- }\n-}\n-\n-FastaReference::~FastaReference(void) {\n- fclose(file);\n- if (usingmmap) {\n- munmap(filemm, filesize);\n- }\n- delete index;\n-}\n-\n-string FastaReference::getSequence(string seqname) {\n- FastaIndexEntry entry = index->entry(seqname);\n- int newlines_in_sequence = entry.length / entry.line_blen;\n- int seqlen = newlines_in_sequence + entry.length;\n- char* seq = (char*) calloc (seqlen + 1, sizeof(char));\n- if (usingmmap) {\n- memcpy(seq, (char*) filemm + entry.offset, seqlen);\n- } else {\n- fseek64(file, entry.offset, SEEK_SET);\n- fread(seq, sizeof(char), seqlen, file);\n- }\n- seq[seqlen] = \'\\0\';\n- char* pbegin = seq;\n- char* pend = seq + (seqlen/sizeof(char));\n- pend = remove(pbegin, pend, \'\\n\');\n- pend = remove(pbegin, pend, \'\\0\');\n- string s = seq;\n- free(seq);\n- s.resize((pend - pbegin)/sizeof(char));\n- return s;\n-}\n-\n-// TODO cleanup; odd function. use a map\n-string FastaReference::sequenceNameStartingWith(string seqnameStart) {\n- try {\n- return (*index)[seqnameStart].name;\n- } catch (exception& e) {\n- cerr << e.what() << ": unable to find index entry for " << seqnameStart << endl;\n- exit(1);\n- }\n-}\n-\n-string FastaReference::getSubSequence(string seqname, int start, int length) {\n- FastaIndexEntry entry = index->entry(seqname);\n- if (start < 0 || length < 1) {\n- cerr << "Error: cannot construct subsequence with negative offset or length < 1" << endl;\n- exit(1);\n- }\n- // we have to handle newlines\n- // approach: count newlines before start\n- // count newlines by end of read\n- // subtracting newlines before start find count of embedded newlines\n- int newlines_before = start > 0 ? (start - 1) / entry.line_blen : 0;\n- int newlines_by_end = (start + length - 1) / entry.line_blen;\n- int newlines_inside = newlines_by_end - newlines_before;\n- int seqlen = length + newlines_inside;\n- char* seq = (char*) calloc (seqlen + 1, sizeof(char));\n- if (usingmmap) {\n- memcpy(seq, (char*) filemm + entry.offset + newlines_before + start, seqlen);\n- } else {\n- fseek64(file, (off_t) (entry.offset + newlines_before + start), SEEK_SET);\n- fread(seq, sizeof(char), (off_t) seqlen, file);\n- }\n- seq[seqlen] = \'\\0\';\n- char* pbegin = seq;\n- char* pend = seq + (seqlen/sizeof(char));\n- pend = remove(pbegin, pend, \'\\n\');\n- pend = remove(pbegin, pend, \'\\0\');\n- string s = seq;\n- free(seq);\n- s.resize((pend - pbegin)/sizeof(char));\n- return s;\n-}\n-\n-long unsigned int FastaReference::sequenceLength(string seqname) {\n- FastaIndexEntry entry = index->entry(seqname);\n- return entry.length;\n-}\n-\n' |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/Fasta/Fasta.h --- a/BEDTools-Version-2.14.3/src/utils/Fasta/Fasta.h Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,78 +0,0 @@ -// *************************************************************************** -// FastaIndex.h (c) 2010 Erik Garrison <erik.garrison@bc.edu> -// Marth Lab, Department of Biology, Boston College -// All rights reserved. -// --------------------------------------------------------------------------- -// Last modified: 5 February 2010 (EG) -// --------------------------------------------------------------------------- - -#ifndef _FASTA_H -#define _FASTA_H - -#include <map> -#include <iostream> -#include <fstream> -#include <vector> -#include <stdint.h> -#include <stdio.h> -#include <algorithm> -#include "LargeFileSupport.h" -#include <sys/stat.h> -#include <sys/mman.h> -#include "split.h" -#include <stdlib.h> -#include <ctype.h> -#include <unistd.h> - -using namespace std; - -class FastaIndexEntry { - friend ostream& operator<<(ostream& output, const FastaIndexEntry& e); - public: - FastaIndexEntry(string name, int length, long long offset, int line_blen, int line_len); - FastaIndexEntry(void); - ~FastaIndexEntry(void); - string name; // sequence name - int length; // length of sequence - long long offset; // bytes offset of sequence from start of file - int line_blen; // line length in bytes, sequence characters - int line_len; // line length including newline - void clear(void); -}; - -class FastaIndex : public map<string, FastaIndexEntry> { - friend ostream& operator<<(ostream& output, FastaIndex& i); - public: - FastaIndex(void); - ~FastaIndex(void); - vector<string> sequenceNames; - void indexReference(string refName); - void readIndexFile(string fname); - void writeIndexFile(string fname); - ifstream indexFile; - FastaIndexEntry entry(string key); - void flushEntryToIndex(FastaIndexEntry& entry); - string indexFileExtension(void); -}; - -class FastaReference { - public: - void open(string reffilename, bool usemmap = false); - bool usingmmap; - string filename; - FastaReference(void) : usingmmap(false) { } - ~FastaReference(void); - FILE* file; - void* filemm; - size_t filesize; - FastaIndex* index; - vector<FastaIndexEntry> findSequencesStartingWith(string seqnameStart); - string getSequence(string seqname); - // potentially useful for performance, investigate - // void getSequence(string seqname, string& sequence); - string getSubSequence(string seqname, int start, int length); - string sequenceNameStartingWith(string seqnameStart); - long unsigned int sequenceLength(string seqname); -}; - -#endif |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/Fasta/LargeFileSupport.h --- a/BEDTools-Version-2.14.3/src/utils/Fasta/LargeFileSupport.h Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,13 +0,0 @@ -#pragma once - -#define _FILE_OFFSET_BITS 64 - -#ifdef WIN32 -#define ftell64(a) _ftelli64(a) -#define fseek64(a,b,c) _fseeki64(a,b,c) -typedef __int64_t off_type; -#else -#define ftell64(a) ftello(a) -#define fseek64(a,b,c) fseeko(a,b,c) -typedef off_t off_type; -#endif |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/Fasta/Makefile --- a/BEDTools-Version-2.14.3/src/utils/Fasta/Makefile Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,26 +0,0 @@ -OBJ_DIR = ../../../obj/ -BIN_DIR = ../../../bin/ -UTILITIES_DIR = ../../utils/ -# ------------------- -# define our includes -# ------------------- -INCLUDES = - -# ---------------------------------- -# define our source and object files -# ---------------------------------- -SOURCES= Fasta.cpp split.cpp -OBJECTS= $(SOURCES:.cpp=.o) -BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) - -all: $(BUILT_OBJECTS) - -.PHONY: all - -$(BUILT_OBJECTS): $(SOURCES) - @echo " * compiling" $(*F).cpp - @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) - -clean: - @echo "Cleaning up." - @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/Fasta/split.cpp --- a/BEDTools-Version-2.14.3/src/utils/Fasta/split.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,33 +0,0 @@ -#include "split.h" - -std::vector<std::string> &split(const std::string &s, char delim, std::vector<std::string> &elems) { - std::stringstream ss(s); - std::string item; - while(std::getline(ss, item, delim)) { - elems.push_back(item); - } - return elems; -} - -std::vector<std::string> split(const std::string &s, char delim) { - std::vector<std::string> elems; - return split(s, delim, elems); -} - -std::vector<std::string> &split(const std::string &s, const std::string& delims, std::vector<std::string> &elems) { - char* tok; - char cchars [s.size()+1]; - char* cstr = &cchars[0]; - strcpy(cstr, s.c_str()); - tok = strtok(cstr, delims.c_str()); - while (tok != NULL) { - elems.push_back(tok); - tok = strtok(NULL, delims.c_str()); - } - return elems; -} - -std::vector<std::string> split(const std::string &s, const std::string& delims) { - std::vector<std::string> elems; - return split(s, delims, elems); -} |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/Fasta/split.h --- a/BEDTools-Version-2.14.3/src/utils/Fasta/split.h Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,20 +0,0 @@ -#ifndef __SPLIT_H -#define __SPLIT_H - -// functions to split a string by a specific delimiter -#include <string> -#include <vector> -#include <sstream> -#include <string.h> - -// thanks to Evan Teran, http://stackoverflow.com/questions/236129/how-to-split-a-string/236803#236803 - -// split a string on a single delimiter character (delim) -std::vector<std::string>& split(const std::string &s, char delim, std::vector<std::string> &elems); -std::vector<std::string> split(const std::string &s, char delim); - -// split a string on any character found in the string of delimiters (delims) -std::vector<std::string>& split(const std::string &s, const std::string& delims, std::vector<std::string> &elems); -std::vector<std::string> split(const std::string &s, const std::string& delims); - -#endif |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/bedFile/Makefile --- a/BEDTools-Version-2.14.3/src/utils/bedFile/Makefile Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,29 +0,0 @@ -OBJ_DIR = ../../../obj/ -BIN_DIR = ../../../bin/ -UTILITIES_DIR = ../../utils/ -# ------------------- -# define our includes -# ------------------- -INCLUDES = -I$(UTILITIES_DIR)/lineFileUtilities/ -I$(UTILITIES_DIR)/gzstream/ -I$(UTILITIES_DIR)/fileType/ -I$(UTILITIES_DIR)/stringUtilities/ - -# ---------------------------------- -# define our source and object files -# ---------------------------------- -SOURCES= bedFile.cpp -OBJECTS= $(SOURCES:.cpp=.o) -_EXT_OBJECTS=lineFileUtilities.o gzstream.o fileType.o -EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) -BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) - -$(BUILT_OBJECTS): $(SOURCES) - @echo " * compiling" $(*F).cpp - @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) - -$(EXT_OBJECTS): - @$(MAKE) --no-print-directory -C -W $(INCLUDES) - -clean: - @echo "Cleaning up." - @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* - -.PHONY: clean \ No newline at end of file |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/bedFile/bedFile.cpp --- a/BEDTools-Version-2.14.3/src/utils/bedFile/bedFile.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,720 +0,0 @@\n-/*****************************************************************************\n- bedFile.cpp\n-\n- (c) 2009 - Aaron Quinlan\n- Hall Laboratory\n- Department of Biochemistry and Molecular Genetics\n- University of Virginia\n- aaronquinlan@gmail.com\n-\n- Licensed under the GNU General Public License 2.0 license.\n-******************************************************************************/\n-#include "bedFile.h"\n-\n-\n-/************************************************\n-Helper functions\n-*************************************************/\n-void splitBedIntoBlocks(const BED &bed, int lineNum, bedVector &bedBlocks) {\n-\n- if (bed.otherFields.size() < 6) {\n- cerr << "Input error: Cannot split into blocks. Found interval with fewer than 12 columns on line " << lineNum << "." << endl;\n- exit(1);\n- }\n-\n- int blockCount = atoi(bed.otherFields[3].c_str());\n- if ( blockCount <= 0 ) {\n- cerr << "Input error: found interval having <= 0 blocks on line " << lineNum << "." << endl;\n- exit(1);\n- }\n- else if ( blockCount == 1 ) {\n- //take a short-cut for single blocks\n- bedBlocks.push_back(bed);\n- }\n- else {\n- // get the comma-delimited strings for the BED12 block starts and block ends.\n- string blockSizes(bed.otherFields[4]);\n- string blockStarts(bed.otherFields[5]);\n-\n- vector<int> sizes;\n- vector<int> starts;\n- Tokenize(blockSizes, sizes, ",");\n- Tokenize(blockStarts, starts, ",");\n-\n- if ( sizes.size() != (size_t) blockCount || starts.size() != (size_t) blockCount ) {\n- cerr << "Input error: found interval with block-counts not matching starts/sizes on line " << lineNum << "." << endl;\n- exit(1);\n- }\n-\n- // add each BED block to the bedBlocks vector\n- for (UINT i = 0; i < (UINT) blockCount; ++i) {\n- CHRPOS blockStart = bed.start + starts[i];\n- CHRPOS blockEnd = bed.start + starts[i] + sizes[i];\n- BED currBedBlock(bed.chrom, blockStart, blockEnd, bed.name, bed.score, bed.strand, bed.otherFields);\n- bedBlocks.push_back(currBedBlock);\n- }\n- }\n-}\n-\n-\n-/***********************************************\n-Sorting comparison functions\n-************************************************/\n-bool sortByChrom(BED const &a, BED const &b) {\n- if (a.chrom < b.chrom) return true;\n- else return false;\n-};\n-\n-bool sortByStart(const BED &a, const BED &b) {\n- if (a.start < b.start) return true;\n- else return false;\n-};\n-\n-bool sortBySizeAsc(const BED &a, const BED &b) {\n-\n- CHRPOS aLen = a.end - a.start;\n- CHRPOS bLen = b.end - b.start;\n-\n- if (aLen < bLen) return true;\n- else return false;\n-};\n-\n-bool sortBySizeDesc(const BED &a, const BED &b) {\n-\n- CHRPOS aLen = a.end - a.start;\n- CHRPOS bLen = b.end - b.start;\n-\n- if (aLen > bLen) return true;\n- else return false;\n-};\n-\n-bool sortByScoreAsc(const BED &a, const BED &b) {\n- if (a.score < b.score) return true;\n- else return false;\n-};\n-\n-bool sortByScoreDesc(const BED &a, const BED &b) {\n- if (a.score > b.score) return true;\n- else return false;\n-};\n-\n-bool byChromThenStart(BED const &a, BED const &b) {\n-\n- if (a.chrom < b.chrom) return true;\n- else if (a.chrom > b.chrom) return false;\n-\n- if (a.start < b.start) return true;\n- else if (a.start >= b.start) return false;\n-\n- return false;\n-};\n-\n-\n-/*******************************************\n-Class methods\n-*******************************************/\n-\n-// Constructor\n-BedFile::BedFile(string &bedFile)\n-: bedFile(bedFile),\n- _isGff(false),\n- _isVcf(false),\n- _typeIsKnown(false),\n- _merged_start(-1),\n- _merged_end(-1),\n- _merged_chrom(""),\n- _prev_start(-1),\n- _prev_chrom("")\n-{}\n-\n-// Destructor\n-BedFile::~BedFile(void) {\n-}\n-\n-\n-void BedFile::Open(void) {\n- \n- _bedFields.reserve(12);\n- \n- if (bedFile == "stdin" || bedFile == "-") {\n- _bedStream'..b' else {\n- // correct for the fact that we artificially expanded the zeroLength feature\n- bedItr->depthMapList[index][a.start+2].starts++;\n- bedItr->depthMapList[index][a.end-1].ends++; \n- }\n-\n- if (a.start < bedItr->minOverlapStarts[index]) {\n- bedItr->minOverlapStarts[index] = a.start;\n- }\n- }\n- }\n- }\n- startBin >>= _binNextShift;\n- endBin >>= _binNextShift;\n- }\n-}\n-\n-void BedFile::setZeroBased(bool zeroBased) { this->isZeroBased = zeroBased; }\n-\n-void BedFile::setGff (bool gff) { this->_isGff = gff; }\n-\n-\n-void BedFile::setVcf (bool vcf) { this->_isVcf = vcf; }\n-\n-\n-void BedFile::setFileType (FileType type) {\n- _fileType = type;\n- _typeIsKnown = true;\n-}\n-\n-\n-void BedFile::setBedType (int colNums) {\n- bedType = colNums;\n-}\n-\n-\n-void BedFile::loadBedFileIntoMap() {\n-\n- BED bedEntry, nullBed;\n- int lineNum = 0;\n- BedLineStatus bedStatus;\n-\n- Open();\n- while ((bedStatus = GetNextBed(bedEntry, lineNum)) != BED_INVALID) {\n- if (bedStatus == BED_VALID) {\n- BIN bin = getBin(bedEntry.start, bedEntry.end);\n- bedMap[bedEntry.chrom][bin].push_back(bedEntry);\n- bedEntry = nullBed;\n- }\n- }\n- Close();\n-}\n-\n-\n-void BedFile::loadBedCovFileIntoMap() {\n-\n- BED bedEntry, nullBed;\n- int lineNum = 0;\n- BedLineStatus bedStatus;\n-\n- Open();\n- while ((bedStatus = GetNextBed(bedEntry, lineNum)) != BED_INVALID) {\n- if (bedStatus == BED_VALID) {\n- BIN bin = getBin(bedEntry.start, bedEntry.end);\n-\n- BEDCOV bedCov;\n- bedCov.chrom = bedEntry.chrom;\n- bedCov.start = bedEntry.start;\n- bedCov.end = bedEntry.end;\n- bedCov.name = bedEntry.name;\n- bedCov.score = bedEntry.score;\n- bedCov.strand = bedEntry.strand;\n- bedCov.otherFields = bedEntry.otherFields;\n- bedCov.zeroLength = bedEntry.zeroLength;\n- bedCov.count = 0;\n- bedCov.minOverlapStart = INT_MAX;\n-\n- bedCovMap[bedEntry.chrom][bin].push_back(bedCov);\n- bedEntry = nullBed;\n- }\n- }\n- Close();\n-}\n-\n-void BedFile::loadBedCovListFileIntoMap() {\n-\n- BED bedEntry, nullBed;\n- int lineNum = 0;\n- BedLineStatus bedStatus;\n-\n- Open();\n- while ((bedStatus = GetNextBed(bedEntry, lineNum)) != BED_INVALID) {\n- if (bedStatus == BED_VALID) {\n- BIN bin = getBin(bedEntry.start, bedEntry.end);\n-\n- BEDCOVLIST bedCovList;\n- bedCovList.chrom = bedEntry.chrom;\n- bedCovList.start = bedEntry.start;\n- bedCovList.end = bedEntry.end;\n- bedCovList.name = bedEntry.name;\n- bedCovList.score = bedEntry.score;\n- bedCovList.strand = bedEntry.strand;\n- bedCovList.otherFields = bedEntry.otherFields;\n- bedCovList.zeroLength = bedEntry.zeroLength;\n-\n- bedCovListMap[bedEntry.chrom][bin].push_back(bedCovList);\n- bedEntry = nullBed;\n- }\n- }\n- Close();\n-}\n-\n-\n-void BedFile::loadBedFileIntoMapNoBin() {\n-\n- BED bedEntry, nullBed;\n- int lineNum = 0;\n- BedLineStatus bedStatus;\n-\n- Open();\n- while ((bedStatus = this->GetNextBed(bedEntry, lineNum)) != BED_INVALID) {\n- if (bedStatus == BED_VALID) {\n- bedMapNoBin[bedEntry.chrom].push_back(bedEntry);\n- bedEntry = nullBed;\n- }\n- }\n- Close();\n-\n- // sort the BED entries for each chromosome\n- // in ascending order of start position\n- for (masterBedMapNoBin::iterator m = this->bedMapNoBin.begin(); m != this->bedMapNoBin.end(); ++m) {\n- sort(m->second.begin(), m->second.end(), sortByStart);\n- }\n-}\n' |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/bedFile/bedFile.h --- a/BEDTools-Version-2.14.3/src/utils/bedFile/bedFile.h Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,1167 +0,0 @@\n-/*****************************************************************************\n- bedFile.h\n-\n- (c) 2009 - Aaron Quinlan\n- Hall Laboratory\n- Department of Biochemistry and Molecular Genetics\n- University of Virginia\n- aaronquinlan@gmail.com\n-\n- Licensed under the GNU General Public License 2.0 license.\n-******************************************************************************/\n-#ifndef BEDFILE_H\n-#define BEDFILE_H\n-\n-// "local" includes\n-#include "gzstream.h"\n-#include "lineFileUtilities.h"\n-#include "fileType.h"\n-\n-// standard includes\n-#include <vector>\n-#include <map>\n-#include <set>\n-#include <string>\n-#include <iostream>\n-#include <fstream>\n-#include <sstream>\n-#include <cstring>\n-#include <algorithm>\n-#include <limits.h>\n-#include <stdint.h>\n-#include <cstdio>\n-//#include <tr1/unordered_map> // Experimental.\n-using namespace std;\n-\n-\n-//*************************************************\n-// Data type tydedef\n-//*************************************************\n-typedef uint32_t CHRPOS;\n-typedef uint16_t BINLEVEL;\n-typedef uint32_t BIN;\n-typedef uint16_t USHORT;\n-typedef uint32_t UINT;\n-\n-//*************************************************\n-// Genome binning constants\n-//*************************************************\n-\n-const BIN _numBins = 37450;\n-const BINLEVEL _binLevels = 7;\n-\n-// bins range in size from 16kb to 512Mb\n-// Bin 0 spans 512Mbp, # Level 1\n-// Bins 1-8 span 64Mbp, # Level 2\n-// Bins 9-72 span 8Mbp, # Level 3\n-// Bins 73-584 span 1Mbp # Level 4\n-// Bins 585-4680 span 128Kbp # Level 5\n-// Bins 4681-37449 span 16Kbp # Level 6\n-const BIN _binOffsetsExtended[] = {32678+4096+512+64+8+1, 4096+512+64+8+1, 512+64+8+1, 64+8+1, 8+1, 1, 0};\n-//const BIN _binOffsetsExtended[] = {4096+512+64+8+1, 4096+512+64+8+1, 512+64+8+1, 64+8+1, 8+1, 1, 0};\n-\n-const USHORT _binFirstShift = 14; /* How much to shift to get to finest bin. */\n-const USHORT _binNextShift = 3; /* How much to shift to get to next larger bin. */\n-\n-\n-//*************************************************\n-// Common data structures\n-//*************************************************\n-\n-struct DEPTH {\n- UINT starts;\n- UINT ends;\n-};\n-\n-\n-/*\n- Structure for regular BED records\n-*/\n-struct BED {\n-\n- // Regular BED fields\n- string chrom;\n- CHRPOS start;\n- CHRPOS end;\n- string name;\n- string score;\n- string strand;\n-\n- // Add\'l fields for BED12 and/or custom BED annotations\n- vector<string> otherFields;\n-\n- // experimental fields for the FJOIN approach.\n- bool zeroLength;\n- bool added;\n- bool finished;\n- // list of hits from another file.\n- vector<BED> overlaps;\n-\n-public:\n- // constructors\n-\n- // Null\n- BED()\n- : chrom(""),\n- start(0),\n- end(0),\n- name(""),\n- score(""),\n- strand(""),\n- otherFields(),\n- zeroLength(false),\n- added(false),\n- finished(false),\n- overlaps()\n- {}\n-\n- // BED3\n- BED(string chrom, CHRPOS start, CHRPOS end)\n- : chrom(chrom),\n- start(start),\n- end(end),\n- name(""),\n- score(""),\n- strand(""),\n- otherFields(),\n- zeroLength(false),\n- added(false),\n- finished(false),\n- overlaps()\n- {}\n-\n- // BED4\n- BED(string chrom, CHRPOS start, CHRPOS end, string strand)\n- : chrom(chrom),\n- start(start),\n- end(end),\n- name(""),\n- score(""),\n- strand(strand),\n- otherFields(),\n- zeroLength(false),\n- added(false),\n- finished(false),\n- overlaps()\n- {}\n-\n- // BED6\n- BED(string chrom, CHRPOS start, CHRPOS end, string name,\n- string score, string strand)\n- : chrom(chrom),\n- start(start),\n- end(end),\n- name(name),\n- score(score),\n- strand(strand),\n- otherFields(),\n- zeroLength(false),\n- added(false),\n- finished(false),\n- overlaps()\n- {}\n-\n- // B'..b' vector<string>::const_iterator othEnd = bed.otherFields.end();\n- for ( ; othIt != othEnd; ++othIt) {\n- printf("\\t%s", othIt->c_str());\n- }\n- printf("\\n");\n- }\n- }\n- // VCF\n- else if (_isGff == false && _isVcf == true) {\n- printf ("%s\\t%d\\t", bed.chrom.c_str(), bed.start+1);\n-\n- vector<string>::const_iterator othIt = bed.otherFields.begin();\n- vector<string>::const_iterator othEnd = bed.otherFields.end();\n- for ( ; othIt != othEnd; ++othIt) {\n- printf("%s\\t", othIt->c_str());\n- }\n- printf("\\n");\n- }\n- // GFF\n- else if (_isGff == true) {\n- // "GFF-9"\n- if (this->bedType == 8) {\n- printf ("%s\\t%s\\t%s\\t%d\\t%d\\t%s\\t%s\\t%s\\n", bed.chrom.c_str(), bed.otherFields[0].c_str(),\n- bed.name.c_str(), start+1, end,\n- bed.score.c_str(), bed.strand.c_str(),\n- bed.otherFields[1].c_str());\n- }\n- // "GFF-8"\n- else if (this->bedType == 9) {\n- printf ("%s\\t%s\\t%s\\t%d\\t%d\\t%s\\t%s\\t%s\\t%s\\n", bed.chrom.c_str(), bed.otherFields[0].c_str(),\n- bed.name.c_str(), start+1, end,\n- bed.score.c_str(), bed.strand.c_str(),\n- bed.otherFields[1].c_str(), bed.otherFields[2].c_str());\n- }\n- }\n- }\n-\n-\n- /*\n- reportNullBedTab\n- */\n- void reportNullBedTab() {\n-\n- if (_isGff == false && _isVcf == false) {\n- if (this->bedType == 3) {\n- printf (".\\t-1\\t-1\\t");\n- }\n- else if (this->bedType == 4) {\n- printf (".\\t-1\\t-1\\t.\\t");\n- }\n- else if (this->bedType == 5) {\n- printf (".\\t-1\\t-1\\t.\\t-1\\t");\n- }\n- else if (this->bedType == 6) {\n- printf (".\\t-1\\t-1\\t.\\t-1\\t.\\t");\n- }\n- else if (this->bedType > 6) {\n- printf (".\\t-1\\t-1\\t.\\t-1\\t.\\t");\n- for (unsigned int i = 6; i < this->bedType; ++i) {\n- printf(".\\t");\n- }\n- }\n- }\n- else if (_isGff == true && _isVcf == false) {\n- if (this->bedType == 8) {\n- printf (".\\t.\\t.\\t-1\\t-1\\t-1\\t.\\t.\\t");\n- }\n- else if (this->bedType == 9) {\n- printf (".\\t.\\t.\\t-1\\t-1\\t-1\\t.\\t.\\t.\\t");\n- }\n- }\n- }\n-\n-\n- /*\n- reportNullBedTab\n- */\n- void reportNullBedNewLine() {\n-\n- if (_isGff == false && _isVcf == false) {\n- if (this->bedType == 3) {\n- printf (".\\t-1\\t-1\\n");\n- }\n- else if (this->bedType == 4) {\n- printf (".\\t-1\\t-1\\t.\\n");\n- }\n- else if (this->bedType == 5) {\n- printf (".\\t-1\\t-1\\t.\\t-1\\n");\n- }\n- else if (this->bedType == 6) {\n- printf (".\\t-1\\t-1\\t.\\t-1\\t.\\n");\n- }\n- else if (this->bedType > 6) {\n- printf (".\\t-1\\t-1\\t.\\t-1\\t.");\n- for (unsigned int i = 6; i < this->bedType; ++i) {\n- printf("\\t.");\n- }\n- printf("\\n");\n- }\n- }\n- else if (_isGff == true && _isVcf == false) {\n- if (this->bedType == 8) {\n- printf (".\\t.\\t.\\t-1\\t-1\\t-1\\t.\\t.\\n");\n- }\n- else if (this->bedType == 9) {\n- printf (".\\t.\\t.\\t-1\\t-1\\t-1\\t.\\t.\\t.\\n");\n- }\n- }\n- }\n-\n-\n-};\n-\n-#endif /* BEDFILE_H */\n' |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/bedFile/bedFile.h.orig --- a/BEDTools-Version-2.14.3/src/utils/bedFile/bedFile.h.orig Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,1144 +0,0 @@\n-/*****************************************************************************\n- bedFile.h\n-\n- (c) 2009 - Aaron Quinlan\n- Hall Laboratory\n- Department of Biochemistry and Molecular Genetics\n- University of Virginia\n- aaronquinlan@gmail.com\n-\n- Licensed under the GNU General Public License 2.0 license.\n-******************************************************************************/\n-#ifndef BEDFILE_H\n-#define BEDFILE_H\n-\n-// "local" includes\n-#include "gzstream.h"\n-#include "lineFileUtilities.h"\n-#include "fileType.h"\n-\n-// standard includes\n-#include <vector>\n-#include <map>\n-#include <set>\n-#include <string>\n-#include <iostream>\n-#include <fstream>\n-#include <sstream>\n-#include <cstring>\n-#include <algorithm>\n-#include <limits.h>\n-#include <stdint.h>\n-#include <cstdio>\n-//#include <tr1/unordered_map> // Experimental.\n-using namespace std;\n-\n-\n-//*************************************************\n-// Data type tydedef\n-//*************************************************\n-typedef uint32_t CHRPOS;\n-typedef uint16_t BINLEVEL;\n-typedef uint32_t BIN;\n-typedef uint16_t USHORT;\n-typedef uint32_t UINT;\n-\n-//*************************************************\n-// Genome binning constants\n-//*************************************************\n-\n-const BIN _numBins = 37450;\n-const BINLEVEL _binLevels = 7;\n-\n-// bins range in size from 16kb to 512Mb\n-// Bin 0 spans 512Mbp, # Level 1\n-// Bins 1-8 span 64Mbp, # Level 2\n-// Bins 9-72 span 8Mbp, # Level 3\n-// Bins 73-584 span 1Mbp # Level 4\n-// Bins 585-4680 span 128Kbp # Level 5\n-// Bins 4681-37449 span 16Kbp # Level 6\n-const BIN _binOffsetsExtended[] = {32678+4096+512+64+8+1, 4096+512+64+8+1, 512+64+8+1, 64+8+1, 8+1, 1, 0};\n-//const BIN _binOffsetsExtended[] = {4096+512+64+8+1, 4096+512+64+8+1, 512+64+8+1, 64+8+1, 8+1, 1, 0};\n-\n-const USHORT _binFirstShift = 14; /* How much to shift to get to finest bin. */\n-const USHORT _binNextShift = 3; /* How much to shift to get to next larger bin. */\n-\n-\n-//*************************************************\n-// Common data structures\n-//*************************************************\n-\n-struct DEPTH {\n- UINT starts;\n- UINT ends;\n-};\n-\n-\n-/*\n- Structure for regular BED records\n-*/\n-struct BED {\n-\n- // Regular BED fields\n- string chrom;\n- CHRPOS start;\n- CHRPOS end;\n- string name;\n- string score;\n- string strand;\n-\n- // Add\'l fields for BED12 and/or custom BED annotations\n- vector<string> otherFields;\n-\n- // experimental fields for the FJOIN approach.\n- bool zeroLength;\n- bool added;\n- bool finished;\n- // list of hits from another file.\n- vector<BED> overlaps;\n-\n-public:\n- // constructors\n-\n- // Null\n- BED()\n- : chrom(""),\n- start(0),\n- end(0),\n- name(""),\n- score(""),\n- strand(""),\n- otherFields(),\n- zeroLength(false),\n- added(false),\n- finished(false),\n- overlaps()\n- {}\n-\n- // BED3\n- BED(string chrom, CHRPOS start, CHRPOS end)\n- : chrom(chrom),\n- start(start),\n- end(end),\n- name(""),\n- score(""),\n- strand(""),\n- otherFields(),\n- zeroLength(false),\n- added(false),\n- finished(false),\n- overlaps()\n- {}\n-\n- // BED4\n- BED(string chrom, CHRPOS start, CHRPOS end, string strand)\n- : chrom(chrom),\n- start(start),\n- end(end),\n- name(""),\n- score(""),\n- strand(strand),\n- otherFields(),\n- zeroLength(false),\n- added(false),\n- finished(false),\n- overlaps()\n- {}\n-\n- // BED6\n- BED(string chrom, CHRPOS start, CHRPOS end, string name,\n- string score, string strand)\n- : chrom(chrom),\n- start(start),\n- end(end),\n- name(name),\n- score(score),\n- strand(strand),\n- otherFields(),\n- zeroLength(false),\n- added(false),\n- finished(false),\n- overlaps()\n- {}\n-\n- // B'..b' vector<string>::const_iterator othEnd = bed.otherFields.end();\n- for ( ; othIt != othEnd; ++othIt) {\n- printf("\\t%s", othIt->c_str());\n- }\n- printf("\\n");\n- }\n- }\n- // VCF\n- else if (_isGff == false && _isVcf == true) {\n- printf ("%s\\t%d\\t", bed.chrom.c_str(), bed.start+1);\n-\n- vector<string>::const_iterator othIt = bed.otherFields.begin();\n- vector<string>::const_iterator othEnd = bed.otherFields.end();\n- for ( ; othIt != othEnd; ++othIt) {\n- printf("%s\\t", othIt->c_str());\n- }\n- printf("\\n");\n- }\n- // GFF\n- else if (_isGff == true) {\n- // "GFF-9"\n- if (this->bedType == 8) {\n- printf ("%s\\t%s\\t%s\\t%d\\t%d\\t%s\\t%s\\t%s\\n", bed.chrom.c_str(), bed.otherFields[0].c_str(),\n- bed.name.c_str(), start+1, end,\n- bed.score.c_str(), bed.strand.c_str(),\n- bed.otherFields[1].c_str());\n- }\n- // "GFF-8"\n- else if (this->bedType == 9) {\n- printf ("%s\\t%s\\t%s\\t%d\\t%d\\t%s\\t%s\\t%s\\t%s\\n", bed.chrom.c_str(), bed.otherFields[0].c_str(),\n- bed.name.c_str(), start+1, end,\n- bed.score.c_str(), bed.strand.c_str(),\n- bed.otherFields[1].c_str(), bed.otherFields[2].c_str());\n- }\n- }\n- }\n-\n-\n- /*\n- reportNullBedTab\n- */\n- void reportNullBedTab() {\n-\n- if (_isGff == false && _isVcf == false) {\n- if (this->bedType == 3) {\n- printf (".\\t-1\\t-1\\t");\n- }\n- else if (this->bedType == 4) {\n- printf (".\\t-1\\t-1\\t.\\t");\n- }\n- else if (this->bedType == 5) {\n- printf (".\\t-1\\t-1\\t.\\t-1\\t");\n- }\n- else if (this->bedType == 6) {\n- printf (".\\t-1\\t-1\\t.\\t-1\\t.\\t");\n- }\n- else if (this->bedType > 6) {\n- printf (".\\t-1\\t-1\\t.\\t-1\\t.\\t");\n- for (unsigned int i = 6; i < this->bedType; ++i) {\n- printf(".\\t");\n- }\n- }\n- }\n- else if (_isGff == true && _isVcf == false) {\n- if (this->bedType == 8) {\n- printf (".\\t.\\t.\\t-1\\t-1\\t-1\\t.\\t.\\t");\n- }\n- else if (this->bedType == 9) {\n- printf (".\\t.\\t.\\t-1\\t-1\\t-1\\t.\\t.\\t.\\t");\n- }\n- }\n- }\n-\n-\n- /*\n- reportNullBedTab\n- */\n- void reportNullBedNewLine() {\n-\n- if (_isGff == false && _isVcf == false) {\n- if (this->bedType == 3) {\n- printf (".\\t-1\\t-1\\n");\n- }\n- else if (this->bedType == 4) {\n- printf (".\\t-1\\t-1\\t.\\n");\n- }\n- else if (this->bedType == 5) {\n- printf (".\\t-1\\t-1\\t.\\t-1\\n");\n- }\n- else if (this->bedType == 6) {\n- printf (".\\t-1\\t-1\\t.\\t-1\\t.\\n");\n- }\n- else if (this->bedType > 6) {\n- printf (".\\t-1\\t-1\\t.\\t-1\\t.");\n- for (unsigned int i = 6; i < this->bedType; ++i) {\n- printf("\\t.");\n- }\n- printf("\\n");\n- }\n- }\n- else if (_isGff == true && _isVcf == false) {\n- if (this->bedType == 8) {\n- printf (".\\t.\\t.\\t-1\\t-1\\t-1\\t.\\t.\\n");\n- }\n- else if (this->bedType == 9) {\n- printf (".\\t.\\t.\\t-1\\t-1\\t-1\\t.\\t.\\t.\\n");\n- }\n- }\n- }\n-\n-\n-};\n-\n-#endif /* BEDFILE_H */\n' |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/bedFilePE/Makefile --- a/BEDTools-Version-2.14.3/src/utils/bedFilePE/Makefile Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,29 +0,0 @@ -OBJ_DIR = ../../../obj/ -BIN_DIR = ../../../bin/ -UTILITIES_DIR = ../../utils/ -# ------------------- -# define our includes -# ------------------- -INCLUDES = -I$(UTILITIES_DIR)/lineFileUtilities/ -I$(UTILITIES_DIR)/bedFile/ -I$(UTILITIES_DIR)/gzstream/ -I$(UTILITIES_DIR)/fileType/ - -# ---------------------------------- -# define our source and object files -# ---------------------------------- -SOURCES= bedFilePE.cpp -OBJECTS= $(SOURCES:.cpp=.o) -_EXT_OBJECTS=lineFileUtilities.o fileType.o -EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) -BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) - -$(BUILT_OBJECTS): $(SOURCES) - @echo " * compiling" $(*F).cpp - @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) - -$(EXT_OBJECTS): - @$(MAKE) --no-print-directory -C $(INCLUDES) - -clean: - @echo "Cleaning up." - @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* - -.PHONY: clean \ No newline at end of file |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/bedFilePE/bedFilePE.cpp --- a/BEDTools-Version-2.14.3/src/utils/bedFilePE/bedFilePE.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,530 +0,0 @@\n-//\n-// bedFilePE.cpp\n-// BEDTools\n-//\n-// Created by Aaron Quinlan Spring 2009.\n-// Copyright 2009 Aaron Quinlan. All rights reserved.\n-//\n-// Summary: Contains common functions for finding BED overlaps.\n-//\n-// Acknowledgments: Much of the code herein is taken from Jim Kent\'s\n-// BED processing code. I am grateful for his elegant\n-// genome binning algorithm and therefore use it extensively.\n-\n-\n-#include "bedFilePE.h"\n-\n-\n-// Constructor\n-BedFilePE::BedFilePE(string &bedFile) {\n- this->bedFile = bedFile;\n-}\n-\n-// Destructor\n-BedFilePE::~BedFilePE(void) {\n-}\n-\n-void BedFilePE::Open(void) {\n- if (bedFile == "stdin" || bedFile == "-") {\n- _bedStream = &cin;\n- }\n- else {\n- _bedStream = new ifstream(bedFile.c_str(), ios::in);\n-\n- if (isGzipFile(_bedStream) == true) {\n- delete _bedStream;\n- _bedStream = new igzstream(bedFile.c_str(), ios::in);\n- }\n- // can we open the file?\n- if ( !(_bedStream->good()) ) {\n- cerr << "Error: The requested bed file (" << bedFile << ") could not be opened. Exiting!" << endl;\n- exit (1);\n- }\n- }\n-}\n-\n-\n-\n-// Close the BEDPE file\n-void BedFilePE::Close(void) {\n- if (bedFile != "stdin" && bedFile != "-") delete _bedStream;\n-}\n-\n-\n-BedLineStatus BedFilePE::GetNextBedPE (BEDPE &bedpe, int &lineNum) {\n-\n- // make sure there are still lines to process.\n- // if so, tokenize, validate and return the BEDPE entry.\n- if (_bedStream->good()) {\n- string bedPELine;\n- vector<string> bedPEFields;\n- bedPEFields.reserve(10);\n-\n- // parse the bedStream pointer\n- getline(*_bedStream, bedPELine);\n- lineNum++;\n-\n- // split into a string vector.\n- Tokenize(bedPELine,bedPEFields);\n-\n- // load the BEDPE struct as long as it\'s a valid BEDPE entry.\n- return parseLine(bedpe, bedPEFields, lineNum);\n- }\n- // default if file is closed or EOF\n- return BED_INVALID;\n-}\n-\n-\n-/*\n- reportBedPETab\n-\n- Writes the _original_ BED entry for A.\n- Works for BEDPE only.\n-*/\n-void BedFilePE::reportBedPETab(const BEDPE &a) {\n-\n- if (this->bedType == 6) {\n- printf("%s\\t%d\\t%d\\t%s\\t%d\\t%d\\t", a.chrom1.c_str(), a.start1, a.end1,\n- a.chrom2.c_str(), a.start2, a.end2);\n- }\n- else if (this->bedType == 7) {\n- printf("%s\\t%d\\t%d\\t%s\\t%d\\t%d\\t%s\\t", a.chrom1.c_str(), a.start1, a.end1,\n- a.chrom2.c_str(), a.start2, a.end2,\n- a.name.c_str());\n- }\n- else if (this->bedType == 8) {\n- printf("%s\\t%d\\t%d\\t%s\\t%d\\t%d\\t%s\\t%s\\t", a.chrom1.c_str(), a.start1, a.end1,\n- a.chrom2.c_str(), a.start2, a.end2,\n- a.name.c_str(), a.score.c_str());\n- }\n- else if (this->bedType == 10) {\n- printf("%s\\t%d\\t%d\\t%s\\t%d\\t%d\\t%s\\t%s\\t%s\\t%s\\t", a.chrom1.c_str(), a.start1, a.end1,\n- a.chrom2.c_str(), a.start2, a.end2,\n- a.name.c_str(), a.score.c_str(), a.strand1.c_str(), a.strand2.c_str());\n- }\n- else if (this->bedType > 10) {\n- printf("%s\\t%d\\t%d\\t%s\\t%d\\t%d\\t%s\\t%s\\t%s\\t%s", a.chrom1.c_str(), a.start1, a.end1,\n- a.chrom2.c_str(), a.start2, a.end2,\n- a.name.c_str(), a.score.c_str(), a.strand1.c_str(), a.strand2.c_str());\n-\n- vector<string>::const_iterator othIt = a.otherFields.begin();\n- vector<string>::const_iterator othEnd = a.otherFields.end();\n- for ( ; othIt != othEnd; ++othIt) {\n- printf("\\t%s", othIt->c_str());\n- }\n- printf("\\t");\n- }\n-}\n-\n-\n-\n-/*\n- reportBedPENewLine\n-\n- Writes the _original_ BED entry for A.\n- '..b' float size = end - start;\n-\n- if ( (overlap / size) >= overlapFraction ) {\n-\n- // skip the hit if not on the same strand (and we care)\n- if ((forceStrand == false) && (enforceDiffNames == false)) {\n- hits.push_back(*bedItr); // it\'s a hit, add it.\n- }\n- else if ((forceStrand == true) && (enforceDiffNames == false)) {\n- if (strand == bedItr->bed.strand)\n- hits.push_back(*bedItr); // it\'s a hit, add it.\n- }\n- else if ((forceStrand == true) && (enforceDiffNames == true)) {\n- if ((strand == bedItr->bed.strand) && (name != bedItr->bed.name))\n- hits.push_back(*bedItr); // it\'s a hit, add it.\n- }\n- else if ((forceStrand == false) && (enforceDiffNames == true)) {\n- if (name != bedItr->bed.name)\n- hits.push_back(*bedItr); // it\'s a hit, add it.\n- }\n- }\n-\n- }\n- }\n- startBin >>= _binNextShift;\n- endBin >>= _binNextShift;\n- }\n-}\n-\n-\n-void BedFilePE::loadBedPEFileIntoMap() {\n-\n- int lineNum = 0;\n- int bin1, bin2;\n- BedLineStatus bedStatus;\n- BEDPE bedpeEntry, nullBedPE;\n-\n- Open();\n- bedStatus = this->GetNextBedPE(bedpeEntry, lineNum);\n- while (bedStatus != BED_INVALID) {\n-\n- if (bedStatus == BED_VALID) {\n- MATE *bedEntry1 = new MATE();\n- MATE *bedEntry2 = new MATE();\n- // separate the BEDPE entry into separate\n- // BED entries\n- splitBedPEIntoBeds(bedpeEntry, lineNum, bedEntry1, bedEntry2);\n-\n- // load end1 into a UCSC bin map\n- bin1 = getBin(bedEntry1->bed.start, bedEntry1->bed.end);\n- this->bedMapEnd1[bedEntry1->bed.chrom][bin1].push_back(*bedEntry1);\n-\n- // load end2 into a UCSC bin map\n- bin2 = getBin(bedEntry2->bed.start, bedEntry2->bed.end);\n- this->bedMapEnd2[bedEntry2->bed.chrom][bin2].push_back(*bedEntry2);\n-\n- bedpeEntry = nullBedPE;\n- }\n- bedStatus = this->GetNextBedPE(bedpeEntry, lineNum);\n- }\n- Close();\n-}\n-\n-\n-void BedFilePE::splitBedPEIntoBeds(const BEDPE &bedpeEntry, const int &lineNum, MATE *bedEntry1, MATE *bedEntry2) {\n-\n- /*\n- Split the BEDPE entry into separate BED entries\n-\n- NOTE: I am using a trick here where I store\n- the lineNum of the BEDPE from the original file\n- in the "count" column. This allows me to later\n- resolve whether the hits found on both ends of BEDPE A\n- came from the same entry in BEDPE B. Tracking by "name"\n- alone with fail when there are multiple mappings for a given\n- read-pair.\n- */\n-\n- bedEntry1->bed.chrom = bedpeEntry.chrom1;\n- bedEntry1->bed.start = bedpeEntry.start1;\n- bedEntry1->bed.end = bedpeEntry.end1;\n- bedEntry1->bed.name = bedpeEntry.name;\n- bedEntry1->bed.score = bedpeEntry.score; // only store the score in end1 to save memory\n- bedEntry1->bed.strand = bedpeEntry.strand1;\n- bedEntry1->bed.otherFields = bedpeEntry.otherFields; // only store the otherFields in end1 to save memory\n- bedEntry1->lineNum = lineNum;\n- bedEntry1->mate = bedEntry2; // keep a pointer to end2\n-\n- bedEntry2->bed.chrom = bedpeEntry.chrom2;\n- bedEntry2->bed.start = bedpeEntry.start2;\n- bedEntry2->bed.end = bedpeEntry.end2;\n- bedEntry2->bed.name = bedpeEntry.name;\n- bedEntry2->bed.strand = bedpeEntry.strand2;\n- bedEntry2->lineNum = lineNum;\n- bedEntry2->mate = bedEntry1; // keep a pointer to end1\n-}\n-\n-\n-\n' |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/bedFilePE/bedFilePE.h --- a/BEDTools-Version-2.14.3/src/utils/bedFilePE/bedFilePE.h Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,93 +0,0 @@ -#ifndef BEDFILEPE_H -#define BEDFILEPE_H - -#include <vector> -#include <map> -#include <string> -#include <iostream> -#include <fstream> -#include <sstream> -#include <cstring> -#include <algorithm> -#include "bedFile.h" -#include "lineFileUtilities.h" - -using namespace std; - - -/* - Structure for paired-end records -*/ -struct BEDPE { - - // UCSC BED fields - string chrom1; - CHRPOS start1; - CHRPOS end1; - - string chrom2; - CHRPOS start2; - CHRPOS end2; - - string name; - string score; - - string strand1; - string strand2; - - vector<string> otherFields; -}; - - - - -//************************************************ -// BedFile Class methods and elements -//************************************************ -class BedFilePE { - -public: - - // Constructor - BedFilePE(string &); - - // Destructor - ~BedFilePE(void); - - // Open a BEDPE file for reading (creates an istream pointer) - void Open(void); - - // Close an opened BEDPE file. - void Close(void); - - // Get the next BED entry in an opened BED file. - BedLineStatus GetNextBedPE (BEDPE &bedpe, int &lineNum); - - - // Methods - - void reportBedPETab(const BEDPE &a); - void reportBedPENewLine(const BEDPE &a); - void loadBedPEFileIntoMap(); - void splitBedPEIntoBeds(const BEDPE &a, const int &lineNum, MATE *bedEntry1, MATE *bedEntry2); - - - void FindOverlapsPerBin(int bEnd, string chrom, CHRPOS start, CHRPOS end, string name, string strand, - vector<MATE> &hits, float overlapFraction, bool forceStrand, bool enforceDiffNames); - - - string bedFile; - unsigned int bedType; - - masterMateMap bedMapEnd1; - masterMateMap bedMapEnd2; - -private: - istream *_bedStream; - - // methods - BedLineStatus parseLine (BEDPE &bedpe, const vector<string> &lineVector, int &lineNum); - bool parseBedPELine (BEDPE &bed, const vector<string> &lineVector, const int &lineNum); -}; - -#endif /* BEDFILEPE_H */ |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/bedGraphFile/Makefile --- a/BEDTools-Version-2.14.3/src/utils/bedGraphFile/Makefile Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,31 +0,0 @@ -OBJ_DIR = ../../../obj/ -BIN_DIR = ../../../bin/ -UTILITIES_DIR = ../../utils/ -# ------------------- -# define our includes -# ------------------- -INCLUDES = -I$(UTILITIES_DIR)/lineFileUtilities/ \ - -I$(UTILITIES_DIR)/gzstream/ \ - -I$(UTILITIES_DIR)/fileType/ - -# ---------------------------------- -# define our source and object files -# ---------------------------------- -SOURCES= bedGraphFile.cpp -OBJECTS= $(SOURCES:.cpp=.o) -_EXT_OBJECTS=lineFileUtilities.o gzstream.o fileType.o -EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) -BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) - -$(BUILT_OBJECTS): $(SOURCES) - @echo " * compiling" $(*F).cpp - @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) - -$(EXT_OBJECTS): - @$(MAKE) --no-print-directory -C -W $(INCLUDES) - -clean: - @echo "Cleaning up." - @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* - -.PHONY: clean |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/bedGraphFile/bedGraphFile.cpp --- a/BEDTools-Version-2.14.3/src/utils/bedGraphFile/bedGraphFile.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,58 +0,0 @@ -/***************************************************************************** - bedGraphFile.cpp - - (c) 2010 - Assaf Gordon - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licenced under the GNU General Public License 2.0 license. -******************************************************************************/ -#include "bedGraphFile.h" -#include <sstream> - -// Constructor -BedGraphFile::BedGraphFile(string &_file) : - bedGraphFile(_file), - _bedGraphStream(NULL) -{} - - -// Destructor -BedGraphFile::~BedGraphFile() { - Close(); -} - - -// Open the BEDGRAPH file -void BedGraphFile::Open() { - if (bedGraphFile == "stdin" || bedGraphFile == "-") { - _bedGraphStream = &cin; - } - else { - _bedGraphStream = new ifstream(bedGraphFile.c_str(), ios::in); - - if (isGzipFile(_bedGraphStream) == true) { - delete _bedGraphStream; - _bedGraphStream = new igzstream(bedGraphFile.c_str(), ios::in); - } - // can we open the file? - if ( !(_bedGraphStream->good()) ) { - cerr << "Error: The requested bed file (" << bedGraphFile << ") could not be opened. Exiting!" << endl; - exit (1); - } - } -} - - -// Close the BEDGRAPH file -void BedGraphFile::Close() { - if (bedGraphFile != "stdin" && bedGraphFile != "-") { - if (_bedGraphStream) { - delete _bedGraphStream; - _bedGraphStream = NULL ; - } - } -} - |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/bedGraphFile/bedGraphFile.h --- a/BEDTools-Version-2.14.3/src/utils/bedGraphFile/bedGraphFile.h Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,199 +0,0 @@ -/***************************************************************************** - bedGraphFile.cpp - - (c) 2010 - Assaf Gordon - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licenced under the GNU General Public License 2.0 license. -******************************************************************************/ -#ifndef BEDGRAPHFILE_H -#define BEDGRAPHFILE_H - -#include "gzstream.h" -#include "lineFileUtilities.h" -#include "fileType.h" -#include <vector> -#include <map> -#include <set> -#include <string> -#include <iostream> -#include <fstream> -#include <sstream> -#include <cstring> -#include <algorithm> -#include <limits.h> -#include <stdint.h> -#include <cstdio> - -using namespace std; - -//************************************************* -// Data type tydedef -//************************************************* -#ifndef CHRPOS -typedef uint32_t CHRPOS; -#endif - -#ifndef DEPTH -typedef uint32_t DEPTH; -#endif - -/* - Structure for regular BedGraph records - */ -template <typename T> -class BEDGRAPH -{ -public: - std::string chrom; - CHRPOS start; - CHRPOS end; - T depth; - -public: - typedef T DEPTH_TYPE; - // constructors - - // Null - BEDGRAPH() : - start(0), - end(0), - depth(T()) - {} - - // BEDGraph - BEDGRAPH(string _chrom, CHRPOS _start, CHRPOS _end, T _depth) : - chrom(_chrom), - start(_start), - end(_end), - depth(_depth) - {} -}; // BEDGraph - -typedef BEDGRAPH<int32_t> BEDGRAPH_INT; -typedef BEDGRAPH<std::string> BEDGRAPH_STR; -typedef BEDGRAPH<double> BEDGRAPH_FLOAT; - -template <typename T> -std::ostream& operator<< (std::ostream& strm, const BEDGRAPH<T>& bg) -{ - strm << bg.chrom << "\t" - << bg.start << "\t" - << bg.end << "\t" - << bg.depth; - return strm; -} - -// enum to flag the state of a given line in a BEDGraph file. -enum BedGraphLineStatus -{ - BEDGRAPH_INVALID = -1, - BEDGRAPH_HEADER = 0, - BEDGRAPH_BLANK = 1, - BEDGRAPH_VALID = 2 -}; - - -//************************************************ -// BedGraphFile Class methods and elements -//************************************************ -class BedGraphFile { - -public: - - // Constructor - BedGraphFile(string &); - - // Destructor - ~BedGraphFile(void); - - // Open a BEDGraph file for reading (creates an istream pointer) - void Open(void); - - // Close an opened BED file. - void Close(void); - - // Get the next BED entry in an opened BED file. - template <typename T> - BedGraphLineStatus GetNextBedGraph (BEDGRAPH<T> &bedgraph, int &lineNum) - { - // make sure there are still lines to process. - // if so, tokenize, validate and return the BED entry. - if (_bedGraphStream->good()) { - string bedGraphLine; - vector<string> bedGraphFields; - - // parse the bedStream pointer - getline(*_bedGraphStream, bedGraphLine); - if (_bedGraphStream->eof()) - return BEDGRAPH_INVALID; - if (_bedGraphStream->bad()) { - cerr << "Error while reading file '" << bedGraphFile << "' : " - << strerror(errno) << endl; - exit(1); - } - lineNum++; - - // split into a string vector. - Tokenize(bedGraphLine,bedGraphFields); - - // load the BED struct as long as it's a valid BED entry. - return parseLine(bedgraph, bedGraphFields, lineNum); - } - - // default if file is closed or EOF - return BEDGRAPH_INVALID; - } - - // the bedfile with which this instance is associated - string bedGraphFile; - -private: - // data - istream *_bedGraphStream; - - template <typename T> - BedGraphLineStatus parseLine (BEDGRAPH<T> &bg, const vector<string> &lineVector, int &lineNum) - { - if (lineVector.size() == 0) - return BEDGRAPH_BLANK; - - if (lineVector[0].find("track") != string::npos || - lineVector[0].find("browser") != string::npos || - lineVector[0].find("#") != string::npos) - return BEDGRAPH_HEADER; - - if (lineVector.size() != 4) - return BEDGRAPH_INVALID; - - bg.chrom = lineVector[0]; - - stringstream str_start(lineVector[1]); - if (! (str_start >> bg.start) ) { - cerr << "Input error, failed to extract start value from '" << lineVector[1] - << "' (column 2) in " << bedGraphFile << " line " << lineNum << endl; - exit(1); - } - - stringstream str_end(lineVector[2]); - if (! (str_end >> bg.end) ) { - cerr << "Input error, failed to extract end value from '" << lineVector[2] - << "' (column 3) in " << bedGraphFile << " line " << lineNum << endl; - exit(1); - } - - stringstream str_depth(lineVector[3]); - if (! (str_depth >> bg.depth) ) { - cerr << "Input error, failed to extract depth value from '" << lineVector[3] - << "' (column 4) in " << bedGraphFile << " line " << lineNum << endl; - exit(1); - } - - return BEDGRAPH_VALID; - } -}; - -#endif /* BEDFILE_H */ |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/chromsweep/Makefile --- a/BEDTools-Version-2.14.3/src/utils/chromsweep/Makefile Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,32 +0,0 @@ -OBJ_DIR = ../../../obj/ -BIN_DIR = ../../../bin/ -UTILITIES_DIR = ../../utils/ -# ------------------- -# define our includes -# ------------------- -INCLUDES = -I$(UTILITIES_DIR)/lineFileUtilities/ \ - -I$(UTILITIES_DIR)/bedFile/ \ - -I$(UTILITIES_DIR)/gzstream/ \ - -I$(UTILITIES_DIR)/fileType/ - -# ---------------------------------- -# define our source and object files -# ---------------------------------- -SOURCES= chromsweep.cpp -OBJECTS= $(SOURCES:.cpp=.o) -_EXT_OBJECTS=lineFileUtilities.o fileType.o -EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) -BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) - -$(BUILT_OBJECTS): $(SOURCES) - @echo " * compiling" $(*F).cpp - @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) - -$(EXT_OBJECTS): - @$(MAKE) --no-print-directory -C $(INCLUDES) - -clean: - @echo "Cleaning up." - @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* - -.PHONY: clean \ No newline at end of file |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/chromsweep/chromsweep.cpp --- a/BEDTools-Version-2.14.3/src/utils/chromsweep/chromsweep.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,180 +0,0 @@ -/***************************************************************************** - chromsweep.cpp - - (c) 2009 - Aaron Quinlan - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licenced under the GNU General Public License 2.0 license. -******************************************************************************/ -#include "lineFileUtilities.h" -#include "chromsweep.h" -#include <queue> - -bool after(const BED &a, const BED &b); -void report_hits(const BED &curr_qy, const vector<BED> &hits); -vector<BED> scan_cache(const BED &curr_qy, BedLineStatus qy_status, const vector<BED> &db_cache, vector<BED> &hits); - - -/* - // constructor using existing BedFile pointers -*/ -ChromSweep::ChromSweep(BedFile *bedA, BedFile *bedB, bool sameStrand, bool diffStrand) -: _bedA(bedA) -, _bedB(bedB) -, _sameStrand(sameStrand) -, _diffStrand(diffStrand) -{ - // prime the results pump. - _qy_lineNum = 0; - _db_lineNum = 0; - - _hits.reserve(1000); - _cache.reserve(1000); - - _bedA->Open(); - _bedB->Open(); - _qy_status = _bedA->GetNextBed(_curr_qy, _qy_lineNum); - _db_status = _bedB->GetNextBed(_curr_db, _db_lineNum); -} - -/* - Constructor with filenames -*/ -ChromSweep::ChromSweep(string &bedAFile, string &bedBFile) -{ - // prime the results pump. - _qy_lineNum = 0; - _db_lineNum = 0; - - _hits.reserve(100000); - _cache.reserve(100000); - - _bedA = new BedFile(bedAFile); - _bedB = new BedFile(bedBFile); - - _bedA->Open(); - _bedB->Open(); - - _qy_status = _bedA->GetNextBed(_curr_qy, _qy_lineNum); - _db_status = _bedB->GetNextBed(_curr_db, _db_lineNum); -} - - -/* - Destructor -*/ -ChromSweep::~ChromSweep(void) { -} - - -void ChromSweep::ScanCache() { - if (_qy_status != BED_INVALID) { - vector<BED>::iterator c = _cache.begin(); - while (c != _cache.end()) - { - if ((_curr_qy.chrom == c->chrom) && !(after(_curr_qy, *c))) { - if (IsValidHit(_curr_qy, *c)) { - _hits.push_back(*c); - } - ++c; - } - else { - c = _cache.erase(c); - } - } - } -} - - -bool ChromSweep::ChromChange() -{ - // the files are on the same chrom - if ((_curr_qy.chrom == _curr_db.chrom) || (_db_status == BED_INVALID) || (_qy_status == BED_INVALID)) { - return false; - } - // the query is ahead of the database. fast-forward the database to catch-up. - else if (_curr_qy.chrom > _curr_db.chrom) { - while (!_bedB->Empty() && _curr_db.chrom < _curr_qy.chrom) - { - _db_status = _bedB->GetNextBed(_curr_db, _db_lineNum); - } - _cache.clear(); - return false; - } - // the database is ahead of the query. - else { - // 1. scan the cache for remaining hits on the query's current chrom. - if (_curr_qy.chrom == _curr_chrom) - { - ScanCache(); - _results.push(make_pair(_curr_qy, _hits)); - _hits.clear(); - } - // 2. fast-forward until we catch up and report 0 hits until we do. - else if (_curr_qy.chrom < _curr_db.chrom) - { - _results.push(make_pair(_curr_qy, _no_hits)); - _cache.clear(); - } - _qy_status = _bedA->GetNextBed(_curr_qy, _qy_lineNum); - _curr_chrom = _curr_qy.chrom; - return true; - } -} - -bool ChromSweep::IsValidHit(const BED &query, const BED &db) { - // do we have an overlap in the DB? - if (overlaps(query.start, query.end, db.start, db.end) > 0) { - // Now test for necessary strandedness. - bool strands_are_same = (query.strand == db.strand); - if ( (_sameStrand == false && _diffStrand == false) - || - (_sameStrand == true && strands_are_same == true) - || - (_diffStrand == true && strands_are_same == false) - ) - { - return true; - } - } - return false; -} - - -bool ChromSweep::Next(pair<BED, vector<BED> > &next) { - if (!_bedA->Empty()) { - // have we changed chromosomes? - if (ChromChange() == false) { - // scan the database cache for hits - ScanCache(); - // advance the db until we are ahead of the query. update hits and cache as necessary - while (!_bedB->Empty() && _curr_qy.chrom == _curr_db.chrom && !(after(_curr_db, _curr_qy))) - { - if (IsValidHit(_curr_qy, _curr_db)) { - _hits.push_back(_curr_db); - } - _cache.push_back(_curr_db); - _db_status = _bedB->GetNextBed(_curr_db, _db_lineNum); - } - // add the hits for this query to the pump - _results.push(make_pair(_curr_qy, _hits)); - // reset for the next query - _hits.clear(); - _curr_qy = _nullBed; - _qy_status = _bedA->GetNextBed(_curr_qy, _qy_lineNum); - _curr_chrom = _curr_qy.chrom; - } - } - // report the next set if hits if there are still overlaps in the pump - if (!_results.empty()) { - next = _results.front(); - _results.pop(); - return true; - } - // otherwise, the party is over. - else {return false;} -} - |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/chromsweep/chromsweep.h --- a/BEDTools-Version-2.14.3/src/utils/chromsweep/chromsweep.h Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,89 +0,0 @@ -/***************************************************************************** - chromsweepBed.h - - (c) 2009 - Aaron Quinlan - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licenced under the GNU General Public License 2.0 license. -******************************************************************************/ -#ifndef CHROMSWEEP_H -#define CHROMSWEEP_H - -#include "bedFile.h" -#include <vector> -#include <queue> -#include <iostream> -#include <fstream> -#include <stdlib.h> -using namespace std; - - - -class ChromSweep { - -// public interface. -public: - - // A is the query and B is the database - - // constructor using existing BedFile pointers - ChromSweep(BedFile *bedA, BedFile *bedB, bool sameStrand = false, bool diffStrand = false); - - // constructor using filenames - ChromSweep(string &bedAFile, string &bedBFile); - - // destructor - ~ChromSweep(void); - - // loads next (a pair) with the current query and it's overlaps - // next.first is the current query interval - // next.second is a vector of the current query's hits. - // returns true if overlap - bool Next(pair<BED, vector<BED> > &next); - - // Usage: - // ChromSweep sweep = ChromSweep(_bedA, _bedB); - // pair<BED, vector<BED> > hit_set; - // while (sweep.Next(hit_set)) - // { - // // magic happens here! - // processHits(hit_set.first, hit_set.second); - // } - -// private variables. -private: - - // instances of a bed file class. - BedFile *_bedA, *_bedB; - // do we care about strandedness. - bool _sameStrand, _diffStrand; - // a cache of still active features from the database file - vector<BED> _cache; - // the set of hits in the database for the current query - vector<BED> _hits; - // a queue from which we retrieve overlap results. used by Next() - queue< pair<BED, vector<BED> > > _results; - BED _nullBed; - // an empty BED vector for returning no hits for a given query - vector<BED> _no_hits; - // the current query and db features. - BED _curr_qy, _curr_db; - // a cache of the current chrom from the query. used to handle chrom changes. - string _curr_chrom; - // the current line status in the database and query files - BedLineStatus _qy_status, _db_status; - // the current line numbers in the database and query files - int _qy_lineNum, _db_lineNum; - -// private methods. -private: - - void ScanCache(); - bool ChromChange(); - bool IsValidHit(const BED &query, const BED &db); -}; - -#endif /* CHROMSWEEP_H */ |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/fileType/Makefile --- a/BEDTools-Version-2.14.3/src/utils/fileType/Makefile Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,29 +0,0 @@ -OBJ_DIR = ../../../obj/ -BIN_DIR = ../../../bin/ -UTILITIES_DIR = ../../utils/ -# ------------------- -# define our includes -# ------------------- -INCLUDES = - -# ---------------------------------- -# define our source and object files -# ---------------------------------- -SOURCES= fileType.cpp -OBJECTS= $(SOURCES:.cpp=.o) -_EXT_OBJECTS= -EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) -BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) - -$(BUILT_OBJECTS): $(SOURCES) - @echo " * compiling" $(*F).cpp - @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) - -$(EXT_OBJECTS): - @$(MAKE) --no-print-directory -C $(INCLUDES) - -clean: - @echo "Cleaning up." - @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* - -.PHONY: clean \ No newline at end of file |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/fileType/fileType.cpp --- a/BEDTools-Version-2.14.3/src/utils/fileType/fileType.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,71 +0,0 @@ -/***************************************************************************** -fileType.cpp - -(c) 2009 - Aaron Quinlan -Hall Laboratory -Department of Biochemistry and Molecular Genetics -University of Virginia -aaronquinlan@gmail.com - -Licensed under the GNU General Public License 2.0 license. -******************************************************************************/ - -#include "fileType.h" - - -/* -returns TRUE if the file is a regular file: -not a pipe/device. - -This implies that the file can be opened/closed/seek'd multiple times without losing information -*/ -bool isRegularFile(const string& filename) { - struct stat buf ; - int i; - - i = stat(filename.c_str(), &buf); - if (i!=0) { - cerr << "Error: can't determine file type of '" << filename << "': " << strerror(errno) << endl; - exit(1); - } - if (S_ISREG(buf.st_mode)) - return true; - - return false; -} - -/* -returns TRUE if the file has a GZIP header. -Should only be run on regular files. -*/ -bool isGzipFile(istream *file) { - //see http://www.gzip.org/zlib/rfc-gzip.html#file-format - - /* - 11-Sep-2011: - We now only peek at the first byte and test for GZIPiness. - This is because I can only putback() one byte into an istream - without triggering the "fail" bit. This was necessary to support - FIFOs, per version 2.13.0 - */ - struct { - unsigned char id1; -// unsigned char id2; -// unsigned char cm; - } gzip_header; - - if (!file->read((char*)&gzip_header, sizeof(gzip_header))) { - return false; - } - - if ( gzip_header.id1 == 0x1f ) -// && -// gzip_header.id2 == 0x8b -// && -// gzip_header.cm == 8 ) - { - return true; - } - file->putback(gzip_header.id1); - return false; -} |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/fileType/fileType.h --- a/BEDTools-Version-2.14.3/src/utils/fileType/fileType.h Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,38 +0,0 @@ -/***************************************************************************** - fileType.h - - (c) 2009 - Aaron Quinlan - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licensed under the GNU General Public License 2.0 license. -******************************************************************************/ -#ifndef FILETYPE_H -#define FILETYPE_H - -#include <string> -#include <iostream> -#include <fstream> -#include <stdlib.h> -#include <string.h> -#include <errno.h> -#include <sys/stat.h> -#include <sys/types.h> -#include <unistd.h> -#include <sstream> - -using namespace std; - -/***************************************************************************** - Convenience functions to detect whether a given file is - "regular" and/or "gzipped". - - Kindly contributed by Assaf Gordon. -******************************************************************************/ -string string_error(int errnum); -bool isRegularFile(const string& filename); -bool isGzipFile(istream *file); - -#endif /* FILETYPE_H */ |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/genomeFile/Makefile --- a/BEDTools-Version-2.14.3/src/utils/genomeFile/Makefile Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,32 +0,0 @@ -OBJ_DIR = ../../../obj/ -BIN_DIR = ../../../bin/ -UTILITIES_DIR = ../ -# ------------------- -# define our includes -# ------------------- -INCLUDES = -I$(UTILITIES_DIR)/lineFileUtilities/ \ - -I$(UTILITIES_DIR)/fileType/ \ - -I$(UTILITIES_DIR)/BamTools/include/ - -# ---------------------------------- -# define our source and object files -# ---------------------------------- -SOURCES= genomeFile.cpp -OBJECTS= $(SOURCES:.cpp=.o) -_EXT_OBJECTS=lineFileUtilities.o fileType.o -EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) -BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) - -$(BUILT_OBJECTS): $(SOURCES) - @echo " * compiling" $(*F).cpp - @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) -L$(BT_ROOT)/lib - -$(EXT_OBJECTS): - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/ - -clean: - @echo "Cleaning up." - @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* - -.PHONY: clean |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/genomeFile/genomeFile.cpp --- a/BEDTools-Version-2.14.3/src/utils/genomeFile/genomeFile.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,103 +0,0 @@ -/***************************************************************************** - genomeFile.cpp - - (c) 2009 - Aaron Quinlan - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licensed under the GNU General Public License 2.0 license. -******************************************************************************/ -#include "lineFileUtilities.h" -#include "genomeFile.h" - - -GenomeFile::GenomeFile(const string &genomeFile) { - _genomeFile = genomeFile; - loadGenomeFileIntoMap(); -} - -GenomeFile::GenomeFile(const RefVector &genome) { - for (size_t i = 0; i < genome.size(); ++i) { - string chrom = genome[i].RefName; - int length = genome[i].RefLength; - - _chromSizes[chrom] = length; - _chromList.push_back(chrom); - } -} - -// Destructor -GenomeFile::~GenomeFile(void) { -} - - -void GenomeFile::loadGenomeFileIntoMap() { - - string genomeLine; - int lineNum = 0; - vector<string> genomeFields; // vector for a GENOME entry - - // open the GENOME file for reading - ifstream genome(_genomeFile.c_str(), ios::in); - if ( !genome ) { - cerr << "Error: The requested genome file (" << _genomeFile << ") could not be opened. Exiting!" << endl; - exit (1); - } - - while (getline(genome, genomeLine)) { - - Tokenize(genomeLine,genomeFields); // load the fields into the vector - lineNum++; - - // ignore a blank line - if (genomeFields.size() > 0) { - if (genomeFields[0].find("#") == string::npos) { - - // we need at least 2 columns - if (genomeFields.size() >= 2) { - char *p2End; - long c2; - // make sure the second column is numeric. - c2 = strtol(genomeFields[1].c_str(), &p2End, 10); - - // strtol will set p2End to the start of the string if non-integral, base 10 - if (p2End != genomeFields[1].c_str()) { - string chrom = genomeFields[0]; - int size = atoi(genomeFields[1].c_str()); - _chromSizes[chrom] = size; - _chromList.push_back(chrom); - } - } - else { - cerr << "Less than the req'd two fields were encountered in the genome file (" << _genomeFile << ")"; - cerr << " at line " << lineNum << ". Exiting." << endl; - exit (1); - } - } - } - genomeFields.clear(); - } -} - - -int GenomeFile::getChromSize(const string &chrom) { - chromToSizes::const_iterator chromIt = _chromSizes.find(chrom); - if (chromIt != _chromSizes.end()) - return _chromSizes[chrom]; - else - return -1; // chrom not found. -} - -vector<string> GenomeFile::getChromList() { - return _chromList; -} - -int GenomeFile::getNumberOfChroms() { - return _chromList.size(); -} - -string GenomeFile::getGenomeFileName() { - return _genomeFile; -} |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/genomeFile/genomeFile.h --- a/BEDTools-Version-2.14.3/src/utils/genomeFile/genomeFile.h Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,62 +0,0 @@ -/***************************************************************************** - genomeFile.h - - (c) 2009 - Aaron Quinlan - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licensed under the GNU General Public License 2.0 license. -******************************************************************************/ -#ifndef GENOMEFILE_H -#define GENOMEFILE_H - -#include <map> -#include <string> -#include <iostream> -#include <sstream> -#include <fstream> -#include <cstring> -#include <cstdio> -#include "api/BamReader.h" -#include "api/BamAux.h" -using namespace BamTools; - -using namespace std; - - -// typedef for mapping b/w chrom name and it's size in b.p. -typedef map<string, int, std::less<string> > chromToSizes; - - -class GenomeFile { - -public: - - // Constructor using a file - GenomeFile(const string &genomeFile); - - // Constructor using a vector of BamTools RefVector - GenomeFile(const RefVector &genome); - - // Destructor - ~GenomeFile(void); - - // load a GENOME file into a map keyed by chrom. value is size of chrom. - void loadGenomeFileIntoMap(); - - int getChromSize(const string &chrom); // return the size of a chromosome - vector<string> getChromList(); // return a list of chrom names - int getNumberOfChroms(); // return the number of chroms - string getGenomeFileName(); // return the name of the genome file - - - -private: - string _genomeFile; - chromToSizes _chromSizes; - vector<string> _chromList; -}; - -#endif /* GENOMEFILE_H */ |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/gzstream/COPYING.LIB --- a/BEDTools-Version-2.14.3/src/utils/gzstream/COPYING.LIB Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b"@@ -1,504 +0,0 @@\n-\t\t GNU LESSER GENERAL PUBLIC LICENSE\n-\t\t Version 2.1, February 1999\n-\n- Copyright (C) 1991, 1999 Free Software Foundation, Inc.\n- 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA\n- Everyone is permitted to copy and distribute verbatim copies\n- of this license document, but changing it is not allowed.\n-\n-[This is the first released version of the Lesser GPL. It also counts\n- as the successor of the GNU Library Public License, version 2, hence\n- the version number 2.1.]\n-\n-\t\t\t Preamble\n-\n- The licenses for most software are designed to take away your\n-freedom to share and change it. By contrast, the GNU General Public\n-Licenses are intended to guarantee your freedom to share and change\n-free software--to make sure the software is free for all its users.\n-\n- This license, the Lesser General Public License, applies to some\n-specially designated software packages--typically libraries--of the\n-Free Software Foundation and other authors who decide to use it. You\n-can use it too, but we suggest you first think carefully about whether\n-this license or the ordinary General Public License is the better\n-strategy to use in any particular case, based on the explanations below.\n-\n- When we speak of free software, we are referring to freedom of use,\n-not price. Our General Public Licenses are designed to make sure that\n-you have the freedom to distribute copies of free software (and charge\n-for this service if you wish); that you receive source code or can get\n-it if you want it; that you can change the software and use pieces of\n-it in new free programs; and that you are informed that you can do\n-these things.\n-\n- To protect your rights, we need to make restrictions that forbid\n-distributors to deny you these rights or to ask you to surrender these\n-rights. These restrictions translate to certain responsibilities for\n-you if you distribute copies of the library or if you modify it.\n-\n- For example, if you distribute copies of the library, whether gratis\n-or for a fee, you must give the recipients all the rights that we gave\n-you. You must make sure that they, too, receive or can get the source\n-code. If you link other code with the library, you must provide\n-complete object files to the recipients, so that they can relink them\n-with the library after making changes to the library and recompiling\n-it. And you must show them these terms so they know their rights.\n-\n- We protect your rights with a two-step method: (1) we copyright the\n-library, and (2) we offer you this license, which gives you legal\n-permission to copy, distribute and/or modify the library.\n-\n- To protect each distributor, we want to make it very clear that\n-there is no warranty for the free library. Also, if the library is\n-modified by someone else and passed on, the recipients should know\n-that what they have is not the original version, so that the original\n-author's reputation will not be affected by problems that might be\n-introduced by others.\n-\x0c\n- Finally, software patents pose a constant threat to the existence of\n-any free program. We wish to make sure that a company cannot\n-effectively restrict the users of a free program by obtaining a\n-restrictive license from a patent holder. Therefore, we insist that\n-any patent license obtained for a version of the library must be\n-consistent with the full freedom of use specified in this license.\n-\n- Most GNU software, including some libraries, is covered by the\n-ordinary GNU General Public License. This license, the GNU Lesser\n-General Public License, applies to certain designated libraries, and\n-is quite different from the ordinary General Public License. We use\n-this license for certain libraries in order to permit linking those\n-libraries into non-free programs.\n-\n- When a program is linked with a library, whether statically or using\n-a shared library, the combination of the two is legally speaking a\n-combined work, a derivative of the original library. The o"..b'se version number, you may choose any version ever published by\n-the Free Software Foundation.\n-\x0c\n- 14. If you wish to incorporate parts of the Library into other free\n-programs whose distribution conditions are incompatible with these,\n-write to the author to ask for permission. For software which is\n-copyrighted by the Free Software Foundation, write to the Free\n-Software Foundation; we sometimes make exceptions for this. Our\n-decision will be guided by the two goals of preserving the free status\n-of all derivatives of our free software and of promoting the sharing\n-and reuse of software generally.\n-\n-\t\t\t NO WARRANTY\n-\n- 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO\n-WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW.\n-EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR\n-OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY\n-KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE\n-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR\n-PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE\n-LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME\n-THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.\n-\n- 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN\n-WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY\n-AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU\n-FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR\n-CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE\n-LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING\n-RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A\n-FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF\n-SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH\n-DAMAGES.\n-\n-\t\t END OF TERMS AND CONDITIONS\n-\x0c\n- How to Apply These Terms to Your New Libraries\n-\n- If you develop a new library, and you want it to be of the greatest\n-possible use to the public, we recommend making it free software that\n-everyone can redistribute and change. You can do so by permitting\n-redistribution under these terms (or, alternatively, under the terms of the\n-ordinary General Public License).\n-\n- To apply these terms, attach the following notices to the library. It is\n-safest to attach them to the start of each source file to most effectively\n-convey the exclusion of warranty; and each file should have at least the\n-"copyright" line and a pointer to where the full notice is found.\n-\n- <one line to give the library\'s name and a brief idea of what it does.>\n- Copyright (C) <year> <name of author>\n-\n- This library is free software; you can redistribute it and/or\n- modify it under the terms of the GNU Lesser General Public\n- License as published by the Free Software Foundation; either\n- version 2.1 of the License, or (at your option) any later version.\n-\n- This library is distributed in the hope that it will be useful,\n- but WITHOUT ANY WARRANTY; without even the implied warranty of\n- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\n- Lesser General Public License for more details.\n-\n- You should have received a copy of the GNU Lesser General Public\n- License along with this library; if not, write to the Free Software\n- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA\n-\n-Also add information on how to contact you by electronic and paper mail.\n-\n-You should also get your employer (if you work as a programmer) or your\n-school, if any, to sign a "copyright disclaimer" for the library, if\n-necessary. Here is a sample; alter the names:\n-\n- Yoyodyne, Inc., hereby disclaims all copyright interest in the\n- library `Frob\' (a library for tweaking knobs) written by James Random Hacker.\n-\n- <signature of Ty Coon>, 1 April 1990\n- Ty Coon, President of Vice\n-\n-That\'s all there is to it!\n-\n-\n' |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/gzstream/Makefile --- a/BEDTools-Version-2.14.3/src/utils/gzstream/Makefile Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,49 +0,0 @@ -# ============================================================================ -# gzstream, C++ iostream classes wrapping the zlib compression library. -# Copyright (C) 2001 Deepak Bandyopadhyay, Lutz Kettner -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -# ============================================================================ -# -# File : Makefile -# Revision : $Revision: 1.3 $ -# Revision_date : $Date: 2001/10/04 15:09:28 $ -# Author(s) : Deepak Bandyopadhyay, Lutz Kettner -# -# ============================================================================ - -# ---------------------------------------------------------------------------- -# adapt these settings to your need: -# add '-DGZSTREAM_NAMESPACE=name' to CPPFLAGS to place the classes -# in its own namespace. Note, this macro needs to be set while creating -# the library as well while compiling applications based on it. -# As an alternative, gzstream.C and gzstream.h can be edited. -# ---------------------------------------------------------------------------- - -INCLUDES = -I. -OBJ_DIR = ../../../obj/ -BIN_DIR = ../../../bin/ -UTILITIES_DIR = ../../utils/ - -${OBJ_DIR}/gzstream.o : gzstream.C gzstream.h - ${CXX} ${CXXFLAGS} -c -o ${OBJ_DIR}/gzstream.o gzstream.C $(INCLUDES) - -clean: - @echo "Cleaning up." - @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* - -.PHONY: clean - - |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/gzstream/README --- a/BEDTools-Version-2.14.3/src/utils/gzstream/README Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,6 +0,0 @@ - - gzstream - C++ iostream classes wrapping the zlib compression library. -=========================================================================== - - See index.html for documentation and installation instructions. |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/gzstream/gzstream.C --- a/BEDTools-Version-2.14.3/src/utils/gzstream/gzstream.C Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,165 +0,0 @@ -// ============================================================================ -// gzstream, C++ iostream classes wrapping the zlib compression library. -// Copyright (C) 2001 Deepak Bandyopadhyay, Lutz Kettner -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License along with this library; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -// ============================================================================ -// -// File : gzstream.C -// Revision : $Revision: 1.7 $ -// Revision_date : $Date: 2003/01/08 14:41:27 $ -// Author(s) : Deepak Bandyopadhyay, Lutz Kettner -// -// Standard streambuf implementation following Nicolai Josuttis, "The -// Standard C++ Library". -// ============================================================================ - -#include <gzstream.h> -#include <iostream> -#include <string.h> // for memcpy - -#ifdef GZSTREAM_NAMESPACE -namespace GZSTREAM_NAMESPACE { -#endif - -// ---------------------------------------------------------------------------- -// Internal classes to implement gzstream. See header file for user classes. -// ---------------------------------------------------------------------------- - -// -------------------------------------- -// class gzstreambuf: -// -------------------------------------- - -gzstreambuf* gzstreambuf::open( const char* name, int open_mode) { - if ( is_open()) - return (gzstreambuf*)0; - mode = open_mode; - // no append nor read/write mode - if ((mode & std::ios::ate) || (mode & std::ios::app) - || ((mode & std::ios::in) && (mode & std::ios::out))) - return (gzstreambuf*)0; - char fmode[10]; - char* fmodeptr = fmode; - if ( mode & std::ios::in) - *fmodeptr++ = 'r'; - else if ( mode & std::ios::out) - *fmodeptr++ = 'w'; - *fmodeptr++ = 'b'; - *fmodeptr = '\0'; - file = gzopen( name, fmode); - if (file == 0) - return (gzstreambuf*)0; - opened = 1; - return this; -} - -gzstreambuf * gzstreambuf::close() { - if ( is_open()) { - sync(); - opened = 0; - if ( gzclose( file) == Z_OK) - return this; - } - return (gzstreambuf*)0; -} - -int gzstreambuf::underflow() { // used for input buffer only - if ( gptr() && ( gptr() < egptr())) - return * reinterpret_cast<unsigned char *>( gptr()); - - if ( ! (mode & std::ios::in) || ! opened) - return EOF; - // Josuttis' implementation of inbuf - int n_putback = gptr() - eback(); - if ( n_putback > 4) - n_putback = 4; - memcpy( buffer + (4 - n_putback), gptr() - n_putback, n_putback); - - int num = gzread( file, buffer+4, bufferSize-4); - if (num <= 0) // ERROR or EOF - return EOF; - - // reset buffer pointers - setg( buffer + (4 - n_putback), // beginning of putback area - buffer + 4, // read position - buffer + 4 + num); // end of buffer - - // return next character - return * reinterpret_cast<unsigned char *>( gptr()); -} - -int gzstreambuf::flush_buffer() { - // Separate the writing of the buffer from overflow() and - // sync() operation. - int w = pptr() - pbase(); - if ( gzwrite( file, pbase(), w) != w) - return EOF; - pbump( -w); - return w; -} - -int gzstreambuf::overflow( int c) { // used for output buffer only - if ( ! ( mode & std::ios::out) || ! opened) - return EOF; - if (c != EOF) { - *pptr() = c; - pbump(1); - } - if ( flush_buffer() == EOF) - return EOF; - return c; -} - -int gzstreambuf::sync() { - // Changed to use flush_buffer() instead of overflow( EOF) - // which caused improper behavior with std::endl and flush(), - // bug reported by Vincent Ricard. - if ( pptr() && pptr() > pbase()) { - if ( flush_buffer() == EOF) - return -1; - } - return 0; -} - -// -------------------------------------- -// class gzstreambase: -// -------------------------------------- - -gzstreambase::gzstreambase( const char* name, int mode) { - init( &buf); - open( name, mode); -} - -gzstreambase::~gzstreambase() { - buf.close(); -} - -void gzstreambase::open( const char* name, int open_mode) { - if ( ! buf.open( name, open_mode)) - clear( rdstate() | std::ios::badbit); -} - -void gzstreambase::close() { - if ( buf.is_open()) - if ( ! buf.close()) - clear( rdstate() | std::ios::badbit); -} - -#ifdef GZSTREAM_NAMESPACE -} // namespace GZSTREAM_NAMESPACE -#endif - -// ============================================================================ -// EOF // |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/gzstream/gzstream.h --- a/BEDTools-Version-2.14.3/src/utils/gzstream/gzstream.h Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,121 +0,0 @@ -// ============================================================================ -// gzstream, C++ iostream classes wrapping the zlib compression library. -// Copyright (C) 2001 Deepak Bandyopadhyay, Lutz Kettner -// -// This library is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 2.1 of the License, or (at your option) any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License along with this library; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -// ============================================================================ -// -// File : gzstream.h -// Revision : $Revision: 1.5 $ -// Revision_date : $Date: 2002/04/26 23:30:15 $ -// Author(s) : Deepak Bandyopadhyay, Lutz Kettner -// -// Standard streambuf implementation following Nicolai Josuttis, "The -// Standard C++ Library". -// ============================================================================ - -#ifndef GZSTREAM_H -#define GZSTREAM_H 1 - -// standard C++ with new header file names and std:: namespace -#include <iostream> -#include <fstream> -#include <zlib.h> - -#ifdef GZSTREAM_NAMESPACE -namespace GZSTREAM_NAMESPACE { -#endif - -// ---------------------------------------------------------------------------- -// Internal classes to implement gzstream. See below for user classes. -// ---------------------------------------------------------------------------- - -class gzstreambuf : public std::streambuf { -private: - static const int bufferSize = 47+256; // size of data buff - // totals 512 bytes under g++ for igzstream at the end. - - gzFile file; // file handle for compressed file - char buffer[bufferSize]; // data buffer - char opened; // open/close state of stream - int mode; // I/O mode - - int flush_buffer(); -public: - gzstreambuf() : opened(0) { - setp( buffer, buffer + (bufferSize-1)); - setg( buffer + 4, // beginning of putback area - buffer + 4, // read position - buffer + 4); // end position - // ASSERT: both input & output capabilities will not be used together - } - int is_open() { return opened; } - gzstreambuf* open( const char* name, int open_mode); - gzstreambuf* close(); - ~gzstreambuf() { close(); } - - virtual int overflow( int c = EOF); - virtual int underflow(); - virtual int sync(); -}; - -class gzstreambase : virtual public std::ios { -protected: - gzstreambuf buf; -public: - gzstreambase() { init(&buf); } - gzstreambase( const char* name, int open_mode); - ~gzstreambase(); - void open( const char* name, int open_mode); - void close(); - gzstreambuf* rdbuf() { return &buf; } -}; - -// ---------------------------------------------------------------------------- -// User classes. Use igzstream and ogzstream analogously to ifstream and -// ofstream respectively. They read and write files based on the gz* -// function interface of the zlib. Files are compatible with gzip compression. -// ---------------------------------------------------------------------------- - -class igzstream : public gzstreambase, public std::istream { -public: - igzstream() : std::istream( &buf) {} - igzstream( const char* name, int open_mode = std::ios::in) - : gzstreambase( name, open_mode), std::istream( &buf) {} - gzstreambuf* rdbuf() { return gzstreambase::rdbuf(); } - void open( const char* name, int open_mode = std::ios::in) { - gzstreambase::open( name, open_mode); - } -}; - -class ogzstream : public gzstreambase, public std::ostream { -public: - ogzstream() : std::ostream( &buf) {} - ogzstream( const char* name, int mode = std::ios::out) - : gzstreambase( name, mode), std::ostream( &buf) {} - gzstreambuf* rdbuf() { return gzstreambase::rdbuf(); } - void open( const char* name, int open_mode = std::ios::out) { - gzstreambase::open( name, open_mode); - } -}; - -#ifdef GZSTREAM_NAMESPACE -} // namespace GZSTREAM_NAMESPACE -#endif - -#endif // GZSTREAM_H -// ============================================================================ -// EOF // - |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/gzstream/gzstream.o |
b |
Binary file BEDTools-Version-2.14.3/src/utils/gzstream/gzstream.o has changed |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/gzstream/test_gunzip.o |
b |
Binary file BEDTools-Version-2.14.3/src/utils/gzstream/test_gunzip.o has changed |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/gzstream/test_gzip.o |
b |
Binary file BEDTools-Version-2.14.3/src/utils/gzstream/test_gzip.o has changed |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/gzstream/version --- a/BEDTools-Version-2.14.3/src/utils/gzstream/version Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,1 +0,0 @@ -1.5 (08 Jan 2003) |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/lineFileUtilities/Makefile --- a/BEDTools-Version-2.14.3/src/utils/lineFileUtilities/Makefile Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,29 +0,0 @@ -OBJ_DIR = ../../../obj/ -BIN_DIR = ../../../bin/ -UTILITIES_DIR = ../../utils/ -# ------------------- -# define our includes -# ------------------- -INCLUDES = - -# ---------------------------------- -# define our source and object files -# ---------------------------------- -SOURCES= lineFileUtilities.cpp -OBJECTS= $(SOURCES:.cpp=.o) -_EXT_OBJECTS= -EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) -BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) - -$(BUILT_OBJECTS): $(SOURCES) - @echo " * compiling" $(*F).cpp - @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) - -$(EXT_OBJECTS): - @$(MAKE) --no-print-directory -C $(INCLUDES) - -clean: - @echo "Cleaning up." - @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* - -.PHONY: clean \ No newline at end of file |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/lineFileUtilities/lineFileUtilities.cpp --- a/BEDTools-Version-2.14.3/src/utils/lineFileUtilities/lineFileUtilities.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,15 +0,0 @@ -// -// lineFileUtilities.cpp -// BEDTools -// -// Created by Aaron Quinlan Spring 2009. -// Copyright 2009 Aaron Quinlan. All rights reserved. -// -// Summary: Contains common functions for processing text files. -// -#include <sstream> -#include <iostream> -#include "lineFileUtilities.h" - - - |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/lineFileUtilities/lineFileUtilities.h --- a/BEDTools-Version-2.14.3/src/utils/lineFileUtilities/lineFileUtilities.h Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,52 +0,0 @@ -#ifndef LINEFILEUTILITIES_H -#define LINEFILEUTILITIES_H - -#include <vector> -#include <string> -#include <cstring> -#include <cstdlib> -#include <sstream> - -using namespace std; - -// templated function to convert objects to strings -template <typename T> -inline -std::string ToString(const T & value) { - std::stringstream ss; - ss << value; - return ss.str(); -} - -// tokenize into a list of strings. -inline -void Tokenize(const string &str, vector<string> &elems, const string &delimiter = "\t") -{ - char* tok; - char cchars [str.size()+1]; - char* cstr = &cchars[0]; - strcpy(cstr, str.c_str()); - tok = strtok(cstr, delimiter.c_str()); - while (tok != NULL) { - elems.push_back(tok); - tok = strtok(NULL, delimiter.c_str()); - } -} - -// tokenize into a list of integers -inline -void Tokenize(const string &str, vector<int> &elems, const string &delimiter = "\t") -{ - char* tok; - char cchars [str.size()+1]; - char* cstr = &cchars[0]; - strcpy(cstr, str.c_str()); - tok = strtok(cstr, delimiter.c_str()); - while (tok != NULL) { - elems.push_back(atoi(tok)); - tok = strtok(NULL, delimiter.c_str()); - } -} - -#endif /* LINEFILEUTILITIES_H */ - |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/sequenceUtilities/Makefile --- a/BEDTools-Version-2.14.3/src/utils/sequenceUtilities/Makefile Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,29 +0,0 @@ -OBJ_DIR = ../../../obj/ -BIN_DIR = ../../../bin/ -UTILITIES_DIR = ../../utils/ -# ------------------- -# define our includes -# ------------------- -INCLUDES = - -# ---------------------------------- -# define our source and object files -# ---------------------------------- -SOURCES= sequenceUtils.cpp -OBJECTS= $(SOURCES:.cpp=.o) -_EXT_OBJECTS = -EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) -BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) - -$(BUILT_OBJECTS): $(SOURCES) - @echo " * compiling" $(*F).cpp - @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) - -$(EXT_OBJECTS): - @$(MAKE) --no-print-directory -C $(INCLUDES) - -clean: - @echo "Cleaning up." - @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* - -.PHONY: clean \ No newline at end of file |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/sequenceUtilities/sequenceUtils.cpp --- a/BEDTools-Version-2.14.3/src/utils/sequenceUtilities/sequenceUtils.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,126 +0,0 @@ -// -// sequenceUtils.cpp -// BEDTools -// -// Created by Aaron Quinlan Spring 2009. -// Copyright 2009 Aaron Quinlan. All rights reserved. -// -// Summary: Contains common functions for manipulating DNA sequences. -// -// Acknowledgment: I am grateful to Michael Stromberg for the code below to -// reverse complement a sequence. - -#include "sequenceUtils.h" - -// Performs an in-place sequence reversal -void reverseSequence(string &seq) { - std::reverse(seq.begin(), seq.end()); -} - -// Performs an in-place reverse complement conversion -void reverseComplement(string &seq) { - - // reverse the sequence - reverseSequence(seq); - - // swap the bases - for(unsigned int i = 0; i < seq.length(); i++) { - switch(seq[i]) { - case 'A': - seq[i] = 'T'; - break; - case 'C': - seq[i] = 'G'; - break; - case 'G': - seq[i] = 'C'; - break; - case 'T': - seq[i] = 'A'; - break; - case 'a': - seq[i] = 't'; - break; - case 'c': - seq[i] = 'g'; - break; - case 'g': - seq[i] = 'c'; - break; - case 't': - seq[i] = 'a'; - break; - default: - break; - } - } -} - - -void toLowerCase(std::string &seq) -{ - const int length = seq.length(); - for(int i=0; i < length; ++i) - { - seq[i] = std::tolower(seq[i]); - } -} - - -void toUpperCase(std::string &seq) -{ - const int length = seq.length(); - for(int i=0; i < length; ++i) - { - seq[i] = std::toupper(seq[i]); - } -} - - -void getDnaContent(const string &seq, int &a, int &c, int &g, int &t, int &n, int &other) -{ - // swap the bases - for(unsigned int i = 0; i < seq.length(); i++) { - switch(seq[i]) { - case 'A': - case 'a': - a++; - break; - case 'C': - case 'c': - c++; - break; - case 'G': - case 'g': - g++; - break; - case 'T': - case 't': - t++; - break; - case 'N': - case 'n': - n++; - break; - default: - other++; - break; - } - } -} - - -int countPattern(const string &seq, const string &pattern) -{ - // swap the bases - int patternLength = pattern.size(); - int patternCount = 0; - for(unsigned int i = 0; i < seq.length(); i++) { - if (seq.substr(i,patternLength) == pattern) { - patternCount++; - } - } - return patternCount; -} - - |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/sequenceUtilities/sequenceUtils.h --- a/BEDTools-Version-2.14.3/src/utils/sequenceUtilities/sequenceUtils.h Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,27 +0,0 @@ -#ifndef SEQUENCEUTILS_H -#define SEQUENCEUTILS_H - -#include <string> -#include <algorithm> -#include <cctype> - -using namespace std; - -// Performs an in-place sequence reversal -void reverseSequence(string &seq); - -// Performs an in-place reverse complement conversion -void reverseComplement(string &seq); - -// Converts every character in a string to lowercase -void toLowerCase(string &seq); - -// Converts every character in a string to uppercase -void toUpperCase(string &seq); - -// Calculates the number of a, c, g, t, n, and other bases found in a sequence -void getDnaContent(const string &seq, int &a, int &c, int &g, int &t, int &n, int &other); - -int countPattern(const string &seq, const string &pattern); - -#endif |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/stringUtilities/stringUtilities.h --- a/BEDTools-Version-2.14.3/src/utils/stringUtilities/stringUtilities.h Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,18 +0,0 @@ -#ifndef STRINGUTILITIES_H -#define STRINGUTILITIES_H - -#include <cctype> -#include <string> - -/**************************************************** -// isInteger(s): Tests if string s is a valid integer -*****************************************************/ -inline bool isInteger(const std::string& s) { - int len = s.length(); - for (int i = 0; i < len; i++) { - if (!std::isdigit(s[i])) return false; - return true; -} - -#endif /* STRINGUTILITIES_H */ - |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/tabFile/Makefile --- a/BEDTools-Version-2.14.3/src/utils/tabFile/Makefile Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,29 +0,0 @@ -OBJ_DIR = ../../../obj/ -BIN_DIR = ../../../bin/ -UTILITIES_DIR = ../../utils/ -# ------------------- -# define our includes -# ------------------- -INCLUDES = -I$(UTILITIES_DIR)/lineFileUtilities/ -I$(UTILITIES_DIR)/gzstream/ -I$(UTILITIES_DIR)/fileType/ - -# ---------------------------------- -# define our source and object files -# ---------------------------------- -SOURCES= tabFile.cpp -OBJECTS= $(SOURCES:.cpp=.o) -_EXT_OBJECTS=lineFileUtilities.o gzstream.o fileType.o -EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) -BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) - -$(BUILT_OBJECTS): $(SOURCES) - @echo " * compiling" $(*F).cpp - @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) - -$(EXT_OBJECTS): - @$(MAKE) --no-print-directory -C -W $(INCLUDES) - -clean: - @echo "Cleaning up." - @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* - -.PHONY: clean \ No newline at end of file |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/tabFile/tabFile.cpp --- a/BEDTools-Version-2.14.3/src/utils/tabFile/tabFile.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,99 +0,0 @@ -/***************************************************************************** - tabFile.cpp - - (c) 2009 - Aaron Quinlan - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licensed under the GNU General Public License 2.0 license. -******************************************************************************/ -#include "lineFileUtilities.h" -#include "tabFile.h" - -/******************************************* -Class methods -*******************************************/ - -// Constructor -TabFile::TabFile(const string &tabFile) -: _tabFile(tabFile) -{} - -// Destructor -TabFile::~TabFile(void) { -} - - -void TabFile::Open(void) { - if (_tabFile == "stdin") { - _tabStream = &cin; - } - else { - size_t foundPos; - foundPos = _tabFile.find_last_of(".gz"); - // is this a GZIPPED TAB file? - if (foundPos == _tabFile.size() - 1) { - igzstream tabs(_tabFile.c_str(), ios::in); - if ( !tabs ) { - cerr << "Error: The requested file (" << _tabFile << ") could not be opened. Exiting!" << endl; - exit (1); - } - else { - // if so, close it (this was just a test) - tabs.close(); - // now set a pointer to the stream so that we - // can read the file later on. - _tabStream = new igzstream(_tabFile.c_str(), ios::in); - } - } - // not GZIPPED. - else { - - ifstream tabs(_tabFile.c_str(), ios::in); - // can we open the file? - if ( !tabs ) { - cerr << "Error: The requested file (" << _tabFile << ") could not be opened. Exiting!" << endl; - exit (1); - } - else { - // if so, close it (this was just a test) - tabs.close(); - // now set a pointer to the stream so that we - // can read the file later on. - _tabStream = new ifstream(_tabFile.c_str(), ios::in); - } - } - } -} - - -// Close the TAB file -void TabFile::Close(void) { - if (_tabFile != "stdin") delete _tabStream; -} - - -TabLineStatus TabFile::GetNextTabLine(TAB_FIELDS &tabFields, int &lineNum) { - - // make sure there are still lines to process. - // if so, tokenize, return the TAB_FIELDS. - if (_tabStream->good() == true) { - string tabLine; - tabFields.reserve(20); - - // parse the tabStream pointer - getline(*_tabStream, tabLine); - lineNum++; - - // split into a string vector. - Tokenize(tabLine, tabFields); - - // parse the line and validate it - return parseTabLine(tabFields, lineNum); - } - - // default if file is closed or EOF - return TAB_INVALID; -} |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/tabFile/tabFile.h --- a/BEDTools-Version-2.14.3/src/utils/tabFile/tabFile.h Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,80 +0,0 @@ -/***************************************************************************** - tabFile.h - - (c) 2009 - Aaron Quinlan - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licensed under the GNU General Public License 2.0 license. -******************************************************************************/ -#ifndef TABFILE_H -#define TABFILE_H - -#include "gzstream.h" -#include <vector> -#include <string> -#include <iostream> - -using namespace std; - -// enum to flag the state of a given line in a TAB file. -enum TabLineStatus -{ - TAB_INVALID = -1, - TAB_HEADER = 0, - TAB_BLANK = 1, - TAB_VALID = 2 -}; - -typedef vector<string> TAB_FIELDS; - -//************************************************ -// TabFile Class methods and elements -//************************************************ -class TabFile { - -public: - - // Constructor - TabFile(const string &tabFile); - - // Destructor - ~TabFile(void); - - // Open a TAB file for reading (creates an istream pointer) - void Open(void); - - // Close an opened TAB file. - void Close(void); - - // Get the next TAB entry in an opened TAB file. - TabLineStatus GetNextTabLine (TAB_FIELDS &tab, int &lineNum); - -private: - - // data - istream *_tabStream; - string _tabFile; - - // methods - inline TabLineStatus parseTabLine (const vector<string> &lineVector, int &lineNum) { - // bail out if we have a blank line - if (lineVector.size() == 0) - return TAB_BLANK; - // real line with data - if (lineVector[0][0] != '#') { - return TAB_VALID; - } - // comment or header line - else { - lineNum--; - return TAB_HEADER; - } - // default - return TAB_INVALID; - } -}; - -#endif /* TABFILE_H */ |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/utils/version/version.h --- a/BEDTools-Version-2.14.3/src/utils/version/version.h Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,8 +0,0 @@ -#ifndef VERSION_H -#define VERSION_H - -// define the version. All tools in the -// suite carry the same version number. -#define VERSION "2.14.2" - -#endif /* VERSION_H */ |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/windowBed/Makefile --- a/BEDTools-Version-2.14.3/src/windowBed/Makefile Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,51 +0,0 @@ -UTILITIES_DIR = ../utils/ -OBJ_DIR = ../../obj/ -BIN_DIR = ../../bin/ - -# ------------------- -# define our includes -# ------------------- -INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \ - -I$(UTILITIES_DIR)/version/ \ - -I$(UTILITIES_DIR)/gzstream/ \ - -I$(UTILITIES_DIR)/genomeFile/ \ - -I$(UTILITIES_DIR)/lineFileUtilities/ \ - -I$(UTILITIES_DIR)/fileType/ \ - -I$(UTILITIES_DIR)/BamTools/include \ - -I$(UTILITIES_DIR)/BamTools-Ancillary -# ---------------------------------- -# define our source and object files -# ---------------------------------- -SOURCES= windowMain.cpp windowBed.cpp -OBJECTS= $(SOURCES:.cpp=.o) -_EXT_OBJECTS=bedFile.o lineFileUtilities.o gzstream.o fileType.o -EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS)) -BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) -PROGRAM= windowBed - - -all: $(PROGRAM) - -.PHONY: all - -$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS) - @echo " * linking $(PROGRAM)" - @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ -L$(UTILITIES_DIR)/BamTools/lib/ -lbamtools $(LIBS) - -$(BUILT_OBJECTS): $(SOURCES) - @echo " * compiling" $(*F).cpp - @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) - -$(EXT_OBJECTS): - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/bedFile/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/lineFileUtilities/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/BamTools/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/BamTools-Ancillary/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/gzstream/ - @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/fileType/ - -clean: - @echo "Cleaning up." - @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* - -.PHONY: clean |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/windowBed/windowBed.cpp --- a/BEDTools-Version-2.14.3/src/windowBed/windowBed.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,253 +0,0 @@ -/***************************************************************************** - windowBed.cpp - - (c) 2009 - Aaron Quinlan - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licenced under the GNU General Public License 2.0 license. -******************************************************************************/ -#include "lineFileUtilities.h" -#include "windowBed.h" - - -/* - Constructor -*/ -BedWindow::BedWindow(string bedAFile, string bedBFile, int leftSlop, int rightSlop, - bool anyHit, bool noHit, bool writeCount, bool strandWindows, - bool matchOnSameStrand, bool matchOnDiffStrand, bool bamInput, bool bamOutput, bool isUncompressedBam) { - - _bedAFile = bedAFile; - _bedBFile = bedBFile; - - _leftSlop = leftSlop; - _rightSlop = rightSlop; - - _anyHit = anyHit; - _noHit = noHit; - _writeCount = writeCount; - _strandWindows = strandWindows; - _matchOnSameStrand = matchOnSameStrand; - _matchOnDiffStrand = matchOnDiffStrand; - _bamInput = bamInput; - _bamOutput = bamOutput; - _isUncompressedBam = isUncompressedBam; - - _bedA = new BedFile(bedAFile); - _bedB = new BedFile(bedBFile); - - if (_bamInput == false) - WindowIntersectBed(); - else - WindowIntersectBam(_bedAFile); -} - - - -/* - Destructor -*/ -BedWindow::~BedWindow(void) { -} - - - -void BedWindow::FindWindowOverlaps(const BED &a, vector<BED> &hits) { - - /* - Adjust the start and end of a based on the requested window - */ - - // update the current feature's start and end - // according to the slop requested (slop = 0 by default) - CHRPOS aFudgeStart = 0; - CHRPOS aFudgeEnd; - AddWindow(a, aFudgeStart, aFudgeEnd); - - /* - Now report the hits (if any) based on the window around a. - */ - // get the hits in B for the A feature - _bedB->FindOverlapsPerBin(a.chrom, aFudgeStart, aFudgeEnd, a.strand, hits, _matchOnSameStrand, _matchOnDiffStrand); - - int numOverlaps = 0; - - // loop through the hits and report those that meet the user's criteria - vector<BED>::const_iterator h = hits.begin(); - vector<BED>::const_iterator hitsEnd = hits.end(); - for (; h != hitsEnd; ++h) { - - int s = max(aFudgeStart, h->start); - int e = min(aFudgeEnd, h->end); - int overlapBases = (e - s); // the number of overlapping bases b/w a and b - int aLength = (a.end - a.start); // the length of a in b.p. - - if (s < e) { - // is there enough overlap (default ~ 1bp) - if ( ((float) overlapBases / (float) aLength) > 0 ) { - numOverlaps++; - if (_anyHit == false && _noHit == false && _writeCount == false) { - _bedA->reportBedTab(a); - _bedB->reportBedNewLine(*h); - } - } - } - } - if (_anyHit == true && (numOverlaps >= 1)) { - _bedA->reportBedNewLine(a); } - else if (_writeCount == true) { - _bedA->reportBedTab(a); printf("\t%d\n", numOverlaps); - } - else if (_noHit == true && (numOverlaps == 0)) { - _bedA->reportBedNewLine(a); - } -} - - -bool BedWindow::FindOneOrMoreWindowOverlaps(const BED &a) { - - // update the current feature's start and end - // according to the slop requested (slop = 0 by default) - CHRPOS aFudgeStart = 0; - CHRPOS aFudgeEnd; - AddWindow(a, aFudgeStart, aFudgeEnd); - - bool overlapsFound = _bedB->FindOneOrMoreOverlapsPerBin(a.chrom, a.start, a.end, a.strand, _matchOnSameStrand, _matchOnDiffStrand); - return overlapsFound; -} - - -void BedWindow::WindowIntersectBed() { - - // load the "B" bed file into a map so - // that we can easily compare "A" to it for overlaps - _bedB->loadBedFileIntoMap(); - - BED a, nullBed; - int lineNum = 0; // current input line number - BedLineStatus bedStatus; - vector<BED> hits; // vector of potential hits - hits.reserve(100); - - _bedA->Open(); - while ((bedStatus = _bedA->GetNextBed(a, lineNum)) != BED_INVALID) { - if (bedStatus == BED_VALID) { - FindWindowOverlaps(a, hits); - hits.clear(); - a = nullBed; - } - } - _bedA->Close(); -} - - -void BedWindow::WindowIntersectBam(string bamFile) { - - // load the "B" bed file into a map so - // that we can easily compare "A" to it for overlaps - _bedB->loadBedFileIntoMap(); - - // open the BAM file - BamReader reader; - BamWriter writer; - reader.Open(bamFile); - - // get header & reference information - string bamHeader = reader.GetHeaderText(); - RefVector refs = reader.GetReferenceData(); - - // open a BAM output to stdout if we are writing BAM - if (_bamOutput == true) { - // set compression mode - BamWriter::CompressionMode compressionMode = BamWriter::Compressed; - if ( _isUncompressedBam ) compressionMode = BamWriter::Uncompressed; - writer.SetCompressionMode(compressionMode); - // open our BAM writer - writer.Open("stdout", bamHeader, refs); - } - - vector<BED> hits; // vector of potential hits - // reserve some space - hits.reserve(100); - - _bedA->bedType = 6; - BamAlignment bam; - bool overlapsFound; - // get each set of alignments for each pair. - while (reader.GetNextAlignment(bam)) { - - if (bam.IsMapped()) { - BED a; - a.chrom = refs.at(bam.RefID).RefName; - a.start = bam.Position; - a.end = bam.GetEndPosition(false, false); - - // build the name field from the BAM alignment. - a.name = bam.Name; - if (bam.IsFirstMate()) a.name += "/1"; - if (bam.IsSecondMate()) a.name += "/2"; - - a.score = ToString(bam.MapQuality); - a.strand = "+"; if (bam.IsReverseStrand()) a.strand = "-"; - - if (_bamOutput == true) { - overlapsFound = FindOneOrMoreWindowOverlaps(a); - if (overlapsFound == true) { - if (_noHit == false) - writer.SaveAlignment(bam); - } - else { - if (_noHit == true) - writer.SaveAlignment(bam); - } - } - else { - FindWindowOverlaps(a, hits); - hits.clear(); - } - } - // BAM IsMapped() is false - else if (_noHit == true) { - writer.SaveAlignment(bam); - } - } - - // close the relevant BAM files. - reader.Close(); - if (_bamOutput == true) { - writer.Close(); - } -} - - -void BedWindow::AddWindow(const BED &a, CHRPOS &fudgeStart, CHRPOS &fudgeEnd) { - // Does the user want to treat the windows based on strand? - // If so, - // if "+", then left is left and right is right - // if "-", the left is right and right is left. - if (_strandWindows) { - if (a.strand == "+") { - if ((int) (a.start - _leftSlop) > 0) - fudgeStart = a.start - _leftSlop; - else fudgeStart = 0; - fudgeEnd = a.end + _rightSlop; - } - else { - if ((int) (a.start - _rightSlop) > 0) - fudgeStart = a.start - _rightSlop; - else fudgeStart = 0; - fudgeEnd = a.end + _leftSlop; - } - } - // If not, add the windows irrespective of strand - else { - if ((int) (a.start - _leftSlop) > 0) - fudgeStart = a.start - _leftSlop; - else fudgeStart = 0; - fudgeEnd = a.end + _rightSlop; - } -} - |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/windowBed/windowBed.h --- a/BEDTools-Version-2.14.3/src/windowBed/windowBed.h Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,69 +0,0 @@ -/***************************************************************************** - windowBed.h - - (c) 2009 - Aaron Quinlan - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licenced under the GNU General Public License 2.0 license. -******************************************************************************/ -#ifndef WINDOWBED_H -#define WINDOWBED_H - -#include "api/BamReader.h" -#include "api/BamWriter.h" -#include "api/BamAux.h" -using namespace BamTools; - -#include "bedFile.h" -#include <vector> -#include <iostream> -#include <fstream> - -using namespace std; - -//************************************************ -// Class methods and elements -//************************************************ -class BedWindow { - -public: - - // constructor - BedWindow(string bedAFile, string bedBFile, int leftSlop, int rightSlop, - bool anyHit, bool noHit, bool writeCount, bool strandWindows, - bool matchOnSameStrand, bool matchOnDiffStrand, bool bamInput, bool bamOutput, bool isUncompressedBam); - - // destructor - ~BedWindow(void); - -private: - - string _bedAFile; - string _bedBFile; - bool _anyHit; - bool _writeCount; - int _leftSlop; - int _rightSlop; - bool _noHit; - bool _strandWindows; - bool _matchOnSameStrand; - bool _matchOnDiffStrand; - bool _bamInput; - bool _bamOutput; - bool _isUncompressedBam; - - // instance of a bed file class. - BedFile *_bedA, *_bedB; - - // methods - void WindowIntersectBed(); - void WindowIntersectBam(string bamFile); - void FindWindowOverlaps(const BED &a, vector<BED> &hits); - bool FindOneOrMoreWindowOverlaps(const BED &a); - void AddWindow(const BED &a, CHRPOS &fudgeStart, CHRPOS &fudgeEnd); - -}; -#endif /* WINDOWBED_H */ |
b |
diff -r dfcd8b6c1bda -r bec36315bd12 BEDTools-Version-2.14.3/src/windowBed/windowMain.cpp --- a/BEDTools-Version-2.14.3/src/windowBed/windowMain.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,263 +0,0 @@\n-/*****************************************************************************\n- windowMain.cpp\n-\n- (c) 2009 - Aaron Quinlan\n- Hall Laboratory\n- Department of Biochemistry and Molecular Genetics\n- University of Virginia\n- aaronquinlan@gmail.com\n-\n- Licenced under the GNU General Public License 2.0 license.\n-******************************************************************************/\n-#include "windowBed.h"\n-#include "version.h"\n-\n-using namespace std;\n-\n-// define the version\n-#define PROGRAM_NAME "windowBed"\n-\n-// define our parameter checking macro\n-#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)\n-\n-// function declarations\n-void ShowHelp(void);\n-\n-\n-int main(int argc, char* argv[]) {\n-\n- // our configuration variables\n- bool showHelp = false;\n-\n- // input files\n- string bedAFile;\n- string bedBFile;\n-\n- // input arguments\n- int leftSlop = 1000;\n- int rightSlop = 1000;\n-\n- bool haveBedA = false;\n- bool haveBedB = false;\n- bool noHit = false;\n- bool anyHit = false;\n- bool writeCount = false;\n- bool haveSlop = false;\n- bool haveLeft = false;\n- bool haveRight = false;\n- bool strandWindows = false;\n- bool matchOnSameStrand = false;\n- bool matchOnDiffStrand = false;\n- bool inputIsBam = false;\n- bool outputIsBam = true;\n- bool uncompressedBam = false;\n-\n- // check to see if we should print out some help\n- if(argc <= 1) showHelp = true;\n-\n- for(int i = 1; i < argc; i++) {\n- int parameterLength = (int)strlen(argv[i]);\n-\n- if((PARAMETER_CHECK("-h", 2, parameterLength)) ||\n- (PARAMETER_CHECK("--help", 5, parameterLength))) {\n- showHelp = true;\n- }\n- }\n-\n- if(showHelp) ShowHelp();\n-\n- // do some parsing (all of these parameters require 2 strings)\n- for(int i = 1; i < argc; i++) {\n-\n- int parameterLength = (int)strlen(argv[i]);\n-\n- if(PARAMETER_CHECK("-a", 2, parameterLength)) {\n- if ((i+1) < argc) {\n- haveBedA = true;\n- bedAFile = argv[i + 1];\n- i++;\n- }\n- }\n- else if(PARAMETER_CHECK("-abam", 5, parameterLength)) {\n- if ((i+1) < argc) {\n- haveBedA = true;\n- inputIsBam = true;\n- bedAFile = argv[i + 1];\n- i++;\n- }\n- }\n- else if(PARAMETER_CHECK("-b", 2, parameterLength)) {\n- if ((i+1) < argc) {\n- haveBedB = true;\n- bedBFile = argv[i + 1];\n- i++;\n- }\n- }\n- else if(PARAMETER_CHECK("-bed", 4, parameterLength)) {\n- outputIsBam = false;\n- }\n- else if(PARAMETER_CHECK("-u", 2, parameterLength)) {\n- anyHit = true;\n- }\n- else if(PARAMETER_CHECK("-c", 2, parameterLength)) {\n- writeCount = true;\n- }\n- else if (PARAMETER_CHECK("-v", 2, parameterLength)) {\n- noHit = true;\n- }\n- else if (PARAMETER_CHECK("-sw", 3, parameterLength)) {\n- strandWindows = true;\n- }\n- else if (PARAMETER_CHECK("-sm", 3, parameterLength)) {\n- matchOnSameStrand = true;\n- }\n- else if (PARAMETER_CHECK("-Sm", 3, parameterLength)) {\n- matchOnDiffStrand = true;\n- }\n- else if (PARAMETER_CHECK("-w", 2, parameterLength)) {\n- if ((i+1) < argc) {\n- haveSlop = true;\n- leftSlop = atoi(argv[i + 1]);\n- rightSlop = leftSlop;\n- i++;\n- }\n- }\n- else if (PARAMETER_CHECK("-l", 2, parameterLength)) {\n- if ((i+1) < argc) {\n- haveLeft = true;\n- leftSlop = atoi(argv[i + 1]);\n- i++;\n- }\n- '..b'ressedBam);\n- delete bi;\n- return 0;\n- }\n- else {\n- ShowHelp();\n- }\n-}\n-\n-\n-void ShowHelp(void) {\n-\n- cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl;\n-\n- cerr << "Author: Aaron Quinlan (aaronquinlan@gmail.com)" << endl;\n-\n- cerr << "Summary: Examines a \\"window\\" around each feature in A and" << endl;\n- cerr << "\\t reports all features in B that overlap the window. For each" << endl;\n- cerr << "\\t overlap the entire entry in A and B are reported." << endl << endl;\n-\n- cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -a <bed/gff/vcf> -b <bed/gff/vcf>" << endl << endl;\n-\n- cerr << "Options: " << endl;\n-\n- cerr << "\\t-abam\\t" << "The A input file is in BAM format. Output will be BAM as well." << endl << endl;\n-\n- cerr << "\\t-ubam\\t" << "Write uncompressed BAM output. Default is to write compressed BAM." << endl << endl;\n-\n- cerr << "\\t-bed\\t" << "When using BAM input (-abam), write output as BED. The default" << endl;\n- cerr << "\\t\\tis to write output in BAM when using -abam." << endl << endl;\n-\n- cerr << "\\t-w\\t" << "Base pairs added upstream and downstream of each entry" << endl;\n- cerr << "\\t\\tin A when searching for overlaps in B." << endl;\n- cerr << "\\t\\t- Creates symterical \\"windows\\" around A." << endl;\n- cerr << "\\t\\t- Default is 1000 bp." << endl;\n- cerr << "\\t\\t- (INTEGER)" << endl << endl;\n-\n- cerr << "\\t-l\\t" << "Base pairs added upstream (left of) of each entry" << endl;\n- cerr << "\\t\\tin A when searching for overlaps in B." << endl;\n- cerr << "\\t\\t- Allows one to define assymterical \\"windows\\"." << endl;\n- cerr << "\\t\\t- Default is 1000 bp." << endl;\n- cerr << "\\t\\t- (INTEGER)" << endl << endl;\n-\n- cerr << "\\t-r\\t" << "Base pairs added downstream (right of) of each entry" << endl;\n- cerr << "\\t\\tin A when searching for overlaps in B." << endl;\n- cerr << "\\t\\t- Allows one to define assymterical \\"windows\\"." << endl;\n- cerr << "\\t\\t- Default is 1000 bp." << endl;\n- cerr << "\\t\\t- (INTEGER)" << endl << endl;\n-\n- cerr << "\\t-sw\\t" << "Define -l and -r based on strand. For example if used, -l 500" << endl;\n- cerr << "\\t\\tfor a negative-stranded feature will add 500 bp downstream." << endl;\n- cerr << "\\t\\t- Default = disabled." << endl << endl;\n-\n- cerr << "\\t-sm\\t" << "Only report hits in B that overlap A on the _same_ strand." << endl;\n- cerr << "\\t\\t- By default, overlaps are reported without respect to strand." << endl << endl;\n-\n- cerr << "\\t-Sm\\t" << "Only report hits in B that overlap A on the _opposite_ strand." << endl;\n- cerr << "\\t\\t- By default, overlaps are reported without respect to strand." << endl << endl;\n-\n- cerr << "\\t-u\\t" << "Write the original A entry _once_ if _any_ overlaps found in B." << endl;\n- cerr << "\\t\\t- In other words, just report the fact >=1 hit was found." << endl << endl;\n-\n- cerr << "\\t-c\\t" << "For each entry in A, report the number of overlaps with B." << endl;\n- cerr << "\\t\\t- Reports 0 for A entries that have no overlap with B." << endl;\n- cerr << "\\t\\t- Overlaps restricted by -f." << endl << endl;\n-\n- cerr << "\\t-v\\t" << "Only report those entries in A that have _no overlaps_ with B." << endl;\n- cerr << "\\t\\t- Similar to \\"grep -v.\\"" << endl << endl;\n-\n- // end the program here\n- exit(1);\n-}\n' |