annotate BEDTools-Version-2.14.3/src/sortBed/sortBed.cpp @ 0:dfcd8b6c1bda

Uploaded
author aaronquinlan
date Thu, 03 Nov 2011 10:25:04 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1 /*****************************************************************************
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
2 sortBed.cpp
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
3
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
4 (c) 2009 - Aaron Quinlan
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
5 Hall Laboratory
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
6 Department of Biochemistry and Molecular Genetics
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
7 University of Virginia
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
8 aaronquinlan@gmail.com
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
9
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
10 Licenced under the GNU General Public License 2.0 license.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
11 ******************************************************************************/
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
12 #include "lineFileUtilities.h"
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
13 #include "sortBed.h"
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
14
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
15 //
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
16 // Constructor
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
17 //
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
18 BedSort::BedSort(string &bedFile) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
19 _bedFile = bedFile;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
20 _bed = new BedFile(bedFile);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
21 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
22
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
23 //
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
24 // Destructor
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
25 //
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
26 BedSort::~BedSort(void) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
27 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
28
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
29
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
30 void BedSort::SortBed() {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
31
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
32 // load the "B" bed file into a map so
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
33 // that we can easily compare "A" to it for overlaps
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
34 _bed->loadBedFileIntoMapNoBin();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
35
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
36 // loop through each chromosome and merge their BED entries
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
37 for (masterBedMapNoBin::iterator m = _bed->bedMapNoBin.begin(); m != _bed->bedMapNoBin.end(); ++m) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
38
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
39 // bedList is already sorted by start position.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
40 vector<BED> bedList = m->second;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
41
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
42 for (unsigned int i = 0; i < bedList.size(); ++i) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
43 _bed->reportBedNewLine(bedList[i]);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
44 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
45 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
46 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
47
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
48
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
49 void BedSort::SortBedBySizeAsc() {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
50
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
51 // load the "B" bed file into a map so
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
52 // that we can easily compare "A" to it for overlaps
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
53 _bed->loadBedFileIntoMapNoBin();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
54
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
55 vector<BED> masterList;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
56 masterList.reserve(1000000);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
57
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
58 // loop through each chromosome and merge their BED entries
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
59 for (masterBedMapNoBin::iterator m = _bed->bedMapNoBin.begin(); m != _bed->bedMapNoBin.end(); ++m) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
60
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
61 // bedList is already sorted by start position.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
62 vector<BED> bedList = m->second;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
63
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
64 // add the entries from this chromosome to the current list
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
65 for (unsigned int i = 0; i < m->second.size(); ++i) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
66 masterList.push_back(m->second[i]);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
67 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
68 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
69
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
70 // sort the master list by size (asc.)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
71 sort(masterList.begin(), masterList.end(), sortBySizeAsc);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
72
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
73 // report the entries in ascending order
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
74 for (unsigned int i = 0; i < masterList.size(); ++i) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
75 _bed->reportBedNewLine(masterList[i]);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
76 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
77 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
78
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
79
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
80 void BedSort::SortBedBySizeDesc() {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
81
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
82 // load the "B" bed file into a map so
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
83 // that we can easily compare "A" to it for overlaps
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
84 _bed->loadBedFileIntoMapNoBin();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
85
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
86 vector<BED> masterList;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
87 masterList.reserve(1000000);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
88
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
89 // loop through each chromosome and merge their BED entries
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
90 for (masterBedMapNoBin::iterator m = _bed->bedMapNoBin.begin(); m != _bed->bedMapNoBin.end(); ++m) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
91
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
92 // bedList is already sorted by start position.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
93 vector<BED> bedList = m->second;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
94
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
95 // add the entries from this chromosome to the current list
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
96 for (unsigned int i = 0; i < m->second.size(); ++i) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
97 masterList.push_back(m->second[i]);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
98 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
99 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
100
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
101 // sort the master list by size (asc.)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
102 sort(masterList.begin(), masterList.end(), sortBySizeDesc);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
103
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
104 // report the entries in ascending order
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
105 for (unsigned int i = 0; i < masterList.size(); ++i) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
106 _bed->reportBedNewLine(masterList[i]);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
107 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
108 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
109
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
110 void BedSort::SortBedByChromThenSizeAsc() {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
111
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
112 // load the "B" bed file into a map so
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
113 // that we can easily compare "A" to it for overlaps
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
114 _bed->loadBedFileIntoMapNoBin();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
115
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
116 // loop through each chromosome and merge their BED entries
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
117 for (masterBedMapNoBin::iterator m = _bed->bedMapNoBin.begin(); m != _bed->bedMapNoBin.end(); ++m) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
118
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
119 // bedList is already sorted by start position.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
120 vector<BED> bedList = m->second;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
121 sort(bedList.begin(), bedList.end(), sortBySizeAsc);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
122
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
123 for (unsigned int i = 0; i < bedList.size(); ++i) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
124 _bed->reportBedNewLine(bedList[i]);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
125 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
126 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
127 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
128
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
129
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
130 void BedSort::SortBedByChromThenSizeDesc() {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
131
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
132 // load the "B" bed file into a map so
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
133 // that we can easily compare "A" to it for overlaps
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
134 _bed->loadBedFileIntoMapNoBin();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
135
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
136 // loop through each chromosome and merge their BED entries
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
137 for (masterBedMapNoBin::iterator m = _bed->bedMapNoBin.begin(); m != _bed->bedMapNoBin.end(); ++m) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
138
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
139 // bedList is already sorted by start position.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
140 vector<BED> bedList = m->second;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
141
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
142 sort(bedList.begin(), bedList.end(), sortBySizeDesc);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
143
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
144 for (unsigned int i = 0; i < bedList.size(); ++i) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
145 _bed->reportBedNewLine(bedList[i]);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
146 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
147 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
148 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
149
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
150
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
151 void BedSort::SortBedByChromThenScoreAsc() {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
152
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
153 // load the "B" bed file into a map so
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
154 // that we can easily compare "A" to it for overlaps
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
155 _bed->loadBedFileIntoMapNoBin();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
156
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
157 if (_bed->bedType >= 5) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
158 // loop through each chromosome and merge their BED entries
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
159 for (masterBedMapNoBin::iterator m = _bed->bedMapNoBin.begin(); m != _bed->bedMapNoBin.end(); ++m) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
160
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
161 // bedList is already sorted by start position.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
162 vector<BED> bedList = m->second;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
163 sort(bedList.begin(), bedList.end(), sortByScoreAsc);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
164
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
165 for (unsigned int i = 0; i < bedList.size(); ++i) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
166 _bed->reportBedNewLine(bedList[i]);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
167 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
168 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
169 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
170 else {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
171 cerr << "Error: Requested a sort by score, but your BED file does not appear to be in BED 5 format or greater. Exiting." << endl;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
172 exit(1);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
173 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
174 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
175
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
176
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
177 void BedSort::SortBedByChromThenScoreDesc() {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
178
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
179 // load the "B" bed file into a map so
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
180 // that we can easily compare "A" to it for overlaps
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
181 _bed->loadBedFileIntoMapNoBin();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
182
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
183 if (_bed->bedType >= 5) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
184 // loop through each chromosome and merge their BED entries
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
185 for (masterBedMapNoBin::iterator m = _bed->bedMapNoBin.begin(); m != _bed->bedMapNoBin.end(); ++m) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
186
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
187 // bedList is already sorted by start position.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
188 vector<BED> bedList = m->second;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
189 sort(bedList.begin(), bedList.end(), sortByScoreDesc);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
190
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
191 for (unsigned int i = 0; i < bedList.size(); ++i) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
192 _bed->reportBedNewLine(bedList[i]);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
193 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
194 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
195 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
196 else {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
197 cerr << "Error: Requested a sort by score, but your BED file does not appear to be in BED 5 format or greater. Exiting." << endl;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
198 exit(1);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
199 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
200 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
201