0
|
1 /*****************************************************************************
|
|
2 sortBed.cpp
|
|
3
|
|
4 (c) 2009 - Aaron Quinlan
|
|
5 Hall Laboratory
|
|
6 Department of Biochemistry and Molecular Genetics
|
|
7 University of Virginia
|
|
8 aaronquinlan@gmail.com
|
|
9
|
|
10 Licenced under the GNU General Public License 2.0 license.
|
|
11 ******************************************************************************/
|
|
12 #include "lineFileUtilities.h"
|
|
13 #include "sortBed.h"
|
|
14
|
|
15 //
|
|
16 // Constructor
|
|
17 //
|
|
18 BedSort::BedSort(string &bedFile) {
|
|
19 _bedFile = bedFile;
|
|
20 _bed = new BedFile(bedFile);
|
|
21 }
|
|
22
|
|
23 //
|
|
24 // Destructor
|
|
25 //
|
|
26 BedSort::~BedSort(void) {
|
|
27 }
|
|
28
|
|
29
|
|
30 void BedSort::SortBed() {
|
|
31
|
|
32 // load the "B" bed file into a map so
|
|
33 // that we can easily compare "A" to it for overlaps
|
|
34 _bed->loadBedFileIntoMapNoBin();
|
|
35
|
|
36 // loop through each chromosome and merge their BED entries
|
|
37 for (masterBedMapNoBin::iterator m = _bed->bedMapNoBin.begin(); m != _bed->bedMapNoBin.end(); ++m) {
|
|
38
|
|
39 // bedList is already sorted by start position.
|
|
40 vector<BED> bedList = m->second;
|
|
41
|
|
42 for (unsigned int i = 0; i < bedList.size(); ++i) {
|
|
43 _bed->reportBedNewLine(bedList[i]);
|
|
44 }
|
|
45 }
|
|
46 }
|
|
47
|
|
48
|
|
49 void BedSort::SortBedBySizeAsc() {
|
|
50
|
|
51 // load the "B" bed file into a map so
|
|
52 // that we can easily compare "A" to it for overlaps
|
|
53 _bed->loadBedFileIntoMapNoBin();
|
|
54
|
|
55 vector<BED> masterList;
|
|
56 masterList.reserve(1000000);
|
|
57
|
|
58 // loop through each chromosome and merge their BED entries
|
|
59 for (masterBedMapNoBin::iterator m = _bed->bedMapNoBin.begin(); m != _bed->bedMapNoBin.end(); ++m) {
|
|
60
|
|
61 // bedList is already sorted by start position.
|
|
62 vector<BED> bedList = m->second;
|
|
63
|
|
64 // add the entries from this chromosome to the current list
|
|
65 for (unsigned int i = 0; i < m->second.size(); ++i) {
|
|
66 masterList.push_back(m->second[i]);
|
|
67 }
|
|
68 }
|
|
69
|
|
70 // sort the master list by size (asc.)
|
|
71 sort(masterList.begin(), masterList.end(), sortBySizeAsc);
|
|
72
|
|
73 // report the entries in ascending order
|
|
74 for (unsigned int i = 0; i < masterList.size(); ++i) {
|
|
75 _bed->reportBedNewLine(masterList[i]);
|
|
76 }
|
|
77 }
|
|
78
|
|
79
|
|
80 void BedSort::SortBedBySizeDesc() {
|
|
81
|
|
82 // load the "B" bed file into a map so
|
|
83 // that we can easily compare "A" to it for overlaps
|
|
84 _bed->loadBedFileIntoMapNoBin();
|
|
85
|
|
86 vector<BED> masterList;
|
|
87 masterList.reserve(1000000);
|
|
88
|
|
89 // loop through each chromosome and merge their BED entries
|
|
90 for (masterBedMapNoBin::iterator m = _bed->bedMapNoBin.begin(); m != _bed->bedMapNoBin.end(); ++m) {
|
|
91
|
|
92 // bedList is already sorted by start position.
|
|
93 vector<BED> bedList = m->second;
|
|
94
|
|
95 // add the entries from this chromosome to the current list
|
|
96 for (unsigned int i = 0; i < m->second.size(); ++i) {
|
|
97 masterList.push_back(m->second[i]);
|
|
98 }
|
|
99 }
|
|
100
|
|
101 // sort the master list by size (asc.)
|
|
102 sort(masterList.begin(), masterList.end(), sortBySizeDesc);
|
|
103
|
|
104 // report the entries in ascending order
|
|
105 for (unsigned int i = 0; i < masterList.size(); ++i) {
|
|
106 _bed->reportBedNewLine(masterList[i]);
|
|
107 }
|
|
108 }
|
|
109
|
|
110 void BedSort::SortBedByChromThenSizeAsc() {
|
|
111
|
|
112 // load the "B" bed file into a map so
|
|
113 // that we can easily compare "A" to it for overlaps
|
|
114 _bed->loadBedFileIntoMapNoBin();
|
|
115
|
|
116 // loop through each chromosome and merge their BED entries
|
|
117 for (masterBedMapNoBin::iterator m = _bed->bedMapNoBin.begin(); m != _bed->bedMapNoBin.end(); ++m) {
|
|
118
|
|
119 // bedList is already sorted by start position.
|
|
120 vector<BED> bedList = m->second;
|
|
121 sort(bedList.begin(), bedList.end(), sortBySizeAsc);
|
|
122
|
|
123 for (unsigned int i = 0; i < bedList.size(); ++i) {
|
|
124 _bed->reportBedNewLine(bedList[i]);
|
|
125 }
|
|
126 }
|
|
127 }
|
|
128
|
|
129
|
|
130 void BedSort::SortBedByChromThenSizeDesc() {
|
|
131
|
|
132 // load the "B" bed file into a map so
|
|
133 // that we can easily compare "A" to it for overlaps
|
|
134 _bed->loadBedFileIntoMapNoBin();
|
|
135
|
|
136 // loop through each chromosome and merge their BED entries
|
|
137 for (masterBedMapNoBin::iterator m = _bed->bedMapNoBin.begin(); m != _bed->bedMapNoBin.end(); ++m) {
|
|
138
|
|
139 // bedList is already sorted by start position.
|
|
140 vector<BED> bedList = m->second;
|
|
141
|
|
142 sort(bedList.begin(), bedList.end(), sortBySizeDesc);
|
|
143
|
|
144 for (unsigned int i = 0; i < bedList.size(); ++i) {
|
|
145 _bed->reportBedNewLine(bedList[i]);
|
|
146 }
|
|
147 }
|
|
148 }
|
|
149
|
|
150
|
|
151 void BedSort::SortBedByChromThenScoreAsc() {
|
|
152
|
|
153 // load the "B" bed file into a map so
|
|
154 // that we can easily compare "A" to it for overlaps
|
|
155 _bed->loadBedFileIntoMapNoBin();
|
|
156
|
|
157 if (_bed->bedType >= 5) {
|
|
158 // loop through each chromosome and merge their BED entries
|
|
159 for (masterBedMapNoBin::iterator m = _bed->bedMapNoBin.begin(); m != _bed->bedMapNoBin.end(); ++m) {
|
|
160
|
|
161 // bedList is already sorted by start position.
|
|
162 vector<BED> bedList = m->second;
|
|
163 sort(bedList.begin(), bedList.end(), sortByScoreAsc);
|
|
164
|
|
165 for (unsigned int i = 0; i < bedList.size(); ++i) {
|
|
166 _bed->reportBedNewLine(bedList[i]);
|
|
167 }
|
|
168 }
|
|
169 }
|
|
170 else {
|
|
171 cerr << "Error: Requested a sort by score, but your BED file does not appear to be in BED 5 format or greater. Exiting." << endl;
|
|
172 exit(1);
|
|
173 }
|
|
174 }
|
|
175
|
|
176
|
|
177 void BedSort::SortBedByChromThenScoreDesc() {
|
|
178
|
|
179 // load the "B" bed file into a map so
|
|
180 // that we can easily compare "A" to it for overlaps
|
|
181 _bed->loadBedFileIntoMapNoBin();
|
|
182
|
|
183 if (_bed->bedType >= 5) {
|
|
184 // loop through each chromosome and merge their BED entries
|
|
185 for (masterBedMapNoBin::iterator m = _bed->bedMapNoBin.begin(); m != _bed->bedMapNoBin.end(); ++m) {
|
|
186
|
|
187 // bedList is already sorted by start position.
|
|
188 vector<BED> bedList = m->second;
|
|
189 sort(bedList.begin(), bedList.end(), sortByScoreDesc);
|
|
190
|
|
191 for (unsigned int i = 0; i < bedList.size(); ++i) {
|
|
192 _bed->reportBedNewLine(bedList[i]);
|
|
193 }
|
|
194 }
|
|
195 }
|
|
196 else {
|
|
197 cerr << "Error: Requested a sort by score, but your BED file does not appear to be in BED 5 format or greater. Exiting." << endl;
|
|
198 exit(1);
|
|
199 }
|
|
200 }
|
|
201
|