comparison BEDTools-Version-2.14.3/src/sortBed/sortBed.cpp @ 0:dfcd8b6c1bda

Uploaded
author aaronquinlan
date Thu, 03 Nov 2011 10:25:04 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:dfcd8b6c1bda
1 /*****************************************************************************
2 sortBed.cpp
3
4 (c) 2009 - Aaron Quinlan
5 Hall Laboratory
6 Department of Biochemistry and Molecular Genetics
7 University of Virginia
8 aaronquinlan@gmail.com
9
10 Licenced under the GNU General Public License 2.0 license.
11 ******************************************************************************/
12 #include "lineFileUtilities.h"
13 #include "sortBed.h"
14
15 //
16 // Constructor
17 //
18 BedSort::BedSort(string &bedFile) {
19 _bedFile = bedFile;
20 _bed = new BedFile(bedFile);
21 }
22
23 //
24 // Destructor
25 //
26 BedSort::~BedSort(void) {
27 }
28
29
30 void BedSort::SortBed() {
31
32 // load the "B" bed file into a map so
33 // that we can easily compare "A" to it for overlaps
34 _bed->loadBedFileIntoMapNoBin();
35
36 // loop through each chromosome and merge their BED entries
37 for (masterBedMapNoBin::iterator m = _bed->bedMapNoBin.begin(); m != _bed->bedMapNoBin.end(); ++m) {
38
39 // bedList is already sorted by start position.
40 vector<BED> bedList = m->second;
41
42 for (unsigned int i = 0; i < bedList.size(); ++i) {
43 _bed->reportBedNewLine(bedList[i]);
44 }
45 }
46 }
47
48
49 void BedSort::SortBedBySizeAsc() {
50
51 // load the "B" bed file into a map so
52 // that we can easily compare "A" to it for overlaps
53 _bed->loadBedFileIntoMapNoBin();
54
55 vector<BED> masterList;
56 masterList.reserve(1000000);
57
58 // loop through each chromosome and merge their BED entries
59 for (masterBedMapNoBin::iterator m = _bed->bedMapNoBin.begin(); m != _bed->bedMapNoBin.end(); ++m) {
60
61 // bedList is already sorted by start position.
62 vector<BED> bedList = m->second;
63
64 // add the entries from this chromosome to the current list
65 for (unsigned int i = 0; i < m->second.size(); ++i) {
66 masterList.push_back(m->second[i]);
67 }
68 }
69
70 // sort the master list by size (asc.)
71 sort(masterList.begin(), masterList.end(), sortBySizeAsc);
72
73 // report the entries in ascending order
74 for (unsigned int i = 0; i < masterList.size(); ++i) {
75 _bed->reportBedNewLine(masterList[i]);
76 }
77 }
78
79
80 void BedSort::SortBedBySizeDesc() {
81
82 // load the "B" bed file into a map so
83 // that we can easily compare "A" to it for overlaps
84 _bed->loadBedFileIntoMapNoBin();
85
86 vector<BED> masterList;
87 masterList.reserve(1000000);
88
89 // loop through each chromosome and merge their BED entries
90 for (masterBedMapNoBin::iterator m = _bed->bedMapNoBin.begin(); m != _bed->bedMapNoBin.end(); ++m) {
91
92 // bedList is already sorted by start position.
93 vector<BED> bedList = m->second;
94
95 // add the entries from this chromosome to the current list
96 for (unsigned int i = 0; i < m->second.size(); ++i) {
97 masterList.push_back(m->second[i]);
98 }
99 }
100
101 // sort the master list by size (asc.)
102 sort(masterList.begin(), masterList.end(), sortBySizeDesc);
103
104 // report the entries in ascending order
105 for (unsigned int i = 0; i < masterList.size(); ++i) {
106 _bed->reportBedNewLine(masterList[i]);
107 }
108 }
109
110 void BedSort::SortBedByChromThenSizeAsc() {
111
112 // load the "B" bed file into a map so
113 // that we can easily compare "A" to it for overlaps
114 _bed->loadBedFileIntoMapNoBin();
115
116 // loop through each chromosome and merge their BED entries
117 for (masterBedMapNoBin::iterator m = _bed->bedMapNoBin.begin(); m != _bed->bedMapNoBin.end(); ++m) {
118
119 // bedList is already sorted by start position.
120 vector<BED> bedList = m->second;
121 sort(bedList.begin(), bedList.end(), sortBySizeAsc);
122
123 for (unsigned int i = 0; i < bedList.size(); ++i) {
124 _bed->reportBedNewLine(bedList[i]);
125 }
126 }
127 }
128
129
130 void BedSort::SortBedByChromThenSizeDesc() {
131
132 // load the "B" bed file into a map so
133 // that we can easily compare "A" to it for overlaps
134 _bed->loadBedFileIntoMapNoBin();
135
136 // loop through each chromosome and merge their BED entries
137 for (masterBedMapNoBin::iterator m = _bed->bedMapNoBin.begin(); m != _bed->bedMapNoBin.end(); ++m) {
138
139 // bedList is already sorted by start position.
140 vector<BED> bedList = m->second;
141
142 sort(bedList.begin(), bedList.end(), sortBySizeDesc);
143
144 for (unsigned int i = 0; i < bedList.size(); ++i) {
145 _bed->reportBedNewLine(bedList[i]);
146 }
147 }
148 }
149
150
151 void BedSort::SortBedByChromThenScoreAsc() {
152
153 // load the "B" bed file into a map so
154 // that we can easily compare "A" to it for overlaps
155 _bed->loadBedFileIntoMapNoBin();
156
157 if (_bed->bedType >= 5) {
158 // loop through each chromosome and merge their BED entries
159 for (masterBedMapNoBin::iterator m = _bed->bedMapNoBin.begin(); m != _bed->bedMapNoBin.end(); ++m) {
160
161 // bedList is already sorted by start position.
162 vector<BED> bedList = m->second;
163 sort(bedList.begin(), bedList.end(), sortByScoreAsc);
164
165 for (unsigned int i = 0; i < bedList.size(); ++i) {
166 _bed->reportBedNewLine(bedList[i]);
167 }
168 }
169 }
170 else {
171 cerr << "Error: Requested a sort by score, but your BED file does not appear to be in BED 5 format or greater. Exiting." << endl;
172 exit(1);
173 }
174 }
175
176
177 void BedSort::SortBedByChromThenScoreDesc() {
178
179 // load the "B" bed file into a map so
180 // that we can easily compare "A" to it for overlaps
181 _bed->loadBedFileIntoMapNoBin();
182
183 if (_bed->bedType >= 5) {
184 // loop through each chromosome and merge their BED entries
185 for (masterBedMapNoBin::iterator m = _bed->bedMapNoBin.begin(); m != _bed->bedMapNoBin.end(); ++m) {
186
187 // bedList is already sorted by start position.
188 vector<BED> bedList = m->second;
189 sort(bedList.begin(), bedList.end(), sortByScoreDesc);
190
191 for (unsigned int i = 0; i < bedList.size(); ++i) {
192 _bed->reportBedNewLine(bedList[i]);
193 }
194 }
195 }
196 else {
197 cerr << "Error: Requested a sort by score, but your BED file does not appear to be in BED 5 format or greater. Exiting." << endl;
198 exit(1);
199 }
200 }
201