0
|
1 //
|
|
2 // sequenceUtils.cpp
|
|
3 // BEDTools
|
|
4 //
|
|
5 // Created by Aaron Quinlan Spring 2009.
|
|
6 // Copyright 2009 Aaron Quinlan. All rights reserved.
|
|
7 //
|
|
8 // Summary: Contains common functions for manipulating DNA sequences.
|
|
9 //
|
|
10 // Acknowledgment: I am grateful to Michael Stromberg for the code below to
|
|
11 // reverse complement a sequence.
|
|
12
|
|
13 #include "sequenceUtils.h"
|
|
14
|
|
15 // Performs an in-place sequence reversal
|
|
16 void reverseSequence(string &seq) {
|
|
17 std::reverse(seq.begin(), seq.end());
|
|
18 }
|
|
19
|
|
20 // Performs an in-place reverse complement conversion
|
|
21 void reverseComplement(string &seq) {
|
|
22
|
|
23 // reverse the sequence
|
|
24 reverseSequence(seq);
|
|
25
|
|
26 // swap the bases
|
|
27 for(unsigned int i = 0; i < seq.length(); i++) {
|
|
28 switch(seq[i]) {
|
|
29 case 'A':
|
|
30 seq[i] = 'T';
|
|
31 break;
|
|
32 case 'C':
|
|
33 seq[i] = 'G';
|
|
34 break;
|
|
35 case 'G':
|
|
36 seq[i] = 'C';
|
|
37 break;
|
|
38 case 'T':
|
|
39 seq[i] = 'A';
|
|
40 break;
|
|
41 case 'a':
|
|
42 seq[i] = 't';
|
|
43 break;
|
|
44 case 'c':
|
|
45 seq[i] = 'g';
|
|
46 break;
|
|
47 case 'g':
|
|
48 seq[i] = 'c';
|
|
49 break;
|
|
50 case 't':
|
|
51 seq[i] = 'a';
|
|
52 break;
|
|
53 default:
|
|
54 break;
|
|
55 }
|
|
56 }
|
|
57 }
|
|
58
|
|
59
|
|
60 void toLowerCase(std::string &seq)
|
|
61 {
|
|
62 const int length = seq.length();
|
|
63 for(int i=0; i < length; ++i)
|
|
64 {
|
|
65 seq[i] = std::tolower(seq[i]);
|
|
66 }
|
|
67 }
|
|
68
|
|
69
|
|
70 void toUpperCase(std::string &seq)
|
|
71 {
|
|
72 const int length = seq.length();
|
|
73 for(int i=0; i < length; ++i)
|
|
74 {
|
|
75 seq[i] = std::toupper(seq[i]);
|
|
76 }
|
|
77 }
|
|
78
|
|
79
|
|
80 void getDnaContent(const string &seq, int &a, int &c, int &g, int &t, int &n, int &other)
|
|
81 {
|
|
82 // swap the bases
|
|
83 for(unsigned int i = 0; i < seq.length(); i++) {
|
|
84 switch(seq[i]) {
|
|
85 case 'A':
|
|
86 case 'a':
|
|
87 a++;
|
|
88 break;
|
|
89 case 'C':
|
|
90 case 'c':
|
|
91 c++;
|
|
92 break;
|
|
93 case 'G':
|
|
94 case 'g':
|
|
95 g++;
|
|
96 break;
|
|
97 case 'T':
|
|
98 case 't':
|
|
99 t++;
|
|
100 break;
|
|
101 case 'N':
|
|
102 case 'n':
|
|
103 n++;
|
|
104 break;
|
|
105 default:
|
|
106 other++;
|
|
107 break;
|
|
108 }
|
|
109 }
|
|
110 }
|
|
111
|
|
112
|
|
113 int countPattern(const string &seq, const string &pattern)
|
|
114 {
|
|
115 // swap the bases
|
|
116 int patternLength = pattern.size();
|
|
117 int patternCount = 0;
|
|
118 for(unsigned int i = 0; i < seq.length(); i++) {
|
|
119 if (seq.substr(i,patternLength) == pattern) {
|
|
120 patternCount++;
|
|
121 }
|
|
122 }
|
|
123 return patternCount;
|
|
124 }
|
|
125
|
|
126
|