Mercurial > repos > aaronquinlan > multi_intersect
comparison BEDTools-Version-2.14.3/src/utils/sequenceUtilities/sequenceUtils.cpp @ 0:dfcd8b6c1bda
Uploaded
author | aaronquinlan |
---|---|
date | Thu, 03 Nov 2011 10:25:04 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:dfcd8b6c1bda |
---|---|
1 // | |
2 // sequenceUtils.cpp | |
3 // BEDTools | |
4 // | |
5 // Created by Aaron Quinlan Spring 2009. | |
6 // Copyright 2009 Aaron Quinlan. All rights reserved. | |
7 // | |
8 // Summary: Contains common functions for manipulating DNA sequences. | |
9 // | |
10 // Acknowledgment: I am grateful to Michael Stromberg for the code below to | |
11 // reverse complement a sequence. | |
12 | |
13 #include "sequenceUtils.h" | |
14 | |
15 // Performs an in-place sequence reversal | |
16 void reverseSequence(string &seq) { | |
17 std::reverse(seq.begin(), seq.end()); | |
18 } | |
19 | |
20 // Performs an in-place reverse complement conversion | |
21 void reverseComplement(string &seq) { | |
22 | |
23 // reverse the sequence | |
24 reverseSequence(seq); | |
25 | |
26 // swap the bases | |
27 for(unsigned int i = 0; i < seq.length(); i++) { | |
28 switch(seq[i]) { | |
29 case 'A': | |
30 seq[i] = 'T'; | |
31 break; | |
32 case 'C': | |
33 seq[i] = 'G'; | |
34 break; | |
35 case 'G': | |
36 seq[i] = 'C'; | |
37 break; | |
38 case 'T': | |
39 seq[i] = 'A'; | |
40 break; | |
41 case 'a': | |
42 seq[i] = 't'; | |
43 break; | |
44 case 'c': | |
45 seq[i] = 'g'; | |
46 break; | |
47 case 'g': | |
48 seq[i] = 'c'; | |
49 break; | |
50 case 't': | |
51 seq[i] = 'a'; | |
52 break; | |
53 default: | |
54 break; | |
55 } | |
56 } | |
57 } | |
58 | |
59 | |
60 void toLowerCase(std::string &seq) | |
61 { | |
62 const int length = seq.length(); | |
63 for(int i=0; i < length; ++i) | |
64 { | |
65 seq[i] = std::tolower(seq[i]); | |
66 } | |
67 } | |
68 | |
69 | |
70 void toUpperCase(std::string &seq) | |
71 { | |
72 const int length = seq.length(); | |
73 for(int i=0; i < length; ++i) | |
74 { | |
75 seq[i] = std::toupper(seq[i]); | |
76 } | |
77 } | |
78 | |
79 | |
80 void getDnaContent(const string &seq, int &a, int &c, int &g, int &t, int &n, int &other) | |
81 { | |
82 // swap the bases | |
83 for(unsigned int i = 0; i < seq.length(); i++) { | |
84 switch(seq[i]) { | |
85 case 'A': | |
86 case 'a': | |
87 a++; | |
88 break; | |
89 case 'C': | |
90 case 'c': | |
91 c++; | |
92 break; | |
93 case 'G': | |
94 case 'g': | |
95 g++; | |
96 break; | |
97 case 'T': | |
98 case 't': | |
99 t++; | |
100 break; | |
101 case 'N': | |
102 case 'n': | |
103 n++; | |
104 break; | |
105 default: | |
106 other++; | |
107 break; | |
108 } | |
109 } | |
110 } | |
111 | |
112 | |
113 int countPattern(const string &seq, const string &pattern) | |
114 { | |
115 // swap the bases | |
116 int patternLength = pattern.size(); | |
117 int patternCount = 0; | |
118 for(unsigned int i = 0; i < seq.length(); i++) { | |
119 if (seq.substr(i,patternLength) == pattern) { | |
120 patternCount++; | |
121 } | |
122 } | |
123 return patternCount; | |
124 } | |
125 | |
126 |