annotate BEDTools-Version-2.14.3/src/utils/sequenceUtilities/sequenceUtils.cpp @ 0:dfcd8b6c1bda

Uploaded
author aaronquinlan
date Thu, 03 Nov 2011 10:25:04 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1 //
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
2 // sequenceUtils.cpp
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
3 // BEDTools
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
4 //
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
5 // Created by Aaron Quinlan Spring 2009.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
6 // Copyright 2009 Aaron Quinlan. All rights reserved.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
7 //
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
8 // Summary: Contains common functions for manipulating DNA sequences.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
9 //
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
10 // Acknowledgment: I am grateful to Michael Stromberg for the code below to
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
11 // reverse complement a sequence.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
12
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
13 #include "sequenceUtils.h"
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
14
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
15 // Performs an in-place sequence reversal
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
16 void reverseSequence(string &seq) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
17 std::reverse(seq.begin(), seq.end());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
18 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
19
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
20 // Performs an in-place reverse complement conversion
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
21 void reverseComplement(string &seq) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
22
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
23 // reverse the sequence
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
24 reverseSequence(seq);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
25
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
26 // swap the bases
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
27 for(unsigned int i = 0; i < seq.length(); i++) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
28 switch(seq[i]) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
29 case 'A':
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
30 seq[i] = 'T';
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
31 break;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
32 case 'C':
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
33 seq[i] = 'G';
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
34 break;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
35 case 'G':
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
36 seq[i] = 'C';
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
37 break;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
38 case 'T':
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
39 seq[i] = 'A';
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
40 break;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
41 case 'a':
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
42 seq[i] = 't';
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
43 break;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
44 case 'c':
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
45 seq[i] = 'g';
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
46 break;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
47 case 'g':
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
48 seq[i] = 'c';
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
49 break;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
50 case 't':
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
51 seq[i] = 'a';
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
52 break;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
53 default:
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
54 break;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
55 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
56 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
57 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
58
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
59
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
60 void toLowerCase(std::string &seq)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
61 {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
62 const int length = seq.length();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
63 for(int i=0; i < length; ++i)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
64 {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
65 seq[i] = std::tolower(seq[i]);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
66 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
67 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
68
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
69
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
70 void toUpperCase(std::string &seq)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
71 {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
72 const int length = seq.length();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
73 for(int i=0; i < length; ++i)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
74 {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
75 seq[i] = std::toupper(seq[i]);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
76 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
77 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
78
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
79
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
80 void getDnaContent(const string &seq, int &a, int &c, int &g, int &t, int &n, int &other)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
81 {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
82 // swap the bases
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
83 for(unsigned int i = 0; i < seq.length(); i++) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
84 switch(seq[i]) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
85 case 'A':
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
86 case 'a':
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
87 a++;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
88 break;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
89 case 'C':
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
90 case 'c':
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
91 c++;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
92 break;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
93 case 'G':
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
94 case 'g':
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
95 g++;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
96 break;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
97 case 'T':
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
98 case 't':
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
99 t++;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
100 break;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
101 case 'N':
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
102 case 'n':
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
103 n++;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
104 break;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
105 default:
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
106 other++;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
107 break;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
108 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
109 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
110 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
111
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
112
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
113 int countPattern(const string &seq, const string &pattern)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
114 {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
115 // swap the bases
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
116 int patternLength = pattern.size();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
117 int patternCount = 0;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
118 for(unsigned int i = 0; i < seq.length(); i++) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
119 if (seq.substr(i,patternLength) == pattern) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
120 patternCount++;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
121 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
122 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
123 return patternCount;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
124 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
125
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
126