Mercurial > repos > mvdbeek > mismatch_frequencies
comparison mismatch_frequencies.py @ 7:270681625775
Add help text and fix for skipping the beginning/end of reads aligned in reverse orientation.
author | mvdbeek |
---|---|
date | Wed, 28 Jan 2015 13:12:56 +0100 |
parents | e8ebe5132737 |
children |
comparison
equal
deleted
inserted
replaced
6:a7bf987b8cc4 | 7:270681625775 |
---|---|
29 len_dict={} | 29 len_dict={} |
30 for i in range(minimal_readlength, maximal_readlength+1): | 30 for i in range(minimal_readlength, maximal_readlength+1): |
31 len_dict[i]=mismatch_dict.copy() | 31 len_dict[i]=mismatch_dict.copy() |
32 for alignedread in pysam_alignment: | 32 for alignedread in pysam_alignment: |
33 if self.read_is_valid(alignedread, minimal_readlength, maximal_readlength): | 33 if self.read_is_valid(alignedread, minimal_readlength, maximal_readlength): |
34 len_dict[int(alignedread.rlen)]['total_mapped'] += 1 | 34 len_dict[int(alignedread.rlen)]['total valid reads'] += 1 |
35 MD=alignedread.opt('MD') | 35 MD=alignedread.opt('MD') |
36 if self.read_has_mismatch(alignedread, self.number_of_allowed_mismatches): | 36 if self.read_has_mismatch(alignedread, self.number_of_allowed_mismatches): |
37 (ref_base, mismatch_base)=self.read_to_reference_mismatch(MD, alignedread.seq, alignedread.is_reverse) | 37 (ref_base, mismatch_base)=self.read_to_reference_mismatch(MD, alignedread.seq, alignedread.is_reverse) |
38 if ref_base == None: | 38 if ref_base == None: |
39 continue | 39 continue |
131 reference_base+=MD[result.end()-1] | 131 reference_base+=MD[result.end()-1] |
132 mismatched_base+=readseq[mismatch_position-1] | 132 mismatched_base+=readseq[mismatch_position-1] |
133 if is_reverse: | 133 if is_reverse: |
134 reference_base=reverseComplement(reference_base) | 134 reference_base=reverseComplement(reference_base) |
135 mismatched_base=reverseComplement(mismatched_base) | 135 mismatched_base=reverseComplement(mismatched_base) |
136 mismatch_position=len(readseq)-mismatch_position-1 | |
136 if mismatched_base=='N': | 137 if mismatched_base=='N': |
137 return (None, None) | 138 return (None, None) |
138 if self.mismatch_in_allowed_region(readseq, mismatch_position): | 139 if self.mismatch_in_allowed_region(readseq, mismatch_position): |
139 return (reference_base, mismatched_base) | 140 return (reference_base, mismatched_base) |
140 else: | 141 else: |
141 return (None, None) | 142 return (None, None) |
142 | |
143 | 143 |
144 def reverseComplement(sequence): | 144 def reverseComplement(sequence): |
145 '''do a reverse complement of DNA base. | 145 '''do a reverse complement of DNA base. |
146 >>> reverseComplement('ATGC')=='GCAT' | 146 >>> reverseComplement('ATGC')=='GCAT' |
147 True | 147 True |
152 return sequence.upper().translate(complement)[::-1] | 152 return sequence.upper().translate(complement)[::-1] |
153 | 153 |
154 def barplot(df, library, axes): | 154 def barplot(df, library, axes): |
155 df.plot(kind='bar', ax=axes, subplots=False,\ | 155 df.plot(kind='bar', ax=axes, subplots=False,\ |
156 stacked=False, legend='test',\ | 156 stacked=False, legend='test',\ |
157 title='Mismatches in TE small RNAs from {0}'.format(library)) | 157 title='Mismatch frequencies for {0}'.format(library)) |
158 | 158 |
159 def result_dict_to_df(result_dict): | 159 def result_dict_to_df(result_dict): |
160 mismatches = [] | 160 mismatches = [] |
161 libraries = [] | 161 libraries = [] |
162 for mismatch, library in result_dict.iteritems(): | 162 for mismatch, library in result_dict.iteritems(): |
176 for i,library in enumerate (names): | 176 for i,library in enumerate (names): |
177 axes=fig.add_subplot(nrows,2,i+1) | 177 axes=fig.add_subplot(nrows,2,i+1) |
178 library_dict=result_dict[library] | 178 library_dict=result_dict[library] |
179 for length in library_dict.keys(): | 179 for length in library_dict.keys(): |
180 for mismatch in library_dict[length]: | 180 for mismatch in library_dict[length]: |
181 if mismatch == 'total_mapped': | 181 if mismatch == 'total valid reads': |
182 continue | 182 continue |
183 library_dict[length][mismatch]=library_dict[length][mismatch]/float(library_dict[length]['total_mapped'])*100 | 183 library_dict[length][mismatch]=library_dict[length][mismatch]/float(library_dict[length]['total valid reads'])*100 |
184 del library_dict[length]['total_mapped'] | 184 del library_dict[length]['total valid reads'] |
185 df=pd.DataFrame(library_dict) | 185 df=pd.DataFrame(library_dict) |
186 barplot(df, library, axes), | 186 barplot(df, library, axes), |
187 axes.set_ylabel('Percent of mapped reads with mismatches') | 187 axes.set_ylabel('Mismatch count / all valid reads * 100') |
188 fig.savefig(args.output_pdf, format='pdf') | 188 fig.savefig(args.output_pdf, format='pdf') |
189 | 189 |
190 def setup_MismatchFrequencies(args): | 190 def setup_MismatchFrequencies(args): |
191 resultDict=OrderedDict() | 191 resultDict=OrderedDict() |
192 kw_list=[{'result_dict' : resultDict, | 192 kw_list=[{'result_dict' : resultDict, |