mismatch_frequencies: mismatch_frequencies.py comparison

comparison mismatch_frequencies.py @ 7:270681625775

Add help text and fix for skipping the beginning/end of reads aligned in reverse orientation.

author	mvdbeek
date	Wed, 28 Jan 2015 13:12:56 +0100
parents	e8ebe5132737
children

comparison

equal deleted inserted replaced

-:a7bf987b8cc4
+:270681625775
 len_dict={}
 for i in range(minimal_readlength, maximal_readlength+1):
 len_dict[i]=mismatch_dict.copy()
 for alignedread in pysam_alignment:
 if self.read_is_valid(alignedread, minimal_readlength, maximal_readlength):
-len_dict[int(alignedread.rlen)]['total_mapped'] += 1
+len_dict[int(alignedread.rlen)]['total valid reads'] += 1
 MD=alignedread.opt('MD')
 if self.read_has_mismatch(alignedread, self.number_of_allowed_mismatches):
 (ref_base, mismatch_base)=self.read_to_reference_mismatch(MD, alignedread.seq, alignedread.is_reverse)
 if ref_base == None:
 continue
 reference_base+=MD[result.end()-1]
 mismatched_base+=readseq[mismatch_position-1]
 if is_reverse:
 reference_base=reverseComplement(reference_base)
 mismatched_base=reverseComplement(mismatched_base)
+mismatch_position=len(readseq)-mismatch_position-1
 if mismatched_base=='N':
 return (None, None)
 if self.mismatch_in_allowed_region(readseq, mismatch_position):
 return (reference_base, mismatched_base)
 else:
 return (None, None)
 def reverseComplement(sequence):
 '''do a reverse complement of DNA base.
 >>> reverseComplement('ATGC')=='GCAT'
 True
 return sequence.upper().translate(complement)[::-1]
 def barplot(df, library, axes):
 df.plot(kind='bar', ax=axes, subplots=False,\
 stacked=False, legend='test',\
-title='Mismatches in TE small RNAs from {0}'.format(library))
+title='Mismatch frequencies for {0}'.format(library))
 def result_dict_to_df(result_dict):
 mismatches = []
 libraries = []
 for mismatch, library in result_dict.iteritems():
 for i,library in enumerate (names):
 axes=fig.add_subplot(nrows,2,i+1)
 library_dict=result_dict[library]
 for length in library_dict.keys():
 for mismatch in library_dict[length]:
-if mismatch == 'total_mapped':
+if mismatch == 'total valid reads':
 continue
-library_dict[length][mismatch]=library_dict[length][mismatch]/float(library_dict[length]['total_mapped'])*100
+library_dict[length][mismatch]=library_dict[length][mismatch]/float(library_dict[length]['total valid reads'])*100
-del library_dict[length]['total_mapped']
+del library_dict[length]['total valid reads']
 df=pd.DataFrame(library_dict)
 barplot(df, library, axes),
-axes.set_ylabel('Percent of mapped reads with mismatches')
+axes.set_ylabel('Mismatch count / all valid reads * 100')
 fig.savefig(args.output_pdf, format='pdf')
 def setup_MismatchFrequencies(args):
 resultDict=OrderedDict()
 kw_list=[{'result_dict' : resultDict,

Mercurial > repos > mvdbeek > mismatch_frequencies

comparison mismatch_frequencies.py @ 7:270681625775