# HG changeset patch # User Marius van den Beek # Date 1432481613 -7200 # Node ID ca7b7890ed20b19583a90f9d1ca49a0434e41d5d # Parent 942464ea4211777675148e8a5886f20524bb8e90# Parent 2612bb9caf7182b9368e79f46204d039e5820c57 merge heads diff -r 2612bb9caf71 -r ca7b7890ed20 mismatch_frequencies.py --- a/mismatch_frequencies.py Wed Apr 01 14:22:11 2015 +0200 +++ b/mismatch_frequencies.py Sun May 24 17:33:33 2015 +0200 @@ -31,7 +31,7 @@ len_dict[i]=mismatch_dict.copy() for alignedread in pysam_alignment: if self.read_is_valid(alignedread, minimal_readlength, maximal_readlength): - len_dict[int(alignedread.rlen)]['total valid reads'] += 1 + len_dict[int(alignedread.rlen)]['total_mapped'] += 1 MD=alignedread.opt('MD') if self.read_has_mismatch(alignedread, self.number_of_allowed_mismatches): (ref_base, mismatch_base)=self.read_to_reference_mismatch(MD, alignedread.seq, alignedread.is_reverse) @@ -133,7 +133,6 @@ if is_reverse: reference_base=reverseComplement(reference_base) mismatched_base=reverseComplement(mismatched_base) - mismatch_position=len(readseq)-mismatch_position-1 if mismatched_base=='N': return (None, None) if self.mismatch_in_allowed_region(readseq, mismatch_position): @@ -141,6 +140,7 @@ else: return (None, None) + def reverseComplement(sequence): '''do a reverse complement of DNA base. >>> reverseComplement('ATGC')=='GCAT' @@ -154,7 +154,7 @@ def barplot(df, library, axes): df.plot(kind='bar', ax=axes, subplots=False,\ stacked=False, legend='test',\ - title='Mismatch frequencies for {0}'.format(library)) + title='Mismatches in TE small RNAs from {0}'.format(library)) def result_dict_to_df(result_dict): mismatches = [] @@ -178,13 +178,13 @@ library_dict=result_dict[library] for length in library_dict.keys(): for mismatch in library_dict[length]: - if mismatch == 'total valid reads': + if mismatch == 'total_mapped': continue - library_dict[length][mismatch]=library_dict[length][mismatch]/float(library_dict[length]['total valid reads'])*100 - del library_dict[length]['total valid reads'] + library_dict[length][mismatch]=library_dict[length][mismatch]/float(library_dict[length]['total_mapped'])*100 + del library_dict[length]['total_mapped'] df=pd.DataFrame(library_dict) barplot(df, library, axes), - axes.set_ylabel('Mismatch count / all valid reads * 100') + axes.set_ylabel('Percent of mapped reads with mismatches') fig.savefig(args.output_pdf, format='pdf') def setup_MismatchFrequencies(args): diff -r 2612bb9caf71 -r ca7b7890ed20 mismatch_frequencies.xml --- a/mismatch_frequencies.xml Wed Apr 01 14:22:11 2015 +0200 +++ b/mismatch_frequencies.xml Sun May 24 17:33:33 2015 +0200 @@ -1,11 +1,11 @@ - diff -r 2612bb9caf71 -r ca7b7890ed20 test-data/3mismatches_ago2ip_ovary.bam Binary file test-data/3mismatches_ago2ip_ovary.bam has changed diff -r 2612bb9caf71 -r ca7b7890ed20 test-data/3mismatches_ago2ip_s2.bam Binary file test-data/3mismatches_ago2ip_s2.bam has changed diff -r 2612bb9caf71 -r ca7b7890ed20 test-data/mismatch.pdf Binary file test-data/mismatch.pdf has changed diff -r 2612bb9caf71 -r ca7b7890ed20 tool_dependencies.xml --- a/tool_dependencies.xml Wed Apr 01 14:22:11 2015 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,12 +0,0 @@ - - - - - - - - - - - -