Mercurial > repos > mvdbeek > mismatch_frequencies
changeset 15:2612bb9caf71
merge heads #2.
author | Marius van den Beek <m.vandenbeek@gmail.com> |
---|---|
date | Wed, 01 Apr 2015 14:22:11 +0200 |
parents | 77ce882a6060 (diff) dd09eada7b78 (current diff) |
children | ca7b7890ed20 |
files | mismatch_frequencies.xml test-data/mismatch.tab |
diffstat | 3 files changed, 13 insertions(+), 17 deletions(-) [+] |
line wrap: on
line diff
--- a/mismatch_frequencies.py Tue Jan 27 17:59:57 2015 +0100 +++ b/mismatch_frequencies.py Wed Apr 01 14:22:11 2015 +0200 @@ -31,7 +31,7 @@ len_dict[i]=mismatch_dict.copy() for alignedread in pysam_alignment: if self.read_is_valid(alignedread, minimal_readlength, maximal_readlength): - len_dict[int(alignedread.rlen)]['total_mapped'] += 1 + len_dict[int(alignedread.rlen)]['total valid reads'] += 1 MD=alignedread.opt('MD') if self.read_has_mismatch(alignedread, self.number_of_allowed_mismatches): (ref_base, mismatch_base)=self.read_to_reference_mismatch(MD, alignedread.seq, alignedread.is_reverse) @@ -133,6 +133,7 @@ if is_reverse: reference_base=reverseComplement(reference_base) mismatched_base=reverseComplement(mismatched_base) + mismatch_position=len(readseq)-mismatch_position-1 if mismatched_base=='N': return (None, None) if self.mismatch_in_allowed_region(readseq, mismatch_position): @@ -140,7 +141,6 @@ else: return (None, None) - def reverseComplement(sequence): '''do a reverse complement of DNA base. >>> reverseComplement('ATGC')=='GCAT' @@ -154,7 +154,7 @@ def barplot(df, library, axes): df.plot(kind='bar', ax=axes, subplots=False,\ stacked=False, legend='test',\ - title='Mismatches in TE small RNAs from {0}'.format(library)) + title='Mismatch frequencies for {0}'.format(library)) def result_dict_to_df(result_dict): mismatches = [] @@ -178,13 +178,13 @@ library_dict=result_dict[library] for length in library_dict.keys(): for mismatch in library_dict[length]: - if mismatch == 'total_mapped': + if mismatch == 'total valid reads': continue - library_dict[length][mismatch]=library_dict[length][mismatch]/float(library_dict[length]['total_mapped'])*100 - del library_dict[length]['total_mapped'] + library_dict[length][mismatch]=library_dict[length][mismatch]/float(library_dict[length]['total valid reads'])*100 + del library_dict[length]['total valid reads'] df=pd.DataFrame(library_dict) barplot(df, library, axes), - axes.set_ylabel('Percent of mapped reads with mismatches') + axes.set_ylabel('Mismatch count / all valid reads * 100') fig.savefig(args.output_pdf, format='pdf') def setup_MismatchFrequencies(args):
--- a/mismatch_frequencies.xml Tue Jan 27 17:59:57 2015 +0100 +++ b/mismatch_frequencies.xml Wed Apr 01 14:22:11 2015 +0200 @@ -1,8 +1,8 @@ -<tool id="mismatch_frequencies" name="Mismatch Frequencies" version="0.0.4" hidden="false" > +<tool id="mismatch_frequencies" name="Mismatch Frequencies" version="0.0.6" hidden="false" > <description>Analyze mismatch frequencies in BAM/SAM alignments</description> <requirements> <requirement type="package" version="0.7.7">pysam</requirement> - <requirement type="package" version="0.14">pandas</requirement> + <requirement type="package" version="0.14.1">pandas</requirement> <requirement type="package" version="1.4">matplotlib</requirement> </requirements> <command interpreter="python">mismatch_frequencies.py --input @@ -39,8 +39,4 @@ <param name="number_of_mismatches" value="1" /> <param name="min_length" value="21" /> <param name="max_length" value="21" /> - <output name="tabular" file="mismatch.tab" ftype="tabular"/> - <output name="pdf" file="mismatch.pdf" ftype="pdf"/> - </test> - </tests> </tool>
--- a/tool_dependencies.xml Tue Jan 27 17:59:57 2015 +0100 +++ b/tool_dependencies.xml Wed Apr 01 14:22:11 2015 +0200 @@ -1,12 +1,12 @@ <?xml version="1.0"?> <tool_dependency> <package name="pysam" version="0.7.7"> - <repository changeset_revision="a7f103854ad5" name="package_pysam_0_7_7" owner="iuc" prior_installation_required="True" toolshed="https://testtoolshed.g2.bx.psu.edu" /> + <repository changeset_revision="ca10c522f37e" name="package_pysam_0_7_7" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" /> </package> - <package name="pandas" version="0.14"> - <repository changeset_revision="21afd61aae1e" name="package_pandas_0_14" owner="iuc" prior_installation_required="True" toolshed="https://testtoolshed.g2.bx.psu.edu" /> + <package name="pandas" version="0.14.1"> + <repository changeset_revision="ef98e20431a7" name="package_pandas_0_14" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" /> </package> <package name="matplotlib" version="1.4"> - <repository changeset_revision="6424ce261dab" name="package_matplotlib_1_4" owner="iuc" prior_installation_required="True" toolshed="https://testtoolshed.g2.bx.psu.edu" /> + <repository changeset_revision="62a48352f6a6" name="package_matplotlib_1_4" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" /> </package> </tool_dependency>