changeset 15:2612bb9caf71

merge heads #2.
author Marius van den Beek <m.vandenbeek@gmail.com>
date Wed, 01 Apr 2015 14:22:11 +0200
parents 77ce882a6060 (diff) dd09eada7b78 (current diff)
children ca7b7890ed20
files mismatch_frequencies.xml test-data/mismatch.tab
diffstat 3 files changed, 13 insertions(+), 17 deletions(-) [+]
line wrap: on
line diff
--- a/mismatch_frequencies.py	Tue Jan 27 17:59:57 2015 +0100
+++ b/mismatch_frequencies.py	Wed Apr 01 14:22:11 2015 +0200
@@ -31,7 +31,7 @@
             len_dict[i]=mismatch_dict.copy()
         for alignedread in pysam_alignment:
             if self.read_is_valid(alignedread, minimal_readlength, maximal_readlength):
-                len_dict[int(alignedread.rlen)]['total_mapped'] += 1
+                len_dict[int(alignedread.rlen)]['total valid reads'] += 1
                 MD=alignedread.opt('MD')
                 if self.read_has_mismatch(alignedread, self.number_of_allowed_mismatches):
                     (ref_base, mismatch_base)=self.read_to_reference_mismatch(MD, alignedread.seq, alignedread.is_reverse)
@@ -133,6 +133,7 @@
         if is_reverse:
             reference_base=reverseComplement(reference_base)
             mismatched_base=reverseComplement(mismatched_base)
+            mismatch_position=len(readseq)-mismatch_position-1
         if mismatched_base=='N':
             return (None, None)
         if self.mismatch_in_allowed_region(readseq, mismatch_position):
@@ -140,7 +141,6 @@
         else:
             return (None, None)
 
-
 def reverseComplement(sequence):
     '''do a reverse complement of DNA base.
     >>> reverseComplement('ATGC')=='GCAT'
@@ -154,7 +154,7 @@
 def barplot(df, library, axes):
     df.plot(kind='bar', ax=axes, subplots=False,\
             stacked=False, legend='test',\
-            title='Mismatches in TE small RNAs from {0}'.format(library))
+            title='Mismatch frequencies for {0}'.format(library))
   
 def result_dict_to_df(result_dict):
     mismatches = []
@@ -178,13 +178,13 @@
         library_dict=result_dict[library]
         for length in library_dict.keys():
             for mismatch in library_dict[length]:
-                if mismatch == 'total_mapped':
+                if mismatch == 'total valid reads':
                     continue
-                library_dict[length][mismatch]=library_dict[length][mismatch]/float(library_dict[length]['total_mapped'])*100
-            del library_dict[length]['total_mapped']
+                library_dict[length][mismatch]=library_dict[length][mismatch]/float(library_dict[length]['total valid reads'])*100
+            del library_dict[length]['total valid reads']
         df=pd.DataFrame(library_dict)
         barplot(df, library, axes),
-        axes.set_ylabel('Percent of mapped reads with mismatches')
+        axes.set_ylabel('Mismatch count / all valid reads * 100')
     fig.savefig(args.output_pdf, format='pdf')    
 
 def setup_MismatchFrequencies(args):
--- a/mismatch_frequencies.xml	Tue Jan 27 17:59:57 2015 +0100
+++ b/mismatch_frequencies.xml	Wed Apr 01 14:22:11 2015 +0200
@@ -1,8 +1,8 @@
-<tool id="mismatch_frequencies" name="Mismatch Frequencies" version="0.0.4" hidden="false" >
+<tool id="mismatch_frequencies" name="Mismatch Frequencies" version="0.0.6" hidden="false" >
   <description>Analyze mismatch frequencies in BAM/SAM alignments</description>
   <requirements>
     <requirement type="package" version="0.7.7">pysam</requirement>
-    <requirement type="package" version="0.14">pandas</requirement>
+    <requirement type="package" version="0.14.1">pandas</requirement>
     <requirement type="package" version="1.4">matplotlib</requirement>
   </requirements>
   <command interpreter="python">mismatch_frequencies.py --input 
@@ -39,8 +39,4 @@
       <param name="number_of_mismatches" value="1" />
       <param name="min_length" value="21" />
       <param name="max_length" value="21" />
-      <output name="tabular" file="mismatch.tab" ftype="tabular"/>
-      <output name="pdf" file="mismatch.pdf" ftype="pdf"/>
-    </test>
-  </tests>
 </tool>
--- a/tool_dependencies.xml	Tue Jan 27 17:59:57 2015 +0100
+++ b/tool_dependencies.xml	Wed Apr 01 14:22:11 2015 +0200
@@ -1,12 +1,12 @@
 <?xml version="1.0"?>
 <tool_dependency>
     <package name="pysam" version="0.7.7">
-        <repository changeset_revision="a7f103854ad5" name="package_pysam_0_7_7" owner="iuc" prior_installation_required="True" toolshed="https://testtoolshed.g2.bx.psu.edu" />
+        <repository changeset_revision="ca10c522f37e" name="package_pysam_0_7_7" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" />
     </package>
-    <package name="pandas" version="0.14">
-        <repository changeset_revision="21afd61aae1e" name="package_pandas_0_14" owner="iuc" prior_installation_required="True" toolshed="https://testtoolshed.g2.bx.psu.edu" />
+    <package name="pandas" version="0.14.1">
+        <repository changeset_revision="ef98e20431a7" name="package_pandas_0_14" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" />
     </package>
     <package name="matplotlib" version="1.4">
-        <repository changeset_revision="6424ce261dab" name="package_matplotlib_1_4" owner="iuc" prior_installation_required="True" toolshed="https://testtoolshed.g2.bx.psu.edu" />
+        <repository changeset_revision="62a48352f6a6" name="package_matplotlib_1_4" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" />
     </package>
 </tool_dependency>