diff r-zerone.py @ 0:7fbff19b4485 draft default tip

planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
author mvdbeek
date Fri, 14 Sep 2018 04:52:55 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/r-zerone.py	Fri Sep 14 04:52:55 2018 -0400
@@ -0,0 +1,51 @@
+from collections import OrderedDict
+import click
+import pandas as pd
+from rpy2.robjects import pandas2ri
+from rpy2.robjects.packages import importr
+
+
+pandas2ri.activate()
+zerone = importr('zerone')
+
+
+def generate_dataframe(controls, treatments):
+    c = OrderedDict()
+    for control in controls:
+        c[control] = pd.read_csv(control, usecols=[3], sep='\t', header=None, dtype=int)[3]
+
+    t = OrderedDict()
+    for treatment in treatments:
+        t[treatment] = pd.read_csv(treatment, usecols=[3], sep='\t', header=None, dtype=int)[3]
+
+    control_series = pd.DataFrame(c).sum(axis=1)
+    control_df = pd.DataFrame(control_series)
+    control_df.columns = ['Control']
+    treatment_df = pd.DataFrame(t)
+    chroms = pd.read_csv(treatments[0], usecols=[0], sep='\t', header=None)
+    chroms.columns = ['chrom']
+    df = pd.DataFrame.merge(control_df, treatment_df, left_index=True, right_index=True)
+    df = chroms.merge(df, left_index=True, right_index=True)
+    return df
+
+
+def discretize(df):
+    r = zerone.zerone(df, returnall=True)
+    return pandas2ri.ri2py_dataframe(r.rx('path'))['path'] == 2
+
+
+@click.command()
+@click.option('--control_files', type=click.Path(exists=True), multiple=True)
+@click.option('--fusion_files', type=click.Path(exists=True), multiple=True, required=True)
+@click.option('--output', type=click.Path(exists=False), required=True)
+def main(control_files, fusion_files, output):
+    """Run zerone discretization for control and fusion files"""
+    df = generate_dataframe(controls=control_files, treatments=fusion_files)
+    s = discretize(df)
+    template = pd.read_csv(control_files[0], usecols=[0, 1, 2], sep='\t', header=None)
+    template['result'] = s.astype(int)
+    template.to_csv(output, header=None, sep='\t', index=None)
+
+
+if __name__ == '__main__':
+    main()