comparison r-zerone.py @ 0:7fbff19b4485 draft default tip

planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
author mvdbeek
date Fri, 14 Sep 2018 04:52:55 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:7fbff19b4485
1 from collections import OrderedDict
2 import click
3 import pandas as pd
4 from rpy2.robjects import pandas2ri
5 from rpy2.robjects.packages import importr
6
7
8 pandas2ri.activate()
9 zerone = importr('zerone')
10
11
12 def generate_dataframe(controls, treatments):
13 c = OrderedDict()
14 for control in controls:
15 c[control] = pd.read_csv(control, usecols=[3], sep='\t', header=None, dtype=int)[3]
16
17 t = OrderedDict()
18 for treatment in treatments:
19 t[treatment] = pd.read_csv(treatment, usecols=[3], sep='\t', header=None, dtype=int)[3]
20
21 control_series = pd.DataFrame(c).sum(axis=1)
22 control_df = pd.DataFrame(control_series)
23 control_df.columns = ['Control']
24 treatment_df = pd.DataFrame(t)
25 chroms = pd.read_csv(treatments[0], usecols=[0], sep='\t', header=None)
26 chroms.columns = ['chrom']
27 df = pd.DataFrame.merge(control_df, treatment_df, left_index=True, right_index=True)
28 df = chroms.merge(df, left_index=True, right_index=True)
29 return df
30
31
32 def discretize(df):
33 r = zerone.zerone(df, returnall=True)
34 return pandas2ri.ri2py_dataframe(r.rx('path'))['path'] == 2
35
36
37 @click.command()
38 @click.option('--control_files', type=click.Path(exists=True), multiple=True)
39 @click.option('--fusion_files', type=click.Path(exists=True), multiple=True, required=True)
40 @click.option('--output', type=click.Path(exists=False), required=True)
41 def main(control_files, fusion_files, output):
42 """Run zerone discretization for control and fusion files"""
43 df = generate_dataframe(controls=control_files, treatments=fusion_files)
44 s = discretize(df)
45 template = pd.read_csv(control_files[0], usecols=[0, 1, 2], sep='\t', header=None)
46 template['result'] = s.astype(int)
47 template.to_csv(output, header=None, sep='\t', index=None)
48
49
50 if __name__ == '__main__':
51 main()