annotate r-zerone.py @ 0:7fbff19b4485 draft default tip

planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
author mvdbeek
date Fri, 14 Sep 2018 04:52:55 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
7fbff19b4485 planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff changeset
1 from collections import OrderedDict
7fbff19b4485 planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff changeset
2 import click
7fbff19b4485 planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff changeset
3 import pandas as pd
7fbff19b4485 planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff changeset
4 from rpy2.robjects import pandas2ri
7fbff19b4485 planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff changeset
5 from rpy2.robjects.packages import importr
7fbff19b4485 planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff changeset
6
7fbff19b4485 planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff changeset
7
7fbff19b4485 planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff changeset
8 pandas2ri.activate()
7fbff19b4485 planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff changeset
9 zerone = importr('zerone')
7fbff19b4485 planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff changeset
10
7fbff19b4485 planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff changeset
11
7fbff19b4485 planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff changeset
12 def generate_dataframe(controls, treatments):
7fbff19b4485 planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff changeset
13 c = OrderedDict()
7fbff19b4485 planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff changeset
14 for control in controls:
7fbff19b4485 planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff changeset
15 c[control] = pd.read_csv(control, usecols=[3], sep='\t', header=None, dtype=int)[3]
7fbff19b4485 planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff changeset
16
7fbff19b4485 planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff changeset
17 t = OrderedDict()
7fbff19b4485 planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff changeset
18 for treatment in treatments:
7fbff19b4485 planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff changeset
19 t[treatment] = pd.read_csv(treatment, usecols=[3], sep='\t', header=None, dtype=int)[3]
7fbff19b4485 planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff changeset
20
7fbff19b4485 planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff changeset
21 control_series = pd.DataFrame(c).sum(axis=1)
7fbff19b4485 planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff changeset
22 control_df = pd.DataFrame(control_series)
7fbff19b4485 planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff changeset
23 control_df.columns = ['Control']
7fbff19b4485 planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff changeset
24 treatment_df = pd.DataFrame(t)
7fbff19b4485 planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff changeset
25 chroms = pd.read_csv(treatments[0], usecols=[0], sep='\t', header=None)
7fbff19b4485 planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff changeset
26 chroms.columns = ['chrom']
7fbff19b4485 planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff changeset
27 df = pd.DataFrame.merge(control_df, treatment_df, left_index=True, right_index=True)
7fbff19b4485 planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff changeset
28 df = chroms.merge(df, left_index=True, right_index=True)
7fbff19b4485 planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff changeset
29 return df
7fbff19b4485 planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff changeset
30
7fbff19b4485 planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff changeset
31
7fbff19b4485 planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff changeset
32 def discretize(df):
7fbff19b4485 planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff changeset
33 r = zerone.zerone(df, returnall=True)
7fbff19b4485 planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff changeset
34 return pandas2ri.ri2py_dataframe(r.rx('path'))['path'] == 2
7fbff19b4485 planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff changeset
35
7fbff19b4485 planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff changeset
36
7fbff19b4485 planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff changeset
37 @click.command()
7fbff19b4485 planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff changeset
38 @click.option('--control_files', type=click.Path(exists=True), multiple=True)
7fbff19b4485 planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff changeset
39 @click.option('--fusion_files', type=click.Path(exists=True), multiple=True, required=True)
7fbff19b4485 planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff changeset
40 @click.option('--output', type=click.Path(exists=False), required=True)
7fbff19b4485 planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff changeset
41 def main(control_files, fusion_files, output):
7fbff19b4485 planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff changeset
42 """Run zerone discretization for control and fusion files"""
7fbff19b4485 planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff changeset
43 df = generate_dataframe(controls=control_files, treatments=fusion_files)
7fbff19b4485 planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff changeset
44 s = discretize(df)
7fbff19b4485 planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff changeset
45 template = pd.read_csv(control_files[0], usecols=[0, 1, 2], sep='\t', header=None)
7fbff19b4485 planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff changeset
46 template['result'] = s.astype(int)
7fbff19b4485 planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff changeset
47 template.to_csv(output, header=None, sep='\t', index=None)
7fbff19b4485 planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff changeset
48
7fbff19b4485 planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff changeset
49
7fbff19b4485 planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff changeset
50 if __name__ == '__main__':
7fbff19b4485 planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff changeset
51 main()