Mercurial > repos > mvdbeek > zerone_damid
annotate r-zerone.py @ 0:7fbff19b4485 draft default tip
planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
author | mvdbeek |
---|---|
date | Fri, 14 Sep 2018 04:52:55 -0400 |
parents | |
children |
rev | line source |
---|---|
0
7fbff19b4485
planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff
changeset
|
1 from collections import OrderedDict |
7fbff19b4485
planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff
changeset
|
2 import click |
7fbff19b4485
planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff
changeset
|
3 import pandas as pd |
7fbff19b4485
planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff
changeset
|
4 from rpy2.robjects import pandas2ri |
7fbff19b4485
planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff
changeset
|
5 from rpy2.robjects.packages import importr |
7fbff19b4485
planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff
changeset
|
6 |
7fbff19b4485
planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff
changeset
|
7 |
7fbff19b4485
planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff
changeset
|
8 pandas2ri.activate() |
7fbff19b4485
planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff
changeset
|
9 zerone = importr('zerone') |
7fbff19b4485
planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff
changeset
|
10 |
7fbff19b4485
planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff
changeset
|
11 |
7fbff19b4485
planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff
changeset
|
12 def generate_dataframe(controls, treatments): |
7fbff19b4485
planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff
changeset
|
13 c = OrderedDict() |
7fbff19b4485
planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff
changeset
|
14 for control in controls: |
7fbff19b4485
planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff
changeset
|
15 c[control] = pd.read_csv(control, usecols=[3], sep='\t', header=None, dtype=int)[3] |
7fbff19b4485
planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff
changeset
|
16 |
7fbff19b4485
planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff
changeset
|
17 t = OrderedDict() |
7fbff19b4485
planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff
changeset
|
18 for treatment in treatments: |
7fbff19b4485
planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff
changeset
|
19 t[treatment] = pd.read_csv(treatment, usecols=[3], sep='\t', header=None, dtype=int)[3] |
7fbff19b4485
planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff
changeset
|
20 |
7fbff19b4485
planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff
changeset
|
21 control_series = pd.DataFrame(c).sum(axis=1) |
7fbff19b4485
planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff
changeset
|
22 control_df = pd.DataFrame(control_series) |
7fbff19b4485
planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff
changeset
|
23 control_df.columns = ['Control'] |
7fbff19b4485
planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff
changeset
|
24 treatment_df = pd.DataFrame(t) |
7fbff19b4485
planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff
changeset
|
25 chroms = pd.read_csv(treatments[0], usecols=[0], sep='\t', header=None) |
7fbff19b4485
planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff
changeset
|
26 chroms.columns = ['chrom'] |
7fbff19b4485
planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff
changeset
|
27 df = pd.DataFrame.merge(control_df, treatment_df, left_index=True, right_index=True) |
7fbff19b4485
planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff
changeset
|
28 df = chroms.merge(df, left_index=True, right_index=True) |
7fbff19b4485
planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff
changeset
|
29 return df |
7fbff19b4485
planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff
changeset
|
30 |
7fbff19b4485
planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff
changeset
|
31 |
7fbff19b4485
planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff
changeset
|
32 def discretize(df): |
7fbff19b4485
planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff
changeset
|
33 r = zerone.zerone(df, returnall=True) |
7fbff19b4485
planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff
changeset
|
34 return pandas2ri.ri2py_dataframe(r.rx('path'))['path'] == 2 |
7fbff19b4485
planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff
changeset
|
35 |
7fbff19b4485
planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff
changeset
|
36 |
7fbff19b4485
planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff
changeset
|
37 @click.command() |
7fbff19b4485
planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff
changeset
|
38 @click.option('--control_files', type=click.Path(exists=True), multiple=True) |
7fbff19b4485
planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff
changeset
|
39 @click.option('--fusion_files', type=click.Path(exists=True), multiple=True, required=True) |
7fbff19b4485
planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff
changeset
|
40 @click.option('--output', type=click.Path(exists=False), required=True) |
7fbff19b4485
planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff
changeset
|
41 def main(control_files, fusion_files, output): |
7fbff19b4485
planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff
changeset
|
42 """Run zerone discretization for control and fusion files""" |
7fbff19b4485
planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff
changeset
|
43 df = generate_dataframe(controls=control_files, treatments=fusion_files) |
7fbff19b4485
planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff
changeset
|
44 s = discretize(df) |
7fbff19b4485
planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff
changeset
|
45 template = pd.read_csv(control_files[0], usecols=[0, 1, 2], sep='\t', header=None) |
7fbff19b4485
planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff
changeset
|
46 template['result'] = s.astype(int) |
7fbff19b4485
planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff
changeset
|
47 template.to_csv(output, header=None, sep='\t', index=None) |
7fbff19b4485
planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff
changeset
|
48 |
7fbff19b4485
planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff
changeset
|
49 |
7fbff19b4485
planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff
changeset
|
50 if __name__ == '__main__': |
7fbff19b4485
planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
mvdbeek
parents:
diff
changeset
|
51 main() |