Mercurial > repos > mvdbeek > zerone_damid
comparison r-zerone.py @ 0:7fbff19b4485 draft default tip
planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
author | mvdbeek |
---|---|
date | Fri, 14 Sep 2018 04:52:55 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:7fbff19b4485 |
---|---|
1 from collections import OrderedDict | |
2 import click | |
3 import pandas as pd | |
4 from rpy2.robjects import pandas2ri | |
5 from rpy2.robjects.packages import importr | |
6 | |
7 | |
8 pandas2ri.activate() | |
9 zerone = importr('zerone') | |
10 | |
11 | |
12 def generate_dataframe(controls, treatments): | |
13 c = OrderedDict() | |
14 for control in controls: | |
15 c[control] = pd.read_csv(control, usecols=[3], sep='\t', header=None, dtype=int)[3] | |
16 | |
17 t = OrderedDict() | |
18 for treatment in treatments: | |
19 t[treatment] = pd.read_csv(treatment, usecols=[3], sep='\t', header=None, dtype=int)[3] | |
20 | |
21 control_series = pd.DataFrame(c).sum(axis=1) | |
22 control_df = pd.DataFrame(control_series) | |
23 control_df.columns = ['Control'] | |
24 treatment_df = pd.DataFrame(t) | |
25 chroms = pd.read_csv(treatments[0], usecols=[0], sep='\t', header=None) | |
26 chroms.columns = ['chrom'] | |
27 df = pd.DataFrame.merge(control_df, treatment_df, left_index=True, right_index=True) | |
28 df = chroms.merge(df, left_index=True, right_index=True) | |
29 return df | |
30 | |
31 | |
32 def discretize(df): | |
33 r = zerone.zerone(df, returnall=True) | |
34 return pandas2ri.ri2py_dataframe(r.rx('path'))['path'] == 2 | |
35 | |
36 | |
37 @click.command() | |
38 @click.option('--control_files', type=click.Path(exists=True), multiple=True) | |
39 @click.option('--fusion_files', type=click.Path(exists=True), multiple=True, required=True) | |
40 @click.option('--output', type=click.Path(exists=False), required=True) | |
41 def main(control_files, fusion_files, output): | |
42 """Run zerone discretization for control and fusion files""" | |
43 df = generate_dataframe(controls=control_files, treatments=fusion_files) | |
44 s = discretize(df) | |
45 template = pd.read_csv(control_files[0], usecols=[0, 1, 2], sep='\t', header=None) | |
46 template['result'] = s.astype(int) | |
47 template.to_csv(output, header=None, sep='\t', index=None) | |
48 | |
49 | |
50 if __name__ == '__main__': | |
51 main() |