Mercurial > repos > mvdbeek > zerone_damid
diff r-zerone.py @ 0:7fbff19b4485 draft default tip
planemo upload for repository https://github.com/galaxyproject/mvdbeek/tree/master/tools/zerone commit 35b6256bc4fe6138f03228795e88da46068ac14e
author | mvdbeek |
---|---|
date | Fri, 14 Sep 2018 04:52:55 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/r-zerone.py Fri Sep 14 04:52:55 2018 -0400 @@ -0,0 +1,51 @@ +from collections import OrderedDict +import click +import pandas as pd +from rpy2.robjects import pandas2ri +from rpy2.robjects.packages import importr + + +pandas2ri.activate() +zerone = importr('zerone') + + +def generate_dataframe(controls, treatments): + c = OrderedDict() + for control in controls: + c[control] = pd.read_csv(control, usecols=[3], sep='\t', header=None, dtype=int)[3] + + t = OrderedDict() + for treatment in treatments: + t[treatment] = pd.read_csv(treatment, usecols=[3], sep='\t', header=None, dtype=int)[3] + + control_series = pd.DataFrame(c).sum(axis=1) + control_df = pd.DataFrame(control_series) + control_df.columns = ['Control'] + treatment_df = pd.DataFrame(t) + chroms = pd.read_csv(treatments[0], usecols=[0], sep='\t', header=None) + chroms.columns = ['chrom'] + df = pd.DataFrame.merge(control_df, treatment_df, left_index=True, right_index=True) + df = chroms.merge(df, left_index=True, right_index=True) + return df + + +def discretize(df): + r = zerone.zerone(df, returnall=True) + return pandas2ri.ri2py_dataframe(r.rx('path'))['path'] == 2 + + +@click.command() +@click.option('--control_files', type=click.Path(exists=True), multiple=True) +@click.option('--fusion_files', type=click.Path(exists=True), multiple=True, required=True) +@click.option('--output', type=click.Path(exists=False), required=True) +def main(control_files, fusion_files, output): + """Run zerone discretization for control and fusion files""" + df = generate_dataframe(controls=control_files, treatments=fusion_files) + s = discretize(df) + template = pd.read_csv(control_files[0], usecols=[0, 1, 2], sep='\t', header=None) + template['result'] = s.astype(int) + template.to_csv(output, header=None, sep='\t', index=None) + + +if __name__ == '__main__': + main()