Mercurial > repos > bebatut > normalize_dataset
changeset 0:2ac4623ac41c draft
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/normalize_dataset commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
author | bebatut |
---|---|
date | Tue, 12 Apr 2016 03:08:08 -0400 |
parents | |
children | de82a5a1469b |
files | normalize_dataset.py normalize_dataset.xml |
diffstat | 2 files changed, 128 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/normalize_dataset.py Tue Apr 12 03:08:08 2016 -0400 @@ -0,0 +1,75 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import sys +import os +import argparse +import re + +def isfloat(value): + try: + float(value) + return True + except ValueError: + return False + +def normalize_dataset(args): + with open(args.input_file, 'r') as input_file: + input_file_content = input_file.readlines() + if args.normalization == 'column': + column_number = len(input_file_content[0][:-1].split('\t')) + column_sum = [0] * column_number + + with open(args.output_file, 'w') as output_file: + for line in input_file_content: + split_line = line[:-1].split('\t') + + if args.normalization == 'row': + row_sum = 0 + + for col in split_line: + if isfloat(col): + row_sum += float(col) + + sep = '' + for col in split_line: + if isfloat(col): + if args.format == 'percentage': + output_file.write(sep + str(float(col)/row_sum)) + else: + output_file.write(sep + str(100*float(col)/row_sum)) + else: + output_file.write(sep + col) + sep = '\t' + output_file.write('\n') + + elif args.normalization == 'column': + for i in range(len(split_line)): + if isfloat(split_line[i]): + column_sum[i] += float(split_line[i]) + + if args.normalization == 'column': + for line in input_file_content: + split_line = line[:-1].split('\t') + sep = '' + for i in range(len(split_line)): + if isfloat(split_line[i]): + if args.format == 'percentage': + output_file.write(sep + str(100*float(split_line[i])/column_sum[i])) + else: + output_file.write(sep + str(float(split_line[i])/column_sum[i])) + else: + output_file.write(sep + split_line[i]) + sep = '\t' + output_file.write('\n') + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--input_file', required=True) + parser.add_argument('--output_file', required=True) + parser.add_argument('--normalization', required=True, + choices= ['column','row']) + parser.add_argument('--format', required=True, + choices= ['proportion','percentage']) + args = parser.parse_args() + normalize_dataset(args) \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/normalize_dataset.xml Tue Apr 12 03:08:08 2016 -0400 @@ -0,0 +1,53 @@ +<tool id="normalize_dataset" name="Normalize a dataset by" version="0.1.0"> + <description>row or column sum</description> + + <requirements> + </requirements> + + <stdio> + <exit_code range="1:" /> + <exit_code range=":-1" /> + </stdio> + + <version_command></version_command> + + <command><![CDATA[ + python $__tool_directory__/normalize_dataset.py + --input_file $input_file + --output_file $output_file + --normalization $normalization + --format $format + ]]></command> + + <inputs> + <param name="input_file" type="data" format="tabular,tsv,csv" label="Input file" help="File in tabular format with tab-separated columns and header in first line (--input_file)"/> + + <param name="normalization" label="Normalization on" type="select" help="(--normalization)"> + <option value="column" selected="True">Column</option> + <option value="row">Row</option> + </param> + + <param name="format" label="Output format" type="select" help="(--format)"> + <option value="proportion" selected="True">Proportion</option> + <option value="percentage">Percentage</option> + </param> + </inputs> + + <outputs> + <data name="output_file" format="tabular" + label="${tool.name} on ${on_string}: Normalized dataset" /> + </outputs> + + <tests> + </tests> + + <help><![CDATA[ + **What it does** + + This tool normalizes each row or column of a dataset by the row or column sum. + + ]]></help> + + <citations> + </citations> +</tool> \ No newline at end of file