Mercurial > repos > recetox > matchms_formatter
comparison formatter.py @ 9:4ca9807c56e6 draft
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 38a5028a7abe99794086e9b1374ab4bb8bfa68de
| author | recetox |
|---|---|
| date | Wed, 21 Sep 2022 15:28:13 +0000 |
| parents | 364976b9aba6 |
| children | 5c0e5344edf3 |
comparison
equal
deleted
inserted
replaced
| 8:77a407856b32 | 9:4ca9807c56e6 |
|---|---|
| 1 import click | 1 import click |
| 2 from pandas import DataFrame, read_csv | 2 from pandas import DataFrame, read_csv, to_numeric |
| 3 | 3 |
| 4 | 4 |
| 5 def create_long_table(data: DataFrame, value_id: str) -> DataFrame: | 5 def create_long_table(data: DataFrame, value_id: str) -> DataFrame: |
| 6 """Convert the table from compact into long format. | 6 """Convert the table from compact into long format. |
| 7 See DataFrame.melt(...). | 7 See DataFrame.melt(...). |
| 71 matches_filename (str): Path to matches table. | 71 matches_filename (str): Path to matches table. |
| 72 | 72 |
| 73 Returns: | 73 Returns: |
| 74 DataFrame: Joined dataframe on compounds containing scores an matches in long format. | 74 DataFrame: Joined dataframe on compounds containing scores an matches in long format. |
| 75 """ | 75 """ |
| 76 matches = read_csv(matches_filename, sep=None, index_col=0) | 76 matches = read_csv(matches_filename, sep="\t", index_col=0, header=0).apply(to_numeric) |
| 77 scores = read_csv(scores_filename, sep=None, index_col=0) | 77 scores = read_csv(scores_filename, sep="\t", index_col=0, header=0).apply(to_numeric) |
| 78 | 78 |
| 79 scores_long = create_long_table(scores, 'score') | 79 scores_long = create_long_table(scores, 'score') |
| 80 matches_long = create_long_table(matches, 'matches') | 80 matches_long = create_long_table(matches, 'matches') |
| 81 | 81 |
| 82 combined = join_df(matches_long, scores_long, on=['compound'], how='inner') | 82 combined = join_df(matches_long, scores_long, on=['compound'], how='inner') |
| 111 return result | 111 return result |
| 112 | 112 |
| 113 | 113 |
| 114 @cli.resultcallback() | 114 @cli.resultcallback() |
| 115 def write_output(result: DataFrame, scores_filename, matches_filename, output_filename): | 115 def write_output(result: DataFrame, scores_filename, matches_filename, output_filename): |
| 116 input_file = read_csv(scores_filename, sep=None, iterator=True) | |
| 117 sep = input_file._engine.data.dialect.delimiter | |
| 118 | |
| 119 result = result.reset_index().rename(columns={'level_0': 'query', 'compound': 'reference'}) | 116 result = result.reset_index().rename(columns={'level_0': 'query', 'compound': 'reference'}) |
| 120 result.to_csv(output_filename, sep=sep, index=False) | 117 result.to_csv(output_filename, sep="\t", index=False) |
| 121 | 118 |
| 122 | 119 |
| 123 if __name__ == '__main__': | 120 if __name__ == '__main__': |
| 124 cli(obj={}) | 121 cli(obj={}) |
