Mercurial > repos > recetox > matchms_formatter
annotate formatter.py @ 10:5c0e5344edf3 draft
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
author | recetox |
---|---|
date | Tue, 18 Oct 2022 10:59:57 +0000 |
parents | 4ca9807c56e6 |
children | 2f0545b02020 |
rev | line source |
---|---|
0
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
1 import click |
10
5c0e5344edf3
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents:
9
diff
changeset
|
2 from matchms.importing import scores_from_json |
5c0e5344edf3
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents:
9
diff
changeset
|
3 from pandas import DataFrame |
0
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
4 |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
5 |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
6 def create_long_table(data: DataFrame, value_id: str) -> DataFrame: |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
7 """Convert the table from compact into long format. |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
8 See DataFrame.melt(...). |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
9 |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
10 Args: |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
11 data (DataFrame): The data table to convert. |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
12 value_id (str): The name to assign to the added column through conversion to long format. |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
13 |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
14 Returns: |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
15 DataFrame: Table in long format. |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
16 """ |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
17 return data.transpose().melt(ignore_index=False, var_name='compound', value_name=value_id) |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
18 |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
19 |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
20 def join_df(x: DataFrame, y: DataFrame, on=[], how="inner") -> DataFrame: |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
21 """Shortcut functions to join to dataframes on columns and index |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
22 |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
23 Args: |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
24 x (DataFrame): Table X |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
25 y (DataFrame): Table Y |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
26 on (list, optional): Columns on which to join. Defaults to []. |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
27 how (str, optional): Join method, see DataFrame.join(...). Defaults to "inner". |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
28 |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
29 Returns: |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
30 DataFrame: Joined dataframe. |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
31 """ |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
32 df_x = x.set_index([x.index] + on) |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
33 df_y = y.set_index([y.index] + on) |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
34 combined = df_x.join(df_y, how=how) |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
35 return combined |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
36 |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
37 |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
38 def get_top_k_matches(data: DataFrame, k: int) -> DataFrame: |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
39 """Function to get top k matches from dataframe with scores. |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
40 |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
41 Args: |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
42 data (DataFrame): A table with score column. |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
43 k (int): Number of top scores to retrieve. |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
44 |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
45 Returns: |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
46 DataFrame: Table containing only the top k best matches for each compound. |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
47 """ |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
48 return data.groupby(level=0, group_keys=False).apply(DataFrame.nlargest, n=k, columns=['score']) |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
49 |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
50 |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
51 def filter_thresholds(data: DataFrame, t_score: float, t_matches: float) -> DataFrame: |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
52 """Filter a dataframe with scores and matches to only contain values above specified thresholds. |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
53 |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
54 Args: |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
55 data (DataFrame): Table to filter. |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
56 t_score (float): Score threshold. |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
57 t_matches (float): Matches threshold. |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
58 |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
59 Returns: |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
60 DataFrame: Filtered dataframe. |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
61 """ |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
62 filtered = data[data['score'] > t_score] |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
63 filtered = filtered[filtered['matches'] > t_matches] |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
64 return filtered |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
65 |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
66 |
10
5c0e5344edf3
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents:
9
diff
changeset
|
67 def scores_to_dataframes(scores): |
5c0e5344edf3
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents:
9
diff
changeset
|
68 """Unpack scores from matchms.scores into two dataframes of scores and matches. |
5c0e5344edf3
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents:
9
diff
changeset
|
69 |
5c0e5344edf3
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents:
9
diff
changeset
|
70 Args: |
5c0e5344edf3
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents:
9
diff
changeset
|
71 scores (matchms.scores): matchms.scores object. |
5c0e5344edf3
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents:
9
diff
changeset
|
72 |
5c0e5344edf3
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents:
9
diff
changeset
|
73 Returns: |
5c0e5344edf3
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents:
9
diff
changeset
|
74 DataFrame: Scores |
5c0e5344edf3
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents:
9
diff
changeset
|
75 DataFrame: Matches |
5c0e5344edf3
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents:
9
diff
changeset
|
76 """ |
5c0e5344edf3
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents:
9
diff
changeset
|
77 query_names = [spectra.metadata['compound_name'] for spectra in scores.queries] |
5c0e5344edf3
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents:
9
diff
changeset
|
78 reference_names = [spectra.metadata['compound_name'] for spectra in scores.references] |
5c0e5344edf3
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents:
9
diff
changeset
|
79 |
5c0e5344edf3
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents:
9
diff
changeset
|
80 dataframe_scores = DataFrame(data=[entry["score"] for entry in scores.scores], index=reference_names, columns=query_names) |
5c0e5344edf3
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents:
9
diff
changeset
|
81 dataframe_matches = DataFrame(data=[entry["matches"] for entry in scores.scores], index=reference_names, columns=query_names) |
5c0e5344edf3
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents:
9
diff
changeset
|
82 |
5c0e5344edf3
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents:
9
diff
changeset
|
83 return dataframe_scores, dataframe_matches |
5c0e5344edf3
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents:
9
diff
changeset
|
84 |
5c0e5344edf3
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents:
9
diff
changeset
|
85 |
5c0e5344edf3
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents:
9
diff
changeset
|
86 def load_data(scores_filename: str) -> DataFrame: |
0
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
87 """Load data from filenames and join on compound id. |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
88 |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
89 Args: |
10
5c0e5344edf3
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents:
9
diff
changeset
|
90 scores_filename (str): Path to json file with serialized scores. |
0
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
91 |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
92 Returns: |
10
5c0e5344edf3
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents:
9
diff
changeset
|
93 DataFrame: Joined dataframe on compounds containing scores and matches in long format. |
0
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
94 """ |
10
5c0e5344edf3
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents:
9
diff
changeset
|
95 scores = scores_from_json(scores_filename) |
5c0e5344edf3
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents:
9
diff
changeset
|
96 scores, matches = scores_to_dataframes(scores) |
0
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
97 |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
98 scores_long = create_long_table(scores, 'score') |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
99 matches_long = create_long_table(matches, 'matches') |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
100 |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
101 combined = join_df(matches_long, scores_long, on=['compound'], how='inner') |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
102 return combined |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
103 |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
104 |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
105 @click.group() |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
106 @click.option('--sf', 'scores_filename', type=click.Path(exists=True), required=True) |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
107 @click.option('--o', 'output_filename', type=click.Path(writable=True), required=True) |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
108 @click.pass_context |
10
5c0e5344edf3
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents:
9
diff
changeset
|
109 def cli(ctx, scores_filename, output_filename): |
0
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
110 ctx.ensure_object(dict) |
10
5c0e5344edf3
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents:
9
diff
changeset
|
111 ctx.obj['data'] = load_data(scores_filename) |
0
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
112 pass |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
113 |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
114 |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
115 @cli.command() |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
116 @click.option('--st', 'scores_threshold', type=float, required=True) |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
117 @click.option('--mt', 'matches_threshold', type=float, required=True) |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
118 @click.pass_context |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
119 def get_thresholded_data(ctx, scores_threshold, matches_threshold): |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
120 result = filter_thresholds(ctx.obj['data'], scores_threshold, matches_threshold) |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
121 return result |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
122 |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
123 |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
124 @cli.command() |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
125 @click.option('--k', 'k', type=int, required=True) |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
126 @click.pass_context |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
127 def get_top_k_data(ctx, k): |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
128 result = get_top_k_matches(ctx.obj['data'], k) |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
129 return result |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
130 |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
131 |
10
5c0e5344edf3
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents:
9
diff
changeset
|
132 @cli.result_callback() |
5c0e5344edf3
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents:
9
diff
changeset
|
133 def write_output(result: DataFrame, scores_filename, output_filename): |
0
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
134 result = result.reset_index().rename(columns={'level_0': 'query', 'compound': 'reference'}) |
9
4ca9807c56e6
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 38a5028a7abe99794086e9b1374ab4bb8bfa68de
recetox
parents:
1
diff
changeset
|
135 result.to_csv(output_filename, sep="\t", index=False) |
0
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
136 |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
137 |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
138 if __name__ == '__main__': |
0a08bed94964
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff
changeset
|
139 cli(obj={}) |