annotate formatter.py @ 10:5c0e5344edf3 draft

planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
author recetox
date Tue, 18 Oct 2022 10:59:57 +0000
parents 4ca9807c56e6
children 2f0545b02020
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
1 import click
10
5c0e5344edf3 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents: 9
diff changeset
2 from matchms.importing import scores_from_json
5c0e5344edf3 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents: 9
diff changeset
3 from pandas import DataFrame
0
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
4
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
5
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
6 def create_long_table(data: DataFrame, value_id: str) -> DataFrame:
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
7 """Convert the table from compact into long format.
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
8 See DataFrame.melt(...).
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
9
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
10 Args:
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
11 data (DataFrame): The data table to convert.
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
12 value_id (str): The name to assign to the added column through conversion to long format.
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
13
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
14 Returns:
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
15 DataFrame: Table in long format.
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
16 """
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
17 return data.transpose().melt(ignore_index=False, var_name='compound', value_name=value_id)
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
18
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
19
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
20 def join_df(x: DataFrame, y: DataFrame, on=[], how="inner") -> DataFrame:
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
21 """Shortcut functions to join to dataframes on columns and index
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
22
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
23 Args:
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
24 x (DataFrame): Table X
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
25 y (DataFrame): Table Y
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
26 on (list, optional): Columns on which to join. Defaults to [].
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
27 how (str, optional): Join method, see DataFrame.join(...). Defaults to "inner".
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
28
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
29 Returns:
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
30 DataFrame: Joined dataframe.
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
31 """
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
32 df_x = x.set_index([x.index] + on)
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
33 df_y = y.set_index([y.index] + on)
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
34 combined = df_x.join(df_y, how=how)
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
35 return combined
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
36
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
37
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
38 def get_top_k_matches(data: DataFrame, k: int) -> DataFrame:
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
39 """Function to get top k matches from dataframe with scores.
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
40
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
41 Args:
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
42 data (DataFrame): A table with score column.
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
43 k (int): Number of top scores to retrieve.
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
44
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
45 Returns:
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
46 DataFrame: Table containing only the top k best matches for each compound.
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
47 """
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
48 return data.groupby(level=0, group_keys=False).apply(DataFrame.nlargest, n=k, columns=['score'])
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
49
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
50
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
51 def filter_thresholds(data: DataFrame, t_score: float, t_matches: float) -> DataFrame:
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
52 """Filter a dataframe with scores and matches to only contain values above specified thresholds.
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
53
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
54 Args:
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
55 data (DataFrame): Table to filter.
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
56 t_score (float): Score threshold.
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
57 t_matches (float): Matches threshold.
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
58
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
59 Returns:
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
60 DataFrame: Filtered dataframe.
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
61 """
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
62 filtered = data[data['score'] > t_score]
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
63 filtered = filtered[filtered['matches'] > t_matches]
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
64 return filtered
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
65
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
66
10
5c0e5344edf3 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents: 9
diff changeset
67 def scores_to_dataframes(scores):
5c0e5344edf3 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents: 9
diff changeset
68 """Unpack scores from matchms.scores into two dataframes of scores and matches.
5c0e5344edf3 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents: 9
diff changeset
69
5c0e5344edf3 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents: 9
diff changeset
70 Args:
5c0e5344edf3 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents: 9
diff changeset
71 scores (matchms.scores): matchms.scores object.
5c0e5344edf3 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents: 9
diff changeset
72
5c0e5344edf3 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents: 9
diff changeset
73 Returns:
5c0e5344edf3 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents: 9
diff changeset
74 DataFrame: Scores
5c0e5344edf3 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents: 9
diff changeset
75 DataFrame: Matches
5c0e5344edf3 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents: 9
diff changeset
76 """
5c0e5344edf3 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents: 9
diff changeset
77 query_names = [spectra.metadata['compound_name'] for spectra in scores.queries]
5c0e5344edf3 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents: 9
diff changeset
78 reference_names = [spectra.metadata['compound_name'] for spectra in scores.references]
5c0e5344edf3 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents: 9
diff changeset
79
5c0e5344edf3 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents: 9
diff changeset
80 dataframe_scores = DataFrame(data=[entry["score"] for entry in scores.scores], index=reference_names, columns=query_names)
5c0e5344edf3 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents: 9
diff changeset
81 dataframe_matches = DataFrame(data=[entry["matches"] for entry in scores.scores], index=reference_names, columns=query_names)
5c0e5344edf3 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents: 9
diff changeset
82
5c0e5344edf3 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents: 9
diff changeset
83 return dataframe_scores, dataframe_matches
5c0e5344edf3 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents: 9
diff changeset
84
5c0e5344edf3 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents: 9
diff changeset
85
5c0e5344edf3 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents: 9
diff changeset
86 def load_data(scores_filename: str) -> DataFrame:
0
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
87 """Load data from filenames and join on compound id.
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
88
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
89 Args:
10
5c0e5344edf3 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents: 9
diff changeset
90 scores_filename (str): Path to json file with serialized scores.
0
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
91
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
92 Returns:
10
5c0e5344edf3 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents: 9
diff changeset
93 DataFrame: Joined dataframe on compounds containing scores and matches in long format.
0
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
94 """
10
5c0e5344edf3 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents: 9
diff changeset
95 scores = scores_from_json(scores_filename)
5c0e5344edf3 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents: 9
diff changeset
96 scores, matches = scores_to_dataframes(scores)
0
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
97
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
98 scores_long = create_long_table(scores, 'score')
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
99 matches_long = create_long_table(matches, 'matches')
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
100
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
101 combined = join_df(matches_long, scores_long, on=['compound'], how='inner')
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
102 return combined
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
103
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
104
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
105 @click.group()
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
106 @click.option('--sf', 'scores_filename', type=click.Path(exists=True), required=True)
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
107 @click.option('--o', 'output_filename', type=click.Path(writable=True), required=True)
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
108 @click.pass_context
10
5c0e5344edf3 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents: 9
diff changeset
109 def cli(ctx, scores_filename, output_filename):
0
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
110 ctx.ensure_object(dict)
10
5c0e5344edf3 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents: 9
diff changeset
111 ctx.obj['data'] = load_data(scores_filename)
0
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
112 pass
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
113
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
114
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
115 @cli.command()
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
116 @click.option('--st', 'scores_threshold', type=float, required=True)
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
117 @click.option('--mt', 'matches_threshold', type=float, required=True)
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
118 @click.pass_context
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
119 def get_thresholded_data(ctx, scores_threshold, matches_threshold):
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
120 result = filter_thresholds(ctx.obj['data'], scores_threshold, matches_threshold)
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
121 return result
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
122
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
123
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
124 @cli.command()
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
125 @click.option('--k', 'k', type=int, required=True)
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
126 @click.pass_context
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
127 def get_top_k_data(ctx, k):
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
128 result = get_top_k_matches(ctx.obj['data'], k)
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
129 return result
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
130
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
131
10
5c0e5344edf3 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents: 9
diff changeset
132 @cli.result_callback()
5c0e5344edf3 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents: 9
diff changeset
133 def write_output(result: DataFrame, scores_filename, output_filename):
0
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
134 result = result.reset_index().rename(columns={'level_0': 'query', 'compound': 'reference'})
9
4ca9807c56e6 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 38a5028a7abe99794086e9b1374ab4bb8bfa68de
recetox
parents: 1
diff changeset
135 result.to_csv(output_filename, sep="\t", index=False)
0
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
136
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
137
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
138 if __name__ == '__main__':
0a08bed94964 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 85f60c94ccb3cb7706694cbb7ff6d59dcb41c0c9"
recetox
parents:
diff changeset
139 cli(obj={})