Mercurial > repos > recetox > matchms
annotate matchms_wrapper.py @ 1:4aecfd6b319b draft
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit d110cb008c3703945fe3718465de36278fa34652"
| author | recetox | 
|---|---|
| date | Wed, 17 Mar 2021 11:40:17 +0000 | 
| parents | 6a736abe431f | 
| children | a7c9fc186f8c | 
| rev | line source | 
|---|---|
| 
0
 
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
 
recetox 
parents:  
diff
changeset
 | 
1 import argparse | 
| 
 
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
 
recetox 
parents:  
diff
changeset
 | 
2 import sys | 
| 
 
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
 
recetox 
parents:  
diff
changeset
 | 
3 | 
| 
 
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
 
recetox 
parents:  
diff
changeset
 | 
4 from matchms import calculate_scores | 
| 
 
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
 
recetox 
parents:  
diff
changeset
 | 
5 from matchms.importing import load_from_msp | 
| 
 
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
 
recetox 
parents:  
diff
changeset
 | 
6 from matchms.similarity import ( | 
| 
 
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
 
recetox 
parents:  
diff
changeset
 | 
7 CosineGreedy, | 
| 
 
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
 
recetox 
parents:  
diff
changeset
 | 
8 CosineHungarian, | 
| 
 
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
 
recetox 
parents:  
diff
changeset
 | 
9 FingerprintSimilarity, | 
| 
 
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
 
recetox 
parents:  
diff
changeset
 | 
10 IntersectMz, | 
| 
 
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
 
recetox 
parents:  
diff
changeset
 | 
11 ModifiedCosine, | 
| 
 
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
 
recetox 
parents:  
diff
changeset
 | 
12 ParentMassMatch | 
| 
 
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
 
recetox 
parents:  
diff
changeset
 | 
13 ) | 
| 
1
 
4aecfd6b319b
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit d110cb008c3703945fe3718465de36278fa34652"
 
recetox 
parents: 
0 
diff
changeset
 | 
14 from pandas import DataFrame | 
| 
0
 
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
 
recetox 
parents:  
diff
changeset
 | 
15 | 
| 
 
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
 
recetox 
parents:  
diff
changeset
 | 
16 | 
| 
 
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
 
recetox 
parents:  
diff
changeset
 | 
17 def main(argv): | 
| 
 
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
 
recetox 
parents:  
diff
changeset
 | 
18 parser = argparse.ArgumentParser(description="Compute MSP similarity scores") | 
| 
 
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
 
recetox 
parents:  
diff
changeset
 | 
19 parser.add_argument( | 
| 
 
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
 
recetox 
parents:  
diff
changeset
 | 
20 "references_filename", type=str, help="Path to reference MSP library." | 
| 
 
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
 
recetox 
parents:  
diff
changeset
 | 
21 ) | 
| 
 
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
 
recetox 
parents:  
diff
changeset
 | 
22 parser.add_argument("queries_filename", type=str, help="Path to query spectra.") | 
| 
 
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
 
recetox 
parents:  
diff
changeset
 | 
23 parser.add_argument("similarity_metric", type=str, help='Metric to use for matching.') | 
| 
1
 
4aecfd6b319b
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit d110cb008c3703945fe3718465de36278fa34652"
 
recetox 
parents: 
0 
diff
changeset
 | 
24 parser.add_argument("output_filename_scores", type=str, help="Path where to store the output .csv scores.") | 
| 
 
4aecfd6b319b
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit d110cb008c3703945fe3718465de36278fa34652"
 
recetox 
parents: 
0 
diff
changeset
 | 
25 parser.add_argument("output_filename_matches", type=str, help="Path where to store the output .csv matches.") | 
| 
0
 
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
 
recetox 
parents:  
diff
changeset
 | 
26 | 
| 
 
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
 
recetox 
parents:  
diff
changeset
 | 
27 args = parser.parse_args() | 
| 
 
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
 
recetox 
parents:  
diff
changeset
 | 
28 | 
| 
 
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
 
recetox 
parents:  
diff
changeset
 | 
29 if args.similarity_metric == 'CosineGreedy': | 
| 
 
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
 
recetox 
parents:  
diff
changeset
 | 
30 similarity_metric = CosineGreedy() | 
| 
 
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
 
recetox 
parents:  
diff
changeset
 | 
31 elif args.similarity_metric == 'CosineHungarian': | 
| 
 
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
 
recetox 
parents:  
diff
changeset
 | 
32 similarity_metric = CosineHungarian() | 
| 
 
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
 
recetox 
parents:  
diff
changeset
 | 
33 elif args.similarity_metric == 'FingerprintSimilarity': | 
| 
 
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
 
recetox 
parents:  
diff
changeset
 | 
34 similarity_metric = FingerprintSimilarity() | 
| 
 
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
 
recetox 
parents:  
diff
changeset
 | 
35 elif args.similarity_metric == 'IntersectMz': | 
| 
 
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
 
recetox 
parents:  
diff
changeset
 | 
36 similarity_metric = IntersectMz() | 
| 
 
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
 
recetox 
parents:  
diff
changeset
 | 
37 elif args.similarity_metric == 'ModifiedCosine': | 
| 
 
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
 
recetox 
parents:  
diff
changeset
 | 
38 similarity_metric = ModifiedCosine() | 
| 
 
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
 
recetox 
parents:  
diff
changeset
 | 
39 else: | 
| 
 
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
 
recetox 
parents:  
diff
changeset
 | 
40 similarity_metric = ParentMassMatch() | 
| 
 
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
 
recetox 
parents:  
diff
changeset
 | 
41 | 
| 
 
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
 
recetox 
parents:  
diff
changeset
 | 
42 reference_spectra = [ | 
| 
 
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
 
recetox 
parents:  
diff
changeset
 | 
43 spectrum for spectrum in load_from_msp(args.references_filename) | 
| 
 
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
 
recetox 
parents:  
diff
changeset
 | 
44 ] | 
| 
 
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
 
recetox 
parents:  
diff
changeset
 | 
45 queries_spectra = [spectrum for spectrum in load_from_msp(args.queries_filename)] | 
| 
 
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
 
recetox 
parents:  
diff
changeset
 | 
46 | 
| 
 
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
 
recetox 
parents:  
diff
changeset
 | 
47 scores = calculate_scores( | 
| 
 
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
 
recetox 
parents:  
diff
changeset
 | 
48 references=reference_spectra, | 
| 
 
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
 
recetox 
parents:  
diff
changeset
 | 
49 queries=queries_spectra, | 
| 
 
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
 
recetox 
parents:  
diff
changeset
 | 
50 similarity_function=similarity_metric, | 
| 
 
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
 
recetox 
parents:  
diff
changeset
 | 
51 ) | 
| 
 
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
 
recetox 
parents:  
diff
changeset
 | 
52 | 
| 
 
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
 
recetox 
parents:  
diff
changeset
 | 
53 query_names = [spectra.metadata['name'] for spectra in scores.queries] | 
| 
 
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
 
recetox 
parents:  
diff
changeset
 | 
54 reference_names = [spectra.metadata['name'] for spectra in scores.references] | 
| 
1
 
4aecfd6b319b
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit d110cb008c3703945fe3718465de36278fa34652"
 
recetox 
parents: 
0 
diff
changeset
 | 
55 | 
| 
 
4aecfd6b319b
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit d110cb008c3703945fe3718465de36278fa34652"
 
recetox 
parents: 
0 
diff
changeset
 | 
56 # Write scores to dataframe | 
| 
 
4aecfd6b319b
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit d110cb008c3703945fe3718465de36278fa34652"
 
recetox 
parents: 
0 
diff
changeset
 | 
57 dataframe_scores = DataFrame(data=[entry["score"] for entry in scores.scores], index=reference_names, columns=query_names) | 
| 
 
4aecfd6b319b
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit d110cb008c3703945fe3718465de36278fa34652"
 
recetox 
parents: 
0 
diff
changeset
 | 
58 dataframe_scores.to_csv(args.output_filename_scores, sep=';') | 
| 
 
4aecfd6b319b
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit d110cb008c3703945fe3718465de36278fa34652"
 
recetox 
parents: 
0 
diff
changeset
 | 
59 | 
| 
 
4aecfd6b319b
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit d110cb008c3703945fe3718465de36278fa34652"
 
recetox 
parents: 
0 
diff
changeset
 | 
60 # Write number of matches to dataframe | 
| 
 
4aecfd6b319b
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit d110cb008c3703945fe3718465de36278fa34652"
 
recetox 
parents: 
0 
diff
changeset
 | 
61 dataframe_matches = DataFrame(data=[entry["matches"] for entry in scores.scores], index=reference_names, columns=query_names) | 
| 
 
4aecfd6b319b
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit d110cb008c3703945fe3718465de36278fa34652"
 
recetox 
parents: 
0 
diff
changeset
 | 
62 dataframe_matches.to_csv(args.output_filename_matches, sep=';') | 
| 
0
 
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
 
recetox 
parents:  
diff
changeset
 | 
63 return 0 | 
| 
 
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
 
recetox 
parents:  
diff
changeset
 | 
64 | 
| 
 
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
 
recetox 
parents:  
diff
changeset
 | 
65 | 
| 
 
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
 
recetox 
parents:  
diff
changeset
 | 
66 if __name__ == "__main__": | 
| 
 
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
 
recetox 
parents:  
diff
changeset
 | 
67 main(argv=sys.argv[1:]) | 
| 
 
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
 
recetox 
parents:  
diff
changeset
 | 
68 pass | 
