comparison matchms_wrapper.py @ 0:6a736abe431f draft

"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
author recetox
date Mon, 07 Dec 2020 20:12:13 +0000
parents
children 4aecfd6b319b
comparison
equal deleted inserted replaced
-1:000000000000 0:6a736abe431f
1 import argparse
2 import sys
3
4 import pandas
5 from matchms import calculate_scores
6 from matchms.importing import load_from_msp
7 from matchms.similarity import (
8 CosineGreedy,
9 CosineHungarian,
10 FingerprintSimilarity,
11 IntersectMz,
12 ModifiedCosine,
13 ParentMassMatch
14 )
15
16
17 def main(argv):
18 parser = argparse.ArgumentParser(description="Compute MSP similarity scores")
19 parser.add_argument(
20 "references_filename", type=str, help="Path to reference MSP library."
21 )
22 parser.add_argument("queries_filename", type=str, help="Path to query spectra.")
23 parser.add_argument("output_filename", type=str, help="Path where to store the output .csv.")
24 parser.add_argument("similarity_metric", type=str, help='Metric to use for matching.')
25
26 args = parser.parse_args()
27
28 if args.similarity_metric == 'CosineGreedy':
29 similarity_metric = CosineGreedy()
30 elif args.similarity_metric == 'CosineHungarian':
31 similarity_metric = CosineHungarian()
32 elif args.similarity_metric == 'FingerprintSimilarity':
33 similarity_metric = FingerprintSimilarity()
34 elif args.similarity_metric == 'IntersectMz':
35 similarity_metric = IntersectMz()
36 elif args.similarity_metric == 'ModifiedCosine':
37 similarity_metric = ModifiedCosine()
38 else:
39 similarity_metric = ParentMassMatch()
40
41 reference_spectra = [
42 spectrum for spectrum in load_from_msp(args.references_filename)
43 ]
44 queries_spectra = [spectrum for spectrum in load_from_msp(args.queries_filename)]
45
46 scores = calculate_scores(
47 references=reference_spectra,
48 queries=queries_spectra,
49 similarity_function=similarity_metric,
50 )
51
52 query_names = [spectra.metadata['name'] for spectra in scores.queries]
53 reference_names = [spectra.metadata['name'] for spectra in scores.references]
54 dataframe = pandas.DataFrame(data=scores.scores, index=reference_names, columns=query_names)
55 dataframe.to_csv(args.output_filename, sep=';')
56 return 0
57
58
59 if __name__ == "__main__":
60 main(argv=sys.argv[1:])
61 pass