comparison matchms_wrapper.py @ 2:a7c9fc186f8c draft

"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 557e6558b93e63fd0b70443164d2d624cc05c319"
author recetox
date Mon, 19 Apr 2021 08:31:42 +0000
parents 4aecfd6b319b
children 57959596262d
comparison
equal deleted inserted replaced
1:4aecfd6b319b 2:a7c9fc186f8c
1 import argparse 1 import argparse
2 import sys 2 import sys
3 3
4 from matchms import calculate_scores 4 from matchms import calculate_scores
5 from matchms.filtering import add_precursor_mz
5 from matchms.importing import load_from_msp 6 from matchms.importing import load_from_msp
6 from matchms.similarity import ( 7 from matchms.similarity import (
7 CosineGreedy, 8 CosineGreedy,
8 CosineHungarian, 9 CosineHungarian,
9 FingerprintSimilarity,
10 IntersectMz,
11 ModifiedCosine, 10 ModifiedCosine,
12 ParentMassMatch
13 ) 11 )
14 from pandas import DataFrame 12 from pandas import DataFrame
15 13
16 14
17 def main(argv): 15 def main(argv):
21 ) 19 )
22 parser.add_argument("queries_filename", type=str, help="Path to query spectra.") 20 parser.add_argument("queries_filename", type=str, help="Path to query spectra.")
23 parser.add_argument("similarity_metric", type=str, help='Metric to use for matching.') 21 parser.add_argument("similarity_metric", type=str, help='Metric to use for matching.')
24 parser.add_argument("output_filename_scores", type=str, help="Path where to store the output .csv scores.") 22 parser.add_argument("output_filename_scores", type=str, help="Path where to store the output .csv scores.")
25 parser.add_argument("output_filename_matches", type=str, help="Path where to store the output .csv matches.") 23 parser.add_argument("output_filename_matches", type=str, help="Path where to store the output .csv matches.")
24 parser.add_argument("tolerance", type=float, help="Tolerance to use for peak matching.")
25 parser.add_argument("mz_power", type=float, help="The power to raise mz to in the cosine function.")
26 parser.add_argument("intensity_power", type=float, help="The power to raise intensity to in the cosine function.")
26 27
27 args = parser.parse_args() 28 args = parser.parse_args()
28 29
30 reference_spectra = load_from_msp(args.references_filename)
31 queries_spectra = load_from_msp(args.queries_filename)
32
29 if args.similarity_metric == 'CosineGreedy': 33 if args.similarity_metric == 'CosineGreedy':
30 similarity_metric = CosineGreedy() 34 similarity_metric = CosineGreedy(args.tolerance, args.mz_power, args.intensity_power)
31 elif args.similarity_metric == 'CosineHungarian': 35 elif args.similarity_metric == 'CosineHungarian':
32 similarity_metric = CosineHungarian() 36 similarity_metric = CosineHungarian(args.tolerance, args.mz_power, args.intensity_power)
33 elif args.similarity_metric == 'FingerprintSimilarity':
34 similarity_metric = FingerprintSimilarity()
35 elif args.similarity_metric == 'IntersectMz':
36 similarity_metric = IntersectMz()
37 elif args.similarity_metric == 'ModifiedCosine': 37 elif args.similarity_metric == 'ModifiedCosine':
38 similarity_metric = ModifiedCosine() 38 similarity_metric = ModifiedCosine(args.tolerance, args.mz_power, args.intensity_power)
39 reference_spectra = map(add_precursor_mz, reference_spectra)
40 queries_spectra = map(add_precursor_mz, queries_spectra)
39 else: 41 else:
40 similarity_metric = ParentMassMatch() 42 return -1
41
42 reference_spectra = [
43 spectrum for spectrum in load_from_msp(args.references_filename)
44 ]
45 queries_spectra = [spectrum for spectrum in load_from_msp(args.queries_filename)]
46 43
47 scores = calculate_scores( 44 scores = calculate_scores(
48 references=reference_spectra, 45 references=list(reference_spectra),
49 queries=queries_spectra, 46 queries=list(queries_spectra),
50 similarity_function=similarity_metric, 47 similarity_function=similarity_metric,
51 ) 48 )
52 49
53 query_names = [spectra.metadata['name'] for spectra in scores.queries] 50 query_names = [spectra.metadata['name'] for spectra in scores.queries]
54 reference_names = [spectra.metadata['name'] for spectra in scores.references] 51 reference_names = [spectra.metadata['name'] for spectra in scores.references]