comparison matchms_wrapper.py @ 7:4571641de47a draft

"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 845bb7e13e793df5b61b42962ab2df2c6339ac8c"
author recetox
date Tue, 26 Oct 2021 14:24:58 +0000
parents 672c22d7f004
children f06923bdd2f2
comparison
equal deleted inserted replaced
6:5158423e32c5 7:4571641de47a
1 import argparse 1 import argparse
2 import sys 2 import sys
3 3
4 from matchms import calculate_scores 4 from matchms import calculate_scores
5 from matchms.filtering import add_precursor_mz 5 from matchms.filtering import add_precursor_mz, default_filters, normalize_intensities
6 from matchms.importing import load_from_msp 6 from matchms.importing import load_from_msp
7 from matchms.similarity import ( 7 from matchms.similarity import (
8 CosineGreedy, 8 CosineGreedy,
9 CosineHungarian, 9 CosineHungarian,
10 ModifiedCosine, 10 ModifiedCosine,
12 from pandas import DataFrame 12 from pandas import DataFrame
13 13
14 14
15 def main(argv): 15 def main(argv):
16 parser = argparse.ArgumentParser(description="Compute MSP similarity scores") 16 parser = argparse.ArgumentParser(description="Compute MSP similarity scores")
17 parser.add_argument( 17 parser.add_argument("-f", dest="default_filters", action='store_true', help="Apply default filters")
18 "--ref", type=str, dest="references_filename", help="Path to reference MSP library." 18 parser.add_argument("-n", dest="normalize_intensities", action='store_true', help="Normalize intensities.")
19 ) 19 parser.add_argument("-s", dest="symmetric", action='store_true', help="Computation is symmetric.")
20 parser.add_argument("--ref", dest="references_filename", type=str, help="Path to reference MSP library.")
20 parser.add_argument("queries_filename", type=str, help="Path to query spectra.") 21 parser.add_argument("queries_filename", type=str, help="Path to query spectra.")
21 parser.add_argument("similarity_metric", type=str, help='Metric to use for matching.') 22 parser.add_argument("similarity_metric", type=str, help='Metric to use for matching.')
22 parser.add_argument("output_filename_scores", type=str, help="Path where to store the output .csv scores.")
23 parser.add_argument("output_filename_matches", type=str, help="Path where to store the output .csv matches.")
24 parser.add_argument("tolerance", type=float, help="Tolerance to use for peak matching.") 23 parser.add_argument("tolerance", type=float, help="Tolerance to use for peak matching.")
25 parser.add_argument("mz_power", type=float, help="The power to raise mz to in the cosine function.") 24 parser.add_argument("mz_power", type=float, help="The power to raise mz to in the cosine function.")
26 parser.add_argument("intensity_power", type=float, help="The power to raise intensity to in the cosine function.") 25 parser.add_argument("intensity_power", type=float, help="The power to raise intensity to in the cosine function.")
27 26 parser.add_argument("output_filename_scores", type=str, help="Path where to store the output .csv scores.")
27 parser.add_argument("output_filename_matches", type=str, help="Path where to store the output .csv matches.")
28 args = parser.parse_args() 28 args = parser.parse_args()
29 29
30 queries_spectra = list(load_from_msp(args.queries_filename)) 30 queries_spectra = list(load_from_msp(args.queries_filename))
31 if(args.references_filename): 31 if args.symmetric:
32 reference_spectra = []
33 else:
32 reference_spectra = list(load_from_msp(args.references_filename)) 34 reference_spectra = list(load_from_msp(args.references_filename))
33 symmetric = False 35
34 else: 36 if args.default_filters is True:
35 reference_spectra = queries_spectra.copy() 37 print("Applying default filters...")
36 symmetric = True 38 queries_spectra = list(map(default_filters, queries_spectra))
39 reference_spectra = list(map(default_filters, reference_spectra))
40
41 if args.normalize_intensities is True:
42 print("Normalizing intensities...")
43 queries_spectra = list(map(normalize_intensities, queries_spectra))
44 reference_spectra = list(map(normalize_intensities, reference_spectra))
37 45
38 if args.similarity_metric == 'CosineGreedy': 46 if args.similarity_metric == 'CosineGreedy':
39 similarity_metric = CosineGreedy(args.tolerance, args.mz_power, args.intensity_power) 47 similarity_metric = CosineGreedy(args.tolerance, args.mz_power, args.intensity_power)
40 elif args.similarity_metric == 'CosineHungarian': 48 elif args.similarity_metric == 'CosineHungarian':
41 similarity_metric = CosineHungarian(args.tolerance, args.mz_power, args.intensity_power) 49 similarity_metric = CosineHungarian(args.tolerance, args.mz_power, args.intensity_power)
42 elif args.similarity_metric == 'ModifiedCosine': 50 elif args.similarity_metric == 'ModifiedCosine':
43 similarity_metric = ModifiedCosine(args.tolerance, args.mz_power, args.intensity_power) 51 similarity_metric = ModifiedCosine(args.tolerance, args.mz_power, args.intensity_power)
44 reference_spectra = map(add_precursor_mz, reference_spectra) 52 reference_spectra = list(map(add_precursor_mz, reference_spectra))
45 queries_spectra = map(add_precursor_mz, queries_spectra) 53 queries_spectra = list(map(add_precursor_mz, queries_spectra))
46 else: 54 else:
47 return -1 55 return -1
48 56
57 print("Calculating scores...")
49 scores = calculate_scores( 58 scores = calculate_scores(
50 references=list(reference_spectra), 59 references=queries_spectra if args.symmetric else reference_spectra,
51 queries=list(queries_spectra), 60 queries=queries_spectra,
52 similarity_function=similarity_metric, 61 similarity_function=similarity_metric,
53 is_symmetric=symmetric 62 is_symmetric=args.symmetric
54 ) 63 )
55 64
65 write_outputs(args, scores)
66 return 0
67
68
69 def write_outputs(args, scores):
70 print("Storing outputs...")
56 query_names = [spectra.metadata['name'] for spectra in scores.queries] 71 query_names = [spectra.metadata['name'] for spectra in scores.queries]
57 reference_names = [spectra.metadata['name'] for spectra in scores.references] 72 reference_names = [spectra.metadata['name'] for spectra in scores.references]
58 73
59 # Write scores to dataframe 74 # Write scores to dataframe
60 dataframe_scores = DataFrame(data=[entry["score"] for entry in scores.scores], index=reference_names, columns=query_names) 75 dataframe_scores = DataFrame(data=[entry["score"] for entry in scores.scores], index=reference_names, columns=query_names)
61 dataframe_scores.to_csv(args.output_filename_scores, sep='\t') 76 dataframe_scores.to_csv(args.output_filename_scores, sep='\t')
62 77
63 # Write number of matches to dataframe 78 # Write number of matches to dataframe
64 dataframe_matches = DataFrame(data=[entry["matches"] for entry in scores.scores], index=reference_names, columns=query_names) 79 dataframe_matches = DataFrame(data=[entry["matches"] for entry in scores.scores], index=reference_names, columns=query_names)
65 dataframe_matches.to_csv(args.output_filename_matches, sep='\t') 80 dataframe_matches.to_csv(args.output_filename_matches, sep='\t')
66 return 0
67 81
68 82
69 if __name__ == "__main__": 83 if __name__ == "__main__":
70 main(argv=sys.argv[1:]) 84 main(argv=sys.argv[1:])
71 pass 85 pass