Mercurial > repos > recetox > matchms
comparison matchms_wrapper.py @ 7:4571641de47a draft
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 845bb7e13e793df5b61b42962ab2df2c6339ac8c"
author | recetox |
---|---|
date | Tue, 26 Oct 2021 14:24:58 +0000 |
parents | 672c22d7f004 |
children | f06923bdd2f2 |
comparison
equal
deleted
inserted
replaced
6:5158423e32c5 | 7:4571641de47a |
---|---|
1 import argparse | 1 import argparse |
2 import sys | 2 import sys |
3 | 3 |
4 from matchms import calculate_scores | 4 from matchms import calculate_scores |
5 from matchms.filtering import add_precursor_mz | 5 from matchms.filtering import add_precursor_mz, default_filters, normalize_intensities |
6 from matchms.importing import load_from_msp | 6 from matchms.importing import load_from_msp |
7 from matchms.similarity import ( | 7 from matchms.similarity import ( |
8 CosineGreedy, | 8 CosineGreedy, |
9 CosineHungarian, | 9 CosineHungarian, |
10 ModifiedCosine, | 10 ModifiedCosine, |
12 from pandas import DataFrame | 12 from pandas import DataFrame |
13 | 13 |
14 | 14 |
15 def main(argv): | 15 def main(argv): |
16 parser = argparse.ArgumentParser(description="Compute MSP similarity scores") | 16 parser = argparse.ArgumentParser(description="Compute MSP similarity scores") |
17 parser.add_argument( | 17 parser.add_argument("-f", dest="default_filters", action='store_true', help="Apply default filters") |
18 "--ref", type=str, dest="references_filename", help="Path to reference MSP library." | 18 parser.add_argument("-n", dest="normalize_intensities", action='store_true', help="Normalize intensities.") |
19 ) | 19 parser.add_argument("-s", dest="symmetric", action='store_true', help="Computation is symmetric.") |
20 parser.add_argument("--ref", dest="references_filename", type=str, help="Path to reference MSP library.") | |
20 parser.add_argument("queries_filename", type=str, help="Path to query spectra.") | 21 parser.add_argument("queries_filename", type=str, help="Path to query spectra.") |
21 parser.add_argument("similarity_metric", type=str, help='Metric to use for matching.') | 22 parser.add_argument("similarity_metric", type=str, help='Metric to use for matching.') |
22 parser.add_argument("output_filename_scores", type=str, help="Path where to store the output .csv scores.") | |
23 parser.add_argument("output_filename_matches", type=str, help="Path where to store the output .csv matches.") | |
24 parser.add_argument("tolerance", type=float, help="Tolerance to use for peak matching.") | 23 parser.add_argument("tolerance", type=float, help="Tolerance to use for peak matching.") |
25 parser.add_argument("mz_power", type=float, help="The power to raise mz to in the cosine function.") | 24 parser.add_argument("mz_power", type=float, help="The power to raise mz to in the cosine function.") |
26 parser.add_argument("intensity_power", type=float, help="The power to raise intensity to in the cosine function.") | 25 parser.add_argument("intensity_power", type=float, help="The power to raise intensity to in the cosine function.") |
27 | 26 parser.add_argument("output_filename_scores", type=str, help="Path where to store the output .csv scores.") |
27 parser.add_argument("output_filename_matches", type=str, help="Path where to store the output .csv matches.") | |
28 args = parser.parse_args() | 28 args = parser.parse_args() |
29 | 29 |
30 queries_spectra = list(load_from_msp(args.queries_filename)) | 30 queries_spectra = list(load_from_msp(args.queries_filename)) |
31 if(args.references_filename): | 31 if args.symmetric: |
32 reference_spectra = [] | |
33 else: | |
32 reference_spectra = list(load_from_msp(args.references_filename)) | 34 reference_spectra = list(load_from_msp(args.references_filename)) |
33 symmetric = False | 35 |
34 else: | 36 if args.default_filters is True: |
35 reference_spectra = queries_spectra.copy() | 37 print("Applying default filters...") |
36 symmetric = True | 38 queries_spectra = list(map(default_filters, queries_spectra)) |
39 reference_spectra = list(map(default_filters, reference_spectra)) | |
40 | |
41 if args.normalize_intensities is True: | |
42 print("Normalizing intensities...") | |
43 queries_spectra = list(map(normalize_intensities, queries_spectra)) | |
44 reference_spectra = list(map(normalize_intensities, reference_spectra)) | |
37 | 45 |
38 if args.similarity_metric == 'CosineGreedy': | 46 if args.similarity_metric == 'CosineGreedy': |
39 similarity_metric = CosineGreedy(args.tolerance, args.mz_power, args.intensity_power) | 47 similarity_metric = CosineGreedy(args.tolerance, args.mz_power, args.intensity_power) |
40 elif args.similarity_metric == 'CosineHungarian': | 48 elif args.similarity_metric == 'CosineHungarian': |
41 similarity_metric = CosineHungarian(args.tolerance, args.mz_power, args.intensity_power) | 49 similarity_metric = CosineHungarian(args.tolerance, args.mz_power, args.intensity_power) |
42 elif args.similarity_metric == 'ModifiedCosine': | 50 elif args.similarity_metric == 'ModifiedCosine': |
43 similarity_metric = ModifiedCosine(args.tolerance, args.mz_power, args.intensity_power) | 51 similarity_metric = ModifiedCosine(args.tolerance, args.mz_power, args.intensity_power) |
44 reference_spectra = map(add_precursor_mz, reference_spectra) | 52 reference_spectra = list(map(add_precursor_mz, reference_spectra)) |
45 queries_spectra = map(add_precursor_mz, queries_spectra) | 53 queries_spectra = list(map(add_precursor_mz, queries_spectra)) |
46 else: | 54 else: |
47 return -1 | 55 return -1 |
48 | 56 |
57 print("Calculating scores...") | |
49 scores = calculate_scores( | 58 scores = calculate_scores( |
50 references=list(reference_spectra), | 59 references=queries_spectra if args.symmetric else reference_spectra, |
51 queries=list(queries_spectra), | 60 queries=queries_spectra, |
52 similarity_function=similarity_metric, | 61 similarity_function=similarity_metric, |
53 is_symmetric=symmetric | 62 is_symmetric=args.symmetric |
54 ) | 63 ) |
55 | 64 |
65 write_outputs(args, scores) | |
66 return 0 | |
67 | |
68 | |
69 def write_outputs(args, scores): | |
70 print("Storing outputs...") | |
56 query_names = [spectra.metadata['name'] for spectra in scores.queries] | 71 query_names = [spectra.metadata['name'] for spectra in scores.queries] |
57 reference_names = [spectra.metadata['name'] for spectra in scores.references] | 72 reference_names = [spectra.metadata['name'] for spectra in scores.references] |
58 | 73 |
59 # Write scores to dataframe | 74 # Write scores to dataframe |
60 dataframe_scores = DataFrame(data=[entry["score"] for entry in scores.scores], index=reference_names, columns=query_names) | 75 dataframe_scores = DataFrame(data=[entry["score"] for entry in scores.scores], index=reference_names, columns=query_names) |
61 dataframe_scores.to_csv(args.output_filename_scores, sep='\t') | 76 dataframe_scores.to_csv(args.output_filename_scores, sep='\t') |
62 | 77 |
63 # Write number of matches to dataframe | 78 # Write number of matches to dataframe |
64 dataframe_matches = DataFrame(data=[entry["matches"] for entry in scores.scores], index=reference_names, columns=query_names) | 79 dataframe_matches = DataFrame(data=[entry["matches"] for entry in scores.scores], index=reference_names, columns=query_names) |
65 dataframe_matches.to_csv(args.output_filename_matches, sep='\t') | 80 dataframe_matches.to_csv(args.output_filename_matches, sep='\t') |
66 return 0 | |
67 | 81 |
68 | 82 |
69 if __name__ == "__main__": | 83 if __name__ == "__main__": |
70 main(argv=sys.argv[1:]) | 84 main(argv=sys.argv[1:]) |
71 pass | 85 pass |