annotate matchms_wrapper.py @ 7:4571641de47a draft

"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 845bb7e13e793df5b61b42962ab2df2c6339ac8c"
author recetox
date Tue, 26 Oct 2021 14:24:58 +0000
parents 672c22d7f004
children f06923bdd2f2
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
6a736abe431f "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
recetox
parents:
diff changeset
1 import argparse
6a736abe431f "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
recetox
parents:
diff changeset
2 import sys
6a736abe431f "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
recetox
parents:
diff changeset
3
6a736abe431f "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
recetox
parents:
diff changeset
4 from matchms import calculate_scores
7
4571641de47a "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 845bb7e13e793df5b61b42962ab2df2c6339ac8c"
recetox
parents: 5
diff changeset
5 from matchms.filtering import add_precursor_mz, default_filters, normalize_intensities
0
6a736abe431f "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
recetox
parents:
diff changeset
6 from matchms.importing import load_from_msp
6a736abe431f "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
recetox
parents:
diff changeset
7 from matchms.similarity import (
6a736abe431f "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
recetox
parents:
diff changeset
8 CosineGreedy,
6a736abe431f "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
recetox
parents:
diff changeset
9 CosineHungarian,
6a736abe431f "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
recetox
parents:
diff changeset
10 ModifiedCosine,
6a736abe431f "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
recetox
parents:
diff changeset
11 )
1
4aecfd6b319b "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit d110cb008c3703945fe3718465de36278fa34652"
recetox
parents: 0
diff changeset
12 from pandas import DataFrame
0
6a736abe431f "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
recetox
parents:
diff changeset
13
6a736abe431f "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
recetox
parents:
diff changeset
14
6a736abe431f "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
recetox
parents:
diff changeset
15 def main(argv):
6a736abe431f "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
recetox
parents:
diff changeset
16 parser = argparse.ArgumentParser(description="Compute MSP similarity scores")
7
4571641de47a "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 845bb7e13e793df5b61b42962ab2df2c6339ac8c"
recetox
parents: 5
diff changeset
17 parser.add_argument("-f", dest="default_filters", action='store_true', help="Apply default filters")
4571641de47a "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 845bb7e13e793df5b61b42962ab2df2c6339ac8c"
recetox
parents: 5
diff changeset
18 parser.add_argument("-n", dest="normalize_intensities", action='store_true', help="Normalize intensities.")
4571641de47a "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 845bb7e13e793df5b61b42962ab2df2c6339ac8c"
recetox
parents: 5
diff changeset
19 parser.add_argument("-s", dest="symmetric", action='store_true', help="Computation is symmetric.")
4571641de47a "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 845bb7e13e793df5b61b42962ab2df2c6339ac8c"
recetox
parents: 5
diff changeset
20 parser.add_argument("--ref", dest="references_filename", type=str, help="Path to reference MSP library.")
0
6a736abe431f "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
recetox
parents:
diff changeset
21 parser.add_argument("queries_filename", type=str, help="Path to query spectra.")
6a736abe431f "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
recetox
parents:
diff changeset
22 parser.add_argument("similarity_metric", type=str, help='Metric to use for matching.')
2
a7c9fc186f8c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 557e6558b93e63fd0b70443164d2d624cc05c319"
recetox
parents: 1
diff changeset
23 parser.add_argument("tolerance", type=float, help="Tolerance to use for peak matching.")
a7c9fc186f8c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 557e6558b93e63fd0b70443164d2d624cc05c319"
recetox
parents: 1
diff changeset
24 parser.add_argument("mz_power", type=float, help="The power to raise mz to in the cosine function.")
a7c9fc186f8c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 557e6558b93e63fd0b70443164d2d624cc05c319"
recetox
parents: 1
diff changeset
25 parser.add_argument("intensity_power", type=float, help="The power to raise intensity to in the cosine function.")
7
4571641de47a "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 845bb7e13e793df5b61b42962ab2df2c6339ac8c"
recetox
parents: 5
diff changeset
26 parser.add_argument("output_filename_scores", type=str, help="Path where to store the output .csv scores.")
4571641de47a "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 845bb7e13e793df5b61b42962ab2df2c6339ac8c"
recetox
parents: 5
diff changeset
27 parser.add_argument("output_filename_matches", type=str, help="Path where to store the output .csv matches.")
0
6a736abe431f "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
recetox
parents:
diff changeset
28 args = parser.parse_args()
6a736abe431f "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
recetox
parents:
diff changeset
29
4
57959596262d "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 3c7f3cf3f925215a23b2f6665432c32b5ebdc2aa"
recetox
parents: 2
diff changeset
30 queries_spectra = list(load_from_msp(args.queries_filename))
7
4571641de47a "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 845bb7e13e793df5b61b42962ab2df2c6339ac8c"
recetox
parents: 5
diff changeset
31 if args.symmetric:
4571641de47a "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 845bb7e13e793df5b61b42962ab2df2c6339ac8c"
recetox
parents: 5
diff changeset
32 reference_spectra = []
4571641de47a "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 845bb7e13e793df5b61b42962ab2df2c6339ac8c"
recetox
parents: 5
diff changeset
33 else:
4
57959596262d "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 3c7f3cf3f925215a23b2f6665432c32b5ebdc2aa"
recetox
parents: 2
diff changeset
34 reference_spectra = list(load_from_msp(args.references_filename))
7
4571641de47a "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 845bb7e13e793df5b61b42962ab2df2c6339ac8c"
recetox
parents: 5
diff changeset
35
4571641de47a "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 845bb7e13e793df5b61b42962ab2df2c6339ac8c"
recetox
parents: 5
diff changeset
36 if args.default_filters is True:
4571641de47a "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 845bb7e13e793df5b61b42962ab2df2c6339ac8c"
recetox
parents: 5
diff changeset
37 print("Applying default filters...")
4571641de47a "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 845bb7e13e793df5b61b42962ab2df2c6339ac8c"
recetox
parents: 5
diff changeset
38 queries_spectra = list(map(default_filters, queries_spectra))
4571641de47a "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 845bb7e13e793df5b61b42962ab2df2c6339ac8c"
recetox
parents: 5
diff changeset
39 reference_spectra = list(map(default_filters, reference_spectra))
4571641de47a "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 845bb7e13e793df5b61b42962ab2df2c6339ac8c"
recetox
parents: 5
diff changeset
40
4571641de47a "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 845bb7e13e793df5b61b42962ab2df2c6339ac8c"
recetox
parents: 5
diff changeset
41 if args.normalize_intensities is True:
4571641de47a "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 845bb7e13e793df5b61b42962ab2df2c6339ac8c"
recetox
parents: 5
diff changeset
42 print("Normalizing intensities...")
4571641de47a "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 845bb7e13e793df5b61b42962ab2df2c6339ac8c"
recetox
parents: 5
diff changeset
43 queries_spectra = list(map(normalize_intensities, queries_spectra))
4571641de47a "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 845bb7e13e793df5b61b42962ab2df2c6339ac8c"
recetox
parents: 5
diff changeset
44 reference_spectra = list(map(normalize_intensities, reference_spectra))
2
a7c9fc186f8c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 557e6558b93e63fd0b70443164d2d624cc05c319"
recetox
parents: 1
diff changeset
45
0
6a736abe431f "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
recetox
parents:
diff changeset
46 if args.similarity_metric == 'CosineGreedy':
2
a7c9fc186f8c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 557e6558b93e63fd0b70443164d2d624cc05c319"
recetox
parents: 1
diff changeset
47 similarity_metric = CosineGreedy(args.tolerance, args.mz_power, args.intensity_power)
0
6a736abe431f "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
recetox
parents:
diff changeset
48 elif args.similarity_metric == 'CosineHungarian':
2
a7c9fc186f8c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 557e6558b93e63fd0b70443164d2d624cc05c319"
recetox
parents: 1
diff changeset
49 similarity_metric = CosineHungarian(args.tolerance, args.mz_power, args.intensity_power)
0
6a736abe431f "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
recetox
parents:
diff changeset
50 elif args.similarity_metric == 'ModifiedCosine':
2
a7c9fc186f8c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 557e6558b93e63fd0b70443164d2d624cc05c319"
recetox
parents: 1
diff changeset
51 similarity_metric = ModifiedCosine(args.tolerance, args.mz_power, args.intensity_power)
7
4571641de47a "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 845bb7e13e793df5b61b42962ab2df2c6339ac8c"
recetox
parents: 5
diff changeset
52 reference_spectra = list(map(add_precursor_mz, reference_spectra))
4571641de47a "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 845bb7e13e793df5b61b42962ab2df2c6339ac8c"
recetox
parents: 5
diff changeset
53 queries_spectra = list(map(add_precursor_mz, queries_spectra))
0
6a736abe431f "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
recetox
parents:
diff changeset
54 else:
2
a7c9fc186f8c "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 557e6558b93e63fd0b70443164d2d624cc05c319"
recetox
parents: 1
diff changeset
55 return -1
0
6a736abe431f "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
recetox
parents:
diff changeset
56
7
4571641de47a "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 845bb7e13e793df5b61b42962ab2df2c6339ac8c"
recetox
parents: 5
diff changeset
57 print("Calculating scores...")
0
6a736abe431f "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
recetox
parents:
diff changeset
58 scores = calculate_scores(
7
4571641de47a "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 845bb7e13e793df5b61b42962ab2df2c6339ac8c"
recetox
parents: 5
diff changeset
59 references=queries_spectra if args.symmetric else reference_spectra,
4571641de47a "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 845bb7e13e793df5b61b42962ab2df2c6339ac8c"
recetox
parents: 5
diff changeset
60 queries=queries_spectra,
0
6a736abe431f "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
recetox
parents:
diff changeset
61 similarity_function=similarity_metric,
7
4571641de47a "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 845bb7e13e793df5b61b42962ab2df2c6339ac8c"
recetox
parents: 5
diff changeset
62 is_symmetric=args.symmetric
0
6a736abe431f "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
recetox
parents:
diff changeset
63 )
6a736abe431f "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
recetox
parents:
diff changeset
64
7
4571641de47a "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 845bb7e13e793df5b61b42962ab2df2c6339ac8c"
recetox
parents: 5
diff changeset
65 write_outputs(args, scores)
4571641de47a "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 845bb7e13e793df5b61b42962ab2df2c6339ac8c"
recetox
parents: 5
diff changeset
66 return 0
4571641de47a "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 845bb7e13e793df5b61b42962ab2df2c6339ac8c"
recetox
parents: 5
diff changeset
67
4571641de47a "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 845bb7e13e793df5b61b42962ab2df2c6339ac8c"
recetox
parents: 5
diff changeset
68
4571641de47a "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 845bb7e13e793df5b61b42962ab2df2c6339ac8c"
recetox
parents: 5
diff changeset
69 def write_outputs(args, scores):
4571641de47a "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 845bb7e13e793df5b61b42962ab2df2c6339ac8c"
recetox
parents: 5
diff changeset
70 print("Storing outputs...")
0
6a736abe431f "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
recetox
parents:
diff changeset
71 query_names = [spectra.metadata['name'] for spectra in scores.queries]
6a736abe431f "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
recetox
parents:
diff changeset
72 reference_names = [spectra.metadata['name'] for spectra in scores.references]
1
4aecfd6b319b "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit d110cb008c3703945fe3718465de36278fa34652"
recetox
parents: 0
diff changeset
73
4aecfd6b319b "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit d110cb008c3703945fe3718465de36278fa34652"
recetox
parents: 0
diff changeset
74 # Write scores to dataframe
4aecfd6b319b "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit d110cb008c3703945fe3718465de36278fa34652"
recetox
parents: 0
diff changeset
75 dataframe_scores = DataFrame(data=[entry["score"] for entry in scores.scores], index=reference_names, columns=query_names)
5
672c22d7f004 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit e4fdeb4c24f993c7f1a9ce851b9739ce9c0ff8e2"
recetox
parents: 4
diff changeset
76 dataframe_scores.to_csv(args.output_filename_scores, sep='\t')
1
4aecfd6b319b "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit d110cb008c3703945fe3718465de36278fa34652"
recetox
parents: 0
diff changeset
77
4aecfd6b319b "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit d110cb008c3703945fe3718465de36278fa34652"
recetox
parents: 0
diff changeset
78 # Write number of matches to dataframe
4aecfd6b319b "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit d110cb008c3703945fe3718465de36278fa34652"
recetox
parents: 0
diff changeset
79 dataframe_matches = DataFrame(data=[entry["matches"] for entry in scores.scores], index=reference_names, columns=query_names)
5
672c22d7f004 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit e4fdeb4c24f993c7f1a9ce851b9739ce9c0ff8e2"
recetox
parents: 4
diff changeset
80 dataframe_matches.to_csv(args.output_filename_matches, sep='\t')
0
6a736abe431f "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
recetox
parents:
diff changeset
81
6a736abe431f "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
recetox
parents:
diff changeset
82
6a736abe431f "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
recetox
parents:
diff changeset
83 if __name__ == "__main__":
6a736abe431f "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
recetox
parents:
diff changeset
84 main(argv=sys.argv[1:])
6a736abe431f "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
recetox
parents:
diff changeset
85 pass