Mercurial > repos > recetox > matchms
annotate matchms_wrapper.py @ 7:4571641de47a draft
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 845bb7e13e793df5b61b42962ab2df2c6339ac8c"
author | recetox |
---|---|
date | Tue, 26 Oct 2021 14:24:58 +0000 |
parents | 672c22d7f004 |
children | f06923bdd2f2 |
rev | line source |
---|---|
0
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
recetox
parents:
diff
changeset
|
1 import argparse |
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
recetox
parents:
diff
changeset
|
2 import sys |
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
recetox
parents:
diff
changeset
|
3 |
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
recetox
parents:
diff
changeset
|
4 from matchms import calculate_scores |
7
4571641de47a
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 845bb7e13e793df5b61b42962ab2df2c6339ac8c"
recetox
parents:
5
diff
changeset
|
5 from matchms.filtering import add_precursor_mz, default_filters, normalize_intensities |
0
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
recetox
parents:
diff
changeset
|
6 from matchms.importing import load_from_msp |
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
recetox
parents:
diff
changeset
|
7 from matchms.similarity import ( |
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
recetox
parents:
diff
changeset
|
8 CosineGreedy, |
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
recetox
parents:
diff
changeset
|
9 CosineHungarian, |
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
recetox
parents:
diff
changeset
|
10 ModifiedCosine, |
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
recetox
parents:
diff
changeset
|
11 ) |
1
4aecfd6b319b
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit d110cb008c3703945fe3718465de36278fa34652"
recetox
parents:
0
diff
changeset
|
12 from pandas import DataFrame |
0
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
recetox
parents:
diff
changeset
|
13 |
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
recetox
parents:
diff
changeset
|
14 |
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
recetox
parents:
diff
changeset
|
15 def main(argv): |
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
recetox
parents:
diff
changeset
|
16 parser = argparse.ArgumentParser(description="Compute MSP similarity scores") |
7
4571641de47a
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 845bb7e13e793df5b61b42962ab2df2c6339ac8c"
recetox
parents:
5
diff
changeset
|
17 parser.add_argument("-f", dest="default_filters", action='store_true', help="Apply default filters") |
4571641de47a
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 845bb7e13e793df5b61b42962ab2df2c6339ac8c"
recetox
parents:
5
diff
changeset
|
18 parser.add_argument("-n", dest="normalize_intensities", action='store_true', help="Normalize intensities.") |
4571641de47a
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 845bb7e13e793df5b61b42962ab2df2c6339ac8c"
recetox
parents:
5
diff
changeset
|
19 parser.add_argument("-s", dest="symmetric", action='store_true', help="Computation is symmetric.") |
4571641de47a
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 845bb7e13e793df5b61b42962ab2df2c6339ac8c"
recetox
parents:
5
diff
changeset
|
20 parser.add_argument("--ref", dest="references_filename", type=str, help="Path to reference MSP library.") |
0
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
recetox
parents:
diff
changeset
|
21 parser.add_argument("queries_filename", type=str, help="Path to query spectra.") |
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
recetox
parents:
diff
changeset
|
22 parser.add_argument("similarity_metric", type=str, help='Metric to use for matching.') |
2
a7c9fc186f8c
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 557e6558b93e63fd0b70443164d2d624cc05c319"
recetox
parents:
1
diff
changeset
|
23 parser.add_argument("tolerance", type=float, help="Tolerance to use for peak matching.") |
a7c9fc186f8c
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 557e6558b93e63fd0b70443164d2d624cc05c319"
recetox
parents:
1
diff
changeset
|
24 parser.add_argument("mz_power", type=float, help="The power to raise mz to in the cosine function.") |
a7c9fc186f8c
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 557e6558b93e63fd0b70443164d2d624cc05c319"
recetox
parents:
1
diff
changeset
|
25 parser.add_argument("intensity_power", type=float, help="The power to raise intensity to in the cosine function.") |
7
4571641de47a
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 845bb7e13e793df5b61b42962ab2df2c6339ac8c"
recetox
parents:
5
diff
changeset
|
26 parser.add_argument("output_filename_scores", type=str, help="Path where to store the output .csv scores.") |
4571641de47a
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 845bb7e13e793df5b61b42962ab2df2c6339ac8c"
recetox
parents:
5
diff
changeset
|
27 parser.add_argument("output_filename_matches", type=str, help="Path where to store the output .csv matches.") |
0
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
recetox
parents:
diff
changeset
|
28 args = parser.parse_args() |
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
recetox
parents:
diff
changeset
|
29 |
4
57959596262d
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 3c7f3cf3f925215a23b2f6665432c32b5ebdc2aa"
recetox
parents:
2
diff
changeset
|
30 queries_spectra = list(load_from_msp(args.queries_filename)) |
7
4571641de47a
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 845bb7e13e793df5b61b42962ab2df2c6339ac8c"
recetox
parents:
5
diff
changeset
|
31 if args.symmetric: |
4571641de47a
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 845bb7e13e793df5b61b42962ab2df2c6339ac8c"
recetox
parents:
5
diff
changeset
|
32 reference_spectra = [] |
4571641de47a
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 845bb7e13e793df5b61b42962ab2df2c6339ac8c"
recetox
parents:
5
diff
changeset
|
33 else: |
4
57959596262d
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 3c7f3cf3f925215a23b2f6665432c32b5ebdc2aa"
recetox
parents:
2
diff
changeset
|
34 reference_spectra = list(load_from_msp(args.references_filename)) |
7
4571641de47a
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 845bb7e13e793df5b61b42962ab2df2c6339ac8c"
recetox
parents:
5
diff
changeset
|
35 |
4571641de47a
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 845bb7e13e793df5b61b42962ab2df2c6339ac8c"
recetox
parents:
5
diff
changeset
|
36 if args.default_filters is True: |
4571641de47a
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 845bb7e13e793df5b61b42962ab2df2c6339ac8c"
recetox
parents:
5
diff
changeset
|
37 print("Applying default filters...") |
4571641de47a
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 845bb7e13e793df5b61b42962ab2df2c6339ac8c"
recetox
parents:
5
diff
changeset
|
38 queries_spectra = list(map(default_filters, queries_spectra)) |
4571641de47a
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 845bb7e13e793df5b61b42962ab2df2c6339ac8c"
recetox
parents:
5
diff
changeset
|
39 reference_spectra = list(map(default_filters, reference_spectra)) |
4571641de47a
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 845bb7e13e793df5b61b42962ab2df2c6339ac8c"
recetox
parents:
5
diff
changeset
|
40 |
4571641de47a
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 845bb7e13e793df5b61b42962ab2df2c6339ac8c"
recetox
parents:
5
diff
changeset
|
41 if args.normalize_intensities is True: |
4571641de47a
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 845bb7e13e793df5b61b42962ab2df2c6339ac8c"
recetox
parents:
5
diff
changeset
|
42 print("Normalizing intensities...") |
4571641de47a
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 845bb7e13e793df5b61b42962ab2df2c6339ac8c"
recetox
parents:
5
diff
changeset
|
43 queries_spectra = list(map(normalize_intensities, queries_spectra)) |
4571641de47a
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 845bb7e13e793df5b61b42962ab2df2c6339ac8c"
recetox
parents:
5
diff
changeset
|
44 reference_spectra = list(map(normalize_intensities, reference_spectra)) |
2
a7c9fc186f8c
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 557e6558b93e63fd0b70443164d2d624cc05c319"
recetox
parents:
1
diff
changeset
|
45 |
0
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
recetox
parents:
diff
changeset
|
46 if args.similarity_metric == 'CosineGreedy': |
2
a7c9fc186f8c
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 557e6558b93e63fd0b70443164d2d624cc05c319"
recetox
parents:
1
diff
changeset
|
47 similarity_metric = CosineGreedy(args.tolerance, args.mz_power, args.intensity_power) |
0
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
recetox
parents:
diff
changeset
|
48 elif args.similarity_metric == 'CosineHungarian': |
2
a7c9fc186f8c
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 557e6558b93e63fd0b70443164d2d624cc05c319"
recetox
parents:
1
diff
changeset
|
49 similarity_metric = CosineHungarian(args.tolerance, args.mz_power, args.intensity_power) |
0
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
recetox
parents:
diff
changeset
|
50 elif args.similarity_metric == 'ModifiedCosine': |
2
a7c9fc186f8c
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 557e6558b93e63fd0b70443164d2d624cc05c319"
recetox
parents:
1
diff
changeset
|
51 similarity_metric = ModifiedCosine(args.tolerance, args.mz_power, args.intensity_power) |
7
4571641de47a
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 845bb7e13e793df5b61b42962ab2df2c6339ac8c"
recetox
parents:
5
diff
changeset
|
52 reference_spectra = list(map(add_precursor_mz, reference_spectra)) |
4571641de47a
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 845bb7e13e793df5b61b42962ab2df2c6339ac8c"
recetox
parents:
5
diff
changeset
|
53 queries_spectra = list(map(add_precursor_mz, queries_spectra)) |
0
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
recetox
parents:
diff
changeset
|
54 else: |
2
a7c9fc186f8c
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 557e6558b93e63fd0b70443164d2d624cc05c319"
recetox
parents:
1
diff
changeset
|
55 return -1 |
0
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
recetox
parents:
diff
changeset
|
56 |
7
4571641de47a
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 845bb7e13e793df5b61b42962ab2df2c6339ac8c"
recetox
parents:
5
diff
changeset
|
57 print("Calculating scores...") |
0
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
recetox
parents:
diff
changeset
|
58 scores = calculate_scores( |
7
4571641de47a
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 845bb7e13e793df5b61b42962ab2df2c6339ac8c"
recetox
parents:
5
diff
changeset
|
59 references=queries_spectra if args.symmetric else reference_spectra, |
4571641de47a
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 845bb7e13e793df5b61b42962ab2df2c6339ac8c"
recetox
parents:
5
diff
changeset
|
60 queries=queries_spectra, |
0
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
recetox
parents:
diff
changeset
|
61 similarity_function=similarity_metric, |
7
4571641de47a
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 845bb7e13e793df5b61b42962ab2df2c6339ac8c"
recetox
parents:
5
diff
changeset
|
62 is_symmetric=args.symmetric |
0
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
recetox
parents:
diff
changeset
|
63 ) |
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
recetox
parents:
diff
changeset
|
64 |
7
4571641de47a
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 845bb7e13e793df5b61b42962ab2df2c6339ac8c"
recetox
parents:
5
diff
changeset
|
65 write_outputs(args, scores) |
4571641de47a
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 845bb7e13e793df5b61b42962ab2df2c6339ac8c"
recetox
parents:
5
diff
changeset
|
66 return 0 |
4571641de47a
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 845bb7e13e793df5b61b42962ab2df2c6339ac8c"
recetox
parents:
5
diff
changeset
|
67 |
4571641de47a
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 845bb7e13e793df5b61b42962ab2df2c6339ac8c"
recetox
parents:
5
diff
changeset
|
68 |
4571641de47a
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 845bb7e13e793df5b61b42962ab2df2c6339ac8c"
recetox
parents:
5
diff
changeset
|
69 def write_outputs(args, scores): |
4571641de47a
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 845bb7e13e793df5b61b42962ab2df2c6339ac8c"
recetox
parents:
5
diff
changeset
|
70 print("Storing outputs...") |
0
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
recetox
parents:
diff
changeset
|
71 query_names = [spectra.metadata['name'] for spectra in scores.queries] |
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
recetox
parents:
diff
changeset
|
72 reference_names = [spectra.metadata['name'] for spectra in scores.references] |
1
4aecfd6b319b
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit d110cb008c3703945fe3718465de36278fa34652"
recetox
parents:
0
diff
changeset
|
73 |
4aecfd6b319b
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit d110cb008c3703945fe3718465de36278fa34652"
recetox
parents:
0
diff
changeset
|
74 # Write scores to dataframe |
4aecfd6b319b
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit d110cb008c3703945fe3718465de36278fa34652"
recetox
parents:
0
diff
changeset
|
75 dataframe_scores = DataFrame(data=[entry["score"] for entry in scores.scores], index=reference_names, columns=query_names) |
5
672c22d7f004
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit e4fdeb4c24f993c7f1a9ce851b9739ce9c0ff8e2"
recetox
parents:
4
diff
changeset
|
76 dataframe_scores.to_csv(args.output_filename_scores, sep='\t') |
1
4aecfd6b319b
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit d110cb008c3703945fe3718465de36278fa34652"
recetox
parents:
0
diff
changeset
|
77 |
4aecfd6b319b
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit d110cb008c3703945fe3718465de36278fa34652"
recetox
parents:
0
diff
changeset
|
78 # Write number of matches to dataframe |
4aecfd6b319b
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit d110cb008c3703945fe3718465de36278fa34652"
recetox
parents:
0
diff
changeset
|
79 dataframe_matches = DataFrame(data=[entry["matches"] for entry in scores.scores], index=reference_names, columns=query_names) |
5
672c22d7f004
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit e4fdeb4c24f993c7f1a9ce851b9739ce9c0ff8e2"
recetox
parents:
4
diff
changeset
|
80 dataframe_matches.to_csv(args.output_filename_matches, sep='\t') |
0
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
recetox
parents:
diff
changeset
|
81 |
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
recetox
parents:
diff
changeset
|
82 |
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
recetox
parents:
diff
changeset
|
83 if __name__ == "__main__": |
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
recetox
parents:
diff
changeset
|
84 main(argv=sys.argv[1:]) |
6a736abe431f
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
recetox
parents:
diff
changeset
|
85 pass |