Mercurial > repos > recetox > matchms
comparison matchms_similarity_wrapper.py @ 11:ba9410f612bc draft
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit c32f579c38aef4c2c5d088e6c1c9e522bc0a1a12"
author | recetox |
---|---|
date | Thu, 17 Mar 2022 12:26:24 +0000 |
parents | c3dd958cc4a5 |
children | cfa8b66fb106 |
comparison
equal
deleted
inserted
replaced
10:c3dd958cc4a5 | 11:ba9410f612bc |
---|---|
1 import argparse | 1 import argparse |
2 import sys | 2 import sys |
3 | 3 |
4 import numpy as np | |
4 from matchms import calculate_scores | 5 from matchms import calculate_scores |
5 from matchms.importing import load_from_mgf, load_from_msp | 6 from matchms.importing import load_from_mgf, load_from_msp |
6 from matchms.similarity import ( | 7 from matchms.similarity import CosineGreedy, CosineHungarian, MetadataMatch, ModifiedCosine |
7 CosineGreedy, | |
8 CosineHungarian, | |
9 ModifiedCosine, | |
10 ) | |
11 from pandas import DataFrame | 8 from pandas import DataFrame |
12 | 9 |
13 | 10 |
14 def convert_precursor_mz(spectrum): | 11 def convert_precursor_mz(spectrum): |
15 """ | 12 """ |
26 raise ValueError("Precursor_mz missing. Apply 'add_precursor_mz' filter first.") | 23 raise ValueError("Precursor_mz missing. Apply 'add_precursor_mz' filter first.") |
27 | 24 |
28 | 25 |
29 def main(argv): | 26 def main(argv): |
30 parser = argparse.ArgumentParser(description="Compute MSP similarity scores") | 27 parser = argparse.ArgumentParser(description="Compute MSP similarity scores") |
28 parser.add_argument("-r", dest="ri_tolerance", type=float, help="Use RI filtering with given tolerance.") | |
31 parser.add_argument("-s", dest="symmetric", action='store_true', help="Computation is symmetric.") | 29 parser.add_argument("-s", dest="symmetric", action='store_true', help="Computation is symmetric.") |
32 parser.add_argument("--ref", dest="references_filename", type=str, help="Path to reference spectra library.") | 30 parser.add_argument("--ref", dest="references_filename", type=str, help="Path to reference spectra library.") |
33 parser.add_argument("--ref_format", dest="references_format", type=str, help="Reference spectra library file format.") | 31 parser.add_argument("--ref_format", dest="references_format", type=str, help="Reference spectra library file format.") |
34 parser.add_argument("queries_filename", type=str, help="Path to query spectra.") | 32 parser.add_argument("queries_filename", type=str, help="Path to query spectra.") |
35 parser.add_argument("queries_format", type=str, help="Query spectra file format.") | 33 parser.add_argument("queries_format", type=str, help="Query spectra file format.") |
75 queries=queries_spectra, | 73 queries=queries_spectra, |
76 similarity_function=similarity_metric, | 74 similarity_function=similarity_metric, |
77 is_symmetric=args.symmetric | 75 is_symmetric=args.symmetric |
78 ) | 76 ) |
79 | 77 |
78 if args.ri_tolerance is not None: | |
79 print("RI filtering with tolerance ", args.ri_tolerance) | |
80 ri_matches = calculate_scores(reference_spectra, queries_spectra, MetadataMatch("retention_index", "difference", args.ri_tolerance)).scores | |
81 scores.scores["score"] = np.where(ri_matches, scores.scores["score"], 0.0) | |
82 | |
80 write_outputs(args, scores) | 83 write_outputs(args, scores) |
81 return 0 | 84 return 0 |
82 | 85 |
83 | 86 |
84 def write_outputs(args, scores): | 87 def write_outputs(args, scores): |
85 print("Storing outputs...") | 88 print("Storing outputs...") |
86 query_names = [spectra.metadata['name'] for spectra in scores.queries] | 89 query_names = [spectra.metadata['compound_name'] for spectra in scores.queries] |
87 reference_names = [spectra.metadata['name'] for spectra in scores.references] | 90 reference_names = [spectra.metadata['compound_name'] for spectra in scores.references] |
88 | 91 |
89 # Write scores to dataframe | 92 # Write scores to dataframe |
90 dataframe_scores = DataFrame(data=[entry["score"] for entry in scores.scores], index=reference_names, columns=query_names) | 93 dataframe_scores = DataFrame(data=[entry["score"] for entry in scores.scores], index=reference_names, columns=query_names) |
91 dataframe_scores.to_csv(args.output_filename_scores, sep='\t') | 94 dataframe_scores.to_csv(args.output_filename_scores, sep='\t') |
92 | 95 |