comparison matchms_similarity_wrapper.py @ 11:ba9410f612bc draft

"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit c32f579c38aef4c2c5d088e6c1c9e522bc0a1a12"
author recetox
date Thu, 17 Mar 2022 12:26:24 +0000
parents c3dd958cc4a5
children cfa8b66fb106
comparison
equal deleted inserted replaced
10:c3dd958cc4a5 11:ba9410f612bc
1 import argparse 1 import argparse
2 import sys 2 import sys
3 3
4 import numpy as np
4 from matchms import calculate_scores 5 from matchms import calculate_scores
5 from matchms.importing import load_from_mgf, load_from_msp 6 from matchms.importing import load_from_mgf, load_from_msp
6 from matchms.similarity import ( 7 from matchms.similarity import CosineGreedy, CosineHungarian, MetadataMatch, ModifiedCosine
7 CosineGreedy,
8 CosineHungarian,
9 ModifiedCosine,
10 )
11 from pandas import DataFrame 8 from pandas import DataFrame
12 9
13 10
14 def convert_precursor_mz(spectrum): 11 def convert_precursor_mz(spectrum):
15 """ 12 """
26 raise ValueError("Precursor_mz missing. Apply 'add_precursor_mz' filter first.") 23 raise ValueError("Precursor_mz missing. Apply 'add_precursor_mz' filter first.")
27 24
28 25
29 def main(argv): 26 def main(argv):
30 parser = argparse.ArgumentParser(description="Compute MSP similarity scores") 27 parser = argparse.ArgumentParser(description="Compute MSP similarity scores")
28 parser.add_argument("-r", dest="ri_tolerance", type=float, help="Use RI filtering with given tolerance.")
31 parser.add_argument("-s", dest="symmetric", action='store_true', help="Computation is symmetric.") 29 parser.add_argument("-s", dest="symmetric", action='store_true', help="Computation is symmetric.")
32 parser.add_argument("--ref", dest="references_filename", type=str, help="Path to reference spectra library.") 30 parser.add_argument("--ref", dest="references_filename", type=str, help="Path to reference spectra library.")
33 parser.add_argument("--ref_format", dest="references_format", type=str, help="Reference spectra library file format.") 31 parser.add_argument("--ref_format", dest="references_format", type=str, help="Reference spectra library file format.")
34 parser.add_argument("queries_filename", type=str, help="Path to query spectra.") 32 parser.add_argument("queries_filename", type=str, help="Path to query spectra.")
35 parser.add_argument("queries_format", type=str, help="Query spectra file format.") 33 parser.add_argument("queries_format", type=str, help="Query spectra file format.")
75 queries=queries_spectra, 73 queries=queries_spectra,
76 similarity_function=similarity_metric, 74 similarity_function=similarity_metric,
77 is_symmetric=args.symmetric 75 is_symmetric=args.symmetric
78 ) 76 )
79 77
78 if args.ri_tolerance is not None:
79 print("RI filtering with tolerance ", args.ri_tolerance)
80 ri_matches = calculate_scores(reference_spectra, queries_spectra, MetadataMatch("retention_index", "difference", args.ri_tolerance)).scores
81 scores.scores["score"] = np.where(ri_matches, scores.scores["score"], 0.0)
82
80 write_outputs(args, scores) 83 write_outputs(args, scores)
81 return 0 84 return 0
82 85
83 86
84 def write_outputs(args, scores): 87 def write_outputs(args, scores):
85 print("Storing outputs...") 88 print("Storing outputs...")
86 query_names = [spectra.metadata['name'] for spectra in scores.queries] 89 query_names = [spectra.metadata['compound_name'] for spectra in scores.queries]
87 reference_names = [spectra.metadata['name'] for spectra in scores.references] 90 reference_names = [spectra.metadata['compound_name'] for spectra in scores.references]
88 91
89 # Write scores to dataframe 92 # Write scores to dataframe
90 dataframe_scores = DataFrame(data=[entry["score"] for entry in scores.scores], index=reference_names, columns=query_names) 93 dataframe_scores = DataFrame(data=[entry["score"] for entry in scores.scores], index=reference_names, columns=query_names)
91 dataframe_scores.to_csv(args.output_filename_scores, sep='\t') 94 dataframe_scores.to_csv(args.output_filename_scores, sep='\t')
92 95