diff matchms_similarity_wrapper.py @ 11:ba9410f612bc draft

"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit c32f579c38aef4c2c5d088e6c1c9e522bc0a1a12"
author recetox
date Thu, 17 Mar 2022 12:26:24 +0000
parents c3dd958cc4a5
children cfa8b66fb106
line wrap: on
line diff
--- a/matchms_similarity_wrapper.py	Fri Jan 28 16:22:06 2022 +0000
+++ b/matchms_similarity_wrapper.py	Thu Mar 17 12:26:24 2022 +0000
@@ -1,13 +1,10 @@
 import argparse
 import sys
 
+import numpy as np
 from matchms import calculate_scores
 from matchms.importing import load_from_mgf, load_from_msp
-from matchms.similarity import (
-    CosineGreedy,
-    CosineHungarian,
-    ModifiedCosine,
-)
+from matchms.similarity import CosineGreedy, CosineHungarian, MetadataMatch, ModifiedCosine
 from pandas import DataFrame
 
 
@@ -28,6 +25,7 @@
 
 def main(argv):
     parser = argparse.ArgumentParser(description="Compute MSP similarity scores")
+    parser.add_argument("-r", dest="ri_tolerance", type=float, help="Use RI filtering with given tolerance.")
     parser.add_argument("-s", dest="symmetric", action='store_true', help="Computation is symmetric.")
     parser.add_argument("--ref", dest="references_filename", type=str, help="Path to reference spectra library.")
     parser.add_argument("--ref_format", dest="references_format", type=str, help="Reference spectra library file format.")
@@ -77,14 +75,19 @@
         is_symmetric=args.symmetric
     )
 
+    if args.ri_tolerance is not None:
+        print("RI filtering with tolerance ", args.ri_tolerance)
+        ri_matches = calculate_scores(reference_spectra, queries_spectra, MetadataMatch("retention_index", "difference", args.ri_tolerance)).scores
+        scores.scores["score"] = np.where(ri_matches, scores.scores["score"], 0.0)
+
     write_outputs(args, scores)
     return 0
 
 
 def write_outputs(args, scores):
     print("Storing outputs...")
-    query_names = [spectra.metadata['name'] for spectra in scores.queries]
-    reference_names = [spectra.metadata['name'] for spectra in scores.references]
+    query_names = [spectra.metadata['compound_name'] for spectra in scores.queries]
+    reference_names = [spectra.metadata['compound_name'] for spectra in scores.references]
 
     # Write scores to dataframe
     dataframe_scores = DataFrame(data=[entry["score"] for entry in scores.scores], index=reference_names, columns=query_names)