diff matchms_wrapper.py @ 7:4571641de47a draft

"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 845bb7e13e793df5b61b42962ab2df2c6339ac8c"
author recetox
date Tue, 26 Oct 2021 14:24:58 +0000
parents 672c22d7f004
children f06923bdd2f2
line wrap: on
line diff
--- a/matchms_wrapper.py	Mon Aug 09 13:08:27 2021 +0000
+++ b/matchms_wrapper.py	Tue Oct 26 14:24:58 2021 +0000
@@ -2,7 +2,7 @@
 import sys
 
 from matchms import calculate_scores
-from matchms.filtering import add_precursor_mz
+from matchms.filtering import add_precursor_mz, default_filters, normalize_intensities
 from matchms.importing import load_from_msp
 from matchms.similarity import (
     CosineGreedy,
@@ -14,26 +14,34 @@
 
 def main(argv):
     parser = argparse.ArgumentParser(description="Compute MSP similarity scores")
-    parser.add_argument(
-        "--ref", type=str, dest="references_filename", help="Path to reference MSP library."
-    )
+    parser.add_argument("-f", dest="default_filters", action='store_true', help="Apply default filters")
+    parser.add_argument("-n", dest="normalize_intensities", action='store_true', help="Normalize intensities.")
+    parser.add_argument("-s", dest="symmetric", action='store_true', help="Computation is symmetric.")
+    parser.add_argument("--ref", dest="references_filename", type=str, help="Path to reference MSP library.")
     parser.add_argument("queries_filename", type=str, help="Path to query spectra.")
     parser.add_argument("similarity_metric", type=str, help='Metric to use for matching.')
-    parser.add_argument("output_filename_scores", type=str, help="Path where to store the output .csv scores.")
-    parser.add_argument("output_filename_matches", type=str, help="Path where to store the output .csv matches.")
     parser.add_argument("tolerance", type=float, help="Tolerance to use for peak matching.")
     parser.add_argument("mz_power", type=float, help="The power to raise mz to in the cosine function.")
     parser.add_argument("intensity_power", type=float, help="The power to raise intensity to in the cosine function.")
-
+    parser.add_argument("output_filename_scores", type=str, help="Path where to store the output .csv scores.")
+    parser.add_argument("output_filename_matches", type=str, help="Path where to store the output .csv matches.")
     args = parser.parse_args()
 
     queries_spectra = list(load_from_msp(args.queries_filename))
-    if(args.references_filename):
+    if args.symmetric:
+        reference_spectra = []
+    else:
         reference_spectra = list(load_from_msp(args.references_filename))
-        symmetric = False
-    else:
-        reference_spectra = queries_spectra.copy()
-        symmetric = True
+
+    if args.default_filters is True:
+        print("Applying default filters...")
+        queries_spectra = list(map(default_filters, queries_spectra))
+        reference_spectra = list(map(default_filters, reference_spectra))
+
+    if args.normalize_intensities is True:
+        print("Normalizing intensities...")
+        queries_spectra = list(map(normalize_intensities, queries_spectra))
+        reference_spectra = list(map(normalize_intensities, reference_spectra))
 
     if args.similarity_metric == 'CosineGreedy':
         similarity_metric = CosineGreedy(args.tolerance, args.mz_power, args.intensity_power)
@@ -41,18 +49,25 @@
         similarity_metric = CosineHungarian(args.tolerance, args.mz_power, args.intensity_power)
     elif args.similarity_metric == 'ModifiedCosine':
         similarity_metric = ModifiedCosine(args.tolerance, args.mz_power, args.intensity_power)
-        reference_spectra = map(add_precursor_mz, reference_spectra)
-        queries_spectra = map(add_precursor_mz, queries_spectra)
+        reference_spectra = list(map(add_precursor_mz, reference_spectra))
+        queries_spectra = list(map(add_precursor_mz, queries_spectra))
     else:
         return -1
 
+    print("Calculating scores...")
     scores = calculate_scores(
-        references=list(reference_spectra),
-        queries=list(queries_spectra),
+        references=queries_spectra if args.symmetric else reference_spectra,
+        queries=queries_spectra,
         similarity_function=similarity_metric,
-        is_symmetric=symmetric
+        is_symmetric=args.symmetric
     )
 
+    write_outputs(args, scores)
+    return 0
+
+
+def write_outputs(args, scores):
+    print("Storing outputs...")
     query_names = [spectra.metadata['name'] for spectra in scores.queries]
     reference_names = [spectra.metadata['name'] for spectra in scores.references]
 
@@ -63,7 +78,6 @@
     # Write number of matches to dataframe
     dataframe_matches = DataFrame(data=[entry["matches"] for entry in scores.scores], index=reference_names, columns=query_names)
     dataframe_matches.to_csv(args.output_filename_matches, sep='\t')
-    return 0
 
 
 if __name__ == "__main__":