diff matchms_wrapper.py @ 1:4aecfd6b319b draft

"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit d110cb008c3703945fe3718465de36278fa34652"
author recetox
date Wed, 17 Mar 2021 11:40:17 +0000
parents 6a736abe431f
children a7c9fc186f8c
line wrap: on
line diff
--- a/matchms_wrapper.py	Mon Dec 07 20:12:13 2020 +0000
+++ b/matchms_wrapper.py	Wed Mar 17 11:40:17 2021 +0000
@@ -1,7 +1,6 @@
 import argparse
 import sys
 
-import pandas
 from matchms import calculate_scores
 from matchms.importing import load_from_msp
 from matchms.similarity import (
@@ -12,6 +11,7 @@
     ModifiedCosine,
     ParentMassMatch
 )
+from pandas import DataFrame
 
 
 def main(argv):
@@ -20,8 +20,9 @@
         "references_filename", type=str, help="Path to reference MSP library."
     )
     parser.add_argument("queries_filename", type=str, help="Path to query spectra.")
-    parser.add_argument("output_filename", type=str, help="Path where to store the output .csv.")
     parser.add_argument("similarity_metric", type=str, help='Metric to use for matching.')
+    parser.add_argument("output_filename_scores", type=str, help="Path where to store the output .csv scores.")
+    parser.add_argument("output_filename_matches", type=str, help="Path where to store the output .csv matches.")
 
     args = parser.parse_args()
 
@@ -51,8 +52,14 @@
 
     query_names = [spectra.metadata['name'] for spectra in scores.queries]
     reference_names = [spectra.metadata['name'] for spectra in scores.references]
-    dataframe = pandas.DataFrame(data=scores.scores, index=reference_names, columns=query_names)
-    dataframe.to_csv(args.output_filename, sep=';')
+
+    # Write scores to dataframe
+    dataframe_scores = DataFrame(data=[entry["score"] for entry in scores.scores], index=reference_names, columns=query_names)
+    dataframe_scores.to_csv(args.output_filename_scores, sep=';')
+
+    # Write number of matches to dataframe
+    dataframe_matches = DataFrame(data=[entry["matches"] for entry in scores.scores], index=reference_names, columns=query_names)
+    dataframe_matches.to_csv(args.output_filename_matches, sep=';')
     return 0