Mercurial > repos > recetox > matchms
diff matchms_similarity_wrapper.py @ 11:ba9410f612bc draft
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit c32f579c38aef4c2c5d088e6c1c9e522bc0a1a12"
author | recetox |
---|---|
date | Thu, 17 Mar 2022 12:26:24 +0000 |
parents | c3dd958cc4a5 |
children | cfa8b66fb106 |
line wrap: on
line diff
--- a/matchms_similarity_wrapper.py Fri Jan 28 16:22:06 2022 +0000 +++ b/matchms_similarity_wrapper.py Thu Mar 17 12:26:24 2022 +0000 @@ -1,13 +1,10 @@ import argparse import sys +import numpy as np from matchms import calculate_scores from matchms.importing import load_from_mgf, load_from_msp -from matchms.similarity import ( - CosineGreedy, - CosineHungarian, - ModifiedCosine, -) +from matchms.similarity import CosineGreedy, CosineHungarian, MetadataMatch, ModifiedCosine from pandas import DataFrame @@ -28,6 +25,7 @@ def main(argv): parser = argparse.ArgumentParser(description="Compute MSP similarity scores") + parser.add_argument("-r", dest="ri_tolerance", type=float, help="Use RI filtering with given tolerance.") parser.add_argument("-s", dest="symmetric", action='store_true', help="Computation is symmetric.") parser.add_argument("--ref", dest="references_filename", type=str, help="Path to reference spectra library.") parser.add_argument("--ref_format", dest="references_format", type=str, help="Reference spectra library file format.") @@ -77,14 +75,19 @@ is_symmetric=args.symmetric ) + if args.ri_tolerance is not None: + print("RI filtering with tolerance ", args.ri_tolerance) + ri_matches = calculate_scores(reference_spectra, queries_spectra, MetadataMatch("retention_index", "difference", args.ri_tolerance)).scores + scores.scores["score"] = np.where(ri_matches, scores.scores["score"], 0.0) + write_outputs(args, scores) return 0 def write_outputs(args, scores): print("Storing outputs...") - query_names = [spectra.metadata['name'] for spectra in scores.queries] - reference_names = [spectra.metadata['name'] for spectra in scores.references] + query_names = [spectra.metadata['compound_name'] for spectra in scores.queries] + reference_names = [spectra.metadata['compound_name'] for spectra in scores.references] # Write scores to dataframe dataframe_scores = DataFrame(data=[entry["score"] for entry in scores.scores], index=reference_names, columns=query_names)