Mercurial > repos > recetox > matchms
view matchms_wrapper.py @ 5:672c22d7f004 draft
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit e4fdeb4c24f993c7f1a9ce851b9739ce9c0ff8e2"
author | recetox |
---|---|
date | Wed, 21 Jul 2021 07:43:45 +0000 |
parents | 57959596262d |
children | 4571641de47a |
line wrap: on
line source
import argparse import sys from matchms import calculate_scores from matchms.filtering import add_precursor_mz from matchms.importing import load_from_msp from matchms.similarity import ( CosineGreedy, CosineHungarian, ModifiedCosine, ) from pandas import DataFrame def main(argv): parser = argparse.ArgumentParser(description="Compute MSP similarity scores") parser.add_argument( "--ref", type=str, dest="references_filename", help="Path to reference MSP library." ) parser.add_argument("queries_filename", type=str, help="Path to query spectra.") parser.add_argument("similarity_metric", type=str, help='Metric to use for matching.') parser.add_argument("output_filename_scores", type=str, help="Path where to store the output .csv scores.") parser.add_argument("output_filename_matches", type=str, help="Path where to store the output .csv matches.") parser.add_argument("tolerance", type=float, help="Tolerance to use for peak matching.") parser.add_argument("mz_power", type=float, help="The power to raise mz to in the cosine function.") parser.add_argument("intensity_power", type=float, help="The power to raise intensity to in the cosine function.") args = parser.parse_args() queries_spectra = list(load_from_msp(args.queries_filename)) if(args.references_filename): reference_spectra = list(load_from_msp(args.references_filename)) symmetric = False else: reference_spectra = queries_spectra.copy() symmetric = True if args.similarity_metric == 'CosineGreedy': similarity_metric = CosineGreedy(args.tolerance, args.mz_power, args.intensity_power) elif args.similarity_metric == 'CosineHungarian': similarity_metric = CosineHungarian(args.tolerance, args.mz_power, args.intensity_power) elif args.similarity_metric == 'ModifiedCosine': similarity_metric = ModifiedCosine(args.tolerance, args.mz_power, args.intensity_power) reference_spectra = map(add_precursor_mz, reference_spectra) queries_spectra = map(add_precursor_mz, queries_spectra) else: return -1 scores = calculate_scores( references=list(reference_spectra), queries=list(queries_spectra), similarity_function=similarity_metric, is_symmetric=symmetric ) query_names = [spectra.metadata['name'] for spectra in scores.queries] reference_names = [spectra.metadata['name'] for spectra in scores.references] # Write scores to dataframe dataframe_scores = DataFrame(data=[entry["score"] for entry in scores.scores], index=reference_names, columns=query_names) dataframe_scores.to_csv(args.output_filename_scores, sep='\t') # Write number of matches to dataframe dataframe_matches = DataFrame(data=[entry["matches"] for entry in scores.scores], index=reference_names, columns=query_names) dataframe_matches.to_csv(args.output_filename_matches, sep='\t') return 0 if __name__ == "__main__": main(argv=sys.argv[1:]) pass