Mercurial > repos > recetox > matchms
view matchms_wrapper.py @ 0:6a736abe431f draft
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
author | recetox |
---|---|
date | Mon, 07 Dec 2020 20:12:13 +0000 |
parents | |
children | 4aecfd6b319b |
line wrap: on
line source
import argparse import sys import pandas from matchms import calculate_scores from matchms.importing import load_from_msp from matchms.similarity import ( CosineGreedy, CosineHungarian, FingerprintSimilarity, IntersectMz, ModifiedCosine, ParentMassMatch ) def main(argv): parser = argparse.ArgumentParser(description="Compute MSP similarity scores") parser.add_argument( "references_filename", type=str, help="Path to reference MSP library." ) parser.add_argument("queries_filename", type=str, help="Path to query spectra.") parser.add_argument("output_filename", type=str, help="Path where to store the output .csv.") parser.add_argument("similarity_metric", type=str, help='Metric to use for matching.') args = parser.parse_args() if args.similarity_metric == 'CosineGreedy': similarity_metric = CosineGreedy() elif args.similarity_metric == 'CosineHungarian': similarity_metric = CosineHungarian() elif args.similarity_metric == 'FingerprintSimilarity': similarity_metric = FingerprintSimilarity() elif args.similarity_metric == 'IntersectMz': similarity_metric = IntersectMz() elif args.similarity_metric == 'ModifiedCosine': similarity_metric = ModifiedCosine() else: similarity_metric = ParentMassMatch() reference_spectra = [ spectrum for spectrum in load_from_msp(args.references_filename) ] queries_spectra = [spectrum for spectrum in load_from_msp(args.queries_filename)] scores = calculate_scores( references=reference_spectra, queries=queries_spectra, similarity_function=similarity_metric, ) query_names = [spectra.metadata['name'] for spectra in scores.queries] reference_names = [spectra.metadata['name'] for spectra in scores.references] dataframe = pandas.DataFrame(data=scores.scores, index=reference_names, columns=query_names) dataframe.to_csv(args.output_filename, sep=';') return 0 if __name__ == "__main__": main(argv=sys.argv[1:]) pass