view matchms_wrapper.py @ 0:6a736abe431f draft

"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
author recetox
date Mon, 07 Dec 2020 20:12:13 +0000
parents
children 4aecfd6b319b
line wrap: on
line source

import argparse
import sys

import pandas
from matchms import calculate_scores
from matchms.importing import load_from_msp
from matchms.similarity import (
    CosineGreedy,
    CosineHungarian,
    FingerprintSimilarity,
    IntersectMz,
    ModifiedCosine,
    ParentMassMatch
)


def main(argv):
    parser = argparse.ArgumentParser(description="Compute MSP similarity scores")
    parser.add_argument(
        "references_filename", type=str, help="Path to reference MSP library."
    )
    parser.add_argument("queries_filename", type=str, help="Path to query spectra.")
    parser.add_argument("output_filename", type=str, help="Path where to store the output .csv.")
    parser.add_argument("similarity_metric", type=str, help='Metric to use for matching.')

    args = parser.parse_args()

    if args.similarity_metric == 'CosineGreedy':
        similarity_metric = CosineGreedy()
    elif args.similarity_metric == 'CosineHungarian':
        similarity_metric = CosineHungarian()
    elif args.similarity_metric == 'FingerprintSimilarity':
        similarity_metric = FingerprintSimilarity()
    elif args.similarity_metric == 'IntersectMz':
        similarity_metric = IntersectMz()
    elif args.similarity_metric == 'ModifiedCosine':
        similarity_metric = ModifiedCosine()
    else:
        similarity_metric = ParentMassMatch()

    reference_spectra = [
        spectrum for spectrum in load_from_msp(args.references_filename)
    ]
    queries_spectra = [spectrum for spectrum in load_from_msp(args.queries_filename)]

    scores = calculate_scores(
        references=reference_spectra,
        queries=queries_spectra,
        similarity_function=similarity_metric,
    )

    query_names = [spectra.metadata['name'] for spectra in scores.queries]
    reference_names = [spectra.metadata['name'] for spectra in scores.references]
    dataframe = pandas.DataFrame(data=scores.scores, index=reference_names, columns=query_names)
    dataframe.to_csv(args.output_filename, sep=';')
    return 0


if __name__ == "__main__":
    main(argv=sys.argv[1:])
    pass