Mercurial > repos > recetox > matchms
diff matchms_wrapper.py @ 0:6a736abe431f draft
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 25fba33c37b26a6e9321f043c61b3c0dd392a53c"
author | recetox |
---|---|
date | Mon, 07 Dec 2020 20:12:13 +0000 |
parents | |
children | 4aecfd6b319b |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/matchms_wrapper.py Mon Dec 07 20:12:13 2020 +0000 @@ -0,0 +1,61 @@ +import argparse +import sys + +import pandas +from matchms import calculate_scores +from matchms.importing import load_from_msp +from matchms.similarity import ( + CosineGreedy, + CosineHungarian, + FingerprintSimilarity, + IntersectMz, + ModifiedCosine, + ParentMassMatch +) + + +def main(argv): + parser = argparse.ArgumentParser(description="Compute MSP similarity scores") + parser.add_argument( + "references_filename", type=str, help="Path to reference MSP library." + ) + parser.add_argument("queries_filename", type=str, help="Path to query spectra.") + parser.add_argument("output_filename", type=str, help="Path where to store the output .csv.") + parser.add_argument("similarity_metric", type=str, help='Metric to use for matching.') + + args = parser.parse_args() + + if args.similarity_metric == 'CosineGreedy': + similarity_metric = CosineGreedy() + elif args.similarity_metric == 'CosineHungarian': + similarity_metric = CosineHungarian() + elif args.similarity_metric == 'FingerprintSimilarity': + similarity_metric = FingerprintSimilarity() + elif args.similarity_metric == 'IntersectMz': + similarity_metric = IntersectMz() + elif args.similarity_metric == 'ModifiedCosine': + similarity_metric = ModifiedCosine() + else: + similarity_metric = ParentMassMatch() + + reference_spectra = [ + spectrum for spectrum in load_from_msp(args.references_filename) + ] + queries_spectra = [spectrum for spectrum in load_from_msp(args.queries_filename)] + + scores = calculate_scores( + references=reference_spectra, + queries=queries_spectra, + similarity_function=similarity_metric, + ) + + query_names = [spectra.metadata['name'] for spectra in scores.queries] + reference_names = [spectra.metadata['name'] for spectra in scores.references] + dataframe = pandas.DataFrame(data=scores.scores, index=reference_names, columns=query_names) + dataframe.to_csv(args.output_filename, sep=';') + return 0 + + +if __name__ == "__main__": + main(argv=sys.argv[1:]) + pass