Mercurial > repos > recetox > matchms
diff matchms_wrapper.py @ 7:4571641de47a draft
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 845bb7e13e793df5b61b42962ab2df2c6339ac8c"
author | recetox |
---|---|
date | Tue, 26 Oct 2021 14:24:58 +0000 |
parents | 672c22d7f004 |
children | f06923bdd2f2 |
line wrap: on
line diff
--- a/matchms_wrapper.py Mon Aug 09 13:08:27 2021 +0000 +++ b/matchms_wrapper.py Tue Oct 26 14:24:58 2021 +0000 @@ -2,7 +2,7 @@ import sys from matchms import calculate_scores -from matchms.filtering import add_precursor_mz +from matchms.filtering import add_precursor_mz, default_filters, normalize_intensities from matchms.importing import load_from_msp from matchms.similarity import ( CosineGreedy, @@ -14,26 +14,34 @@ def main(argv): parser = argparse.ArgumentParser(description="Compute MSP similarity scores") - parser.add_argument( - "--ref", type=str, dest="references_filename", help="Path to reference MSP library." - ) + parser.add_argument("-f", dest="default_filters", action='store_true', help="Apply default filters") + parser.add_argument("-n", dest="normalize_intensities", action='store_true', help="Normalize intensities.") + parser.add_argument("-s", dest="symmetric", action='store_true', help="Computation is symmetric.") + parser.add_argument("--ref", dest="references_filename", type=str, help="Path to reference MSP library.") parser.add_argument("queries_filename", type=str, help="Path to query spectra.") parser.add_argument("similarity_metric", type=str, help='Metric to use for matching.') - parser.add_argument("output_filename_scores", type=str, help="Path where to store the output .csv scores.") - parser.add_argument("output_filename_matches", type=str, help="Path where to store the output .csv matches.") parser.add_argument("tolerance", type=float, help="Tolerance to use for peak matching.") parser.add_argument("mz_power", type=float, help="The power to raise mz to in the cosine function.") parser.add_argument("intensity_power", type=float, help="The power to raise intensity to in the cosine function.") - + parser.add_argument("output_filename_scores", type=str, help="Path where to store the output .csv scores.") + parser.add_argument("output_filename_matches", type=str, help="Path where to store the output .csv matches.") args = parser.parse_args() queries_spectra = list(load_from_msp(args.queries_filename)) - if(args.references_filename): + if args.symmetric: + reference_spectra = [] + else: reference_spectra = list(load_from_msp(args.references_filename)) - symmetric = False - else: - reference_spectra = queries_spectra.copy() - symmetric = True + + if args.default_filters is True: + print("Applying default filters...") + queries_spectra = list(map(default_filters, queries_spectra)) + reference_spectra = list(map(default_filters, reference_spectra)) + + if args.normalize_intensities is True: + print("Normalizing intensities...") + queries_spectra = list(map(normalize_intensities, queries_spectra)) + reference_spectra = list(map(normalize_intensities, reference_spectra)) if args.similarity_metric == 'CosineGreedy': similarity_metric = CosineGreedy(args.tolerance, args.mz_power, args.intensity_power) @@ -41,18 +49,25 @@ similarity_metric = CosineHungarian(args.tolerance, args.mz_power, args.intensity_power) elif args.similarity_metric == 'ModifiedCosine': similarity_metric = ModifiedCosine(args.tolerance, args.mz_power, args.intensity_power) - reference_spectra = map(add_precursor_mz, reference_spectra) - queries_spectra = map(add_precursor_mz, queries_spectra) + reference_spectra = list(map(add_precursor_mz, reference_spectra)) + queries_spectra = list(map(add_precursor_mz, queries_spectra)) else: return -1 + print("Calculating scores...") scores = calculate_scores( - references=list(reference_spectra), - queries=list(queries_spectra), + references=queries_spectra if args.symmetric else reference_spectra, + queries=queries_spectra, similarity_function=similarity_metric, - is_symmetric=symmetric + is_symmetric=args.symmetric ) + write_outputs(args, scores) + return 0 + + +def write_outputs(args, scores): + print("Storing outputs...") query_names = [spectra.metadata['name'] for spectra in scores.queries] reference_names = [spectra.metadata['name'] for spectra in scores.references] @@ -63,7 +78,6 @@ # Write number of matches to dataframe dataframe_matches = DataFrame(data=[entry["matches"] for entry in scores.scores], index=reference_names, columns=query_names) dataframe_matches.to_csv(args.output_filename_matches, sep='\t') - return 0 if __name__ == "__main__":