Galaxy |

Changeset 2:a7c9fc186f8c (2021-04-19)

Previous changeset 1:4aecfd6b319b (2021-03-17) Next changeset 3:72adeb3ca264 (2021-05-19)

Commit message:
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 557e6558b93e63fd0b70443164d2d624cc05c319"

modified:
matchms.xml
matchms_wrapper.py

diff -r 4aecfd6b319b -r a7c9fc186f8c matchms.xml
--- a/matchms.xml Wed Mar 17 11:40:17 2021 +0000
+++ b/matchms.xml Mon Apr 19 08:31:42 2021 +0000

[

@@ -1,4 +1,4 @@
-<tool id="matchms" name="matchMS" version="0.8.2+galaxy2">
+<tool id="matchms" name="matchMS" version="0.8.2+galaxy3">
     <requirements>
         <requirement type="package" version="0.8.2">matchms</requirement>
         <requirement type="package" version="1.1.4">pandas</requirement>
@@ -9,8 +9,8 @@
     </environment_variables>

     <command detect_errors="exit_code"><![CDATA[
-        python3 ${__tool_directory__}/matchms_wrapper.py "$references" "$queries" "$similarity_metric" "$similarity_scores" "$similarity_matches"
-    ]]>    </command>
+        python3 ${__tool_directory__}/matchms_wrapper.py "$references" "$queries" "$similarity_metric" "$similarity_scores" "$similarity_matches" "$algorithm.tolerance" "$algorithm.mz_power" "$algorithm.intensity_power"
+    ]]> </command>

     <inputs>
         <param label="Reference spectra" name="references" type="data" format="msp" help="Reference mass spectra to match against as library." />
@@ -18,8 +18,13 @@
         <param label="Similarity metric" name="similarity_metric" type="select" display="radio" help="Similarity metric to use for score computation.">
             <option value="CosineGreedy" selected="true">CosineGreedy</option>
             <option value="CosineHungarian">CosineHungarian</option>
-            <option value="IntersectMz">IntersectMz</option>
         </param>
+
+        <section name="algorithm" title="Algorithm Parameters" expanded="true">
+            <param label="tolerance" name="tolerance" type="float" value="0.1" help="Peaks will be considered a match when less than tolerance apart. Absolute m/z value, not in ppm." />
+            <param label="mz_power" name="mz_power" type="float" value="0.0" help="The power to raise mz to in the cosine function." />
+            <param label="intensity_power" name="intensity_power" type="float" value="1.0" help="The power to raise intensity to in the cosine function." />
+        </section>
     </inputs>

     <outputs>
@@ -64,8 +69,6 @@
         | RAMClustR | Mass spectra  | msp    | queries   |
         +-----------+---------------+--------+-----------+

-        RAMClustR outputs a collection of **msp** files which can be matched to a library (.msp) using a similarity score computed in matchMS.
-
     Downstream Tools
         The **output** is a csv which contains the similarity score and second csv containing the number of matched peaks.
     ]]></help>

diff -r 4aecfd6b319b -r a7c9fc186f8c matchms_wrapper.py
--- a/matchms_wrapper.py Wed Mar 17 11:40:17 2021 +0000
+++ b/matchms_wrapper.py Mon Apr 19 08:31:42 2021 +0000

[

@@ -2,14 +2,12 @@
import sys

from matchms import calculate_scores
+from matchms.filtering import add_precursor_mz
from matchms.importing import load_from_msp
from matchms.similarity import (
     CosineGreedy,
     CosineHungarian,
-    FingerprintSimilarity,
-    IntersectMz,
     ModifiedCosine,
-    ParentMassMatch
)
from pandas import DataFrame

@@ -23,30 +21,29 @@
     parser.add_argument("similarity_metric", type=str, help='Metric to use for matching.')
     parser.add_argument("output_filename_scores", type=str, help="Path where to store the output .csv scores.")
     parser.add_argument("output_filename_matches", type=str, help="Path where to store the output .csv matches.")
+    parser.add_argument("tolerance", type=float, help="Tolerance to use for peak matching.")
+    parser.add_argument("mz_power", type=float, help="The power to raise mz to in the cosine function.")
+    parser.add_argument("intensity_power", type=float, help="The power to raise intensity to in the cosine function.")

     args = parser.parse_args()

+    reference_spectra = load_from_msp(args.references_filename)
+    queries_spectra = load_from_msp(args.queries_filename)
+
     if args.similarity_metric == 'CosineGreedy':
-        similarity_metric = CosineGreedy()
+        similarity_metric = CosineGreedy(args.tolerance, args.mz_power, args.intensity_power)
     elif args.similarity_metric == 'CosineHungarian':
-        similarity_metric = CosineHungarian()
-    elif args.similarity_metric == 'FingerprintSimilarity':
-        similarity_metric = FingerprintSimilarity()
-    elif args.similarity_metric == 'IntersectMz':
-        similarity_metric = IntersectMz()
+        similarity_metric = CosineHungarian(args.tolerance, args.mz_power, args.intensity_power)
     elif args.similarity_metric == 'ModifiedCosine':
-        similarity_metric = ModifiedCosine()
+        similarity_metric = ModifiedCosine(args.tolerance, args.mz_power, args.intensity_power)
+        reference_spectra = map(add_precursor_mz, reference_spectra)
+        queries_spectra = map(add_precursor_mz, queries_spectra)
     else:
-        similarity_metric = ParentMassMatch()
-
-    reference_spectra = [
-        spectrum for spectrum in load_from_msp(args.references_filename)
-    ]
-    queries_spectra = [spectrum for spectrum in load_from_msp(args.queries_filename)]
+        return -1

     scores = calculate_scores(
-        references=reference_spectra,
-        queries=queries_spectra,
+        references=list(reference_spectra),
+        queries=list(queries_spectra),
         similarity_function=similarity_metric,
     )