Repository 'matchms'
hg clone https://eddie.galaxyproject.org/repos/recetox/matchms

Changeset 11:ba9410f612bc (2022-03-17)
Previous changeset 10:c3dd958cc4a5 (2022-01-28) Next changeset 12:eedbc8f7267f (2022-03-22)
Commit message:
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit c32f579c38aef4c2c5d088e6c1c9e522bc0a1a12"
modified:
macros.xml
matchms_similarity.xml
matchms_similarity_wrapper.py
test-data/filtering/clean_metadata.msp
test-data/filtering/default_filters.msp
test-data/filtering/input.msp
test-data/filtering/mz_range.msp
test-data/filtering/normalise_intensities.msp
test-data/filtering/relative_intensity.msp
added:
test-data/similarity/fill2.msp
test-data/similarity/matches_test6_out.tsv
test-data/similarity/scores_test6_out.tsv
removed:
test-data/similarity/recetox_gc-ei_ms_20201028_with_precursor_mz.msp
b
diff -r c3dd958cc4a5 -r ba9410f612bc macros.xml
--- a/macros.xml Fri Jan 28 16:22:06 2022 +0000
+++ b/macros.xml Thu Mar 17 12:26:24 2022 +0000
b
@@ -1,5 +1,5 @@
 <macros>
-    <token name="@TOOL_VERSION@">0.11.0</token>
+    <token name="@TOOL_VERSION@">0.14.0</token>
 
     <xml name="creator">
         <creator>
b
diff -r c3dd958cc4a5 -r ba9410f612bc matchms_similarity.xml
--- a/matchms_similarity.xml Fri Jan 28 16:22:06 2022 +0000
+++ b/matchms_similarity.xml Thu Mar 17 12:26:24 2022 +0000
b
@@ -1,4 +1,4 @@
-<tool id="matchms" name="matchMS similarity" version="@TOOL_VERSION@+galaxy1">
+<tool id="matchms" name="matchMS similarity" version="@TOOL_VERSION@+galaxy0" python_template_version="3.8">
     <description>calculate the similarity score and matched peaks</description>
 
     <macros>
@@ -7,8 +7,9 @@
     <expand macro="creator"/>
 
     <requirements>
+        <requirement type="package" version="1.1.4">pandas</requirement>
+        <requirement type="package" version="0.55.1">numba</requirement>
         <requirement type="package" version="@TOOL_VERSION@">matchms</requirement>
-        <requirement type="package" version="1.1.4">pandas</requirement>
     </requirements>
 
     <environment_variables>
@@ -22,6 +23,9 @@
     <configfiles>
         <configfile name="matchms_python_cli">
             python3 ${__tool_directory__}/matchms_similarity_wrapper.py \
+            #if $ri_filtering.is_true
+            -r $ri_filtering.tolerance \
+            #end if 
             #if $symmetric.is_symmetric
             -s \
             #else
@@ -65,6 +69,15 @@
             <param label="intensity_power" name="intensity_power" type="float" value="1.0"
                    help="The power to raise intensity to in the cosine function."/>
         </section>
+
+        <conditional name="ri_filtering">
+            <param name="is_true" label="Apply RI filtering" type="boolean" truevalue="TRUE" falsevalue="FALSE"
+                   checked="false"/>
+            <when value="TRUE">
+            <param label="tolerance" name="tolerance" type="float" value="60"
+                   help="Peaks will be considered a match when less than tolerance apart."/>
+            </when>
+        </conditional>
     </inputs>
 
     <outputs>
@@ -101,8 +114,8 @@
                     checksum="md5$28dc16ce45105234437e53d59e240046"/>
         </test>
         <test>
-            <param name="references" value="similarity/recetox_gc-ei_ms_20201028_with_precursor_mz.msp" ftype="msp"/>
-            <param name="queries" value="similarity/recetox_gc-ei_ms_20201028_with_precursor_mz.msp" ftype="msp"/>
+            <param name="references" value="similarity/recetox_gc-ei_ms_20201028.msp" ftype="msp"/>
+            <param name="queries" value="similarity/recetox_gc-ei_ms_20201028.msp" ftype="msp"/>
             <param name="similarity_metric" value="ModifiedCosine"/>
             <output name="similarity_scores" file="similarity/scores_test4_out.tsv" ftype="tsv"/>
             <output name="similarity_matches" file="similarity/matches_test4_out.tsv" ftype="tsv"/>
@@ -114,6 +127,15 @@
             <output name="similarity_scores" file="similarity/scores_test5_out.tsv" ftype="tsv"/>
             <output name="similarity_matches" file="similarity/matches_test5_out.tsv" ftype="tsv"/>
         </test>
+        <test>
+            <param name="references" value="similarity/recetox_gc-ei_ms_20201028.msp" ftype="msp"/>
+            <param name="queries" value="similarity/fill2.msp" ftype="msp"/>
+            <param name="ri_filtering.is_true" value="TRUE" />
+            <param name="ri_filtering.tolerance" value="60.0" />
+            <param name="similarity_metric" value="CosineGreedy"/>
+            <output name="similarity_scores" file="similarity/scores_test6_out.tsv" ftype="tsv"/>
+            <output name="similarity_matches" file="similarity/matches_test6_out.tsv" ftype="tsv"/>       
+        </test>
     </tests>
 
     <help>
b
diff -r c3dd958cc4a5 -r ba9410f612bc matchms_similarity_wrapper.py
--- a/matchms_similarity_wrapper.py Fri Jan 28 16:22:06 2022 +0000
+++ b/matchms_similarity_wrapper.py Thu Mar 17 12:26:24 2022 +0000
[
@@ -1,13 +1,10 @@
 import argparse
 import sys
 
+import numpy as np
 from matchms import calculate_scores
 from matchms.importing import load_from_mgf, load_from_msp
-from matchms.similarity import (
-    CosineGreedy,
-    CosineHungarian,
-    ModifiedCosine,
-)
+from matchms.similarity import CosineGreedy, CosineHungarian, MetadataMatch, ModifiedCosine
 from pandas import DataFrame
 
 
@@ -28,6 +25,7 @@
 
 def main(argv):
     parser = argparse.ArgumentParser(description="Compute MSP similarity scores")
+    parser.add_argument("-r", dest="ri_tolerance", type=float, help="Use RI filtering with given tolerance.")
     parser.add_argument("-s", dest="symmetric", action='store_true', help="Computation is symmetric.")
     parser.add_argument("--ref", dest="references_filename", type=str, help="Path to reference spectra library.")
     parser.add_argument("--ref_format", dest="references_format", type=str, help="Reference spectra library file format.")
@@ -77,14 +75,19 @@
         is_symmetric=args.symmetric
     )
 
+    if args.ri_tolerance is not None:
+        print("RI filtering with tolerance ", args.ri_tolerance)
+        ri_matches = calculate_scores(reference_spectra, queries_spectra, MetadataMatch("retention_index", "difference", args.ri_tolerance)).scores
+        scores.scores["score"] = np.where(ri_matches, scores.scores["score"], 0.0)
+
     write_outputs(args, scores)
     return 0
 
 
 def write_outputs(args, scores):
     print("Storing outputs...")
-    query_names = [spectra.metadata['name'] for spectra in scores.queries]
-    reference_names = [spectra.metadata['name'] for spectra in scores.references]
+    query_names = [spectra.metadata['compound_name'] for spectra in scores.queries]
+    reference_names = [spectra.metadata['compound_name'] for spectra in scores.references]
 
     # Write scores to dataframe
     dataframe_scores = DataFrame(data=[entry["score"] for entry in scores.scores], index=reference_names, columns=query_names)
b
diff -r c3dd958cc4a5 -r ba9410f612bc test-data/filtering/clean_metadata.msp
--- a/test-data/filtering/clean_metadata.msp Fri Jan 28 16:22:06 2022 +0000
+++ b/test-data/filtering/clean_metadata.msp Thu Mar 17 12:26:24 2022 +0000
b
b'@@ -1,212 +1,197 @@\n-NAME: C001\n-IONMODE: Negative\n-RETENTIONTIME: 38.74\n-RETENTIONINDEX: \n+IONMODE: negative\n SPECTRUMTYPE: Centroid\n COMPOUND_NAME: C001\n+RETENTION_TIME: 38.74\n RETENTION_INDEX: None\n-RETENTION_TIME: 38.74\n NUM PEAKS: 57\n-138.9121\t10186226.0\n-148.9337\t1008656.0\n-175.0641\t26780143.0\n-186.1095\t2675456.0\n-196.8658\t21390430.0\n-198.8647\t21688594.0\n-200.8848\t7742528.0\n-206.9034\t26130980.0\n-216.9205\t32607700.0\n-234.0134\t2550129.0\n-254.8252\t23747536.0\n-256.8215\t31377637.0\n-258.8237\t15532799.0\n-266.8652\t9805546.0\n-268.8537\t3090354.0\n-306.9914\t3169316.0\n-312.7841\t10051801.0\n-316.7777\t10734168.0\n-322.8157\t6317648.0\n-324.9549\t8619910.0\n-334.849\t4178412.0\n-342.8093\t3285552.0\n-349.9455\t2050695.0\n-350.9875\t6150799.0\n-351.941\t1965882.0\n-366.8281\t3253770.0\n-370.7418\t9765463.0\n-372.7383\t19374863.0\n-382.8218\t12815572.0\n-384.8177\t8311500.0\n-392.7685\t10913351.0\n-413.2664\t3965867.0\n-426.7772\t5431633.0\n-428.7834\t8554675.0\n-434.7287\t9943329.0\n-436.8161\t3705247.0\n-440.7322\t10603010.0\n-442.7401\t8271752.0\n-450.7016\t8762673.0\n-460.7076\t4528973.0\n-462.7862\t2123666.0\n-484.7242\t4273989.0\n-486.7743\t4886062.0\n-488.6825\t12267966.0\n-492.744\t7662344.0\n-494.8953\t7188793.0\n-498.8794\t6811405.0\n-500.8484\t6520691.0\n-502.7832\t3567833.0\n-510.763\t4989757.0\n-518.7415\t4243468.0\n-546.6093\t7177067.0\n-550.6949\t6104789.0\n-566.5977\t5171811.0\n-612.6927\t2005587.0\n-676.6436\t1982714.0\n-800.4451\t2792137.0\n+138.9121    10186226.0\n+148.9337    1008656.0\n+175.0641    26780143.0\n+186.1095    2675456.0\n+196.8658    21390430.0\n+198.8647    21688594.0\n+200.8848    7742528.0\n+206.9034    26130980.0\n+216.9205    32607700.0\n+234.0134    2550129.0\n+254.8252    23747536.0\n+256.8215    31377637.0\n+258.8237    15532799.0\n+266.8652    9805546.0\n+268.8537    3090354.0\n+306.9914    3169316.0\n+312.7841    10051801.0\n+316.7777    10734168.0\n+322.8157    6317648.0\n+324.9549    8619910.0\n+334.849     4178412.0\n+342.8093    3285552.0\n+349.9455    2050695.0\n+350.9875    6150799.0\n+351.941     1965882.0\n+366.8281    3253770.0\n+370.7418    9765463.0\n+372.7383    19374863.0\n+382.8218    12815572.0\n+384.8177    8311500.0\n+392.7685    10913351.0\n+413.2664    3965867.0\n+426.7772    5431633.0\n+428.7834    8554675.0\n+434.7287    9943329.0\n+436.8161    3705247.0\n+440.7322    10603010.0\n+442.7401    8271752.0\n+450.7016    8762673.0\n+460.7076    4528973.0\n+462.7862    2123666.0\n+484.7242    4273989.0\n+486.7743    4886062.0\n+488.6825    12267966.0\n+492.744     7662344.0\n+494.8953    7188793.0\n+498.8794    6811405.0\n+500.8484    6520691.0\n+502.7832    3567833.0\n+510.763     4989757.0\n+518.7415    4243468.0\n+546.6093    7177067.0\n+550.6949    6104789.0\n+566.5977    5171811.0\n+612.6927    2005587.0\n+676.6436    1982714.0\n+800.4451    2792137.0\n \n-NAME: C002\n-IONMODE: Negative\n-RETENTIONTIME: 520.25\n-RETENTIONINDEX: \n+IONMODE: negative\n SPECTRUMTYPE: Centroid\n COMPOUND_NAME: C002\n-RETENTION_INDEX: None\n RETENTION_TIME: 520.25\n+RETENTION_INDEX: 1234.5\n NUM PEAKS: 35\n-131.1733\t1971789.0\n-267.2688\t6103973.0\n-279.0196\t1946255.0\n-289.6491\t46498377.0\n-301.1565\t15185412.0\n-309.1649\t18045974.0\n-310.1623\t295359836.0\n-311.1658\t13124727.0\n-312.0296\t38757284.0\n-330.6757\t12666597.0\n-525.375\t1073323842.0\n-526.3783\t181668883.0\n-527.3812\t23642795.0\n-551.3321\t111616808.0\n-552.3348\t28340614.0\n-553.3314\t2609936.0\n-562.3269\t7538206.0\n-578.2905\t7578406.0\n-619.3008\t4742103.0\n-624.296\t11790213.0\n-813.5403\t25060147.0\n-814.5336\t5865975.0\n-955.1171\t2322927.0\n-1047.7378\t150394804.0\n-1048.7399\t90978863.0\n-1049.7432\t29946438.0\n-1050.7453\t6807767.0\n-1069.7158\t5074652.0\n-1074.1979\t3402288.0\n-1075.1968\t33352763.0\n-1076.2004\t10417953.0\n-1101.6535\t2023916.0\n-1206.3127\t3738816.0\n-1216.8041\t4439324.0\n-1217.807\t3565334.0\n+131.1733    1971789.0\n+267.2688    6103973.0\n+279.0196    1946255.0\n+289.6491    46498377.0\n+301.1565    15185412.0\n+309.1649    18045974.0\n+310.1623    295359836.0\n+311.1658    13124727.0\n+312.0296    38757284.0\n+330.6757    12666597.0\n+525.375     1073323842.0\n+526.3783    181668883.0\n+527.3812    23642795.0\n+551.3321'..b'767.0\n+1069.7158   5074652.0\n+1074.1979   3402288.0\n+1075.1968   33352763.0\n+1076.2004   10417953.0\n+1101.6535   2023916.0\n+1206.3127   3738816.0\n+1216.8041   4439324.0\n+1217.807    3565334.0\n \n-NAME: C003\n-IONMODE: Negative\n-RETENTIONTIME: 483.67\n-RETENTIONINDEX: \n+IONMODE: negative\n SPECTRUMTYPE: Centroid\n COMPOUND_NAME: C003\n+RETENTION_TIME: 483.67\n RETENTION_INDEX: None\n-RETENTION_TIME: 483.67\n NUM PEAKS: 26\n-265.2529\t11366224.0\n-266.2564\t1420444.0\n-279.6362\t29849749.0\n-280.6546\t8848921.0\n-288.6414\t202172046.0\n-378.2093\t15309961.0\n-379.1966\t2902366.0\n-522.3565\t4089569222.0\n-523.354\t1201714423.0\n-549.3267\t63300808.0\n-576.2749\t7386007.0\n-577.3074\t2354251.0\n-617.2778\t2323470.0\n-625.4543\t4040374.0\n-796.9808\t13576738.0\n-797.9841\t6368973.0\n-809.9883\t12596682.0\n-810.9916\t6601055.0\n-1043.7028\t144351468.0\n-1044.7068\t83271854.0\n-1045.706\t27998321.0\n-1046.7131\t6505178.0\n-1058.1594\t20718345.0\n-1059.1626\t6608764.0\n-1071.1639\t15461047.0\n-1072.1671\t5096642.0\n+265.2529    11366224.0\n+266.2564    1420444.0\n+279.6362    29849749.0\n+280.6546    8848921.0\n+288.6414    202172046.0\n+378.2093    15309961.0\n+379.1966    2902366.0\n+522.3565    4089569222.0\n+523.354     1201714423.0\n+549.3267    63300808.0\n+576.2749    7386007.0\n+577.3074    2354251.0\n+617.2778    2323470.0\n+625.4543    4040374.0\n+796.9808    13576738.0\n+797.9841    6368973.0\n+809.9883    12596682.0\n+810.9916    6601055.0\n+1043.7028   144351468.0\n+1044.7068   83271854.0\n+1045.706    27998321.0\n+1046.7131   6505178.0\n+1058.1594   20718345.0\n+1059.1626   6608764.0\n+1071.1639   15461047.0\n+1072.1671   5096642.0\n \n-NAME: C004\n-IONMODE: Negative\n-RETENTIONTIME: 473.48\n-RETENTIONINDEX: \n+IONMODE: negative\n SPECTRUMTYPE: Centroid\n COMPOUND_NAME: C004\n+RETENTION_TIME: 473.48\n RETENTION_INDEX: None\n-RETENTION_TIME: 473.48\n NUM PEAKS: 24\n-124.1405\t6517662.0\n-170.2437\t1237313.0\n-275.6336\t28001849.0\n-296.147\t190395687.0\n-482.3247\t145772322.0\n-483.3283\t36245876.0\n-496.34\t12577588056.0\n-497.3442\t3337125302.0\n-498.3462\t532285213.0\n-499.3493\t68176083.0\n-770.964\t49250157.0\n-771.9675\t22666873.0\n-783.9721\t9839299.0\n-784.9749\t3622908.0\n-949.6233\t8009033.0\n-950.6274\t3674694.0\n-991.6726\t1420557258.0\n-992.6749\t763118028.0\n-993.6787\t239161906.0\n-994.6801\t53549573.0\n-1017.6897\t168186952.0\n-1018.6656\t120599518.0\n-1019.6555\t57647644.0\n-1020.6591\t12469103.0\n+124.1405    6517662.0\n+170.2437    1237313.0\n+275.6336    28001849.0\n+296.147     190395687.0\n+482.3247    145772322.0\n+483.3283    36245876.0\n+496.34      12577588056.0\n+497.3442    3337125302.0\n+498.3462    532285213.0\n+499.3493    68176083.0\n+770.964     49250157.0\n+771.9675    22666873.0\n+783.9721    9839299.0\n+784.9749    3622908.0\n+949.6233    8009033.0\n+950.6274    3674694.0\n+991.6726    1420557258.0\n+992.6749    763118028.0\n+993.6787    239161906.0\n+994.6801    53549573.0\n+1017.6897   168186952.0\n+1018.6656   120599518.0\n+1019.6555   57647644.0\n+1020.6591   12469103.0\n \n-NAME: C005\n-IONMODE: Negative\n-RETENTIONTIME: 41.72\n-RETENTIONINDEX: \n+IONMODE: negative\n SPECTRUMTYPE: Centroid\n COMPOUND_NAME: C005\n+RETENTION_TIME: 41.72\n RETENTION_INDEX: None\n-RETENTION_TIME: 41.72\n NUM PEAKS: 20\n-218.1386\t14009249.0\n-337.0623\t88672453.0\n-338.0654\t8770055.0\n-353.0361\t37061354.0\n-359.0443\t48435582.0\n-360.0459\t5025128.0\n-375.018\t29159485.0\n-376.0216\t2740193.0\n-381.0261\t13522755.0\n-396.9999\t10317665.0\n-417.0027\t13822994.0\n-418.9966\t4386311.0\n-432.9764\t9779399.0\n-438.9851\t11307111.0\n-440.9796\t3364168.0\n-454.9592\t9820452.0\n-456.9603\t3774845.0\n-470.9263\t3632486.0\n-512.8989\t4072570.0\n-572.871\t3485486.0\n+218.1386    14009249.0\n+337.0623    88672453.0\n+338.0654    8770055.0\n+353.0361    37061354.0\n+359.0443    48435582.0\n+360.0459    5025128.0\n+375.018     29159485.0\n+376.0216    2740193.0\n+381.0261    13522755.0\n+396.9999    10317665.0\n+417.0027    13822994.0\n+418.9966    4386311.0\n+432.9764    9779399.0\n+438.9851    11307111.0\n+440.9796    3364168.0\n+454.9592    9820452.0\n+456.9603    3774845.0\n+470.9263    3632486.0\n+512.8989    4072570.0\n+572.871     3485486.0\n \n'
b
diff -r c3dd958cc4a5 -r ba9410f612bc test-data/filtering/default_filters.msp
--- a/test-data/filtering/default_filters.msp Fri Jan 28 16:22:06 2022 +0000
+++ b/test-data/filtering/default_filters.msp Thu Mar 17 12:26:24 2022 +0000
b
b'@@ -1,207 +1,199 @@\n-NAME: C001\n IONMODE: negative\n-RETENTIONTIME: 38.74\n-RETENTIONINDEX: \n SPECTRUMTYPE: Centroid\n COMPOUND_NAME: C001\n+RETENTION_TIME: 38.74\n+RETENTION_INDEX: None\n CHARGE: -1\n NUM PEAKS: 57\n-138.9121\t10186226.0\n-148.9337\t1008656.0\n-175.0641\t26780143.0\n-186.1095\t2675456.0\n-196.8658\t21390430.0\n-198.8647\t21688594.0\n-200.8848\t7742528.0\n-206.9034\t26130980.0\n-216.9205\t32607700.0\n-234.0134\t2550129.0\n-254.8252\t23747536.0\n-256.8215\t31377637.0\n-258.8237\t15532799.0\n-266.8652\t9805546.0\n-268.8537\t3090354.0\n-306.9914\t3169316.0\n-312.7841\t10051801.0\n-316.7777\t10734168.0\n-322.8157\t6317648.0\n-324.9549\t8619910.0\n-334.849\t4178412.0\n-342.8093\t3285552.0\n-349.9455\t2050695.0\n-350.9875\t6150799.0\n-351.941\t1965882.0\n-366.8281\t3253770.0\n-370.7418\t9765463.0\n-372.7383\t19374863.0\n-382.8218\t12815572.0\n-384.8177\t8311500.0\n-392.7685\t10913351.0\n-413.2664\t3965867.0\n-426.7772\t5431633.0\n-428.7834\t8554675.0\n-434.7287\t9943329.0\n-436.8161\t3705247.0\n-440.7322\t10603010.0\n-442.7401\t8271752.0\n-450.7016\t8762673.0\n-460.7076\t4528973.0\n-462.7862\t2123666.0\n-484.7242\t4273989.0\n-486.7743\t4886062.0\n-488.6825\t12267966.0\n-492.744\t7662344.0\n-494.8953\t7188793.0\n-498.8794\t6811405.0\n-500.8484\t6520691.0\n-502.7832\t3567833.0\n-510.763\t4989757.0\n-518.7415\t4243468.0\n-546.6093\t7177067.0\n-550.6949\t6104789.0\n-566.5977\t5171811.0\n-612.6927\t2005587.0\n-676.6436\t1982714.0\n-800.4451\t2792137.0\n+138.9121    10186226.0\n+148.9337    1008656.0\n+175.0641    26780143.0\n+186.1095    2675456.0\n+196.8658    21390430.0\n+198.8647    21688594.0\n+200.8848    7742528.0\n+206.9034    26130980.0\n+216.9205    32607700.0\n+234.0134    2550129.0\n+254.8252    23747536.0\n+256.8215    31377637.0\n+258.8237    15532799.0\n+266.8652    9805546.0\n+268.8537    3090354.0\n+306.9914    3169316.0\n+312.7841    10051801.0\n+316.7777    10734168.0\n+322.8157    6317648.0\n+324.9549    8619910.0\n+334.849     4178412.0\n+342.8093    3285552.0\n+349.9455    2050695.0\n+350.9875    6150799.0\n+351.941     1965882.0\n+366.8281    3253770.0\n+370.7418    9765463.0\n+372.7383    19374863.0\n+382.8218    12815572.0\n+384.8177    8311500.0\n+392.7685    10913351.0\n+413.2664    3965867.0\n+426.7772    5431633.0\n+428.7834    8554675.0\n+434.7287    9943329.0\n+436.8161    3705247.0\n+440.7322    10603010.0\n+442.7401    8271752.0\n+450.7016    8762673.0\n+460.7076    4528973.0\n+462.7862    2123666.0\n+484.7242    4273989.0\n+486.7743    4886062.0\n+488.6825    12267966.0\n+492.744     7662344.0\n+494.8953    7188793.0\n+498.8794    6811405.0\n+500.8484    6520691.0\n+502.7832    3567833.0\n+510.763     4989757.0\n+518.7415    4243468.0\n+546.6093    7177067.0\n+550.6949    6104789.0\n+566.5977    5171811.0\n+612.6927    2005587.0\n+676.6436    1982714.0\n+800.4451    2792137.0\n \n-NAME: C002\n IONMODE: negative\n-RETENTIONTIME: 520.25\n-RETENTIONINDEX: \n SPECTRUMTYPE: Centroid\n COMPOUND_NAME: C002\n+RETENTION_TIME: 520.25\n+RETENTION_INDEX: 1234.5\n CHARGE: -1\n NUM PEAKS: 35\n-131.1733\t1971789.0\n-267.2688\t6103973.0\n-279.0196\t1946255.0\n-289.6491\t46498377.0\n-301.1565\t15185412.0\n-309.1649\t18045974.0\n-310.1623\t295359836.0\n-311.1658\t13124727.0\n-312.0296\t38757284.0\n-330.6757\t12666597.0\n-525.375\t1073323842.0\n-526.3783\t181668883.0\n-527.3812\t23642795.0\n-551.3321\t111616808.0\n-552.3348\t28340614.0\n-553.3314\t2609936.0\n-562.3269\t7538206.0\n-578.2905\t7578406.0\n-619.3008\t4742103.0\n-624.296\t11790213.0\n-813.5403\t25060147.0\n-814.5336\t5865975.0\n-955.1171\t2322927.0\n-1047.7378\t150394804.0\n-1048.7399\t90978863.0\n-1049.7432\t29946438.0\n-1050.7453\t6807767.0\n-1069.7158\t5074652.0\n-1074.1979\t3402288.0\n-1075.1968\t33352763.0\n-1076.2004\t10417953.0\n-1101.6535\t2023916.0\n-1206.3127\t3738816.0\n-1216.8041\t4439324.0\n-1217.807\t3565334.0\n+131.1733    1971789.0\n+267.2688    6103973.0\n+279.0196    1946255.0\n+289.6491    46498377.0\n+301.1565    15185412.0\n+309.1649    18045974.0\n+310.1623    295359836.0\n+311.1658    13124727.0\n+312.0296    38757284.0\n+330.6757    12666597.0\n+525.375     1073323842.0\n+526.3783    181668883.0\n+527.3812    23642795.0\n+551.3321    111616808.0\n+552.3348    28340614.0\n+553.3314    2609936'..b'\n+813.5403    25060147.0\n+814.5336    5865975.0\n+955.1171    2322927.0\n+1047.7378   150394804.0\n+1048.7399   90978863.0\n+1049.7432   29946438.0\n+1050.7453   6807767.0\n+1069.7158   5074652.0\n+1074.1979   3402288.0\n+1075.1968   33352763.0\n+1076.2004   10417953.0\n+1101.6535   2023916.0\n+1206.3127   3738816.0\n+1216.8041   4439324.0\n+1217.807    3565334.0\n \n-NAME: C003\n IONMODE: negative\n-RETENTIONTIME: 483.67\n-RETENTIONINDEX: \n SPECTRUMTYPE: Centroid\n COMPOUND_NAME: C003\n+RETENTION_TIME: 483.67\n CHARGE: -1\n NUM PEAKS: 26\n-265.2529\t11366224.0\n-266.2564\t1420444.0\n-279.6362\t29849749.0\n-280.6546\t8848921.0\n-288.6414\t202172046.0\n-378.2093\t15309961.0\n-379.1966\t2902366.0\n-522.3565\t4089569222.0\n-523.354\t1201714423.0\n-549.3267\t63300808.0\n-576.2749\t7386007.0\n-577.3074\t2354251.0\n-617.2778\t2323470.0\n-625.4543\t4040374.0\n-796.9808\t13576738.0\n-797.9841\t6368973.0\n-809.9883\t12596682.0\n-810.9916\t6601055.0\n-1043.7028\t144351468.0\n-1044.7068\t83271854.0\n-1045.706\t27998321.0\n-1046.7131\t6505178.0\n-1058.1594\t20718345.0\n-1059.1626\t6608764.0\n-1071.1639\t15461047.0\n-1072.1671\t5096642.0\n+265.2529    11366224.0\n+266.2564    1420444.0\n+279.6362    29849749.0\n+280.6546    8848921.0\n+288.6414    202172046.0\n+378.2093    15309961.0\n+379.1966    2902366.0\n+522.3565    4089569222.0\n+523.354     1201714423.0\n+549.3267    63300808.0\n+576.2749    7386007.0\n+577.3074    2354251.0\n+617.2778    2323470.0\n+625.4543    4040374.0\n+796.9808    13576738.0\n+797.9841    6368973.0\n+809.9883    12596682.0\n+810.9916    6601055.0\n+1043.7028   144351468.0\n+1044.7068   83271854.0\n+1045.706    27998321.0\n+1046.7131   6505178.0\n+1058.1594   20718345.0\n+1059.1626   6608764.0\n+1071.1639   15461047.0\n+1072.1671   5096642.0\n \n-NAME: C004\n IONMODE: negative\n-RETENTIONTIME: 473.48\n-RETENTIONINDEX: \n SPECTRUMTYPE: Centroid\n COMPOUND_NAME: C004\n+RETENTION_TIME: 473.48\n CHARGE: -1\n NUM PEAKS: 24\n-124.1405\t6517662.0\n-170.2437\t1237313.0\n-275.6336\t28001849.0\n-296.147\t190395687.0\n-482.3247\t145772322.0\n-483.3283\t36245876.0\n-496.34\t12577588056.0\n-497.3442\t3337125302.0\n-498.3462\t532285213.0\n-499.3493\t68176083.0\n-770.964\t49250157.0\n-771.9675\t22666873.0\n-783.9721\t9839299.0\n-784.9749\t3622908.0\n-949.6233\t8009033.0\n-950.6274\t3674694.0\n-991.6726\t1420557258.0\n-992.6749\t763118028.0\n-993.6787\t239161906.0\n-994.6801\t53549573.0\n-1017.6897\t168186952.0\n-1018.6656\t120599518.0\n-1019.6555\t57647644.0\n-1020.6591\t12469103.0\n+124.1405    6517662.0\n+170.2437    1237313.0\n+275.6336    28001849.0\n+296.147     190395687.0\n+482.3247    145772322.0\n+483.3283    36245876.0\n+496.34      12577588056.0\n+497.3442    3337125302.0\n+498.3462    532285213.0\n+499.3493    68176083.0\n+770.964     49250157.0\n+771.9675    22666873.0\n+783.9721    9839299.0\n+784.9749    3622908.0\n+949.6233    8009033.0\n+950.6274    3674694.0\n+991.6726    1420557258.0\n+992.6749    763118028.0\n+993.6787    239161906.0\n+994.6801    53549573.0\n+1017.6897   168186952.0\n+1018.6656   120599518.0\n+1019.6555   57647644.0\n+1020.6591   12469103.0\n \n-NAME: C005\n IONMODE: negative\n-RETENTIONTIME: 41.72\n-RETENTIONINDEX: \n SPECTRUMTYPE: Centroid\n COMPOUND_NAME: C005\n+RETENTION_TIME: 41.72\n CHARGE: -1\n NUM PEAKS: 20\n-218.1386\t14009249.0\n-337.0623\t88672453.0\n-338.0654\t8770055.0\n-353.0361\t37061354.0\n-359.0443\t48435582.0\n-360.0459\t5025128.0\n-375.018\t29159485.0\n-376.0216\t2740193.0\n-381.0261\t13522755.0\n-396.9999\t10317665.0\n-417.0027\t13822994.0\n-418.9966\t4386311.0\n-432.9764\t9779399.0\n-438.9851\t11307111.0\n-440.9796\t3364168.0\n-454.9592\t9820452.0\n-456.9603\t3774845.0\n-470.9263\t3632486.0\n-512.8989\t4072570.0\n-572.871\t3485486.0\n+218.1386    14009249.0\n+337.0623    88672453.0\n+338.0654    8770055.0\n+353.0361    37061354.0\n+359.0443    48435582.0\n+360.0459    5025128.0\n+375.018     29159485.0\n+376.0216    2740193.0\n+381.0261    13522755.0\n+396.9999    10317665.0\n+417.0027    13822994.0\n+418.9966    4386311.0\n+432.9764    9779399.0\n+438.9851    11307111.0\n+440.9796    3364168.0\n+454.9592    9820452.0\n+456.9603    3774845.0\n+470.9263    3632486.0\n+512.8989    4072570.0\n+572.871     3485486.0\n \n'
b
diff -r c3dd958cc4a5 -r ba9410f612bc test-data/filtering/input.msp
--- a/test-data/filtering/input.msp Fri Jan 28 16:22:06 2022 +0000
+++ b/test-data/filtering/input.msp Thu Mar 17 12:26:24 2022 +0000
b
@@ -1,7 +1,7 @@
 NAME:C001
 IONMODE:Negative
 RETENTIONTIME:38.74
-RETENTIONINDEX:
+RETENTIONINDEX: -1
 SPECTRUMTYPE:Centroid
 Num Peaks:57
 216.9205 32607700
@@ -65,7 +65,7 @@
 NAME:C002
 IONMODE:Negative
 RETENTIONTIME:520.25
-RETENTIONINDEX:
+RETENTIONINDEX: 1234.5
 SPECTRUMTYPE:Centroid
 Num Peaks:35
 525.375 1073323842
@@ -107,7 +107,6 @@
 NAME:C003
 IONMODE:Negative
 RETENTIONTIME:483.67
-RETENTIONINDEX:
 SPECTRUMTYPE:Centroid
 Num Peaks:26
 522.3565 4089569222
@@ -140,7 +139,6 @@
 NAME:C004
 IONMODE:Negative
 RETENTIONTIME:473.48
-RETENTIONINDEX:
 SPECTRUMTYPE:Centroid
 Num Peaks:24
 496.34 12577588056
@@ -171,7 +169,6 @@
 NAME:C005
 IONMODE:Negative
 RETENTIONTIME:41.72
-RETENTIONINDEX:
 SPECTRUMTYPE:Centroid
 Num Peaks:20
 337.0623 88672453
b
diff -r c3dd958cc4a5 -r ba9410f612bc test-data/filtering/mz_range.msp
--- a/test-data/filtering/mz_range.msp Fri Jan 28 16:22:06 2022 +0000
+++ b/test-data/filtering/mz_range.msp Thu Mar 17 12:26:24 2022 +0000
b
@@ -1,132 +1,129 @@
-NAME: C001
-IONMODE: Negative
-RETENTIONTIME: 38.74
-RETENTIONINDEX: 
+IONMODE: negative
 SPECTRUMTYPE: Centroid
+COMPOUND_NAME: C001
+RETENTION_TIME: 38.74
+RETENTION_INDEX: None
 NUM PEAKS: 41
-306.9914 3169316.0
-312.7841 10051801.0
-316.7777 10734168.0
-322.8157 6317648.0
-324.9549 8619910.0
-334.849 4178412.0
-342.8093 3285552.0
-349.9455 2050695.0
-350.9875 6150799.0
-351.941 1965882.0
-366.8281 3253770.0
-370.7418 9765463.0
-372.7383 19374863.0
-382.8218 12815572.0
-384.8177 8311500.0
-392.7685 10913351.0
-413.2664 3965867.0
-426.7772 5431633.0
-428.7834 8554675.0
-434.7287 9943329.0
-436.8161 3705247.0
-440.7322 10603010.0
-442.7401 8271752.0
-450.7016 8762673.0
-460.7076 4528973.0
-462.7862 2123666.0
-484.7242 4273989.0
-486.7743 4886062.0
-488.6825 12267966.0
-492.744 7662344.0
-494.8953 7188793.0
-498.8794 6811405.0
-500.8484 6520691.0
-502.7832 3567833.0
-510.763 4989757.0
-518.7415 4243468.0
-546.6093 7177067.0
-550.6949 6104789.0
-566.5977 5171811.0
-612.6927 2005587.0
-676.6436 1982714.0
+306.9914    3169316.0
+312.7841    10051801.0
+316.7777    10734168.0
+322.8157    6317648.0
+324.9549    8619910.0
+334.849     4178412.0
+342.8093    3285552.0
+349.9455    2050695.0
+350.9875    6150799.0
+351.941     1965882.0
+366.8281    3253770.0
+370.7418    9765463.0
+372.7383    19374863.0
+382.8218    12815572.0
+384.8177    8311500.0
+392.7685    10913351.0
+413.2664    3965867.0
+426.7772    5431633.0
+428.7834    8554675.0
+434.7287    9943329.0
+436.8161    3705247.0
+440.7322    10603010.0
+442.7401    8271752.0
+450.7016    8762673.0
+460.7076    4528973.0
+462.7862    2123666.0
+484.7242    4273989.0
+486.7743    4886062.0
+488.6825    12267966.0
+492.744     7662344.0
+494.8953    7188793.0
+498.8794    6811405.0
+500.8484    6520691.0
+502.7832    3567833.0
+510.763     4989757.0
+518.7415    4243468.0
+546.6093    7177067.0
+550.6949    6104789.0
+566.5977    5171811.0
+612.6927    2005587.0
+676.6436    1982714.0
 
-NAME: C002
-IONMODE: Negative
-RETENTIONTIME: 520.25
-RETENTIONINDEX: 
+IONMODE: negative
 SPECTRUMTYPE: Centroid
+COMPOUND_NAME: C002
+RETENTION_TIME: 520.25
+RETENTION_INDEX: 1234.5
 NUM PEAKS: 16
-301.1565 15185412.0
-309.1649 18045974.0
-310.1623 295359836.0
-311.1658 13124727.0
-312.0296 38757284.0
-330.6757 12666597.0
-525.375 1073323842.0
-526.3783 181668883.0
-527.3812 23642795.0
-551.3321 111616808.0
-552.3348 28340614.0
-553.3314 2609936.0
-562.3269 7538206.0
-578.2905 7578406.0
-619.3008 4742103.0
-624.296 11790213.0
+301.1565    15185412.0
+309.1649    18045974.0
+310.1623    295359836.0
+311.1658    13124727.0
+312.0296    38757284.0
+330.6757    12666597.0
+525.375     1073323842.0
+526.3783    181668883.0
+527.3812    23642795.0
+551.3321    111616808.0
+552.3348    28340614.0
+553.3314    2609936.0
+562.3269    7538206.0
+578.2905    7578406.0
+619.3008    4742103.0
+624.296     11790213.0
 
-NAME: C003
-IONMODE: Negative
-RETENTIONTIME: 483.67
-RETENTIONINDEX: 
+IONMODE: negative
 SPECTRUMTYPE: Centroid
+COMPOUND_NAME: C003
+RETENTION_TIME: 483.67
 NUM PEAKS: 11
-378.2093 15309961.0
-379.1966 2902366.0
-522.3565 4089569222.0
-523.354 1201714423.0
-549.3267 63300808.0
-576.2749 7386007.0
-577.3074 2354251.0
-617.2778 2323470.0
-625.4543 4040374.0
-796.9808 13576738.0
-797.9841 6368973.0
+378.2093    15309961.0
+379.1966    2902366.0
+522.3565    4089569222.0
+523.354     1201714423.0
+549.3267    63300808.0
+576.2749    7386007.0
+577.3074    2354251.0
+617.2778    2323470.0
+625.4543    4040374.0
+796.9808    13576738.0
+797.9841    6368973.0
 
-NAME: C004
-IONMODE: Negative
-RETENTIONTIME: 473.48
-RETENTIONINDEX: 
+IONMODE: negative
 SPECTRUMTYPE: Centroid
+COMPOUND_NAME: C004
+RETENTION_TIME: 473.48
 NUM PEAKS: 10
-482.3247 145772322.0
-483.3283 36245876.0
-496.34 12577588056.0
-497.3442 3337125302.0
-498.3462 532285213.0
-499.3493 68176083.0
-770.964 49250157.0
-771.9675 22666873.0
-783.9721 9839299.0
-784.9749 3622908.0
+482.3247    145772322.0
+483.3283    36245876.0
+496.34      12577588056.0
+497.3442    3337125302.0
+498.3462    532285213.0
+499.3493    68176083.0
+770.964     49250157.0
+771.9675    22666873.0
+783.9721    9839299.0
+784.9749    3622908.0
 
-NAME: C005
-IONMODE: Negative
-RETENTIONTIME: 41.72
-RETENTIONINDEX: 
+IONMODE: negative
 SPECTRUMTYPE: Centroid
+COMPOUND_NAME: C005
+RETENTION_TIME: 41.72
 NUM PEAKS: 19
-337.0623 88672453.0
-338.0654 8770055.0
-353.0361 37061354.0
-359.0443 48435582.0
-360.0459 5025128.0
-375.018 29159485.0
-376.0216 2740193.0
-381.0261 13522755.0
-396.9999 10317665.0
-417.0027 13822994.0
-418.9966 4386311.0
-432.9764 9779399.0
-438.9851 11307111.0
-440.9796 3364168.0
-454.9592 9820452.0
-456.9603 3774845.0
-470.9263 3632486.0
-512.8989 4072570.0
-572.871 3485486.0
+337.0623    88672453.0
+338.0654    8770055.0
+353.0361    37061354.0
+359.0443    48435582.0
+360.0459    5025128.0
+375.018     29159485.0
+376.0216    2740193.0
+381.0261    13522755.0
+396.9999    10317665.0
+417.0027    13822994.0
+418.9966    4386311.0
+432.9764    9779399.0
+438.9851    11307111.0
+440.9796    3364168.0
+454.9592    9820452.0
+456.9603    3774845.0
+470.9263    3632486.0
+512.8989    4072570.0
+572.871     3485486.0
 
b
diff -r c3dd958cc4a5 -r ba9410f612bc test-data/filtering/normalise_intensities.msp
--- a/test-data/filtering/normalise_intensities.msp Fri Jan 28 16:22:06 2022 +0000
+++ b/test-data/filtering/normalise_intensities.msp Thu Mar 17 12:26:24 2022 +0000
b
b'@@ -1,197 +1,194 @@\n-NAME: C001\n-IONMODE: Negative\n-RETENTIONTIME: 38.74\n-RETENTIONINDEX: \n+IONMODE: negative\n SPECTRUMTYPE: Centroid\n+COMPOUND_NAME: C001\n+RETENTION_TIME: 38.74\n+RETENTION_INDEX: None\n NUM PEAKS: 57\n-138.9121\t0.31238713555387226\n-148.9337\t0.03093306182282099\n-175.0641\t0.8212827951680125\n-186.1095\t0.08204982258791635\n-196.8658\t0.6559932163262051\n-198.8647\t0.6651371915222478\n-200.8848\t0.23744477531380626\n-206.9034\t0.8013745219687375\n-216.9205\t1.0\n-234.0134\t0.0782063439003671\n-254.8252\t0.7282800074828951\n-256.8215\t0.9622769161885076\n-258.8237\t0.4763537140000675\n-266.8652\t0.30071259242448867\n-268.8537\t0.09477374975849262\n-306.9914\t0.09719532503059093\n-312.7841\t0.30826464301376666\n-316.7777\t0.3291912033047409\n-322.8157\t0.1937471210787636\n-324.9549\t0.2643519782137348\n-334.849\t0.12814188059875428\n-342.8093\t0.10076000453880525\n-349.9455\t0.06288990023828728\n-350.9875\t0.18863026217733847\n-351.941\t0.06028888882073866\n-366.8281\t0.09978532677864431\n-370.7418\t0.29948334289140294\n-372.7383\t0.5941806076478868\n-382.8218\t0.3930228749651156\n-384.8177\t0.2548937827568335\n-392.7685\t0.33468631642219476\n-413.2664\t0.1216236349083192\n-426.7772\t0.1665751647616974\n-428.7834\t0.262351377128715\n-434.7287\t0.3049380667756389\n-436.8161\t0.11363104420121628\n-440.7322\t0.32516890182380237\n-442.7401\t0.25367480687076976\n-450.7016\t0.268730177228078\n-460.7076\t0.13889274619185038\n-462.7862\t0.06512774590050817\n-484.7242\t0.13107299809554185\n-486.7743\t0.14984380989766222\n-488.6825\t0.376229111528872\n-492.744\t0.23498572423077985\n-494.8953\t0.22046305013846423\n-498.8794\t0.20888946475832396\n-500.8484\t0.19997396320500985\n-502.7832\t0.10941688619559184\n-510.763\t0.15302388699601627\n-518.7415\t0.13013699218282798\n-546.6093\t0.2201034418250904\n-550.6949\t0.18721924576097057\n-566.5977\t0.15860704680182902\n-612.6927\t0.061506545999871196\n-676.6436\t0.06080508591528995\n-800.4451\t0.08562814917948829\n+138.9121    0.31238713555387226\n+148.9337    0.03093306182282099\n+175.0641    0.8212827951680125\n+186.1095    0.08204982258791635\n+196.8658    0.6559932163262051\n+198.8647    0.6651371915222478\n+200.8848    0.23744477531380626\n+206.9034    0.8013745219687375\n+216.9205    1.0\n+234.0134    0.0782063439003671\n+254.8252    0.7282800074828951\n+256.8215    0.9622769161885076\n+258.8237    0.4763537140000675\n+266.8652    0.30071259242448867\n+268.8537    0.09477374975849262\n+306.9914    0.09719532503059093\n+312.7841    0.30826464301376666\n+316.7777    0.3291912033047409\n+322.8157    0.1937471210787636\n+324.9549    0.2643519782137348\n+334.849     0.12814188059875428\n+342.8093    0.10076000453880525\n+349.9455    0.06288990023828728\n+350.9875    0.18863026217733847\n+351.941     0.06028888882073866\n+366.8281    0.09978532677864431\n+370.7418    0.29948334289140294\n+372.7383    0.5941806076478868\n+382.8218    0.3930228749651156\n+384.8177    0.2548937827568335\n+392.7685    0.33468631642219476\n+413.2664    0.1216236349083192\n+426.7772    0.1665751647616974\n+428.7834    0.262351377128715\n+434.7287    0.3049380667756389\n+436.8161    0.11363104420121628\n+440.7322    0.32516890182380237\n+442.7401    0.25367480687076976\n+450.7016    0.268730177228078\n+460.7076    0.13889274619185038\n+462.7862    0.06512774590050817\n+484.7242    0.13107299809554185\n+486.7743    0.14984380989766222\n+488.6825    0.376229111528872\n+492.744     0.23498572423077985\n+494.8953    0.22046305013846423\n+498.8794    0.20888946475832396\n+500.8484    0.19997396320500985\n+502.7832    0.10941688619559184\n+510.763     0.15302388699601627\n+518.7415    0.13013699218282798\n+546.6093    0.2201034418250904\n+550.6949    0.18721924576097057\n+566.5977    0.15860704680182902\n+612.6927    0.061506545999871196\n+676.6436    0.06080508591528995\n+800.4451    0.08562814917948829\n \n-NAME: C002\n-IONMODE: Negative\n-RETENTIONTIME: 520.25\n-RETENTIONINDEX: \n+IONMODE: negative\n SPECTRUMTYPE: Centroid\n+COMPOUND_NAME: C002\n+RETENTION_TIME: 520.25\n+RETENTION_INDEX: 1234.5\n NUM PEAKS: 35\n-131.1733\t0.0018370867419900284\n-267.2688\t0.005686981655625982\n-279.0196'..b'539997079428\n+265.2529    0.0027793206039538215\n+266.2564    0.00034733340430054716\n+279.6362    0.0072989959038771346\n+280.6546    0.002163778266032735\n+288.6414    0.049436024927126176\n+378.2093    0.0037436610481220017\n+379.1966    0.0007096996877780199\n+522.3565    1.0\n+523.354     0.29384865685493955\n+549.3267    0.015478600450010918\n+576.2749    0.0018060599048590942\n+577.3074    0.0005756721239330571\n+617.2778    0.0005681454143142512\n+625.4543    0.0009879705613649104\n+796.9808    0.0033198455052339984\n+797.9841    0.0015573701420036753\n+809.9883    0.0030801977705220513\n+810.9916    0.0016141198844341264\n+1043.7028   0.035297475152995465\n+1044.7068   0.020362011126266247\n+1045.706    0.0068462763386867055\n+1046.7131   0.0015906756058816994\n+1058.1594   0.00506614361447774\n+1059.1626   0.0016160049240511426\n+1071.1639   0.0037806052815603864\n+1072.1671   0.0012462539997079428\n \n-NAME: C004\n-IONMODE: Negative\n-RETENTIONTIME: 473.48\n-RETENTIONINDEX: \n+IONMODE: negative\n SPECTRUMTYPE: Centroid\n+COMPOUND_NAME: C004\n+RETENTION_TIME: 473.48\n NUM PEAKS: 24\n-124.1405\t0.0005181964913289414\n-170.2437\t9.837442556482469e-05\n-275.6336\t0.002226328996889195\n-296.147\t0.015137694616192635\n-482.3247\t0.011589847063758851\n-483.3283\t0.00288178272643532\n-496.34\t1.0\n-497.3442\t0.2653231515567137\n-498.3462\t0.04232013408533278\n-499.3493\t0.005420441717160338\n-770.964\t0.003915707588825487\n-771.9675\t0.0018021637295703144\n-783.9721\t0.0007822882222085712\n-784.9749\t0.0002880447335267696\n-949.6233\t0.0006367701791743273\n-950.6274\t0.00029216205711611197\n-991.6726\t0.11294353509394345\n-992.6749\t0.06067284320350776\n-993.6787\t0.01901492598860482\n-994.6801\t0.004257539105397458\n-1017.6897\t0.013371955835345416\n-1018.6656\t0.00958844553208827\n-1019.6555\t0.004583362385803359\n-1020.6591\t0.0009913747329363162\n+124.1405    0.0005181964913289414\n+170.2437    9.837442556482469e-05\n+275.6336    0.002226328996889195\n+296.147     0.015137694616192635\n+482.3247    0.011589847063758851\n+483.3283    0.00288178272643532\n+496.34      1.0\n+497.3442    0.2653231515567137\n+498.3462    0.04232013408533278\n+499.3493    0.005420441717160338\n+770.964     0.003915707588825487\n+771.9675    0.0018021637295703144\n+783.9721    0.0007822882222085712\n+784.9749    0.0002880447335267696\n+949.6233    0.0006367701791743273\n+950.6274    0.00029216205711611197\n+991.6726    0.11294353509394345\n+992.6749    0.06067284320350776\n+993.6787    0.01901492598860482\n+994.6801    0.004257539105397458\n+1017.6897   0.013371955835345416\n+1018.6656   0.00958844553208827\n+1019.6555   0.004583362385803359\n+1020.6591   0.0009913747329363162\n \n-NAME: C005\n-IONMODE: Negative\n-RETENTIONTIME: 41.72\n-RETENTIONINDEX: \n+IONMODE: negative\n SPECTRUMTYPE: Centroid\n+COMPOUND_NAME: C005\n+RETENTION_TIME: 41.72\n NUM PEAKS: 20\n-218.1386\t0.15798873862212878\n-337.0623\t1.0\n-338.0654\t0.09890394032518758\n-353.0361\t0.4179579198062785\n-359.0443\t0.5462303157441691\n-360.0459\t0.0566706776455141\n-375.018\t0.3288449119592981\n-376.0216\t0.03090241565776916\n-381.0261\t0.15250232222627247\n-396.9999\t0.1163570494660839\n-417.0027\t0.1558882553976487\n-418.9966\t0.049466444781898614\n-432.9764\t0.11028677643551825\n-438.9851\t0.12751548668671656\n-440.9796\t0.037939268467062706\n-454.9592\t0.11074974998154162\n-456.9603\t0.042570661713847026\n-470.9263\t0.04096521385282981\n-512.8989\t0.04592824335196862\n-572.871\t0.03930742730214083\n+218.1386    0.15798873862212878\n+337.0623    1.0\n+338.0654    0.09890394032518758\n+353.0361    0.4179579198062785\n+359.0443    0.5462303157441691\n+360.0459    0.0566706776455141\n+375.018     0.3288449119592981\n+376.0216    0.03090241565776916\n+381.0261    0.15250232222627247\n+396.9999    0.1163570494660839\n+417.0027    0.1558882553976487\n+418.9966    0.049466444781898614\n+432.9764    0.11028677643551825\n+438.9851    0.12751548668671656\n+440.9796    0.037939268467062706\n+454.9592    0.11074974998154162\n+456.9603    0.042570661713847026\n+470.9263    0.04096521385282981\n+512.8989    0.04592824335196862\n+572.871     0.03930742730214083\n \n'
b
diff -r c3dd958cc4a5 -r ba9410f612bc test-data/filtering/relative_intensity.msp
--- a/test-data/filtering/relative_intensity.msp Fri Jan 28 16:22:06 2022 +0000
+++ b/test-data/filtering/relative_intensity.msp Thu Mar 17 12:26:24 2022 +0000
b
@@ -1,54 +1,51 @@
-NAME: C001
-IONMODE: Negative
-RETENTIONTIME: 38.74
-RETENTIONINDEX: 
+IONMODE: negative
 SPECTRUMTYPE: Centroid
+COMPOUND_NAME: C001
+RETENTION_TIME: 38.74
+RETENTION_INDEX: None
 NUM PEAKS: 16
-138.9121 0.31238713555387226
-175.0641 0.8212827951680125
-196.8658 0.6559932163262051
-198.8647 0.6651371915222478
-206.9034 0.8013745219687375
-254.8252 0.7282800074828951
-258.8237 0.4763537140000675
-266.8652 0.30071259242448867
-312.7841 0.30826464301376666
-316.7777 0.3291912033047409
-372.7383 0.5941806076478868
-382.8218 0.3930228749651156
-392.7685 0.33468631642219476
-434.7287 0.3049380667756389
-440.7322 0.32516890182380237
-488.6825 0.376229111528872
+138.9121    10186226.0
+175.0641    26780143.0
+196.8658    21390430.0
+198.8647    21688594.0
+206.9034    26130980.0
+254.8252    23747536.0
+258.8237    15532799.0
+266.8652    9805546.0
+312.7841    10051801.0
+316.7777    10734168.0
+372.7383    19374863.0
+382.8218    12815572.0
+392.7685    10913351.0
+434.7287    9943329.0
+440.7322    10603010.0
+488.6825    12267966.0
 
-NAME: C002
-IONMODE: Negative
-RETENTIONTIME: 520.25
-RETENTIONINDEX: 
+IONMODE: negative
 SPECTRUMTYPE: Centroid
+COMPOUND_NAME: C002
+RETENTION_TIME: 520.25
+RETENTION_INDEX: 1234.5
 NUM PEAKS: 0
 
-NAME: C003
-IONMODE: Negative
-RETENTIONTIME: 483.67
-RETENTIONINDEX: 
+IONMODE: negative
 SPECTRUMTYPE: Centroid
+COMPOUND_NAME: C003
+RETENTION_TIME: 483.67
 NUM PEAKS: 0
 
-NAME: C004
-IONMODE: Negative
-RETENTIONTIME: 473.48
-RETENTIONINDEX: 
+IONMODE: negative
 SPECTRUMTYPE: Centroid
+COMPOUND_NAME: C004
+RETENTION_TIME: 473.48
 NUM PEAKS: 0
 
-NAME: C005
-IONMODE: Negative
-RETENTIONTIME: 41.72
-RETENTIONINDEX: 
+IONMODE: negative
 SPECTRUMTYPE: Centroid
+COMPOUND_NAME: C005
+RETENTION_TIME: 41.72
 NUM PEAKS: 3
-353.0361 0.4179579198062785
-359.0443 0.5462303157441691
-375.018 0.3288449119592981
+353.0361    37061354.0
+359.0443    48435582.0
+375.018     29159485.0
 
b
diff -r c3dd958cc4a5 -r ba9410f612bc test-data/similarity/fill2.msp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/similarity/fill2.msp Thu Mar 17 12:26:24 2022 +0000
b
b'@@ -0,0 +1,5150 @@\n+NAME: C041\n+IONMODE: Negative\n+SPECTRUMTYPE: Centroid\n+RETENTIONTIME: 122.44\n+RETENTIONINDEX: 1079.198718\n+NUM PEAKS: 32\n+73.0841293753643        962537.0\n+92.9922681974622        25809589.0\n+93.5236243719193        7832508.0\n+93.9918503397022        4037554.0\n+94.9892593290089        9273560.0\n+102.528897846033        5258508.0\n+103.032878700634        5618676.0\n+106.048287181011        17919228.0\n+116.082937025519        19358163.0\n+116.088997408008        2749072817.0\n+116.093789141941        21174548.0\n+117.085953932526        5958559.0\n+117.088526521735        137197201.0\n+117.09226575013         144026411.0\n+117.095343138483        3078493.0\n+118.085738968471        93658275.0\n+118.091857064635        7792967.0\n+118.095506285782        3082221.0\n+120.088840757968        158124.0\n+129.052817882804        2757896.0\n+144.047597981883        6935455.0\n+154.03180616847         40749504.0\n+155.035134922158        4354801.0\n+156.011148185837        2560241.0\n+172.042203916163        16922092.0\n+178.047197818956        372134.0\n+190.107642341182        143044792.0\n+191.109992585577        20336483.0\n+192.104283767627        9359612.0\n+204.050979060253        4818700.0\n+205.059630310281        24572956.0\n+244.042971889438        1521120.0\n+\n+NAME: C078\n+IONMODE: Negative\n+SPECTRUMTYPE: Centroid\n+RETENTIONTIME: 123.01\n+RETENTIONINDEX: 1084.222756\n+NUM PEAKS: 19\n+75.010394066048         41579966.0\n+76.0137882041242        2876047.0\n+76.0181929768627        28340717.0\n+77.0259504577085        2087987.0\n+78.0338979909698        183026502.0\n+78.5690293876576        1621521.0\n+78.8681599478139        1487893.0\n+79.0376938452989        30093264.0\n+79.0416552579741        2825081311.0\n+79.0447459388332        28320503.0\n+79.0501993561375        4270687.0\n+79.2157179170819        1455165.0\n+79.5185598181509        1593839.0\n+80.0386557190469        10075569.0\n+80.0450063493877        149614748.0\n+81.0483295726077        3268158.0\n+89.9974943554148        1961794.0\n+95.0365701495921        4214292.0\n+96.0443688524812        20830699.0\n+\n+NAME: C115\n+IONMODE: Negative\n+SPECTRUMTYPE: Centroid\n+RETENTIONTIME: 123.7\n+RETENTIONINDEX: 1090.304487\n+NUM PEAKS: 12\n+77.9912808985361        1430046.0\n+78.0150262817737        37531039.0\n+84.0934034839136        3045143.0\n+85.1012069274801        7651144.0\n+86.1044999837715        502049.0\n+127.023946499139        21672536.0\n+155.015395274682        50181759.0\n+173.025818014551        19321056.0\n+225.059247121035        54449419.0\n+240.031884101672        47027617.0\n+278.105692656594        44448528.0\n+332.077037506685        36734393.0\n+\n+NAME: C072\n+IONMODE: Negative\n+SPECTRUMTYPE: Centroid\n+RETENTIONTIME: 126.02\n+RETENTIONINDEX: 1105.809524\n+NUM PEAKS: 20\n+75.026123979198         434239353.0\n+76.0294479264598        10769681.0\n+79.0184721827801        4406598.0\n+79.0228409649542        3040911.0\n+86.0420715774215        69402087.0\n+93.9412902191997        4117357.0\n+95.0446916434486        4401744.0\n+95.9392474302401        3764925.0\n+120.033807681496        1445863.0\n+122.943972830176        4871174.0\n+131.088643222278        12343251.0\n+134.108939486662        1310131.0\n+137.026000929413        1710939.0\n+168.950062688366        1235026.0\n+176.091958662459        68901069.0\n+177.09146256329         9695519.0\n+184.040024671153        13159137.0\n+206.084125728074        4028572.0\n+249.138642058545        13543889.0\n+251.137167813194        1490829.0\n+\n+NAME: C050\n+IONMODE: Negative\n+SPECTRUMTYPE: Centroid\n+RETENTIONTIME: 128.53\n+RETENTIONINDEX: 1117.761905\n+NUM PEAKS: 26\n+72.0264235348245        4721310.0\n+75.0234110401587        7374271.0\n+75.0288404069534        9420937.0\n+86.9717728755449        9553539.0\n+88.9509947069327        6380434.0\n+97.088645546243         920854.0\n+100.00330732202         18214503.0\n+102.073359041231        484549126.0\n+103.002973695426        21838182.0\n+105.0182950443          11385761.0\n+108.947110640446        3717342.0\n+111.104289675305        3'..b'        50206470.0\n+209.047732156056        2402702.0\n+224.063049376575        5028073.0\n+237.079282533047        3254126.0\n+249.987809880519        7068049.0\n+250.967699771612        4666725.0\n+250.985590439413        4155732.0\n+251.00396611366         4712547.0\n+252.983678728064        24136227.0\n+267.016209949758        5864415.0\n+283.063228005498        1245664.0\n+285.04434812945         5580645.0\n+285.079068565003        8168809.0\n+323.006809434282        4228442.0\n+342.088126205946        12649128.0\n+343.014343300482        13057700.0\n+343.066328014548        11668807.0\n+344.014693258176        2143320.0\n+400.005631091464        1275209.0\n+\n+NAME: C020\n+IONMODE: Negative\n+SPECTRUMTYPE: Centroid\n+RETENTIONTIME: 687.99\n+RETENTIONINDEX: 4135.089286\n+NUM PEAKS: 43\n+109.052235628516        1922885.0\n+111.044058476785        1730416.0\n+133.064909307365        1579533.0\n+137.041722086745        2474039.0\n+194.053642118165        5252308.0\n+212.061782732582        10437319.0\n+267.034768010352        4429084.0\n+267.068824022318        27744081.0\n+268.069192256745        2367920.0\n+310.970568727813        3126083.0\n+311.004219192148        2666091.0\n+325.055731606087        7634729.0\n+327.965549188207        5442532.0\n+328.962716535303        2722019.0\n+339.03821058645         6627839.0\n+341.051986399316        1671451.0\n+342.996634492902        3939816.0\n+345.115297423962        3938049.0\n+358.067243216398        3526875.0\n+361.025211906011        6516476.0\n+388.003000430725        3422825.0\n+388.073272089579        4224454.0\n+399.005054559559        4141766.0\n+401.984326631505        5585170.0\n+402.98179623463         3562508.0\n+416.036473280551        7221552.0\n+417.033665098569        4129234.0\n+417.087073648909        1945166.0\n+418.994970709551        2648178.0\n+430.088321970134        10765018.0\n+431.085366629672        6887942.0\n+473.09370665615         2502410.0\n+475.002854889036        2969642.0\n+475.14184210128         30625723.0\n+477.070907310139        2271450.0\n+489.055479984185        1973511.0\n+503.107930410573        2407435.0\n+549.092119293556        2513579.0\n+552.160354111203        1673065.0\n+565.143723544965        3485979.0\n+610.132183060405        1997085.0\n+625.181479977537        3872339.0\n+697.202597429349        2820429.0\n+\n+NAME: C019\n+IONMODE: Negative\n+SPECTRUMTYPE: Centroid\n+RETENTIONTIME: 688.11\n+RETENTIONINDEX: 4135.446429\n+NUM PEAKS: 45\n+70.261357375322         1769331.0\n+82.0777101637279        1721684.0\n+92.0256956423549        1063403.0\n+104.057597689888        2564581.0\n+106.07773010104         1697865.0\n+118.073139793174        3070732.0\n+135.080430341161        1465718.0\n+144.946939559008        1208427.0\n+145.046705119092        2856482.0\n+164.985419247789        16217649.0\n+179.033979756352        10865004.0\n+180.980322946522        4830502.0\n+191.070496682473        3188459.0\n+192.078428243192        3993484.0\n+194.04893073403         5942766.0\n+195.046783458568        4544081.0\n+211.026857856803        2083491.0\n+213.039588698474        3950087.0\n+213.998834302427        1664182.0\n+248.988380501455        23053112.0\n+251.037178293           10860752.0\n+265.054817014529        3722747.0\n+265.089588392487        3283162.0\n+283.099929585291        13668779.0\n+313.053040045895        8388200.0\n+329.031653006854        6682061.0\n+341.157248840923        18181095.0\n+385.022947628725        6484159.0\n+399.075287615392        4270821.0\n+401.053681557414        30562249.0\n+401.124275228471        4559258.0\n+403.051400482668        5874830.0\n+415.106669687654        54985895.0\n+416.107049345269        18386437.0\n+417.104122333661        8837789.0\n+476.073864807294        4515967.0\n+489.124198650628        43332063.0\n+535.10922525834         9870959.0\n+536.110887614382        4207959.0\n+550.163296442538        12203065.0\n+551.161445828019        6205283.0\n+564.146181690587        6031009.0\n+609.130287444605        3608658.0\n+623.183150220198        13681871.0\n+638.166995588673        2404616.0\n+\n'
b
diff -r c3dd958cc4a5 -r ba9410f612bc test-data/similarity/matches_test6_out.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/similarity/matches_test6_out.tsv Thu Mar 17 12:26:24 2022 +0000
[
b'@@ -0,0 +1,387 @@\n+\tC041\tC078\tC115\tC072\tC050\tC069\tC108\tC121\tC085\tC122\tC114\tC084\tC014\tC029\tC006\tC113\tC107\tC129\tC037\tC092\tC021\tC016\tC109\tC079\tC116\tC123\tC135\tC134\tC070\tC056\tC062\tC082\tC061\tC133\tC087\tC025\tC096\tC009\tC075\tC052\tC130\tC015\tC060\tC131\tC132\tC073\tC090\tC091\tC007\tC074\tC104\tC102\tC103\tC001\tC042\tC010\tC127\tC101\tC004\tC110\tC086\tC051\tC002\tC032\tC039\tC057\tC105\tC095\tC033\tC137\tC138\tC066\tC003\tC040\tC011\tC043\tC106\tC048\tC128\tC058\tC088\tC031\tC100\tC053\tC081\tC023\tC099\tC049\tC044\tC017\tC064\tC098\tC008\tC077\tC119\tC094\tC093\tC124\tC083\tC063\tC035\tC047\tC024\tC022\tC097\tC076\tC030\tC118\tC126\tC038\tC080\tC136\tC117\tC125\tC112\tC026\tC005\tC013\tC034\tC046\tC055\tC071\tC036\tC027\tC067\tC120\tC111\tC068\tC089\tC028\tC045\tC059\tC054\tC018\tC012\tC065\tC020\tC019\n+Perylene_2H12\t2\t0\t1\t0\t0\t1\t0\t3\t0\t1\t1\t0\t1\t3\t5\t1\t2\t0\t1\t0\t5\t6\t1\t4\t2\t0\t0\t1\t2\t2\t2\t2\t2\t1\t2\t6\t1\t8\t0\t1\t1\t6\t3\t0\t1\t1\t0\t0\t7\t2\t1\t3\t4\t8\t4\t2\t3\t0\t8\t0\t1\t2\t9\t4\t3\t0\t0\t0\t5\t0\t0\t2\t5\t6\t5\t0\t1\t2\t1\t0\t0\t1\t1\t0\t0\t4\t3\t2\t1\t6\t1\t2\t11\t2\t1\t1\t0\t1\t2\t1\t3\t3\t1\t3\t0\t1\t1\t0\t0\t3\t1\t0\t0\t0\t0\t3\t2\t5\t2\t0\t2\t1\t1\t2\t2\t0\t2\t1\t1\t5\t0\t0\t5\t4\t2\t1\t0\t5\n+Perylene\t0\t0\t0\t2\t2\t0\t2\t0\t1\t0\t0\t0\t2\t1\t1\t0\t0\t0\t6\t2\t0\t2\t1\t2\t1\t0\t1\t0\t3\t0\t1\t1\t2\t1\t2\t1\t0\t5\t1\t1\t0\t2\t2\t0\t1\t1\t3\t0\t1\t1\t0\t1\t1\t3\t0\t1\t0\t1\t3\t0\t1\t2\t7\t2\t0\t3\t1\t2\t1\t0\t1\t1\t4\t0\t2\t0\t1\t1\t0\t0\t0\t0\t0\t0\t1\t6\t0\t3\t1\t4\t3\t0\t7\t1\t1\t1\t0\t0\t0\t0\t2\t1\t2\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t4\t0\t3\t2\t1\t2\t1\t1\t0\t0\t1\t0\t0\t0\t1\t0\t1\t1\t3\t0\t2\n+Phenanthrene_2H10\t3\t4\t1\t3\t0\t2\t0\t0\t1\t0\t1\t0\t4\t4\t1\t3\t1\t0\t4\t1\t3\t4\t1\t2\t1\t0\t0\t1\t1\t4\t1\t1\t1\t1\t1\t2\t1\t5\t3\t0\t0\t3\t1\t0\t1\t0\t1\t0\t0\t0\t1\t0\t0\t4\t1\t5\t0\t0\t6\t1\t2\t1\t9\t3\t0\t0\t2\t0\t1\t0\t0\t1\t6\t2\t1\t1\t0\t0\t0\t0\t0\t2\t0\t1\t2\t0\t0\t0\t0\t3\t1\t0\t4\t1\t0\t0\t0\t0\t0\t0\t1\t1\t2\t4\t0\t0\t0\t1\t0\t1\t0\t0\t1\t0\t0\t0\t10\t5\t2\t0\t2\t0\t0\t1\t0\t0\t0\t0\t0\t4\t0\t1\t0\t2\t1\t0\t0\t1\n+Phenanthrene\t1\t2\t0\t4\t4\t1\t1\t0\t1\t0\t0\t0\t7\t3\t9\t0\t1\t0\t8\t1\t3\t4\t0\t0\t2\t0\t1\t0\t1\t2\t0\t2\t0\t0\t0\t2\t0\t5\t1\t3\t0\t2\t0\t0\t1\t0\t0\t0\t2\t1\t0\t0\t0\t7\t0\t5\t0\t2\t5\t1\t1\t1\t9\t5\t2\t0\t0\t0\t0\t1\t0\t0\t9\t2\t2\t1\t0\t0\t1\t3\t0\t0\t2\t0\t1\t0\t1\t1\t0\t2\t1\t1\t3\t0\t1\t3\t0\t0\t0\t0\t0\t2\t3\t1\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t9\t2\t0\t1\t0\t1\t1\t1\t0\t3\t2\t2\t0\t3\t2\t1\t2\t0\t0\t1\t0\t1\n+Anthracene\t1\t2\t0\t4\t5\t1\t3\t0\t1\t0\t0\t0\t7\t3\t9\t0\t1\t0\t8\t2\t3\t4\t1\t0\t1\t0\t1\t0\t1\t2\t1\t2\t0\t1\t0\t1\t0\t4\t2\t4\t0\t2\t0\t0\t1\t0\t0\t0\t2\t2\t0\t0\t0\t8\t0\t6\t0\t2\t7\t1\t1\t1\t9\t5\t3\t0\t0\t0\t0\t1\t0\t0\t11\t2\t2\t1\t1\t0\t1\t3\t0\t0\t2\t0\t1\t0\t0\t1\t0\t2\t2\t1\t4\t0\t1\t3\t0\t0\t0\t0\t0\t1\t3\t1\t0\t1\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t11\t1\t0\t1\t0\t1\t1\t1\t0\t3\t2\t2\t1\t3\t2\t1\t2\t0\t0\t2\t0\t1\n+Acenaphthylene\t1\t3\t1\t3\t4\t2\t3\t0\t3\t0\t0\t0\t5\t2\t5\t0\t0\t0\t2\t2\t1\t4\t1\t0\t3\t0\t1\t0\t1\t0\t2\t0\t1\t1\t0\t4\t0\t5\t3\t2\t0\t1\t0\t0\t2\t0\t0\t0\t3\t2\t1\t0\t0\t9\t1\t5\t0\t2\t5\t1\t0\t1\t9\t3\t2\t1\t1\t1\t1\t1\t0\t1\t9\t3\t1\t0\t1\t0\t1\t2\t0\t0\t2\t0\t0\t0\t1\t1\t0\t1\t2\t1\t5\t0\t1\t2\t0\t0\t0\t1\t0\t2\t1\t1\t0\t1\t0\t0\t1\t2\t0\t0\t0\t0\t0\t0\t7\t3\t1\t1\t1\t1\t1\t1\t1\t4\t1\t2\t2\t4\t0\t0\t0\t2\t0\t2\t0\t2\n+Acenaphthene\t2\t2\t2\t3\t6\t2\t2\t0\t2\t0\t0\t0\t5\t2\t8\t1\t0\t0\t2\t3\t1\t3\t0\t0\t2\t0\t1\t0\t1\t0\t3\t0\t0\t0\t0\t4\t0\t5\t1\t0\t0\t1\t0\t0\t1\t0\t1\t0\t4\t3\t0\t0\t0\t9\t0\t3\t0\t2\t4\t1\t1\t1\t11\t2\t2\t2\t1\t0\t0\t1\t0\t0\t9\t4\t2\t0\t1\t0\t1\t2\t0\t0\t2\t0\t0\t0\t2\t2\t1\t1\t3\t1\t4\t0\t1\t2\t0\t0\t0\t0\t0\t2\t2\t2\t0\t0\t0\t0\t0\t2\t0\t0\t0\t0\t0\t1\t4\t3\t1\t1\t0\t1\t1\t2\t0\t5\t2\t3\t0\t3\t0\t1\t0\t1\t0\t1\t0\t2\n+Fluoranthene\t1\t1\t0\t2\t4\t2\t0\t0\t2\t2\t1\t1\t8\t3\t5\t0\t1\t0\t5\t0\t5\t8\t0\t0\t0\t0\t0\t0\t3\t2\t1\t1\t1\t0\t1\t2\t0\t3\t0\t1\t2\t4\t0\t0\t1\t0\t2\t0\t3\t0\t1\t1\t0\t5\t0\t1\t0\t1\t9\t2\t0\t2\t10\t2\t1\t0\t2\t1\t0\t0\t1\t2\t5\t2\t3\t1\t0\t0\t0\t0\t0\t6\t0\t0\t0\t1\t0\t2\t0\t2\t2\t1\t6\t0\t0\t1\t0\t0\t0\t1\t3\t4\t4\t3\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t9\t0\t0\t0\t0\t1\t0\t0\t2\t1\t0\t0\t1\t0\t2\t0\t0\t1\t1\t0\t0\t0\n+Pyrene\t1\t0\t0\t0\t3\t2\t0\t0\t2\t2\t1\t1\t8\t3\t3\t0\t1\t0\t4\t0\t5\t7\t0\t0\t0\t0\t0\t0\t3\t1\t1\t1\t1\t0\t1\t2\t0\t2\t0\t1\t2\t4\t0\t0\t1\t0\t2\t0\t2\t0\t1\t1\t0\t5\t0\t1\t0\t0\t8\t2\t0\t2\t9\t1\t1\t0\t2\t1\t0\t0\t1\t2\t5\t2\t3\t1\t0\t0\t0\t0\t0\t6\t0\t0\t0\t1\t0\t2\t0\t2\t2\t1\t6\t0\t0\t1\t0\t0\t0\t1\t3\t4\t3\t3\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t8\t0\t0\t0\t0\t1\t0\t0\t2\t0\t0\t0\t1\t0\t2\t0\t0\t1\t1\t0\t0\t0\n+para-Terphenyl\t0\t0\t0\t1\t1\t1\t2\t0\t1\t2\t4\t1\t4\t3\t10\t0\t2\t1\t4\t1\t7\t7\t0\t3\t0\t0\t2\t2\t6\t3\t1\t1\t3\t0\t3\t6\t2\t9\t2\t0\t0\t5\t3\t0\t2\t3\t1\t1\t8\t3\t0\t1\t1\t8\t0\t2\t1\t1\t13\t1\t0\t3\t18\t2\t2\t1\t1\t1\t0\t2\t0\t4\t5\t4\t5\t2\t0\t0\t1\t3\t1\t5\t2\t0\t2\t1\t1\t2\t0\t4\t2\t1\t10\t2\t1\t1\t0\t0\t2\t0\t2\t4\t4\t4\t0\t1\t1\t1\t1\t2\t0\t0\t1\t0\t0\t1\t15\t2\t2\t1\t0\t0\t1\t1\t1\t2\t1\t0\t2\t5\t3\t0\t0\t4\t1\t1\t0\t1\n+Retene\t7\t1\t0\t3\t4\t9\t1\t0\t1\t2\t2\t3\t9\t6\t14\t1\t3\t2\t7\t2\t13\t14\t1\t1\t0\t0\t0\t2\t4\t10\t4\t3\t2\t1\t2\t3\t2\t6\t9\t8\t0\t4\t1\t0\t2\t3\t1\t0\t4\t3\t0\t0\t1\t15\t1\t7\t0\t2\t19\t3\t2\t5\t11\t6\t5\t0\t2\t1\t1\t1\t0\t2\t11\t5\t12\t2\t1\t0\t1\t0\t0\t7\t2\t1\t2\t0\t0\t0\t1\t6\t1\t1\t7\t3\t0\t2\t0\t0\t1\t0\t5\t3\t2\t4\t0\t0\t0\t1\t0\t0\t1\t0\t0\t0\t0\t4\t30\t1\t0\t0\t1\t1\t1\t1\t2\t2\t0\t3\t3\t3\t3\t2\t3\t1\t3\t2\t0\t5\n+Benzo[b]naphtho[2,1-d]thiophene\t4\t0\t0\t0\t2\t5\t1\t0\t2\t1\t1\t0\t4\t2\t6\t0\t2\t0\t3\t1\t5\t6\t1\t3\t0\t0\t0'..b'1\t0\t1\t0\t4\t1\t0\t0\t0\t7\t3\t6\t0\t1\t8\t2\t3\t1\t9\t3\t3\t1\t0\t0\t1\t0\t0\t0\t8\t2\t1\t1\t0\t0\t0\t0\t0\t3\t0\t0\t1\t0\t2\t0\t1\t1\t1\t0\t6\t0\t0\t0\t0\t0\t0\t1\t0\t3\t1\t4\t0\t0\t0\t0\t1\t2\t1\t0\t0\t0\t0\t0\t7\t5\t1\t0\t1\t0\t0\t1\t0\t2\t2\t1\t1\t7\t0\t0\t0\t2\t1\t0\t0\t2\n+Fenpropimorph_isomer1\t1\t0\t1\t2\t2\t2\t2\t0\t2\t1\t1\t1\t1\t1\t0\t0\t0\t0\t0\t1\t0\t3\t1\t0\t0\t0\t1\t2\t3\t0\t2\t0\t0\t1\t0\t2\t1\t4\t1\t1\t0\t0\t1\t0\t1\t0\t0\t1\t0\t1\t0\t1\t0\t2\t0\t1\t0\t0\t2\t0\t0\t0\t7\t0\t2\t1\t0\t0\t0\t0\t0\t0\t6\t2\t0\t0\t1\t0\t0\t0\t0\t2\t0\t0\t1\t0\t1\t0\t0\t0\t2\t0\t2\t0\t0\t0\t0\t0\t0\t1\t0\t0\t1\t4\t0\t0\t0\t1\t1\t1\t0\t0\t0\t0\t0\t0\t3\t1\t3\t0\t0\t0\t0\t1\t0\t1\t0\t1\t1\t1\t0\t0\t0\t1\t0\t0\t0\t0\n+Fenpropimorph_isomer2\t2\t0\t2\t2\t1\t1\t0\t0\t3\t2\t1\t2\t2\t2\t2\t0\t1\t0\t1\t0\t2\t4\t1\t0\t0\t0\t1\t2\t3\t1\t2\t0\t2\t1\t1\t2\t2\t7\t1\t2\t0\t1\t1\t2\t1\t0\t1\t1\t1\t1\t0\t1\t0\t4\t1\t1\t0\t0\t7\t0\t0\t1\t5\t1\t3\t2\t0\t0\t0\t0\t0\t1\t8\t3\t0\t0\t0\t0\t0\t0\t0\t4\t0\t0\t1\t0\t2\t0\t0\t0\t1\t0\t2\t1\t0\t0\t0\t0\t0\t2\t0\t1\t1\t7\t0\t0\t0\t1\t2\t2\t0\t0\t0\t0\t0\t0\t4\t2\t6\t0\t0\t0\t1\t2\t0\t3\t0\t0\t1\t3\t0\t0\t1\t0\t0\t0\t0\t1\n+Spirodiclofen\t3\t3\t4\t5\t5\t6\t3\t4\t6\t3\t4\t1\t15\t5\t29\t1\t3\t0\t11\t0\t9\t7\t4\t3\t3\t0\t3\t1\t4\t5\t4\t4\t4\t3\t4\t8\t2\t12\t5\t3\t1\t8\t3\t0\t5\t2\t2\t1\t20\t4\t4\t1\t5\t33\t3\t6\t3\t2\t17\t1\t1\t4\t18\t8\t7\t8\t5\t6\t6\t3\t0\t3\t18\t10\t13\t2\t0\t2\t2\t3\t1\t3\t4\t3\t1\t9\t4\t2\t3\t10\t7\t2\t20\t3\t2\t4\t1\t0\t5\t4\t5\t4\t3\t5\t3\t1\t3\t1\t2\t3\t3\t1\t1\t0\t0\t6\t6\t5\t5\t3\t3\t4\t2\t5\t8\t3\t2\t6\t5\t9\t15\t3\t3\t9\t10\t7\t2\t9\n+Spiromesifen\t11\t2\t4\t3\t6\t6\t3\t1\t5\t5\t3\t3\t19\t8\t19\t7\t7\t5\t7\t9\t9\t15\t8\t8\t5\t0\t5\t3\t10\t4\t5\t7\t9\t6\t6\t19\t6\t23\t4\t7\t4\t17\t6\t4\t4\t6\t7\t5\t13\t9\t3\t2\t3\t39\t16\t6\t4\t1\t30\t4\t10\t6\t44\t11\t8\t5\t5\t4\t5\t2\t1\t4\t50\t14\t11\t5\t1\t2\t2\t4\t0\t10\t3\t0\t5\t11\t5\t9\t3\t12\t8\t3\t31\t8\t2\t2\t3\t0\t2\t3\t9\t9\t9\t19\t1\t1\t2\t1\t2\t14\t4\t1\t2\t0\t1\t7\t27\t12\t14\t4\t8\t5\t5\t4\t4\t7\t3\t3\t5\t16\t2\t3\t5\t3\t5\t4\t2\t8\n+Spirotetramat\t19\t5\t5\t7\t10\t10\t5\t5\t9\t8\t7\t6\t26\t13\t34\t6\t8\t6\t11\t11\t17\t19\t10\t10\t6\t0\t4\t4\t17\t5\t6\t10\t13\t6\t10\t23\t7\t28\t9\t14\t7\t24\t10\t4\t4\t10\t9\t6\t29\t13\t5\t7\t7\t54\t22\t19\t5\t8\t52\t8\t11\t12\t65\t18\t16\t11\t6\t7\t10\t2\t5\t11\t66\t20\t23\t6\t2\t5\t4\t7\t2\t16\t8\t5\t12\t16\t6\t11\t7\t25\t9\t5\t49\t15\t2\t5\t4\t2\t3\t5\t14\t16\t14\t27\t1\t5\t3\t5\t5\t15\t5\t1\t3\t0\t2\t13\t50\t28\t20\t8\t10\t11\t10\t8\t5\t7\t5\t6\t8\t22\t8\t5\t9\t15\t9\t7\t5\t15\n+17-alpha-Ethynylestradiol\t12\t5\t6\t9\t8\t9\t5\t3\t7\t4\t6\t6\t17\t12\t23\t7\t6\t3\t8\t9\t12\t14\t6\t6\t4\t0\t3\t4\t10\t2\t5\t7\t8\t5\t10\t14\t7\t17\t7\t10\t4\t15\t3\t4\t3\t5\t6\t6\t12\t8\t1\t3\t5\t34\t15\t19\t5\t5\t35\t4\t9\t7\t37\t13\t13\t7\t5\t3\t6\t1\t1\t4\t47\t15\t13\t2\t3\t3\t1\t2\t0\t9\t3\t1\t8\t15\t5\t11\t5\t15\t8\t1\t27\t7\t3\t3\t4\t2\t2\t2\t9\t6\t9\t22\t2\t2\t1\t2\t6\t11\t5\t1\t2\t0\t2\t7\t30\t19\t18\t5\t8\t5\t6\t4\t8\t9\t5\t6\t4\t21\t2\t3\t6\t8\t9\t5\t5\t10\n+Bisphenol A\t1\t1\t0\t2\t5\t0\t1\t0\t2\t2\t3\t1\t5\t2\t12\t0\t3\t0\t2\t4\t4\t3\t4\t0\t2\t0\t3\t1\t3\t1\t3\t1\t2\t2\t5\t5\t4\t6\t2\t5\t1\t2\t2\t0\t0\t3\t1\t1\t9\t6\t0\t2\t2\t17\t6\t7\t0\t1\t9\t1\t4\t3\t14\t5\t6\t1\t2\t0\t0\t1\t0\t1\t14\t1\t5\t2\t1\t0\t1\t3\t2\t3\t1\t0\t3\t5\t3\t4\t1\t3\t3\t1\t10\t1\t0\t1\t1\t0\t2\t1\t1\t5\t3\t2\t1\t0\t0\t1\t1\t3\t1\t1\t0\t0\t0\t2\t11\t3\t6\t2\t1\t1\t2\t1\t1\t4\t2\t3\t3\t10\t4\t3\t3\t1\t2\t0\t0\t6\n+4-tert-Octylphenol\t1\t3\t0\t3\t3\t1\t2\t0\t1\t0\t2\t0\t3\t2\t5\t0\t0\t0\t1\t0\t1\t1\t1\t0\t0\t0\t0\t1\t0\t1\t0\t1\t2\t1\t1\t0\t0\t1\t1\t2\t0\t2\t0\t0\t0\t1\t1\t1\t2\t0\t0\t0\t1\t3\t3\t5\t1\t1\t5\t0\t0\t0\t0\t4\t3\t0\t0\t0\t0\t0\t0\t0\t3\t1\t0\t0\t2\t0\t0\t0\t0\t1\t0\t0\t0\t0\t2\t1\t0\t0\t0\t0\t2\t0\t0\t1\t0\t0\t0\t0\t1\t0\t0\t1\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t1\t1\t6\t1\t6\t0\t1\t0\t2\t0\t2\t1\t2\t3\t2\t4\t0\t1\t1\t0\t2\t0\t1\t1\n+Estrone\t12\t5\t4\t6\t11\t8\t5\t4\t7\t6\t3\t5\t20\t12\t23\t7\t6\t3\t5\t10\t7\t12\t8\t8\t4\t0\t4\t4\t9\t2\t4\t6\t11\t4\t6\t14\t6\t19\t5\t6\t4\t17\t5\t4\t2\t5\t6\t5\t14\t8\t1\t2\t3\t36\t14\t16\t3\t2\t31\t5\t9\t5\t38\t11\t11\t6\t6\t2\t3\t2\t2\t3\t49\t15\t9\t3\t2\t2\t1\t5\t1\t10\t7\t0\t5\t8\t5\t8\t4\t11\t7\t1\t27\t7\t2\t1\t2\t1\t3\t3\t6\t6\t11\t23\t2\t2\t2\t2\t6\t11\t5\t1\t2\t0\t2\t7\t32\t14\t21\t4\t4\t5\t7\t3\t6\t9\t4\t8\t4\t20\t2\t4\t4\t9\t7\t3\t2\t8\n+17-beta-Estradiol\t7\t2\t4\t4\t4\t3\t4\t1\t8\t3\t4\t6\t18\t7\t14\t5\t6\t3\t5\t8\t10\t10\t7\t5\t5\t0\t4\t4\t10\t4\t4\t3\t7\t4\t5\t12\t8\t16\t3\t3\t3\t12\t3\t2\t2\t3\t6\t3\t9\t8\t2\t2\t3\t33\t10\t9\t5\t2\t25\t5\t8\t6\t36\t8\t9\t5\t3\t3\t0\t1\t1\t4\t40\t8\t7\t1\t1\t1\t0\t2\t0\t6\t2\t0\t5\t8\t3\t9\t2\t9\t5\t1\t23\t7\t2\t2\t1\t1\t3\t1\t4\t6\t7\t17\t1\t2\t0\t2\t5\t9\t4\t1\t2\t0\t1\t4\t26\t9\t13\t4\t4\t4\t4\t1\t2\t6\t4\t3\t3\t12\t0\t1\t3\t4\t5\t1\t2\t5\n+Progesterone\t19\t7\t5\t11\t15\t11\t8\t1\t9\t6\t9\t7\t30\t15\t19\t7\t9\t5\t16\t11\t17\t21\t6\t10\t7\t0\t4\t7\t14\t10\t6\t10\t15\t5\t11\t15\t9\t40\t12\t11\t5\t23\t6\t4\t3\t6\t8\t7\t5\t11\t3\t3\t5\t51\t21\t30\t8\t7\t50\t8\t11\t11\t74\t17\t15\t8\t8\t7\t6\t3\t5\t11\t69\t20\t23\t7\t4\t3\t2\t3\t0\t18\t7\t3\t6\t10\t7\t12\t5\t15\t10\t3\t46\t12\t3\t8\t3\t3\t5\t6\t11\t16\t19\t32\t2\t1\t2\t5\t9\t17\t7\t1\t2\t0\t1\t10\t73\t29\t28\t4\t9\t5\t11\t15\t9\t9\t8\t9\t5\t20\t1\t5\t7\t6\t8\t3\t5\t11\n+Testosterone\t16\t7\t2\t9\t14\t10\t6\t0\t8\t6\t7\t8\t24\t12\t13\t7\t6\t5\t12\t12\t19\t19\t2\t11\t4\t0\t3\t6\t10\t11\t7\t7\t13\t3\t9\t9\t11\t34\t10\t8\t4\t23\t4\t4\t2\t4\t7\t4\t3\t9\t3\t2\t3\t42\t11\t27\t4\t4\t41\t5\t11\t10\t55\t12\t14\t6\t7\t3\t3\t2\t2\t8\t47\t17\t17\t4\t2\t1\t1\t4\t0\t13\t7\t2\t6\t7\t4\t9\t2\t11\t5\t1\t33\t7\t3\t6\t1\t2\t7\t4\t10\t10\t16\t26\t1\t2\t3\t4\t9\t13\t6\t1\t4\t0\t1\t6\t69\t21\t25\t3\t6\t4\t6\t11\t4\t9\t7\t7\t3\t16\t1\t3\t3\t5\t6\t3\t5\t8\n'
b
diff -r c3dd958cc4a5 -r ba9410f612bc test-data/similarity/recetox_gc-ei_ms_20201028_with_precursor_mz.msp
--- a/test-data/similarity/recetox_gc-ei_ms_20201028_with_precursor_mz.msp Fri Jan 28 16:22:06 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,32358 +0,0 @@\n-NAME: Perylene_2H12\n-SCANNUMBER: -1\n-RETENTIONTIME: -1\n-RETENTIONINDEX: 2876\n-PRECURSORMZ: 264.16858\n-PRECURSORTYPE: [M]+\n-IONMODE: Positive\n-SPECTRUMTYPE: Centroid\n-FORMULA: C20H12\n-INCHIKEY: CSHWQDPOILHKBI-AQZSQYOVSA-N\n-INCHI: \n-SMILES: [2H]C1=C(C2=C3C(=C1[2H])C4=C(C(=C(C5=C4C(=C(C(=C5[2H])[2H])[2H])C3=C(C(=C2[2H])[2H])[2H])[2H])[2H])[2H])[2H]\n-AUTHORS: Price et al., RECETOX, Masaryk University (CZ)\n-COLLISIONENERGY: 70eV\n-INSTRUMENT: Q Exactive GC Orbitrap GC-MS/MS\n-INSTRUMENTTYPE: GC-EI-Orbitrap\n-IONIZATION: EI+\n-LICENSE: CC BY-NC\n-COMMENT: \n-PRECURSOR_MZ: 264.16858\n-NUM PEAKS: 33\n-116.05576\t29277.0\n-118.06992\t49651.0\n-128.0558\t37001.0\n-130.06996\t78584.0\n-130.57159\t17533.0\n-132.08408\t65686.0\n-132.5858\t12593.0\n-207.0322\t39569.0\n-222.08282\t13141.0\n-223.06346\t20762.0\n-225.04277\t18058.0\n-227.02202\t26370.0\n-232.11204\t30354.0\n-236.1405\t22796.0\n-252.09322\t8564.0\n-256.11212\t41765.0\n-257.11557\t8688.0\n-258.12622\t21742.0\n-259.13446\t11564.0\n-260.14041\t248997.0\n-261.14358\t51721.0\n-262.15466\t33597.0\n-263.16254\t63732.0\n-264.16858\t829577.0\n-265.01968\t18286.0\n-265.17191\t176460.0\n-266.17523\t18876.0\n-283.03036\t10261.0\n-287.00632\t11352.0\n-295.10288\t26727.0\n-299.06152\t33379.0\n-359.0282\t67046.0\n-400.98447\t17406.0\n-\n-NAME: Perylene\n-SCANNUMBER: -1\n-RETENTIONTIME: -1\n-RETENTIONINDEX: 2886.9\n-PRECURSORMZ: 252.09323\n-PRECURSORTYPE: [M]+\n-IONMODE: Positive\n-SPECTRUMTYPE: Centroid\n-FORMULA: C20H12\n-INCHIKEY: CSHWQDPOILHKBI-UHFFFAOYSA-N\n-INCHI: \n-SMILES: C1=CC2=C3C(=C1)C1=CC=CC4=C1C(=CC=C4)C3=CC=C2\n-AUTHORS: Price et al., RECETOX, Masaryk University (CZ)\n-COLLISIONENERGY: 70eV\n-INSTRUMENT: Q Exactive GC Orbitrap GC-MS/MS\n-INSTRUMENTTYPE: GC-EI-Orbitrap\n-IONIZATION: EI+\n-LICENSE: CC BY-NC\n-COMMENT: \n-PRECURSOR_MZ: 252.09323\n-NUM PEAKS: 19\n-112.03071\t49892.0\n-113.03854\t87510.0\n-124.03076\t100146.0\n-124.53242\t24923.0\n-125.03855\t179254.0\n-125.54019\t49039.0\n-126.04636\t131679.0\n-126.54804\t36313.0\n-222.04645\t28905.0\n-224.06192\t55632.0\n-226.04175\t37413.0\n-246.04646\t23286.0\n-248.06204\t140007.0\n-249.07072\t62236.0\n-250.07765\t641789.0\n-251.07967\t137600.0\n-252.09323\t1955166.0\n-253.09656\t402252.0\n-254.09985\t39987.0\n-\n-NAME: Phenanthrene_2H10\n-SCANNUMBER: -1\n-RETENTIONTIME: -1\n-RETENTIONINDEX: 1827.1\n-PRECURSORMZ: 188.14029\n-PRECURSORTYPE: [M]+\n-IONMODE: Positive\n-SPECTRUMTYPE: Centroid\n-FORMULA: C14H10\n-INCHIKEY: YNPNZTXNASCQKK-LHNTUAQVSA-N\n-INCHI: \n-SMILES: [2H]C1=C(C(=C2C(=C1[2H])C(=C(C3=C(C(=C(C(=C32)[2H])[2H])[2H])[2H])[2H])[2H])[2H])[2H]\n-AUTHORS: Price et al., RECETOX, Masaryk University (CZ)\n-COLLISIONENERGY: 70eV\n-INSTRUMENT: Q Exactive GC Orbitrap GC-MS/MS\n-INSTRUMENTTYPE: GC-EI-Orbitrap\n-IONIZATION: EI+\n-LICENSE: CC BY-NC\n-COMMENT: \n-PRECURSOR_MZ: 188.14029\n-NUM PEAKS: 17\n-76.02767\t185421.0\n-78.0418\t256858.0\n-80.05586\t881271.0\n-90.04181\t200162.0\n-92.06206\t537968.0\n-94.06999\t628791.0\n-156.08402\t836513.0\n-158.09808\t477819.0\n-160.11218\t2421148.0\n-161.11554\t310248.0\n-176.10866\t308983.0\n-184.11224\t2784543.0\n-185.11562\t445833.0\n-186.12637\t1283282.0\n-188.14029\t15115275.0\n-189.1436\t2312386.0\n-190.14688\t151400.0\n-\n-NAME: Phenanthrene\n-SCANNUMBER: -1\n-RETENTIONTIME: -1\n-RETENTIONINDEX: 1832.9\n-PRECURSORMZ: 178.0775\n-PRECURSORTYPE: [M]+\n-IONMODE: Positive\n-SPECTRUMTYPE: Centroid\n-FORMULA: C14H10\n-INCHIKEY: YNPNZTXNASCQKK-UHFFFAOYSA-N\n-INCHI: \n-SMILES: C1=CC2=C(C=C1)C1=C(C=CC=C1)C=C2\n-AUTHORS: Price et al., RECETOX, Masaryk University (CZ)\n-COLLISIONENERGY: 70eV\n-INSTRUMENT: Q Exactive GC Orbitrap GC-MS/MS\n-INSTRUMENTTYPE: GC-EI-Orbitrap\n-IONIZATION: EI+\n-LICENSE: CC BY-NC\n-COMMENT: \n-PRECURSOR_MZ: 178.0775\n-NUM PEAKS: 19\n-74.01508\t137808.0\n-75.02295\t278714.0\n-76.03075\t608417.0\n-87.02295\t304266.0\n-88.03076\t497050.0\n-89.03857\t441168.0\n-98.01511\t150478.0\n-150.04633\t868927.0\n-151.05415\t546351.0\n-152.0619\t2275502.0\n-153.06528\t276320.0\n-169.06468\t272559.0\n-174.04636\t365846.0\n-175.05423\t272039.0\n-176.062\t3370523.0\n-177.06982\t1751846.0\n-178.0775\t13724432.0\n-179.08078\t2250119.0\n-180.08412\t138203.0\n-\n-NAME: Anthracene\n-SCANNUMBER: -1\n-RETENTIONTIME: -1\n-R'..b'9912.0\n-227.17972\t5423840.0\n-228.18791\t2914893.0\n-229.15888\t32360466.0\n-230.16203\t6384102.0\n-231.1749\t1289341.0\n-232.18314\t332775.0\n-237.16412\t482606.0\n-238.172\t1003913.0\n-239.17972\t4289875.0\n-240.18282\t790788.0\n-241.15898\t818792.0\n-243.17453\t1942474.0\n-244.18245\t6361220.0\n-245.18564\t1031445.0\n-249.16392\t573775.0\n-251.17975\t407893.0\n-252.18788\t671025.0\n-253.19528\t4724411.0\n-254.20363\t4511072.0\n-255.20648\t1121372.0\n-256.18237\t1013544.0\n-257.19034\t3573622.0\n-258.19263\t594380.0\n-263.17984\t1236644.0\n-267.17474\t902087.0\n-269.19067\t342232.0\n-270.19833\t656479.0\n-271.20621\t2697450.0\n-272.21378\t25275140.0\n-273.21713\t5337242.0\n-274.22037\t493519.0\n-278.20316\t723267.0\n-281.19022\t3078032.0\n-282.19333\t636564.0\n-285.18503\t445716.0\n-286.19318\t505550.0\n-296.21365\t3914332.0\n-297.21619\t834583.0\n-299.20071\t17845166.0\n-300.20407\t3927878.0\n-301.20691\t386280.0\n-314.22427\t14435122.0\n-315.22732\t3171411.0\n-316.23056\t354527.0\n-447.34674\t509682.0\n-\n-NAME: Testosterone\n-SCANNUMBER: -1\n-RETENTIONTIME: -1\n-RETENTIONINDEX: 2733\n-PRECURSORMZ: 288.20841\n-PRECURSORTYPE: [M]+\n-IONMODE: Positive\n-SPECTRUMTYPE: Centroid\n-FORMULA: C19H28O2\n-INCHIKEY: MUMGGOZAMZWBJJ-DYKIIFRCSA-N\n-INCHI: \n-SMILES: CC12CCC3C(C1CCC2O)CCC4=CC(=O)CCC34C\n-AUTHORS: Price et al., RECETOX, Masaryk University (CZ)\n-COLLISIONENERGY: 70eV\n-INSTRUMENT: Q Exactive GC Orbitrap GC-MS/MS\n-INSTRUMENTTYPE: GC-EI-Orbitrap\n-IONIZATION: EI+\n-LICENSE: CC BY-NC\n-COMMENT: \n-PRECURSOR_MZ: 288.20841\n-NUM PEAKS: 130\n-67.05424\t425856.0\n-68.05758\t32423.0\n-69.06989\t40720.0\n-77.0386\t701107.0\n-78.04644\t206546.0\n-79.05425\t1677740.0\n-80.06205\t212420.0\n-81.06992\t591913.0\n-82.07324\t87864.0\n-83.08553\t132100.0\n-91.05427\t2712295.0\n-92.05762\t347222.0\n-93.06994\t1165998.0\n-94.07777\t348187.0\n-95.08556\t843191.0\n-96.05698\t438830.0\n-97.10123\t225039.0\n-103.05426\t164013.0\n-104.06209\t107889.0\n-105.06992\t1641197.0\n-106.07773\t362834.0\n-107.04917\t231952.0\n-107.08556\t791873.0\n-108.09338\t226494.0\n-109.06484\t1377189.0\n-110.07266\t295897.0\n-111.11686\t89809.0\n-115.05426\t293808.0\n-116.06208\t54416.0\n-117.0699\t705440.0\n-118.07771\t180876.0\n-119.08557\t1042378.0\n-120.09339\t283969.0\n-121.10119\t622111.0\n-122.07263\t346341.0\n-123.08047\t632087.0\n-124.0883\t3096347.0\n-125.09164\t287870.0\n-128.06206\t220792.0\n-129.06988\t580911.0\n-130.07767\t146160.0\n-131.08559\t960647.0\n-132.09337\t265447.0\n-133.10121\t554432.0\n-134.10901\t403802.0\n-135.11685\t343774.0\n-136.1247\t198902.0\n-137.0961\t277512.0\n-138.1039\t57211.0\n-141.06987\t131615.0\n-142.07773\t123164.0\n-143.08554\t501343.0\n-144.09331\t66154.0\n-145.10117\t268116.0\n-146.10899\t254486.0\n-147.0804\t95276.0\n-147.11679\t1285762.0\n-148.12456\t421900.0\n-149.09608\t321248.0\n-150.10381\t153636.0\n-151.11175\t129271.0\n-152.062\t37808.0\n-155.08546\t103204.0\n-156.09323\t64215.0\n-158.07256\t141873.0\n-159.11679\t250012.0\n-160.08816\t145963.0\n-161.13245\t257830.0\n-162.10376\t195492.0\n-163.14809\t122973.0\n-164.11951\t206339.0\n-165.12733\t237526.0\n-169.10114\t92813.0\n-171.11676\t144863.0\n-172.08818\t132181.0\n-173.1324\t250167.0\n-174.10378\t253404.0\n-175.11162\t210353.0\n-176.11943\t88738.0\n-177.1637\t38439.0\n-183.11676\t46215.0\n-185.13243\t817797.0\n-187.14806\t422332.0\n-188.1559\t151252.0\n-189.12735\t68862.0\n-189.16364\t163919.0\n-190.13504\t97146.0\n-195.11685\t57370.0\n-197.13249\t50230.0\n-199.14803\t221936.0\n-200.15579\t108028.0\n-201.16364\t216321.0\n-202.17162\t208618.0\n-203.14304\t1200123.0\n-204.14627\t283454.0\n-206.16646\t32118.0\n-209.13243\t45976.0\n-211.14809\t272618.0\n-212.15134\t57168.0\n-213.16373\t355360.0\n-214.16698\t92310.0\n-215.143\t60304.0\n-216.15102\t55312.0\n-217.15874\t113508.0\n-218.16704\t51076.0\n-226.17177\t37673.0\n-227.1797\t204277.0\n-228.1873\t842856.0\n-229.1588\t359919.0\n-230.16186\t41138.0\n-231.17433\t428563.0\n-232.17703\t61957.0\n-237.16373\t184777.0\n-238.16693\t38109.0\n-241.15854\t66162.0\n-242.16649\t51489.0\n-245.19052\t102650.0\n-246.19786\t1126362.0\n-247.2011\t212107.0\n-252.18724\t143505.0\n-255.17436\t288345.0\n-256.17764\t54890.0\n-259.16943\t50245.0\n-260.17758\t70396.0\n-270.19791\t397018.0\n-271.20062\t89701.0\n-273.18549\t199263.0\n-274.18863\t88305.0\n-288.20841\t1123316.0\n-289.21173\t220898.0\n-\n'
b
diff -r c3dd958cc4a5 -r ba9410f612bc test-data/similarity/scores_test6_out.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/similarity/scores_test6_out.tsv Thu Mar 17 12:26:24 2022 +0000
b
b'@@ -0,0 +1,387 @@\n+\tC041\tC078\tC115\tC072\tC050\tC069\tC108\tC121\tC085\tC122\tC114\tC084\tC014\tC029\tC006\tC113\tC107\tC129\tC037\tC092\tC021\tC016\tC109\tC079\tC116\tC123\tC135\tC134\tC070\tC056\tC062\tC082\tC061\tC133\tC087\tC025\tC096\tC009\tC075\tC052\tC130\tC015\tC060\tC131\tC132\tC073\tC090\tC091\tC007\tC074\tC104\tC102\tC103\tC001\tC042\tC010\tC127\tC101\tC004\tC110\tC086\tC051\tC002\tC032\tC039\tC057\tC105\tC095\tC033\tC137\tC138\tC066\tC003\tC040\tC011\tC043\tC106\tC048\tC128\tC058\tC088\tC031\tC100\tC053\tC081\tC023\tC099\tC049\tC044\tC017\tC064\tC098\tC008\tC077\tC119\tC094\tC093\tC124\tC083\tC063\tC035\tC047\tC024\tC022\tC097\tC076\tC030\tC118\tC126\tC038\tC080\tC136\tC117\tC125\tC112\tC026\tC005\tC013\tC034\tC046\tC055\tC071\tC036\tC027\tC067\tC120\tC111\tC068\tC089\tC028\tC045\tC059\tC054\tC018\tC012\tC065\tC020\tC019\n+Perylene_2H12\t0.034042320821081655\t0.0\t0.009403305812291722\t0.0\t0.0\t0.0007629860430236488\t0.0\t0.02266571495909087\t0.0\t0.00042930183801011794\t0.0010942178146441293\t0.0\t0.0001117117003335653\t0.05556820858114851\t0.03887203120835998\t0.021564634193163717\t0.0022083391714712306\t0.0\t0.0004061839030021898\t0.0\t0.0018210079574397177\t0.0044706250747301906\t0.01250288907877498\t0.006335694044596287\t0.003011359279134579\t0.0\t0.0\t0.003930271682883756\t0.029864036675543378\t0.033018049711041476\t0.0004687982349281314\t0.007804473039067054\t0.002244789787464286\t0.029628841784107067\t0.014383091871802837\t0.014161550072453797\t0.001128422100278789\t0.013544568179924128\t0.0\t0.00132564265609464\t0.011781737207632755\t0.05804594652851786\t0.0004594194032417144\t0.0\t0.022569744034791155\t0.003620342432401012\t0.0\t0.0\t0.03613854198839842\t0.0286726434703242\t0.001143688038067479\t0.005799936661442363\t0.13493653247562692\t0.002638979226994201\t0.028778571241930968\t0.029072485853110962\t0.013627299382366974\t0.0\t0.017403160411527552\t0.0\t0.01696060588991595\t0.03346178919132061\t0.006537280745228869\t0.2724163273071701\t0.0011885136685260815\t0.0\t0.0\t0.0\t0.01407550398904445\t0.0\t0.0\t0.007488357223206149\t0.0054751753426494736\t0.04711906877095722\t0.016701752360186138\t0.0\t0.0003863811526605843\t0.09590552038673016\t0.0016953565887339343\t0.0\t0.0\t0.0011884667247023498\t0.0005586376752260124\t0.0\t0.0\t0.009836286834224597\t0.031057411717545502\t0.020241422379465534\t0.0017303898195810917\t0.01643181731678299\t0.006973253922278287\t0.0029680242363230892\t0.01637963228496475\t0.0027421080262169137\t0.004213724397739657\t0.004913056324491477\t0.0\t0.01662703683355905\t0.09138073163058372\t0.0007929414781066192\t0.0009284532952288166\t0.0032169859262519544\t0.00042486350067965484\t0.020684766458239814\t0.0\t0.009401490687877585\t0.0001566128276370904\t0.0\t0.0\t0.01845259904863743\t0.010148692871241129\t0.0\t0.0\t0.0\t0.0\t0.011101259469173343\t0.004936023674032282\t0.074777491052486\t0.004057752902340233\t0.0\t0.0042980737432896194\t0.0019457316265322528\t0.010236242948902087\t0.009683977809973808\t0.010313078597559113\t0.0\t0.01973961112268132\t0.003127566702438563\t0.0015951608764421129\t0.026946864038107345\t0.0\t0.0\t0.04404385643133546\t0.008207060848564535\t0.009884819221109377\t0.00021371301103211425\t0.0\t0.017348705935088908\n+Perylene\t0.0\t0.0\t0.0\t0.0011051022816824852\t0.000542635114331976\t0.0\t0.001974323722348113\t0.0\t0.00036528875300253366\t0.0\t0.0\t0.0\t0.00014947393267217954\t0.0010447997154295929\t0.00010856751348875029\t0.0\t0.0\t0.0\t0.0954644571424393\t0.0013743898833611904\t0.0\t0.00010959251829602013\t0.004400336771811676\t0.005640218359074094\t0.0034705539472536817\t0.0\t0.0007418715312107792\t0.0\t0.00484873460276118\t0.0\t9.037583761987847e-05\t0.000622866201243807\t0.001347941327641855\t0.001852257860353395\t0.004606412002464841\t0.006353382143201052\t0.0\t0.015172321265656215\t0.0009621639812959181\t0.00019203912971202535\t0.0\t0.002656194724082899\t0.0018101204979896026\t0.0\t0.01595010968620319\t0.00010352564653640499\t0.05326947187783303\t0.0\t2.180451050181285e-05\t0.0013102161710618133\t0.0\t0.00016015256120527178\t0.00034724475140860604\t0.0001415619197232959\t0.0\t0.0004607881598805477\t0.0\t0.00031825716888322155\t0.0008604144211635554\t0.0\t0.018538826563359888\t0.0006458912475918673\t0.006566648646852698\t0.08002800104674827\t0.0\t0.03711581410513316\t0.004394308382007739\t0'..b'83988276892\t0.0035207292343569317\t0.10987461344359015\t0.0995552931947784\t0.0\t0.02603057882086362\t0.009163429668087301\t0.02701827652600082\t0.022851348027207164\t0.04035205759985755\t0.04134708497797248\t0.026834397514291654\t0.036093227930548616\t0.049759012683381856\t0.06076472314328744\t0.0038450623653801695\t0.06544561163320242\t0.06983291785111552\t0.03160141790195927\t0.07628840476846281\t0.005542902834667562\t0.013414348590015168\t0.044447880021334804\t0.01875953929934765\t0.018835245212120343\t0.11131586647362307\t0.04963681669908816\t0.07105010197627538\t0.011450582126856977\t0.0011712769771746697\t0.016098230627855103\t0.08322073490829028\t0.23390437333814124\t0.16039438008281268\t0.06954275494932319\t0.0024221700476814824\t0.00515804448520173\t0.0\t0.006769627476754328\t0.07889307267504196\t0.2685116670683215\t0.06358431078870092\t0.45729034645969496\t0.02451177284259372\t0.022641541906193757\t0.03192716680053657\t0.023420455855089645\t0.04691670387498694\t0.09237915075575202\t0.14564090824538856\t0.061712990090093754\t0.027358994739280263\t0.2626873489921074\t0.015328708395240063\t0.004284778300052697\t0.025826592848464523\t0.04489331319056179\t0.0060044469175185656\t0.013332538298288591\t0.012104481515422903\t0.01788173240386975\t0.017753153886406584\n+Testosterone\t0.01832692827067662\t0.25708889616547254\t0.016914214486909\t0.010497459637120093\t0.011292136765007413\t0.07854326470849173\t0.051828612471147714\t0.0\t0.11096263600615171\t0.03803625569335047\t0.015466574193938263\t0.017464224629416103\t0.0396498515658372\t0.032589892401115664\t0.0026573741974754107\t0.0726094461747022\t0.02206836401133249\t0.024890595444292736\t0.013977499326825365\t0.07313925552406138\t0.04916903577114806\t0.04487541380179884\t0.04595696180372116\t0.04744571805625805\t0.023246262561604055\t0.0\t0.03289082001742213\t0.04884873910759355\t0.048168833082862426\t0.015585669464735831\t0.01209683117745796\t0.052398588924045406\t0.047567855409756755\t0.06615763961573513\t0.060938760179256146\t0.01190326771842562\t0.03956148300556824\t0.17890025658323153\t0.05591745580873676\t0.02052732719153347\t0.13675793453833146\t0.08604104970406877\t0.02492325451805615\t0.007729713027817932\t0.0046637252740731406\t0.05714734644623649\t0.07364438632599457\t0.05511099607220748\t0.000679415365897095\t0.0418936622077723\t0.008561483499184369\t0.03557953743498848\t0.01586924211004424\t0.08684508499736211\t0.026287115042115554\t0.07301044868347491\t0.07768802849611274\t0.019715395473745297\t0.08442913483594439\t0.01607198520998351\t0.022453348279262646\t0.03197257076686277\t0.041931622310372695\t0.05750504025406854\t0.011916728202806962\t0.01333295417764225\t0.012399253949021666\t0.02808811327069467\t0.0017074187504466961\t0.009967834769456748\t0.02085554563859335\t0.04997999547771599\t0.07382128524155089\t0.0592555937705767\t0.017659881197733138\t0.011889639783467538\t0.012176004401160437\t0.0004723102800365613\t0.016649821311006625\t0.027469483924424637\t0.0\t0.05594561264602909\t0.012288159447706781\t0.007666303250554177\t0.016540735726608774\t0.023082025894639396\t0.026639982888311486\t0.04448912738354493\t0.009933275607301548\t0.05341603733927623\t0.04602566615753049\t0.017005814943264502\t0.041353430266686524\t0.023668803021640105\t0.01360918918861156\t0.04440523963398467\t0.006955417605978499\t0.012764216194926174\t0.025888417423570304\t0.01834094573679885\t0.05957704402307372\t0.027036820079779453\t0.053440578853859756\t0.038509279502523946\t0.012270337045054125\t0.017373773531144204\t0.02012565918099467\t0.03949543012957574\t0.19810047128279942\t0.15106267412048285\t0.0439090832339104\t0.002894347700864888\t0.04555678906035976\t0.0\t0.006309598786227622\t0.03377929250085494\t0.29261470974897175\t0.054180994026571\t0.5075004294506457\t0.011524692855668317\t0.025537735184035078\t0.014312333948356435\t0.01295964420252676\t0.029658562360474372\t0.05668464836056643\t0.16354511815610986\t0.06606186066097765\t0.033704054022056514\t0.32384244954413266\t0.015660645267203253\t0.007194353196421112\t0.014491355395205706\t0.018079814914925284\t0.006996524338430169\t0.013851529589089768\t0.011755444979303921\t0.014872175594096931\t0.005640163093614845\n'