Repository 'sirius_csifingerid'
hg clone https://eddie.galaxyproject.org/repos/tomnl/sirius_csifingerid

Changeset 1:cf9f900eeaff (2019-05-31)
Previous changeset 0:110834b7923b (2018-07-24) Next changeset 2:6071be113c51 (2019-06-03)
Commit message:
planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 33ce89cb18109a9dd30c640e934ea94734782bec-dirty
modified:
sirius_csifingerid.py
sirius_csifingerid.xml
added:
test-data/ML006801.tsv
test-data/ML006801.txt
test-data/generic.msp
test-data/generic.tsv
test-data/historic.tsv
test-data/historic_input.msp
test-data/sirus_csifingerid_test1.tsv
removed:
test-data/input.msp
test-data/sirus_csifingerid.tabular
b
diff -r 110834b7923b -r cf9f900eeaff sirius_csifingerid.py
--- a/sirius_csifingerid.py Tue Jul 24 08:58:00 2018 -0400
+++ b/sirius_csifingerid.py Fri May 31 04:36:10 2019 -0400
[
b'@@ -1,13 +1,20 @@\n+from __future__ import absolute_import, print_function\n import argparse\n+import csv\n+import sys\n+import six\n+import re\n import os\n+import tempfile\n+import multiprocessing\n import glob\n-import shutil\n-from subprocess import Popen, PIPE\n-\n+import uuid\n+from collections import defaultdict\n \n parser = argparse.ArgumentParser()\n-parser.add_argument(\'--input\')\n-parser.add_argument(\'--db_online\')\n+parser.add_argument(\'--input_pth\')\n+parser.add_argument(\'--result_pth\')\n+parser.add_argument(\'--database\')\n parser.add_argument(\'--profile\')\n parser.add_argument(\'--candidates\')\n parser.add_argument(\'--ppm_max\')\n@@ -15,79 +22,264 @@\n parser.add_argument(\'--results_name\')\n parser.add_argument(\'--out_dir\')\n parser.add_argument(\'--tool_directory\')\n+parser.add_argument(\'--temp_dir\')\n+\n+parser.add_argument(\'--meta_select_col\', default=\'all\')\n+parser.add_argument(\'--cores_top_level\', default=1)\n+parser.add_argument(\'--chunks\', default=1)\n+parser.add_argument(\'--minMSMSpeaks\', default=1)\n+parser.add_argument(\'--schema\', default=\'msp\')\n args = parser.parse_args()\n-print args\n+print(args)\n+\n+\n+if args.temp_dir:\n+    wd = os.path.join(args.temp_dir, \'temp\')\n+    os.mkdir(wd)\n+\n+    if not os.path.exists(wd):\n+        os.mkdir(wd)\n+\n+else:\n+    td = tempfile.mkdtemp()\n+    wd = os.path.join(td, str(uuid.uuid4()))\n+    os.mkdir(wd)\n+\n+######################################################################\n+# Setup parameter dictionary\n+######################################################################\n+paramd = defaultdict()\n+paramd["cli"] = {}\n+paramd["cli"]["--database"] = args.database\n+paramd["cli"]["--profile"] = args.profile\n+paramd["cli"]["--candidates"] = args.candidates\n+paramd["cli"]["--ppm-max"] = args.ppm_max\n+if args.polarity == \'positive\':\n+    paramd["default_ion"] = "[M+H]+"\n+elif args.polarity == \'negative\':\n+    paramd["default_ion"] = "[M-H]-"\n+else:\n+    paramd["default_ion"] = \'\'\n+\n+print(paramd)\n+\n+\n+######################################################################\n+# Setup regular expressions for MSP parsing dictionary\n+######################################################################\n+regex_msp = {}\n+regex_msp[\'name\'] = [\'^Name(?:=|:)(.*)$\']\n+regex_msp[\'polarity\'] = [\'^ion.*mode(?:=|:)(.*)$\', \'^ionization.*mode(?:=|:)(.*)$\', \'^polarity(?:=|:)(.*)$\']\n+regex_msp[\'precursor_mz\'] = [\'^precursor.*m/z(?:=|:)\\s*(\\d*[.,]?\\d*)$\', \'^precursor.*mz(?:=|:)\\s*(\\d*[.,]?\\d*)$\']\n+regex_msp[\'precursor_type\'] = [\'^precursor.*type(?:=|:)(.*)$\', \'^adduct(?:=|:)(.*)$\', \'^ADDUCTIONNAME(?:=|:)(.*)$\']\n+regex_msp[\'num_peaks\'] = [\'^Num.*Peaks(?:=|:)\\s*(\\d*)$\']\n+regex_msp[\'msp\'] = [\'^Name(?:=|:)(.*)$\']  # Flag for standard MSP format\n \n-result_pth = os.path.join(args.out_dir, args.results_name)\n-with open(args.input,"r") as infile:\n- \n+regex_massbank = {}\n+regex_massbank[\'name\'] = [\'^RECORD_TITLE:(.*)$\']\n+regex_massbank[\'polarity\'] = [\'^AC\\$MASS_SPECTROMETRY:\\s+ION_MODE\\s+(.*)$\']\n+regex_massbank[\'precursor_mz\'] = [\'^MS\\$FOCUSED_ION:\\s+PRECURSOR_M/Z\\s+(\\d*[.,]?\\d*)$\']\n+regex_massbank[\'precursor_type\'] = [\'^MS\\$FOCUSED_ION:\\s+PRECURSOR_TYPE\\s+(.*)$\']\n+regex_massbank[\'num_peaks\'] = [\'^PK\\$NUM_PEAK:\\s+(\\d*)\']\n+regex_massbank[\'cols\'] = [\'^PK\\$PEAK:\\s+(.*)\']\n+regex_massbank[\'massbank\'] = [\'^RECORD_TITLE:(.*)$\']  # Flag for massbank format\n+\n+if args.schema == \'msp\':\n+    meta_regex = regex_msp\n+elif args.schema == \'massbank\':\n+    meta_regex = regex_massbank\n+elif args.schema == \'auto\':\n+    # If auto we just check for all the available paramter names and then determine if Massbank or MSP based on\n+    # the name parameter\n+    meta_regex = {}\n+    meta_regex.update(regex_massbank)\n+    meta_regex[\'name\'].extend(regex_msp[\'name\'])\n+    meta_regex[\'polarity\'].extend(regex_msp[\'polarity\'])\n+    meta_regex[\'precursor_mz\'].extend(regex_msp[\'precursor_mz\'])\n+    meta_regex[\'precursor_type\'].extend(regex_msp[\'precursor_type\'])\n+    meta_regex[\'num_peaks\'].extend(regex_msp[\'num_peaks\'])\n+    meta_regex[\'msp\'] = regex_msp[\'msp\']\n+\n+    p'..b'             paramd["cli"]["--precursor"] = meta_info[\'precursor_mz\']\n+\n+            # =============== Create CLI cmd for metfrag ===============================\n+            cmd = "sirius --fingerid"\n+            for k, v in six.iteritems(paramd["cli"]):\n+                    cmd += " {} {}".format(str(k), str(v))\n+            paramds[paramd["SampleName"]] = paramd\n \n-                # run\n-                print cmd_command\n-                os.system(cmd_command)\n-     \n-                # if fingerid found hits\n-                mtching_files = glob.glob(os.path.join(tmpdir, "*_tmpspec_", "summary_csi_fingerid.csv"))\n-                if mtching_files:\n-                    first_read=True\n-                    if len(mtching_files)>1:\n-                        print \'multiple folder names being used\', mtching_files\n-                    latest_file = max(mtching_files, key=os.path.getmtime)\n- \n-                    with open(result_pth, \'a\') as outfile2:\n+            # =============== Run metfrag ==============================================\n+            # Filter before process with a minimum number of MS/MS peaks\n+            if linesread >= float(args.minMSMSpeaks):\n+\n+                if int(args.cores_top_level) > 1:\n+                    cmds.append(cmd)\n+                else:\n+                    print(cmd)\n+                    os.system(cmd)\n+\n+            meta_info = {}\n+\n+def work(cmds):\n+    return [os.system(cmd) for cmd in cmds]\n+\n+\n+# Perform multiprocessing on command line call level\n+if int(args.cores_top_level) > 1:\n+    cmds_chunks = [cmds[x:x + int(args.chunks)] for x in list(range(0, len(cmds), int(args.chunks)))]\n+    pool = multiprocessing.Pool(processes=int(args.cores_top_level))\n+    pool.map(work, cmds_chunks)\n+    pool.close()\n+    pool.join()\n+\n+######################################################################\n+# Concatenate and filter the output\n+######################################################################\n+# outputs might have different headers. Need to get a list of all the headers before we start merging the files\n+# outfiles = [os.path.join(wd, f) for f in glob.glob(os.path.join(wd, "*_metfrag_result.csv"))]\n+outfiles = glob.glob(os.path.join(wd, \'*\', \'summary_csi_fingerid.csv\'))\n+\n+print(outfiles)\n \n-                        with open(latest_file) as infile_csi:\n-                            for iline in infile_csi:\n-                                if "inchi" in iline:\n-                                    if first_read:\n-                                        iline = iline.replace("inchi","InChI")\n-                                        iline = iline.replace("rank", "Rank")\n-                                        iline = iline.replace("name", "Name")\n-                                        iline = iline.replace("score", "Score")\n-                                        outfile2.write("UID\\t"+iline)\n-                                        first_read = False\n-                                else:\n-                                    outfile2.write(featid+"\\t"+ iline)\n-                shutil.rmtree(tmpdir)\n+headers = []\n+c = 0\n+for fn in outfiles:\n+    with open(fn, \'r\') as infile:\n+        reader = csv.reader(infile, delimiter=\'\\t\')\n+        if sys.version_info >= (3, 0):\n+            headers.extend(next(reader))\n+        else:\n+            headers.extend(reader.next())\n+        break\n+\n+headers = list(paramd[\'additional_details\'].keys()) + headers\n+\n \n+with open(args.result_pth, \'a\') as merged_outfile:\n+    dwriter = csv.DictWriter(merged_outfile, fieldnames=headers, delimiter=\'\\t\', quotechar=\'"\',\n+        quoting=csv.QUOTE_NONNUMERIC,)\n+    dwriter.writeheader()\n+\n+    for fn in outfiles:\n+        print(fn)\n+\n+        with open(fn) as infile:\n+            reader = csv.DictReader(infile, delimiter=\'\\t\')\n+\n+            ad = paramds[fn.split(os.sep)[-2]][\'additional_details\']\n+\n+            for line in reader:\n+\n+                line.update(ad)\n+\n+                dwriter.writerow(line)\n'
b
diff -r 110834b7923b -r cf9f900eeaff sirius_csifingerid.xml
--- a/sirius_csifingerid.xml Tue Jul 24 08:58:00 2018 -0400
+++ b/sirius_csifingerid.xml Fri May 31 04:36:10 2019 -0400
[
@@ -1,27 +1,33 @@
-<tool id="sirius_csifingerid" name="SIRIUS-CSI:FingerID" version="0.1.0">
+<tool id="sirius_csifingerid" name="SIRIUS-CSI:FingerID" version="0.2.0">
     <description> </description>
     <requirements>
-        <requirement type="package" version="4.0.0">sirius-csifingerid</requirement>
+        <requirement type="package" version="4.0.1">sirius-csifingerid</requirement>
     </requirements>
     <command detect_errors="exit_code"  interpreter="python">
     <![CDATA[
 
-       #set $tool_directory = $getVar('__tool_directory__', '../../../../tools/sirius_csifingerid')
+
         sirius_csifingerid.py
-            --input "$input"
-            --db_online '$db_online'
+            --input_pth "$input"
+            --database '$database'
             --profile $profile
             --candidates $candidates
             --ppm_max $ppm_max
             --polarity $polarity
-            --results_name 'csi_results.tsv'
             --out_dir .
-            --tool_directory $tool_directory
-            
+            --result_pth sirius_all_summary.tsv
+            --cores_top_level 1
+            --meta_select_col $meta_select_col
+            --minMSMSpeaks $minMSMSpeaks
+            --schema $schema
+            --temp_dir .
+
+
+
     ]]></command>
     <inputs>
-        <param name="input" type="data" format="msp,txt" label="MSP file (Output from Create MSP tool)" argument="--input"/>
-        <param name="db_online" type="select" label="Select SIRIUS-CSI:FingerID Database" argument="--db_online">
+        <param name="input" type="data" format="msp,txt" label="MSP file (Output from Create MSP tool)" argument="--input_pth"/>
+        <param name="database" type="select" label="Select SIRIUS-CSI:FingerID Database" argument="--database">
             <option value="PubChem" >PubChem</option>
             <option selected="true" value="hmdb">HMDB</option>
             <option value="kegg">KEGG</option>
@@ -32,23 +38,45 @@
         <param name="ppm_max" type="integer" value="10" label="Mass deviation of the fragment peaks in ppm" argument="--ppm_max"/>
         <param name="candidates" type="integer" value="5" label="The maximum number of candidates in the output" argument="--candidates"/>
         <param name="polarity" type="select" label="Ion Mode" argument="--polarity">
-            <option value="pos" selected="true">Positive</option>
-            <option value="neg">Negative</option>
+            <option value="positive" selected="true">Positive</option>
+            <option value="negative">Negative</option>
         </param> 
         <param name="profile" type="select" label="Analysis used" argument="--profile">
             <option value="orbitrap" selected="true">Orbitrap</option>
             <option value="qtof">qTOF</option>
             <option value="fticr">FT-ICR</option>
         </param>
+        <param name="schema" type="select" label="Schema"
+               help="The schema used for the MSP file (auto will try automatically determine the schema)">
+            <option value="auto" selected="True">Auto</option>
+            <option value="msp" >Generic MSP</option>
+            <option value="massbank">MassBank</option>
+        </param>
+        <param name="meta_select_col" type="select" label="Choose how additional metadata columns are extracted"
+               help="The SIRIUS-CSI:Fingerid output can have additional meta data columns added, these can be either extracted
+               from all MSP parameters or from the 'Name' and 'RECORD_TITLE' MSP parameter. Additionally, columns
+               can be added from the 'Name' or 'RECORD_TITLE' parameter by splitting on | and :
+               e.g. 'MZ:100.2 | RT:20 | xcms_grp_id:1' would create MZ,RT and xcms_grp_id columns">
+            <option value="name" selected="true">Extra metadata columns from the Name or RECORD_TITLE</option>
+            <option value="name_split" >Extra metadata columns from the Name or RECORD_TITLE (each column is split on "|" and ":" ) </option>
+            <option value="all">Extra metadata columns from all MSP parameters</option>
+        </param>
+        <param name="minMSMSpeaks" type="integer" label="Minimum number of MS/MS peaks" value="0"/>
     </inputs>
     <outputs>
-        <data name="results" format="tsv" label="${tool.name} results of ${input.name}" from_work_dir="csi_results.tsv"  />
+        <data name="results" format="tsv" label="${tool.name} results of ${input.name}" from_work_dir="sirius_all_summary.tsv"  />
     </outputs>
     <tests>
+
         <test>
-            <param name="input" value="input.msp"/>
-            <output name="results" file="sirus_csifingerid.tabular"/>
+            <param name="input" value="ML006801.txt"/>
+            <output name="results" file="ML006801.tsv"/>
         </test>
+        <test>
+            <param name="input" value="generic.msp"/>
+            <output name="results" file="generic.tsv"/>
+        </test>
+
     </tests>
     <help>
 ----------------
@@ -74,7 +102,7 @@
 
 The following databases are available:
 
-* PubChem (default)
+* PubChem (
 
 * hmdb
 
@@ -113,6 +141,7 @@
 ---------------------------
 
 - **Jordi Capellades (j.capellades.to@gmail.com) - Universitat Rovira i Virgili (SP)**
+- **Thomas N Lawson (t.n.lawson@bham.ac.uk) - University of Birmingham (UK)**
 - **Ralf Weber (r.j.weber@bham.ac.uk) - University of Birmingham (UK)**
 
     </help>
b
diff -r 110834b7923b -r cf9f900eeaff test-data/ML006801.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ML006801.tsv Fri May 31 04:36:10 2019 -0400
[
@@ -0,0 +1,2 @@
+"name" "source" "experimentName" "confidence" "inchikey2D" "inchi" "molecularFormula" "rank" "score" "name" "smiles" "xlogp" "pubchemids" "links"
+"L-thialysine; LC-ESI-ITFT; MS2; CE: 50%; R=7500; [M+H]+" "1_tmpspec" "" "0.0" "GHSJKUNUIHUPDF" "InChI=1S/C5H12N2O2S/c6-1-2-10-3-4(7)5(8)9/h4H,1-3,6-7H2,(H,8,9)" "C5H12N2O2S" "1" "-7.0831442988613125" "L-thialysine; LC-ESI-ITFT; MS2; CE: 50%; R=7500; [M+H]+" "C(CSCC(C(=O)O)N)N" "" "20049;99558;6995002;12898158;25246097;54754416;57517225" "HMDB:(33518);Natural Products:(UNPD166389);CHEBI:(497734);Plantcyc:(S-2-AMINOETHYL-L-CYSTEINE);Biocyc:(S-2-AMINOETHYL-L-CYSTEINE THIALYSINE)"
b
diff -r 110834b7923b -r cf9f900eeaff test-data/ML006801.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ML006801.txt Fri May 31 04:36:10 2019 -0400
[
@@ -0,0 +1,63 @@
+ACCESSION: ML004801
+RECORD_TITLE: L-thialysine; LC-ESI-ITFT; MS2; CE: 50%; R=7500; [M+H]+
+DATE: 2014.11.12
+AUTHORS: Mark Earll, Stephan Beisken, EMBL-EBI
+LICENSE: CC BY-SA
+COPYRIGHT: Copyright (C) 2014, European Molecular Biology Laboratory - European Bioinformatics Institute (EMBL-EBI), Hinxton, UK.
+PUBLICATION: Beisken S et al (2014) Scientific Data, 1:140029, DOI:10.1038/sdata.2014.29. http://www.ebi.ac.uk/metabolights/MTBLS38
+COMMENT: CONFIDENCE standard compound
+COMMENT: ML_ID 48
+CH$NAME: L-thialysine
+CH$NAME: (2R)-2-amino-3-(2-aminoethylsulfanyl)propanoic acid
+CH$COMPOUND_CLASS: N/A; Environmental Standard
+CH$FORMULA: C5H12N2O2S
+CH$EXACT_MASS: 164.0619
+CH$SMILES: NCCSC[C@H](N)C(=O)O
+CH$IUPAC: InChI=1S/C5H12N2O2S/c6-1-2-10-3-4(7)5(8)9/h4H,1-3,6-7H2,(H,8,9)/t4-/m0/s1
+CH$LINK: CHEBI 497734
+CH$LINK: PUBCHEM CID:99558
+CH$LINK: INCHIKEY GHSJKUNUIHUPDF-BYPYZUCNSA-N
+CH$LINK: CHEMSPIDER 89945
+AC$INSTRUMENT: LTQ Orbitrap Velos Thermo Scientific
+AC$INSTRUMENT_TYPE: LC-ESI-ITFT
+AC$MASS_SPECTROMETRY: MS_TYPE MS2
+AC$MASS_SPECTROMETRY: ION_MODE POSITIVE
+AC$MASS_SPECTROMETRY: IONIZATION ESI
+AC$MASS_SPECTROMETRY: FRAGMENTATION_MODE HCD
+AC$MASS_SPECTROMETRY: COLLISION_ENERGY 50 % (nominal)
+AC$MASS_SPECTROMETRY: RESOLUTION 7500
+AC$CHROMATOGRAPHY: COLUMN_NAME HSS T3 1.7 um, 2x150 mm, Waters
+AC$CHROMATOGRAPHY: FLOW_GRADIENT 100/0 at 0 min, 90/10 at 7.5 min, 0/100 at 10 min, 0/100 at 12 min, 100/0 at 18 min, 100/0 at 25 min
+AC$CHROMATOGRAPHY: FLOW_RATE 250 uL/min at 0 min, 400 uL/min at 7.5 min
+AC$CHROMATOGRAPHY: RETENTION_TIME 1.2 min
+AC$CHROMATOGRAPHY: SOLVENT A 0.2% Formic Acid
+AC$CHROMATOGRAPHY: SOLVENT B 98/2/0.2 Acetonitrile/Water/Formic Acid
+MS$FOCUSED_ION: BASE_PEAK 165.069
+MS$FOCUSED_ION: PRECURSOR_M/Z 165.0692
+MS$FOCUSED_ION: PRECURSOR_TYPE [M+H]+
+MS$DATA_PROCESSING: RECALIBRATE loess on assigned fragments and MS1
+MS$DATA_PROCESSING: REANALYZE Peaks with additional N2/O included
+MS$DATA_PROCESSING: WHOLE RMassBank 1.7.0
+PK$SPLASH: splash10-00di-0900000000-99e0ec9e6034dff32dc8
+PK$ANNOTATION: m/z tentative_formula formula_count mass error(ppm)
+  76.0215 C2H6NS+ 1 76.0215 -1.27
+  88.0392 C3H6NO2+ 1 88.0393 -1.19
+  92.0162 C2H6NOS+ 1 92.0165 -2.73
+  102.037 C4H8NS+ 1 102.0372 -1.93
+  109.0271 C4H3N3O+ 1 109.0271 0.61
+  120.0112 C3H6NO2S+ 1 120.0114 -1.8
+  148.0424 C5H10NO2S+ 1 148.0427 -1.8
+  165.0699 C5H13N2O2S+ 1 165.0692 4.09
+  174.0753 C5H10N4O3+ 1 174.0747 3.5
+PK$NUM_PEAK: 9
+PK$PEAK: m/z int. rel.int.
+  76.0215 18351.9 16
+  88.0392 41980.6 36
+  92.0162 9969.8 8
+  102.037 24583.1 21
+  109.0271 1331.3 1
+  120.0112 1140642.2 999
+  148.0424 40689.7 35
+  165.0699 12929.9 11
+  174.0753 1548.6 1
+//
\ No newline at end of file
b
diff -r 110834b7923b -r cf9f900eeaff test-data/generic.msp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/generic.msp Fri May 31 04:36:10 2019 -0400
b
@@ -0,0 +1,123 @@
+NAME:  MZ:70.0659 | RT:1483 | XCMS_group:1 | file:1 | scan:NA | pid:NA
+PRECURSORMZ: 70.0658950805664
+Comment:
+Num Peaks: 8
+50.4781379699707 3487.4296875 4.61
+51.0193099975586 3390.96948242188 4.49
+53.0031509399414 10011.958984375 13.25
+53.5898513793945 4252.7880859375 5.63
+54.3787727355957 3541.5107421875 4.69
+69.0455169677734 9650.0107421875 12.77
+70.0660934448242 37168.609375 49.18
+82.9910659790039 4077.36694335938 5.39
+
+NAME:  MZ:72.0815 | RT:1823 | XCMS_group:2 | file:1 | scan:NA
+PRECURSORMZ: 72.0815277099609
+COMMENT:
+Num Peaks: 6
+51.773567199707 818.313903808594 10.98
+54.0346794128418 1247.91137695312 16.75
+54.6847991943359 967.616882324219 12.98
+56.050350189209 1780.01037597656 23.90
+58.4994125366211 975.196228027344 13.09
+72.0814056396484 1660.50390625 22.29
+
+NAME:  MZ:72.0815 | RT:1857 | scan:NA
+PRECURSORMZ: 72.08154296875
+COMMENT:
+Num Peaks: 4
+56.0504341125488 1838.78173828125 46.54
+59.9103507995605 701.556762695312 17.75
+63.7723731994629 650.224975585938 16.46
+72.0814590454102 760.228637695312 19.25
+
+NAME:  MZ:76.0400 | RT:1606 | XCMS_group:5 | file:1 | scan:NA
+PRECURSORMZ: 76.0400390625
+COMMENT:
+Num Peaks: 4
+53.2376174926758 3224.35571289062 25.41
+60.3291244506836 3193.19482421875 25.17
+73.7529830932617 3305.61401367188 26.05
+82.5309600830078 2965.41772460938 23.37
+
+NAME:  MZ:79.0219 | RT:177 | XCMS_group:9 | file:1 | scan:NA
+PRECURSORMZ: 79.0218658447266
+COMMENT:
+Num Peaks: 7
+53.6282501220703 15316.7431640625 1.07
+59.967342376709 251727.734375 17.51
+61.0115814208984 80113.8046875 5.57
+62.9908714294434 93065.1015625 6.47
+63.9986305236816 950876.9375 66.13
+79.0219345092773 33032.984375 2.30
+95.4936447143555 13826.033203125 0.96
+
+NAME:  MZ:79.0219 | RT:184 | XCMS_group:9 | file:1 | scan:NA
+PRECURSORMZ: 79.0218811035156
+COMMENT:
+Num Peaks: 5
+59.1125831604004 67799.1953125 3.10
+59.9673652648926 345613.1875 15.83
+62.9906845092773 117693.296875 5.39
+63.9986686706543 1585970.25 72.62
+80.5974655151367 66719.4609375 3.06
+
+NAME:  MZ:79.0219 | RT:212 | XCMS_group:9 | file:1 | scan:NA
+PRECURSORMZ: 79.0218887329102
+COMMENT:
+Num Peaks: 12
+53.1700401306152 2441.47143554688 2.54
+55.1893730163574 2006.07958984375 2.08
+58.9013671875 2539.39086914062 2.64
+59.9673500061035 13423.1376953125 13.94
+61.0115776062012 4831.0986328125 5.02
+62.9908828735352 3668.52905273438 3.81
+63.9986190795898 54386.6640625 56.50
+73.8388671875 2330.30126953125 2.42
+78.5768051147461 2563.25 2.66
+79.0221328735352 2581.44604492188 2.68
+96.8009872436523 2530.70141601562 2.63
+99.6652908325195 2961.3095703125 3.08
+
+NAME:  MZ:79.9904 | RT:1284 | XCMS_group:11 | file:1 | scan:NA
+PRECURSORMZ: 79.9903564453125
+COMMENT:
+Num Peaks: 3
+51.6917915344238 584.212829589844 31.93
+53.0398750305176 649.807922363281 35.48
+97.3154754638672 596.341003417969 32.59
+
+NAME:  MZ:86.0607 | RT:1497 | XCMS_group:19 | file:1 | scan:NA
+PRECURSORMZ: 86.060661315918
+COMMENT:
+Num Peaks: 4
+53.0031318664551 9658.7939453125 60.81
+53.1939277648926 1998.81518554688 12.58
+80.3447494506836 2044.23645019531 12.87
+101.307479858398 2181.85522460938 13.73
+
+NAME:  MZ:86.0606 | RT:1498 | XCMS_group:19 | file:1 | scan:NA
+PRECURSORMZ: 86.0606307983398
+COMMENT:
+Num Peaks: 11
+52.6782836914062 1061.12646484375 3.59
+53.0032196044922 15176.8583984375 51.38
+53.1121788024902 1193.6044921875 4.039
+53.9984169006348 2790.28930664062 9.45
+54.0287094116211 999.250427246094 3.38
+56.7024726867676 1171.42797851562 3.96
+69.0346069335938 1878.03894042969 3.36
+72.9083633422852 1256.455078125 4.25
+74.0740356445312 1324.07055664062 4.48
+80.5324630737305 1329.61022949219 4.50
+91.0167770385742 1362.0029296875 4.61
+
+NAME:  MZ:86.0607 | RT:1500 | XCMS_group:19 | file:1 | scan:NA
+PRECURSORMZ: 86.0606536865234
+COMMENT:
+Num Peaks: 5
+53.0031509399414 29580.330078125 61.35
+55.3490409851074 4989.64990234375 10.35
+61.990592956543 4089.9619140625 8.48
+63.2290992736816 4168.97412109375 8.64
+67.6647109985352 5392.48779296875 11.18
b
diff -r 110834b7923b -r cf9f900eeaff test-data/generic.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/generic.tsv Fri May 31 04:36:10 2019 -0400
b
@@ -0,0 +1,6 @@
+"name" "source" "experimentName" "confidence" "inchikey2D" "inchi" "molecularFormula" "rank" "score" "name" "smiles" "xlogp" "pubchemids" "links"
+"MZ:86.0606 | RT:1498 | XCMS_group:19 | file:1 | scan:NA" "7_tmpspec" "" "0.0" "IAZDPXIOMUYVGZ" "InChI=1S/C2H6OS/c1-4(2)3/h1-2H3" "C2H6OS" "1" "-75.67854201438384" "MZ:86.0606 | RT:1498 | XCMS_group:19 | file:1 | scan:NA" "CS(=O)C" "" "679;75151;10103116;12206145;12264368;18594457;20151975;21022526;22345572;57247813;71309204;76973052;90811807;90817578" "HMDB:(2151);Natural Products:(UNPD148866);CHEBI:(28262);Maconda:(CON00016);HSDB:(67-68-5);KEGG:(C11143);Plantcyc:(DMSO);Biocyc:(DOH-ISO-VAL DMSO)"
+"MZ:86.0606 | RT:1498 | XCMS_group:19 | file:1 | scan:NA" "5_tmpspec" "" "0.0" "IAZDPXIOMUYVGZ" "InChI=1S/C2H6OS/c1-4(2)3/h1-2H3" "C2H6OS" "1" "-75.82312484178658" "MZ:86.0606 | RT:1498 | XCMS_group:19 | file:1 | scan:NA" "CS(=O)C" "" "679;75151;10103116;12206145;12264368;18594457;20151975;21022526;22345572;57247813;71309204;76973052;90811807;90817578" "HMDB:(2151);Natural Products:(UNPD148866);CHEBI:(28262);Maconda:(CON00016);HSDB:(67-68-5);KEGG:(C11143);Plantcyc:(DMSO);Biocyc:(DOH-ISO-VAL DMSO)"
+"MZ:86.0606 | RT:1498 | XCMS_group:19 | file:1 | scan:NA" "2_tmpspec" "" "0.0" "RWRDLPDLKQPQOW" "InChI=1S/C4H9N/c1-2-4-5-3-1/h5H,1-4H2" "C4H9N" "1" "-136.14546214244544" "MZ:86.0606 | RT:1498 | XCMS_group:19 | file:1 | scan:NA" "C1CCNC1" "" "31268;3613359;11062297;12196044;12196046;12196049;12196050;18440991;20463768;53660610;57608708;57608709;57608710;57750053;60135501;90927493;91312985" "HMDB:(31641);Natural Products:(UNPD154562);CHEBI:(33135 52145);HSDB:(123-75-1);Plantcyc:(PYRROLIDINE);Biocyc:(PYRROLIDINE)"
+"MZ:86.0606 | RT:1498 | XCMS_group:19 | file:1 | scan:NA" "6_tmpspec" "" "0.0" "IAZDPXIOMUYVGZ" "InChI=1S/C2H6OS/c1-4(2)3/h1-2H3" "C2H6OS" "1" "-86.79174845072117" "MZ:86.0606 | RT:1498 | XCMS_group:19 | file:1 | scan:NA" "CS(=O)C" "" "679;75151;10103116;12206145;12264368;18594457;20151975;21022526;22345572;57247813;71309204;76973052;90811807;90817578" "HMDB:(2151);Natural Products:(UNPD148866);CHEBI:(28262);Maconda:(CON00016);HSDB:(67-68-5);KEGG:(C11143);Plantcyc:(DMSO);Biocyc:(DOH-ISO-VAL DMSO)"
+"MZ:86.0606 | RT:1498 | XCMS_group:19 | file:1 | scan:NA" "10_tmpspec" "" "0.0" "HNJBEVLQSNELDL" "InChI=1S/C4H7NO/c6-4-2-1-3-5-4/h1-3H2,(H,5,6)" "C4H7NO" "1" "-149.09880138892078" "MZ:86.0606 | RT:1498 | XCMS_group:19 | file:1 | scan:NA" "C1CC(=NC1)O" "" "12025;3956071;10419134;12197590;12197592;18999930;20030003;20589568;58329813;90472990;91343693;101225382;101796586;123509162" "HMDB:(2039);KNApSAcK:(38233);Natural Products:(UNPD211738);CHEBI:(36592);HSDB:(616-45-5);Plantcyc:(CPD-19607)"
b
diff -r 110834b7923b -r cf9f900eeaff test-data/historic.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/historic.tsv Fri May 31 04:36:10 2019 -0400
b
@@ -0,0 +1,3 @@
+"name" "source" "experimentName" "confidence" "inchikey2D" "inchi" "molecularFormula" "rank" "score" "name" "smiles" "xlogp" "pubchemids" "links"
+"19" "2_tmpspec" "" "0.0" "RWRDLPDLKQPQOW" "InChI=1S/C4H9N/c1-2-4-5-3-1/h5H,1-4H2" "C4H9N" "1" "-136.14546214244544" "19" "C1CCNC1" "" "31268;3613359;11062297;12196044;12196046;12196049;12196050;18440991;20463768;53660610;57608708;57608709;57608710;57750053;60135501;90927493;91312985" "HMDB:(31641);Natural Products:(UNPD154562);CHEBI:(33135 52145);HSDB:(123-75-1);Plantcyc:(PYRROLIDINE);Biocyc:(PYRROLIDINE)"
+"19" "4_tmpspec" "" "0.0" "IAZDPXIOMUYVGZ" "InChI=1S/C2H6OS/c1-4(2)3/h1-2H3" "C2H6OS" "1" "-86.79174845072117" "19" "CS(=O)C" "" "679;75151;10103116;12206145;12264368;18594457;20151975;21022526;22345572;57247813;71309204;76973052;90811807;90817578" "HMDB:(2151);Natural Products:(UNPD148866);CHEBI:(28262);Maconda:(CON00016);HSDB:(67-68-5);KEGG:(C11143);Plantcyc:(DMSO);Biocyc:(DOH-ISO-VAL DMSO)"
b
diff -r 110834b7923b -r cf9f900eeaff test-data/historic_input.msp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/historic_input.msp Fri May 31 04:36:10 2019 -0400
b
@@ -0,0 +1,61 @@
+NAME: 1
+PRECURSORMZ: 70.0658950805664
+Comment:
+Num Peaks: 8
+50.4781379699707 3487.4296875
+51.0193099975586 3390.96948242188
+53.0031509399414 10011.958984375
+53.5898513793945 4252.7880859375
+54.3787727355957 3541.5107421875
+69.0455169677734 9650.0107421875
+70.0660934448242 37168.609375
+82.9910659790039 4077.36694335938
+
+NAME: 2
+PRECURSORMZ: 72.0815277099609
+Comment:
+Num Peaks: 6
+51.773567199707 818.313903808594
+54.0346794128418 1247.91137695312
+54.6847991943359 967.616882324219
+56.050350189209 1780.01037597656
+58.4994125366211 975.196228027344
+72.0814056396484 1660.50390625
+
+NAME: 5
+PRECURSORMZ: 76.0400390625
+Comment:
+Num Peaks: 4
+53.2376174926758 3224.35571289062
+60.3291244506836 3193.19482421875
+73.7529830932617 3305.61401367188
+82.5309600830078 2965.41772460938
+
+NAME: 9
+PRECURSORMZ: 79.0218811035156
+Comment:
+Num Peaks: 5
+59.1125831604004 67799.1953125
+59.9673652648926 345613.1875
+62.9906845092773 117693.296875
+63.9986686706543 1585970.25
+80.5974655151367 66719.4609375
+
+NAME: 11
+PRECURSORMZ: 79.9903564453125
+Comment:
+Num Peaks: 3
+51.6917915344238 584.212829589844
+53.0398750305176 649.807922363281
+97.3154754638672 596.341003417969
+
+NAME: 19
+PRECURSORMZ: 86.0606536865234
+Comment:
+Num Peaks: 5
+53.0031509399414 29580.330078125
+55.3490409851074 4989.64990234375
+61.990592956543 4089.9619140625
+63.2290992736816 4168.97412109375
+67.6647109985352 5392.48779296875
+
b
diff -r 110834b7923b -r cf9f900eeaff test-data/input.msp
--- a/test-data/input.msp Tue Jul 24 08:58:00 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,61 +0,0 @@
-NAME: 1
-PRECURSORMZ: 70.0658950805664
-Comment:
-Num Peaks: 8
-50.4781379699707 3487.4296875
-51.0193099975586 3390.96948242188
-53.0031509399414 10011.958984375
-53.5898513793945 4252.7880859375
-54.3787727355957 3541.5107421875
-69.0455169677734 9650.0107421875
-70.0660934448242 37168.609375
-82.9910659790039 4077.36694335938
-
-NAME: 2
-PRECURSORMZ: 72.0815277099609
-Comment:
-Num Peaks: 6
-51.773567199707 818.313903808594
-54.0346794128418 1247.91137695312
-54.6847991943359 967.616882324219
-56.050350189209 1780.01037597656
-58.4994125366211 975.196228027344
-72.0814056396484 1660.50390625
-
-NAME: 5
-PRECURSORMZ: 76.0400390625
-Comment:
-Num Peaks: 4
-53.2376174926758 3224.35571289062
-60.3291244506836 3193.19482421875
-73.7529830932617 3305.61401367188
-82.5309600830078 2965.41772460938
-
-NAME: 9
-PRECURSORMZ: 79.0218811035156
-Comment:
-Num Peaks: 5
-59.1125831604004 67799.1953125
-59.9673652648926 345613.1875
-62.9906845092773 117693.296875
-63.9986686706543 1585970.25
-80.5974655151367 66719.4609375
-
-NAME: 11
-PRECURSORMZ: 79.9903564453125
-Comment:
-Num Peaks: 3
-51.6917915344238 584.212829589844
-53.0398750305176 649.807922363281
-97.3154754638672 596.341003417969
-
-NAME: 19
-PRECURSORMZ: 86.0606536865234
-Comment:
-Num Peaks: 5
-53.0031509399414 29580.330078125
-55.3490409851074 4989.64990234375
-61.990592956543 4089.9619140625
-63.2290992736816 4168.97412109375
-67.6647109985352 5392.48779296875
-
b
diff -r 110834b7923b -r cf9f900eeaff test-data/sirus_csifingerid.tabular
--- a/test-data/sirus_csifingerid.tabular Tue Jul 24 08:58:00 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,4 +0,0 @@
-UID InChIkey2D InChI molecularFormula Rank Score Name smiles xlogp pubchemids links
-2 RWRDLPDLKQPQOW InChI=1S/C4H9N/c1-2-4-5-3-1/h5H,1-4H2 C4H9N 1 -136.1454621424455 Azolidine C1CCNC1 31268;3613359;11062297;12196044;12196046;12196049;12196050;18440991;20463768;53660610;57608708;57608709;57608710;57750053;60135501;90927493;91312985 HMDB:(31641);Natural Products:(UNPD154562);CHEBI:(33135 52145);HSDB:(123-75-1);Plantcyc:(PYRROLIDINE);Biocyc:(PYRROLIDINE)
-UID InChIkey2D InChI molecularFormula Rank Score Name smiles xlogp pubchemids links
-9 IAZDPXIOMUYVGZ InChI=1S/C2H6OS/c1-4(2)3/h1-2H3 C2H6OS 1 -86.79174845072123 Demasorb CS(=O)C 679;75151;10103116;12206145;12264368;18594457;20151975;21022526;22345572;57247813;71309204;76973052;90811807;90817578 HMDB:(2151);Natural Products:(UNPD148866);CHEBI:(28262);Maconda:(CON00016);HSDB:(67-68-5);KEGG:(C11143);Plantcyc:(DMSO);Biocyc:(DOH-ISO-VAL DMSO)
b
diff -r 110834b7923b -r cf9f900eeaff test-data/sirus_csifingerid_test1.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sirus_csifingerid_test1.tsv Fri May 31 04:36:10 2019 -0400
b
@@ -0,0 +1,4 @@
+UID InChIkey2D InChI molecularFormula Rank Score Name smiles xlogp pubchemids links
+2 RWRDLPDLKQPQOW InChI=1S/C4H9N/c1-2-4-5-3-1/h5H,1-4H2 C4H9N 1 -136.14546214244544 Azolidine C1CCNC1 31268;3613359;11062297;12196044;12196046;12196049;12196050;18440991;20463768;53660610;57608708;57608709;57608710;57750053;60135501;90927493;91312985 HMDB:(31641);Natural Products:(UNPD154562);CHEBI:(33135 52145);HSDB:(123-75-1);Plantcyc:(PYRROLIDINE);Biocyc:(PYRROLIDINE)
+UID InChIkey2D InChI molecularFormula Rank Score Name smiles xlogp pubchemids links
+9 IAZDPXIOMUYVGZ InChI=1S/C2H6OS/c1-4(2)3/h1-2H3 C2H6OS 1 -86.79174845072117 Demasorb CS(=O)C 679;75151;10103116;12206145;12264368;18594457;20151975;21022526;22345572;57247813;71309204;76973052;90811807;90817578 HMDB:(2151);Natural Products:(UNPD148866);CHEBI:(28262);Maconda:(CON00016);HSDB:(67-68-5);KEGG:(C11143);Plantcyc:(DMSO);Biocyc:(DOH-ISO-VAL DMSO)