| Previous changeset 0:110834b7923b (2018-07-24) Next changeset 2:6071be113c51 (2019-06-03) |
|
Commit message:
planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 33ce89cb18109a9dd30c640e934ea94734782bec-dirty |
|
modified:
sirius_csifingerid.py sirius_csifingerid.xml |
|
added:
test-data/ML006801.tsv test-data/ML006801.txt test-data/generic.msp test-data/generic.tsv test-data/historic.tsv test-data/historic_input.msp test-data/sirus_csifingerid_test1.tsv |
|
removed:
test-data/input.msp test-data/sirus_csifingerid.tabular |
| b |
| diff -r 110834b7923b -r cf9f900eeaff sirius_csifingerid.py --- a/sirius_csifingerid.py Tue Jul 24 08:58:00 2018 -0400 +++ b/sirius_csifingerid.py Fri May 31 04:36:10 2019 -0400 |
| [ |
| b'@@ -1,13 +1,20 @@\n+from __future__ import absolute_import, print_function\n import argparse\n+import csv\n+import sys\n+import six\n+import re\n import os\n+import tempfile\n+import multiprocessing\n import glob\n-import shutil\n-from subprocess import Popen, PIPE\n-\n+import uuid\n+from collections import defaultdict\n \n parser = argparse.ArgumentParser()\n-parser.add_argument(\'--input\')\n-parser.add_argument(\'--db_online\')\n+parser.add_argument(\'--input_pth\')\n+parser.add_argument(\'--result_pth\')\n+parser.add_argument(\'--database\')\n parser.add_argument(\'--profile\')\n parser.add_argument(\'--candidates\')\n parser.add_argument(\'--ppm_max\')\n@@ -15,79 +22,264 @@\n parser.add_argument(\'--results_name\')\n parser.add_argument(\'--out_dir\')\n parser.add_argument(\'--tool_directory\')\n+parser.add_argument(\'--temp_dir\')\n+\n+parser.add_argument(\'--meta_select_col\', default=\'all\')\n+parser.add_argument(\'--cores_top_level\', default=1)\n+parser.add_argument(\'--chunks\', default=1)\n+parser.add_argument(\'--minMSMSpeaks\', default=1)\n+parser.add_argument(\'--schema\', default=\'msp\')\n args = parser.parse_args()\n-print args\n+print(args)\n+\n+\n+if args.temp_dir:\n+ wd = os.path.join(args.temp_dir, \'temp\')\n+ os.mkdir(wd)\n+\n+ if not os.path.exists(wd):\n+ os.mkdir(wd)\n+\n+else:\n+ td = tempfile.mkdtemp()\n+ wd = os.path.join(td, str(uuid.uuid4()))\n+ os.mkdir(wd)\n+\n+######################################################################\n+# Setup parameter dictionary\n+######################################################################\n+paramd = defaultdict()\n+paramd["cli"] = {}\n+paramd["cli"]["--database"] = args.database\n+paramd["cli"]["--profile"] = args.profile\n+paramd["cli"]["--candidates"] = args.candidates\n+paramd["cli"]["--ppm-max"] = args.ppm_max\n+if args.polarity == \'positive\':\n+ paramd["default_ion"] = "[M+H]+"\n+elif args.polarity == \'negative\':\n+ paramd["default_ion"] = "[M-H]-"\n+else:\n+ paramd["default_ion"] = \'\'\n+\n+print(paramd)\n+\n+\n+######################################################################\n+# Setup regular expressions for MSP parsing dictionary\n+######################################################################\n+regex_msp = {}\n+regex_msp[\'name\'] = [\'^Name(?:=|:)(.*)$\']\n+regex_msp[\'polarity\'] = [\'^ion.*mode(?:=|:)(.*)$\', \'^ionization.*mode(?:=|:)(.*)$\', \'^polarity(?:=|:)(.*)$\']\n+regex_msp[\'precursor_mz\'] = [\'^precursor.*m/z(?:=|:)\\s*(\\d*[.,]?\\d*)$\', \'^precursor.*mz(?:=|:)\\s*(\\d*[.,]?\\d*)$\']\n+regex_msp[\'precursor_type\'] = [\'^precursor.*type(?:=|:)(.*)$\', \'^adduct(?:=|:)(.*)$\', \'^ADDUCTIONNAME(?:=|:)(.*)$\']\n+regex_msp[\'num_peaks\'] = [\'^Num.*Peaks(?:=|:)\\s*(\\d*)$\']\n+regex_msp[\'msp\'] = [\'^Name(?:=|:)(.*)$\'] # Flag for standard MSP format\n \n-result_pth = os.path.join(args.out_dir, args.results_name)\n-with open(args.input,"r") as infile:\n- \n+regex_massbank = {}\n+regex_massbank[\'name\'] = [\'^RECORD_TITLE:(.*)$\']\n+regex_massbank[\'polarity\'] = [\'^AC\\$MASS_SPECTROMETRY:\\s+ION_MODE\\s+(.*)$\']\n+regex_massbank[\'precursor_mz\'] = [\'^MS\\$FOCUSED_ION:\\s+PRECURSOR_M/Z\\s+(\\d*[.,]?\\d*)$\']\n+regex_massbank[\'precursor_type\'] = [\'^MS\\$FOCUSED_ION:\\s+PRECURSOR_TYPE\\s+(.*)$\']\n+regex_massbank[\'num_peaks\'] = [\'^PK\\$NUM_PEAK:\\s+(\\d*)\']\n+regex_massbank[\'cols\'] = [\'^PK\\$PEAK:\\s+(.*)\']\n+regex_massbank[\'massbank\'] = [\'^RECORD_TITLE:(.*)$\'] # Flag for massbank format\n+\n+if args.schema == \'msp\':\n+ meta_regex = regex_msp\n+elif args.schema == \'massbank\':\n+ meta_regex = regex_massbank\n+elif args.schema == \'auto\':\n+ # If auto we just check for all the available paramter names and then determine if Massbank or MSP based on\n+ # the name parameter\n+ meta_regex = {}\n+ meta_regex.update(regex_massbank)\n+ meta_regex[\'name\'].extend(regex_msp[\'name\'])\n+ meta_regex[\'polarity\'].extend(regex_msp[\'polarity\'])\n+ meta_regex[\'precursor_mz\'].extend(regex_msp[\'precursor_mz\'])\n+ meta_regex[\'precursor_type\'].extend(regex_msp[\'precursor_type\'])\n+ meta_regex[\'num_peaks\'].extend(regex_msp[\'num_peaks\'])\n+ meta_regex[\'msp\'] = regex_msp[\'msp\']\n+\n+ p'..b' paramd["cli"]["--precursor"] = meta_info[\'precursor_mz\']\n+\n+ # =============== Create CLI cmd for metfrag ===============================\n+ cmd = "sirius --fingerid"\n+ for k, v in six.iteritems(paramd["cli"]):\n+ cmd += " {} {}".format(str(k), str(v))\n+ paramds[paramd["SampleName"]] = paramd\n \n- # run\n- print cmd_command\n- os.system(cmd_command)\n- \n- # if fingerid found hits\n- mtching_files = glob.glob(os.path.join(tmpdir, "*_tmpspec_", "summary_csi_fingerid.csv"))\n- if mtching_files:\n- first_read=True\n- if len(mtching_files)>1:\n- print \'multiple folder names being used\', mtching_files\n- latest_file = max(mtching_files, key=os.path.getmtime)\n- \n- with open(result_pth, \'a\') as outfile2:\n+ # =============== Run metfrag ==============================================\n+ # Filter before process with a minimum number of MS/MS peaks\n+ if linesread >= float(args.minMSMSpeaks):\n+\n+ if int(args.cores_top_level) > 1:\n+ cmds.append(cmd)\n+ else:\n+ print(cmd)\n+ os.system(cmd)\n+\n+ meta_info = {}\n+\n+def work(cmds):\n+ return [os.system(cmd) for cmd in cmds]\n+\n+\n+# Perform multiprocessing on command line call level\n+if int(args.cores_top_level) > 1:\n+ cmds_chunks = [cmds[x:x + int(args.chunks)] for x in list(range(0, len(cmds), int(args.chunks)))]\n+ pool = multiprocessing.Pool(processes=int(args.cores_top_level))\n+ pool.map(work, cmds_chunks)\n+ pool.close()\n+ pool.join()\n+\n+######################################################################\n+# Concatenate and filter the output\n+######################################################################\n+# outputs might have different headers. Need to get a list of all the headers before we start merging the files\n+# outfiles = [os.path.join(wd, f) for f in glob.glob(os.path.join(wd, "*_metfrag_result.csv"))]\n+outfiles = glob.glob(os.path.join(wd, \'*\', \'summary_csi_fingerid.csv\'))\n+\n+print(outfiles)\n \n- with open(latest_file) as infile_csi:\n- for iline in infile_csi:\n- if "inchi" in iline:\n- if first_read:\n- iline = iline.replace("inchi","InChI")\n- iline = iline.replace("rank", "Rank")\n- iline = iline.replace("name", "Name")\n- iline = iline.replace("score", "Score")\n- outfile2.write("UID\\t"+iline)\n- first_read = False\n- else:\n- outfile2.write(featid+"\\t"+ iline)\n- shutil.rmtree(tmpdir)\n+headers = []\n+c = 0\n+for fn in outfiles:\n+ with open(fn, \'r\') as infile:\n+ reader = csv.reader(infile, delimiter=\'\\t\')\n+ if sys.version_info >= (3, 0):\n+ headers.extend(next(reader))\n+ else:\n+ headers.extend(reader.next())\n+ break\n+\n+headers = list(paramd[\'additional_details\'].keys()) + headers\n+\n \n+with open(args.result_pth, \'a\') as merged_outfile:\n+ dwriter = csv.DictWriter(merged_outfile, fieldnames=headers, delimiter=\'\\t\', quotechar=\'"\',\n+ quoting=csv.QUOTE_NONNUMERIC,)\n+ dwriter.writeheader()\n+\n+ for fn in outfiles:\n+ print(fn)\n+\n+ with open(fn) as infile:\n+ reader = csv.DictReader(infile, delimiter=\'\\t\')\n+\n+ ad = paramds[fn.split(os.sep)[-2]][\'additional_details\']\n+\n+ for line in reader:\n+\n+ line.update(ad)\n+\n+ dwriter.writerow(line)\n' |
| b |
| diff -r 110834b7923b -r cf9f900eeaff sirius_csifingerid.xml --- a/sirius_csifingerid.xml Tue Jul 24 08:58:00 2018 -0400 +++ b/sirius_csifingerid.xml Fri May 31 04:36:10 2019 -0400 |
| [ |
| @@ -1,27 +1,33 @@ -<tool id="sirius_csifingerid" name="SIRIUS-CSI:FingerID" version="0.1.0"> +<tool id="sirius_csifingerid" name="SIRIUS-CSI:FingerID" version="0.2.0"> <description> </description> <requirements> - <requirement type="package" version="4.0.0">sirius-csifingerid</requirement> + <requirement type="package" version="4.0.1">sirius-csifingerid</requirement> </requirements> <command detect_errors="exit_code" interpreter="python"> <![CDATA[ - #set $tool_directory = $getVar('__tool_directory__', '../../../../tools/sirius_csifingerid') + sirius_csifingerid.py - --input "$input" - --db_online '$db_online' + --input_pth "$input" + --database '$database' --profile $profile --candidates $candidates --ppm_max $ppm_max --polarity $polarity - --results_name 'csi_results.tsv' --out_dir . - --tool_directory $tool_directory - + --result_pth sirius_all_summary.tsv + --cores_top_level 1 + --meta_select_col $meta_select_col + --minMSMSpeaks $minMSMSpeaks + --schema $schema + --temp_dir . + + + ]]></command> <inputs> - <param name="input" type="data" format="msp,txt" label="MSP file (Output from Create MSP tool)" argument="--input"/> - <param name="db_online" type="select" label="Select SIRIUS-CSI:FingerID Database" argument="--db_online"> + <param name="input" type="data" format="msp,txt" label="MSP file (Output from Create MSP tool)" argument="--input_pth"/> + <param name="database" type="select" label="Select SIRIUS-CSI:FingerID Database" argument="--database"> <option value="PubChem" >PubChem</option> <option selected="true" value="hmdb">HMDB</option> <option value="kegg">KEGG</option> @@ -32,23 +38,45 @@ <param name="ppm_max" type="integer" value="10" label="Mass deviation of the fragment peaks in ppm" argument="--ppm_max"/> <param name="candidates" type="integer" value="5" label="The maximum number of candidates in the output" argument="--candidates"/> <param name="polarity" type="select" label="Ion Mode" argument="--polarity"> - <option value="pos" selected="true">Positive</option> - <option value="neg">Negative</option> + <option value="positive" selected="true">Positive</option> + <option value="negative">Negative</option> </param> <param name="profile" type="select" label="Analysis used" argument="--profile"> <option value="orbitrap" selected="true">Orbitrap</option> <option value="qtof">qTOF</option> <option value="fticr">FT-ICR</option> </param> + <param name="schema" type="select" label="Schema" + help="The schema used for the MSP file (auto will try automatically determine the schema)"> + <option value="auto" selected="True">Auto</option> + <option value="msp" >Generic MSP</option> + <option value="massbank">MassBank</option> + </param> + <param name="meta_select_col" type="select" label="Choose how additional metadata columns are extracted" + help="The SIRIUS-CSI:Fingerid output can have additional meta data columns added, these can be either extracted + from all MSP parameters or from the 'Name' and 'RECORD_TITLE' MSP parameter. Additionally, columns + can be added from the 'Name' or 'RECORD_TITLE' parameter by splitting on | and : + e.g. 'MZ:100.2 | RT:20 | xcms_grp_id:1' would create MZ,RT and xcms_grp_id columns"> + <option value="name" selected="true">Extra metadata columns from the Name or RECORD_TITLE</option> + <option value="name_split" >Extra metadata columns from the Name or RECORD_TITLE (each column is split on "|" and ":" ) </option> + <option value="all">Extra metadata columns from all MSP parameters</option> + </param> + <param name="minMSMSpeaks" type="integer" label="Minimum number of MS/MS peaks" value="0"/> </inputs> <outputs> - <data name="results" format="tsv" label="${tool.name} results of ${input.name}" from_work_dir="csi_results.tsv" /> + <data name="results" format="tsv" label="${tool.name} results of ${input.name}" from_work_dir="sirius_all_summary.tsv" /> </outputs> <tests> + <test> - <param name="input" value="input.msp"/> - <output name="results" file="sirus_csifingerid.tabular"/> + <param name="input" value="ML006801.txt"/> + <output name="results" file="ML006801.tsv"/> </test> + <test> + <param name="input" value="generic.msp"/> + <output name="results" file="generic.tsv"/> + </test> + </tests> <help> ---------------- @@ -74,7 +102,7 @@ The following databases are available: -* PubChem (default) +* PubChem ( * hmdb @@ -113,6 +141,7 @@ --------------------------- - **Jordi Capellades (j.capellades.to@gmail.com) - Universitat Rovira i Virgili (SP)** +- **Thomas N Lawson (t.n.lawson@bham.ac.uk) - University of Birmingham (UK)** - **Ralf Weber (r.j.weber@bham.ac.uk) - University of Birmingham (UK)** </help> |
| b |
| diff -r 110834b7923b -r cf9f900eeaff test-data/ML006801.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/ML006801.tsv Fri May 31 04:36:10 2019 -0400 |
| [ |
| @@ -0,0 +1,2 @@ +"name" "source" "experimentName" "confidence" "inchikey2D" "inchi" "molecularFormula" "rank" "score" "name" "smiles" "xlogp" "pubchemids" "links" +"L-thialysine; LC-ESI-ITFT; MS2; CE: 50%; R=7500; [M+H]+" "1_tmpspec" "" "0.0" "GHSJKUNUIHUPDF" "InChI=1S/C5H12N2O2S/c6-1-2-10-3-4(7)5(8)9/h4H,1-3,6-7H2,(H,8,9)" "C5H12N2O2S" "1" "-7.0831442988613125" "L-thialysine; LC-ESI-ITFT; MS2; CE: 50%; R=7500; [M+H]+" "C(CSCC(C(=O)O)N)N" "" "20049;99558;6995002;12898158;25246097;54754416;57517225" "HMDB:(33518);Natural Products:(UNPD166389);CHEBI:(497734);Plantcyc:(S-2-AMINOETHYL-L-CYSTEINE);Biocyc:(S-2-AMINOETHYL-L-CYSTEINE THIALYSINE)" |
| b |
| diff -r 110834b7923b -r cf9f900eeaff test-data/ML006801.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/ML006801.txt Fri May 31 04:36:10 2019 -0400 |
| [ |
| @@ -0,0 +1,63 @@ +ACCESSION: ML004801 +RECORD_TITLE: L-thialysine; LC-ESI-ITFT; MS2; CE: 50%; R=7500; [M+H]+ +DATE: 2014.11.12 +AUTHORS: Mark Earll, Stephan Beisken, EMBL-EBI +LICENSE: CC BY-SA +COPYRIGHT: Copyright (C) 2014, European Molecular Biology Laboratory - European Bioinformatics Institute (EMBL-EBI), Hinxton, UK. +PUBLICATION: Beisken S et al (2014) Scientific Data, 1:140029, DOI:10.1038/sdata.2014.29. http://www.ebi.ac.uk/metabolights/MTBLS38 +COMMENT: CONFIDENCE standard compound +COMMENT: ML_ID 48 +CH$NAME: L-thialysine +CH$NAME: (2R)-2-amino-3-(2-aminoethylsulfanyl)propanoic acid +CH$COMPOUND_CLASS: N/A; Environmental Standard +CH$FORMULA: C5H12N2O2S +CH$EXACT_MASS: 164.0619 +CH$SMILES: NCCSC[C@H](N)C(=O)O +CH$IUPAC: InChI=1S/C5H12N2O2S/c6-1-2-10-3-4(7)5(8)9/h4H,1-3,6-7H2,(H,8,9)/t4-/m0/s1 +CH$LINK: CHEBI 497734 +CH$LINK: PUBCHEM CID:99558 +CH$LINK: INCHIKEY GHSJKUNUIHUPDF-BYPYZUCNSA-N +CH$LINK: CHEMSPIDER 89945 +AC$INSTRUMENT: LTQ Orbitrap Velos Thermo Scientific +AC$INSTRUMENT_TYPE: LC-ESI-ITFT +AC$MASS_SPECTROMETRY: MS_TYPE MS2 +AC$MASS_SPECTROMETRY: ION_MODE POSITIVE +AC$MASS_SPECTROMETRY: IONIZATION ESI +AC$MASS_SPECTROMETRY: FRAGMENTATION_MODE HCD +AC$MASS_SPECTROMETRY: COLLISION_ENERGY 50 % (nominal) +AC$MASS_SPECTROMETRY: RESOLUTION 7500 +AC$CHROMATOGRAPHY: COLUMN_NAME HSS T3 1.7 um, 2x150 mm, Waters +AC$CHROMATOGRAPHY: FLOW_GRADIENT 100/0 at 0 min, 90/10 at 7.5 min, 0/100 at 10 min, 0/100 at 12 min, 100/0 at 18 min, 100/0 at 25 min +AC$CHROMATOGRAPHY: FLOW_RATE 250 uL/min at 0 min, 400 uL/min at 7.5 min +AC$CHROMATOGRAPHY: RETENTION_TIME 1.2 min +AC$CHROMATOGRAPHY: SOLVENT A 0.2% Formic Acid +AC$CHROMATOGRAPHY: SOLVENT B 98/2/0.2 Acetonitrile/Water/Formic Acid +MS$FOCUSED_ION: BASE_PEAK 165.069 +MS$FOCUSED_ION: PRECURSOR_M/Z 165.0692 +MS$FOCUSED_ION: PRECURSOR_TYPE [M+H]+ +MS$DATA_PROCESSING: RECALIBRATE loess on assigned fragments and MS1 +MS$DATA_PROCESSING: REANALYZE Peaks with additional N2/O included +MS$DATA_PROCESSING: WHOLE RMassBank 1.7.0 +PK$SPLASH: splash10-00di-0900000000-99e0ec9e6034dff32dc8 +PK$ANNOTATION: m/z tentative_formula formula_count mass error(ppm) + 76.0215 C2H6NS+ 1 76.0215 -1.27 + 88.0392 C3H6NO2+ 1 88.0393 -1.19 + 92.0162 C2H6NOS+ 1 92.0165 -2.73 + 102.037 C4H8NS+ 1 102.0372 -1.93 + 109.0271 C4H3N3O+ 1 109.0271 0.61 + 120.0112 C3H6NO2S+ 1 120.0114 -1.8 + 148.0424 C5H10NO2S+ 1 148.0427 -1.8 + 165.0699 C5H13N2O2S+ 1 165.0692 4.09 + 174.0753 C5H10N4O3+ 1 174.0747 3.5 +PK$NUM_PEAK: 9 +PK$PEAK: m/z int. rel.int. + 76.0215 18351.9 16 + 88.0392 41980.6 36 + 92.0162 9969.8 8 + 102.037 24583.1 21 + 109.0271 1331.3 1 + 120.0112 1140642.2 999 + 148.0424 40689.7 35 + 165.0699 12929.9 11 + 174.0753 1548.6 1 +// \ No newline at end of file |
| b |
| diff -r 110834b7923b -r cf9f900eeaff test-data/generic.msp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/generic.msp Fri May 31 04:36:10 2019 -0400 |
| b |
| @@ -0,0 +1,123 @@ +NAME: MZ:70.0659 | RT:1483 | XCMS_group:1 | file:1 | scan:NA | pid:NA +PRECURSORMZ: 70.0658950805664 +Comment: +Num Peaks: 8 +50.4781379699707 3487.4296875 4.61 +51.0193099975586 3390.96948242188 4.49 +53.0031509399414 10011.958984375 13.25 +53.5898513793945 4252.7880859375 5.63 +54.3787727355957 3541.5107421875 4.69 +69.0455169677734 9650.0107421875 12.77 +70.0660934448242 37168.609375 49.18 +82.9910659790039 4077.36694335938 5.39 + +NAME: MZ:72.0815 | RT:1823 | XCMS_group:2 | file:1 | scan:NA +PRECURSORMZ: 72.0815277099609 +COMMENT: +Num Peaks: 6 +51.773567199707 818.313903808594 10.98 +54.0346794128418 1247.91137695312 16.75 +54.6847991943359 967.616882324219 12.98 +56.050350189209 1780.01037597656 23.90 +58.4994125366211 975.196228027344 13.09 +72.0814056396484 1660.50390625 22.29 + +NAME: MZ:72.0815 | RT:1857 | scan:NA +PRECURSORMZ: 72.08154296875 +COMMENT: +Num Peaks: 4 +56.0504341125488 1838.78173828125 46.54 +59.9103507995605 701.556762695312 17.75 +63.7723731994629 650.224975585938 16.46 +72.0814590454102 760.228637695312 19.25 + +NAME: MZ:76.0400 | RT:1606 | XCMS_group:5 | file:1 | scan:NA +PRECURSORMZ: 76.0400390625 +COMMENT: +Num Peaks: 4 +53.2376174926758 3224.35571289062 25.41 +60.3291244506836 3193.19482421875 25.17 +73.7529830932617 3305.61401367188 26.05 +82.5309600830078 2965.41772460938 23.37 + +NAME: MZ:79.0219 | RT:177 | XCMS_group:9 | file:1 | scan:NA +PRECURSORMZ: 79.0218658447266 +COMMENT: +Num Peaks: 7 +53.6282501220703 15316.7431640625 1.07 +59.967342376709 251727.734375 17.51 +61.0115814208984 80113.8046875 5.57 +62.9908714294434 93065.1015625 6.47 +63.9986305236816 950876.9375 66.13 +79.0219345092773 33032.984375 2.30 +95.4936447143555 13826.033203125 0.96 + +NAME: MZ:79.0219 | RT:184 | XCMS_group:9 | file:1 | scan:NA +PRECURSORMZ: 79.0218811035156 +COMMENT: +Num Peaks: 5 +59.1125831604004 67799.1953125 3.10 +59.9673652648926 345613.1875 15.83 +62.9906845092773 117693.296875 5.39 +63.9986686706543 1585970.25 72.62 +80.5974655151367 66719.4609375 3.06 + +NAME: MZ:79.0219 | RT:212 | XCMS_group:9 | file:1 | scan:NA +PRECURSORMZ: 79.0218887329102 +COMMENT: +Num Peaks: 12 +53.1700401306152 2441.47143554688 2.54 +55.1893730163574 2006.07958984375 2.08 +58.9013671875 2539.39086914062 2.64 +59.9673500061035 13423.1376953125 13.94 +61.0115776062012 4831.0986328125 5.02 +62.9908828735352 3668.52905273438 3.81 +63.9986190795898 54386.6640625 56.50 +73.8388671875 2330.30126953125 2.42 +78.5768051147461 2563.25 2.66 +79.0221328735352 2581.44604492188 2.68 +96.8009872436523 2530.70141601562 2.63 +99.6652908325195 2961.3095703125 3.08 + +NAME: MZ:79.9904 | RT:1284 | XCMS_group:11 | file:1 | scan:NA +PRECURSORMZ: 79.9903564453125 +COMMENT: +Num Peaks: 3 +51.6917915344238 584.212829589844 31.93 +53.0398750305176 649.807922363281 35.48 +97.3154754638672 596.341003417969 32.59 + +NAME: MZ:86.0607 | RT:1497 | XCMS_group:19 | file:1 | scan:NA +PRECURSORMZ: 86.060661315918 +COMMENT: +Num Peaks: 4 +53.0031318664551 9658.7939453125 60.81 +53.1939277648926 1998.81518554688 12.58 +80.3447494506836 2044.23645019531 12.87 +101.307479858398 2181.85522460938 13.73 + +NAME: MZ:86.0606 | RT:1498 | XCMS_group:19 | file:1 | scan:NA +PRECURSORMZ: 86.0606307983398 +COMMENT: +Num Peaks: 11 +52.6782836914062 1061.12646484375 3.59 +53.0032196044922 15176.8583984375 51.38 +53.1121788024902 1193.6044921875 4.039 +53.9984169006348 2790.28930664062 9.45 +54.0287094116211 999.250427246094 3.38 +56.7024726867676 1171.42797851562 3.96 +69.0346069335938 1878.03894042969 3.36 +72.9083633422852 1256.455078125 4.25 +74.0740356445312 1324.07055664062 4.48 +80.5324630737305 1329.61022949219 4.50 +91.0167770385742 1362.0029296875 4.61 + +NAME: MZ:86.0607 | RT:1500 | XCMS_group:19 | file:1 | scan:NA +PRECURSORMZ: 86.0606536865234 +COMMENT: +Num Peaks: 5 +53.0031509399414 29580.330078125 61.35 +55.3490409851074 4989.64990234375 10.35 +61.990592956543 4089.9619140625 8.48 +63.2290992736816 4168.97412109375 8.64 +67.6647109985352 5392.48779296875 11.18 |
| b |
| diff -r 110834b7923b -r cf9f900eeaff test-data/generic.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/generic.tsv Fri May 31 04:36:10 2019 -0400 |
| b |
| @@ -0,0 +1,6 @@ +"name" "source" "experimentName" "confidence" "inchikey2D" "inchi" "molecularFormula" "rank" "score" "name" "smiles" "xlogp" "pubchemids" "links" +"MZ:86.0606 | RT:1498 | XCMS_group:19 | file:1 | scan:NA" "7_tmpspec" "" "0.0" "IAZDPXIOMUYVGZ" "InChI=1S/C2H6OS/c1-4(2)3/h1-2H3" "C2H6OS" "1" "-75.67854201438384" "MZ:86.0606 | RT:1498 | XCMS_group:19 | file:1 | scan:NA" "CS(=O)C" "" "679;75151;10103116;12206145;12264368;18594457;20151975;21022526;22345572;57247813;71309204;76973052;90811807;90817578" "HMDB:(2151);Natural Products:(UNPD148866);CHEBI:(28262);Maconda:(CON00016);HSDB:(67-68-5);KEGG:(C11143);Plantcyc:(DMSO);Biocyc:(DOH-ISO-VAL DMSO)" +"MZ:86.0606 | RT:1498 | XCMS_group:19 | file:1 | scan:NA" "5_tmpspec" "" "0.0" "IAZDPXIOMUYVGZ" "InChI=1S/C2H6OS/c1-4(2)3/h1-2H3" "C2H6OS" "1" "-75.82312484178658" "MZ:86.0606 | RT:1498 | XCMS_group:19 | file:1 | scan:NA" "CS(=O)C" "" "679;75151;10103116;12206145;12264368;18594457;20151975;21022526;22345572;57247813;71309204;76973052;90811807;90817578" "HMDB:(2151);Natural Products:(UNPD148866);CHEBI:(28262);Maconda:(CON00016);HSDB:(67-68-5);KEGG:(C11143);Plantcyc:(DMSO);Biocyc:(DOH-ISO-VAL DMSO)" +"MZ:86.0606 | RT:1498 | XCMS_group:19 | file:1 | scan:NA" "2_tmpspec" "" "0.0" "RWRDLPDLKQPQOW" "InChI=1S/C4H9N/c1-2-4-5-3-1/h5H,1-4H2" "C4H9N" "1" "-136.14546214244544" "MZ:86.0606 | RT:1498 | XCMS_group:19 | file:1 | scan:NA" "C1CCNC1" "" "31268;3613359;11062297;12196044;12196046;12196049;12196050;18440991;20463768;53660610;57608708;57608709;57608710;57750053;60135501;90927493;91312985" "HMDB:(31641);Natural Products:(UNPD154562);CHEBI:(33135 52145);HSDB:(123-75-1);Plantcyc:(PYRROLIDINE);Biocyc:(PYRROLIDINE)" +"MZ:86.0606 | RT:1498 | XCMS_group:19 | file:1 | scan:NA" "6_tmpspec" "" "0.0" "IAZDPXIOMUYVGZ" "InChI=1S/C2H6OS/c1-4(2)3/h1-2H3" "C2H6OS" "1" "-86.79174845072117" "MZ:86.0606 | RT:1498 | XCMS_group:19 | file:1 | scan:NA" "CS(=O)C" "" "679;75151;10103116;12206145;12264368;18594457;20151975;21022526;22345572;57247813;71309204;76973052;90811807;90817578" "HMDB:(2151);Natural Products:(UNPD148866);CHEBI:(28262);Maconda:(CON00016);HSDB:(67-68-5);KEGG:(C11143);Plantcyc:(DMSO);Biocyc:(DOH-ISO-VAL DMSO)" +"MZ:86.0606 | RT:1498 | XCMS_group:19 | file:1 | scan:NA" "10_tmpspec" "" "0.0" "HNJBEVLQSNELDL" "InChI=1S/C4H7NO/c6-4-2-1-3-5-4/h1-3H2,(H,5,6)" "C4H7NO" "1" "-149.09880138892078" "MZ:86.0606 | RT:1498 | XCMS_group:19 | file:1 | scan:NA" "C1CC(=NC1)O" "" "12025;3956071;10419134;12197590;12197592;18999930;20030003;20589568;58329813;90472990;91343693;101225382;101796586;123509162" "HMDB:(2039);KNApSAcK:(38233);Natural Products:(UNPD211738);CHEBI:(36592);HSDB:(616-45-5);Plantcyc:(CPD-19607)" |
| b |
| diff -r 110834b7923b -r cf9f900eeaff test-data/historic.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/historic.tsv Fri May 31 04:36:10 2019 -0400 |
| b |
| @@ -0,0 +1,3 @@ +"name" "source" "experimentName" "confidence" "inchikey2D" "inchi" "molecularFormula" "rank" "score" "name" "smiles" "xlogp" "pubchemids" "links" +"19" "2_tmpspec" "" "0.0" "RWRDLPDLKQPQOW" "InChI=1S/C4H9N/c1-2-4-5-3-1/h5H,1-4H2" "C4H9N" "1" "-136.14546214244544" "19" "C1CCNC1" "" "31268;3613359;11062297;12196044;12196046;12196049;12196050;18440991;20463768;53660610;57608708;57608709;57608710;57750053;60135501;90927493;91312985" "HMDB:(31641);Natural Products:(UNPD154562);CHEBI:(33135 52145);HSDB:(123-75-1);Plantcyc:(PYRROLIDINE);Biocyc:(PYRROLIDINE)" +"19" "4_tmpspec" "" "0.0" "IAZDPXIOMUYVGZ" "InChI=1S/C2H6OS/c1-4(2)3/h1-2H3" "C2H6OS" "1" "-86.79174845072117" "19" "CS(=O)C" "" "679;75151;10103116;12206145;12264368;18594457;20151975;21022526;22345572;57247813;71309204;76973052;90811807;90817578" "HMDB:(2151);Natural Products:(UNPD148866);CHEBI:(28262);Maconda:(CON00016);HSDB:(67-68-5);KEGG:(C11143);Plantcyc:(DMSO);Biocyc:(DOH-ISO-VAL DMSO)" |
| b |
| diff -r 110834b7923b -r cf9f900eeaff test-data/historic_input.msp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/historic_input.msp Fri May 31 04:36:10 2019 -0400 |
| b |
| @@ -0,0 +1,61 @@ +NAME: 1 +PRECURSORMZ: 70.0658950805664 +Comment: +Num Peaks: 8 +50.4781379699707 3487.4296875 +51.0193099975586 3390.96948242188 +53.0031509399414 10011.958984375 +53.5898513793945 4252.7880859375 +54.3787727355957 3541.5107421875 +69.0455169677734 9650.0107421875 +70.0660934448242 37168.609375 +82.9910659790039 4077.36694335938 + +NAME: 2 +PRECURSORMZ: 72.0815277099609 +Comment: +Num Peaks: 6 +51.773567199707 818.313903808594 +54.0346794128418 1247.91137695312 +54.6847991943359 967.616882324219 +56.050350189209 1780.01037597656 +58.4994125366211 975.196228027344 +72.0814056396484 1660.50390625 + +NAME: 5 +PRECURSORMZ: 76.0400390625 +Comment: +Num Peaks: 4 +53.2376174926758 3224.35571289062 +60.3291244506836 3193.19482421875 +73.7529830932617 3305.61401367188 +82.5309600830078 2965.41772460938 + +NAME: 9 +PRECURSORMZ: 79.0218811035156 +Comment: +Num Peaks: 5 +59.1125831604004 67799.1953125 +59.9673652648926 345613.1875 +62.9906845092773 117693.296875 +63.9986686706543 1585970.25 +80.5974655151367 66719.4609375 + +NAME: 11 +PRECURSORMZ: 79.9903564453125 +Comment: +Num Peaks: 3 +51.6917915344238 584.212829589844 +53.0398750305176 649.807922363281 +97.3154754638672 596.341003417969 + +NAME: 19 +PRECURSORMZ: 86.0606536865234 +Comment: +Num Peaks: 5 +53.0031509399414 29580.330078125 +55.3490409851074 4989.64990234375 +61.990592956543 4089.9619140625 +63.2290992736816 4168.97412109375 +67.6647109985352 5392.48779296875 + |
| b |
| diff -r 110834b7923b -r cf9f900eeaff test-data/input.msp --- a/test-data/input.msp Tue Jul 24 08:58:00 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
| b |
| @@ -1,61 +0,0 @@ -NAME: 1 -PRECURSORMZ: 70.0658950805664 -Comment: -Num Peaks: 8 -50.4781379699707 3487.4296875 -51.0193099975586 3390.96948242188 -53.0031509399414 10011.958984375 -53.5898513793945 4252.7880859375 -54.3787727355957 3541.5107421875 -69.0455169677734 9650.0107421875 -70.0660934448242 37168.609375 -82.9910659790039 4077.36694335938 - -NAME: 2 -PRECURSORMZ: 72.0815277099609 -Comment: -Num Peaks: 6 -51.773567199707 818.313903808594 -54.0346794128418 1247.91137695312 -54.6847991943359 967.616882324219 -56.050350189209 1780.01037597656 -58.4994125366211 975.196228027344 -72.0814056396484 1660.50390625 - -NAME: 5 -PRECURSORMZ: 76.0400390625 -Comment: -Num Peaks: 4 -53.2376174926758 3224.35571289062 -60.3291244506836 3193.19482421875 -73.7529830932617 3305.61401367188 -82.5309600830078 2965.41772460938 - -NAME: 9 -PRECURSORMZ: 79.0218811035156 -Comment: -Num Peaks: 5 -59.1125831604004 67799.1953125 -59.9673652648926 345613.1875 -62.9906845092773 117693.296875 -63.9986686706543 1585970.25 -80.5974655151367 66719.4609375 - -NAME: 11 -PRECURSORMZ: 79.9903564453125 -Comment: -Num Peaks: 3 -51.6917915344238 584.212829589844 -53.0398750305176 649.807922363281 -97.3154754638672 596.341003417969 - -NAME: 19 -PRECURSORMZ: 86.0606536865234 -Comment: -Num Peaks: 5 -53.0031509399414 29580.330078125 -55.3490409851074 4989.64990234375 -61.990592956543 4089.9619140625 -63.2290992736816 4168.97412109375 -67.6647109985352 5392.48779296875 - |
| b |
| diff -r 110834b7923b -r cf9f900eeaff test-data/sirus_csifingerid.tabular --- a/test-data/sirus_csifingerid.tabular Tue Jul 24 08:58:00 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
| b |
| @@ -1,4 +0,0 @@ -UID InChIkey2D InChI molecularFormula Rank Score Name smiles xlogp pubchemids links -2 RWRDLPDLKQPQOW InChI=1S/C4H9N/c1-2-4-5-3-1/h5H,1-4H2 C4H9N 1 -136.1454621424455 Azolidine C1CCNC1 31268;3613359;11062297;12196044;12196046;12196049;12196050;18440991;20463768;53660610;57608708;57608709;57608710;57750053;60135501;90927493;91312985 HMDB:(31641);Natural Products:(UNPD154562);CHEBI:(33135 52145);HSDB:(123-75-1);Plantcyc:(PYRROLIDINE);Biocyc:(PYRROLIDINE) -UID InChIkey2D InChI molecularFormula Rank Score Name smiles xlogp pubchemids links -9 IAZDPXIOMUYVGZ InChI=1S/C2H6OS/c1-4(2)3/h1-2H3 C2H6OS 1 -86.79174845072123 Demasorb CS(=O)C 679;75151;10103116;12206145;12264368;18594457;20151975;21022526;22345572;57247813;71309204;76973052;90811807;90817578 HMDB:(2151);Natural Products:(UNPD148866);CHEBI:(28262);Maconda:(CON00016);HSDB:(67-68-5);KEGG:(C11143);Plantcyc:(DMSO);Biocyc:(DOH-ISO-VAL DMSO) |
| b |
| diff -r 110834b7923b -r cf9f900eeaff test-data/sirus_csifingerid_test1.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sirus_csifingerid_test1.tsv Fri May 31 04:36:10 2019 -0400 |
| b |
| @@ -0,0 +1,4 @@ +UID InChIkey2D InChI molecularFormula Rank Score Name smiles xlogp pubchemids links +2 RWRDLPDLKQPQOW InChI=1S/C4H9N/c1-2-4-5-3-1/h5H,1-4H2 C4H9N 1 -136.14546214244544 Azolidine C1CCNC1 31268;3613359;11062297;12196044;12196046;12196049;12196050;18440991;20463768;53660610;57608708;57608709;57608710;57750053;60135501;90927493;91312985 HMDB:(31641);Natural Products:(UNPD154562);CHEBI:(33135 52145);HSDB:(123-75-1);Plantcyc:(PYRROLIDINE);Biocyc:(PYRROLIDINE) +UID InChIkey2D InChI molecularFormula Rank Score Name smiles xlogp pubchemids links +9 IAZDPXIOMUYVGZ InChI=1S/C2H6OS/c1-4(2)3/h1-2H3 C2H6OS 1 -86.79174845072117 Demasorb CS(=O)C 679;75151;10103116;12206145;12264368;18594457;20151975;21022526;22345572;57247813;71309204;76973052;90811807;90817578 HMDB:(2151);Natural Products:(UNPD148866);CHEBI:(28262);Maconda:(CON00016);HSDB:(67-68-5);KEGG:(C11143);Plantcyc:(DMSO);Biocyc:(DOH-ISO-VAL DMSO) |