Repository 'sirius_csifingerid'
hg clone https://eddie.galaxyproject.org/repos/tomnl/sirius_csifingerid

Changeset 0:110834b7923b (2018-07-24)
Next changeset 1:cf9f900eeaff (2019-05-31)
Commit message:
planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 4861d85108e12f7d7b1df472f42a8449ea1f46f6
added:
sirius_csifingerid.py
sirius_csifingerid.xml
test-data/demo_db.csv
test-data/input.msp
test-data/sirus_csifingerid.tabular
b
diff -r 000000000000 -r 110834b7923b sirius_csifingerid.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/sirius_csifingerid.py Tue Jul 24 08:58:00 2018 -0400
[
@@ -0,0 +1,93 @@
+import argparse
+import os
+import glob
+import shutil
+from subprocess import Popen, PIPE
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--input')
+parser.add_argument('--db_online')
+parser.add_argument('--profile')
+parser.add_argument('--candidates')
+parser.add_argument('--ppm_max')
+parser.add_argument('--polarity')
+parser.add_argument('--results_name')
+parser.add_argument('--out_dir')
+parser.add_argument('--tool_directory')
+args = parser.parse_args()
+print args
+
+result_pth = os.path.join(args.out_dir, args.results_name)
+with open(args.input,"r") as infile:

+    numlines = 0
+    for line in infile:
+        line = line.strip()
+        if numlines == 0: #read the headers
+            if "NAME" in line:
+                featid = line.split("NAME: ")[1]
+            if "PRECURSORMZ" in line:
+                mz = float(line.split("PRECURSORMZ: ")[1])
+                if args.polarity=="pos":
+                    mz2 = mz-1.007276
+                else:
+                    mz2 = mz+1.007276
+            if "Num Peaks" in line:
+                numlines = int(line.split("Num Peaks: ")[1]) # number of spectra peaks
+                linesread = 0
+                peaklist = []
+        else:
+            if linesread != numlines: # read spectra
+                line = tuple(line.split("\t"))
+                linesread += 1
+                peaklist.append(line)
+            else:
+                numlines = 0 #reset for next header
+                #write spec file
+                specpth = os.path.join(args.out_dir,'tmpspec.txt')
+                tmpdir = os.path.join(args.out_dir,'tempout')
+                if not os.path.exists(tmpdir):
+                    os.mkdir(tmpdir)
+
+                with open(specpth, 'w') as outfile1:
+                    for p in peaklist:
+                        outfile1.write(p[0]+" "+p[1]+"\n")
+                    #create commandline input
+                    if args.polarity == "pos":
+                        ion = "[M+H]+"
+                    else:
+                        ion = "[M-H]-"
+                #cmd_command = os.path.join(args.tool_directory, 'bin', 'sirius ')
+                cmd_command = 'sirius '
+                cmd_command += "-c {} -o {} -i {} -z {} -2 {} ".format(args.candidates, tmpdir , ion, mz, specpth)
+                cmd_command += "-d {} --ppm-max {} --fingerid".format(args.db_online, args.ppm_max)
+
+                # run
+                print cmd_command
+                os.system(cmd_command)
+     
+                # if fingerid found hits
+                mtching_files = glob.glob(os.path.join(tmpdir, "*_tmpspec_", "summary_csi_fingerid.csv"))
+                if mtching_files:
+                    first_read=True
+                    if len(mtching_files)>1:
+                        print 'multiple folder names being used', mtching_files
+                    latest_file = max(mtching_files, key=os.path.getmtime)

+                    with open(result_pth, 'a') as outfile2:
+
+                        with open(latest_file) as infile_csi:
+                            for iline in infile_csi:
+                                if "inchi" in iline:
+                                    if first_read:
+                                        iline = iline.replace("inchi","InChI")
+                                        iline = iline.replace("rank", "Rank")
+                                        iline = iline.replace("name", "Name")
+                                        iline = iline.replace("score", "Score")
+                                        outfile2.write("UID\t"+iline)
+                                        first_read = False
+                                else:
+                                    outfile2.write(featid+"\t"+ iline)
+                shutil.rmtree(tmpdir)
+
b
diff -r 000000000000 -r 110834b7923b sirius_csifingerid.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/sirius_csifingerid.xml Tue Jul 24 08:58:00 2018 -0400
[
@@ -0,0 +1,123 @@
+<tool id="sirius_csifingerid" name="SIRIUS-CSI:FingerID" version="0.1.0">
+    <description> </description>
+    <requirements>
+        <requirement type="package" version="4.0.0">sirius-csifingerid</requirement>
+    </requirements>
+    <command detect_errors="exit_code"  interpreter="python">
+    <![CDATA[
+
+       #set $tool_directory = $getVar('__tool_directory__', '../../../../tools/sirius_csifingerid')
+        sirius_csifingerid.py
+            --input "$input"
+            --db_online '$db_online'
+            --profile $profile
+            --candidates $candidates
+            --ppm_max $ppm_max
+            --polarity $polarity
+            --results_name 'csi_results.tsv'
+            --out_dir .
+            --tool_directory $tool_directory
+            
+    ]]></command>
+    <inputs>
+        <param name="input" type="data" format="msp,txt" label="MSP file (Output from Create MSP tool)" argument="--input"/>
+        <param name="db_online" type="select" label="Select SIRIUS-CSI:FingerID Database" argument="--db_online">
+            <option value="PubChem" >PubChem</option>
+            <option selected="true" value="hmdb">HMDB</option>
+            <option value="kegg">KEGG</option>
+            <option value="knapsack">KNApSAcK</option>
+            <option value="biocyc">BioCyc</option>
+            <option  value="all">All (see help)</option>
+        </param>
+        <param name="ppm_max" type="integer" value="10" label="Mass deviation of the fragment peaks in ppm" argument="--ppm_max"/>
+        <param name="candidates" type="integer" value="5" label="The maximum number of candidates in the output" argument="--candidates"/>
+        <param name="polarity" type="select" label="Ion Mode" argument="--polarity">
+            <option value="pos" selected="true">Positive</option>
+            <option value="neg">Negative</option>
+        </param> 
+        <param name="profile" type="select" label="Analysis used" argument="--profile">
+            <option value="orbitrap" selected="true">Orbitrap</option>
+            <option value="qtof">qTOF</option>
+            <option value="fticr">FT-ICR</option>
+        </param>
+    </inputs>
+    <outputs>
+        <data name="results" format="tsv" label="${tool.name} results of ${input.name}" from_work_dir="csi_results.tsv"  />
+    </outputs>
+    <tests>
+        <test>
+            <param name="input" value="input.msp"/>
+            <output name="results" file="sirus_csifingerid.tabular"/>
+        </test>
+    </tests>
+    <help>
+----------------
+SIRIUS-FingerID
+----------------
+
+Description
+-----------
+
+| SIRIUS is a java-based software framework for discovering a landscape of de-novo identification of metabolites using single and tandem mass spectrometry.
+| SIRIUS uses isotope pattern analysis for detecting the molecular formula and further analyses the fragmentation pattern of a compound using fragmentation trees.
+| Website: https://bio.informatik.uni-jena.de/software/sirius/
+|
+
+Parameters
+----------
+
+**\1. MSP file**
+
+MSP file created using *Create MSP* tool
+
+**\2. Select SIRIUS-CSI:FingerID Databases**
+
+The following databases are available:
+
+* PubChem (default)
+
+* hmdb
+
+* kegg
+
+* knapsack
+
+* biocyc
+
+* all (SIRIUS will consider all m/z possible molecular formulas) 
+
+**\3. Allowed mass deviation of the fragment peaks in ppm**
+
+**\4. The maximum number of candidates in the output**
+
+Set the top X candidates to return.
+
+**\5. Ion Mode**
+
+* Positive
+
+* Negative
+
+**\6. Analysis used**
+
+* Orbitrap
+
+* qTOF
+
+* FT-ICR
+
+If you want to analyze spectra measured with Orbitrap or FT-ICR, you should specify the appropriate analysis profile. A profile is a set of configuration options and scoring functions SIRIUS 3 will use for its analysis. For example, the Orbitrap and FT-ICR profiles having tighter constraints for the allowed mass deviation but do not rely so much on the intensity of isotope peaks.
+
+
+Developers and contributors
+---------------------------
+
+- **Jordi Capellades (j.capellades.to@gmail.com) - Universitat Rovira i Virgili (SP)**
+- **Ralf Weber (r.j.weber@bham.ac.uk) - University of Birmingham (UK)**
+
+    </help>
+    <citations>
+        <citation type="doi">10.1073/pnas.1509788112</citation>
+        <citation type="doi">10.1093/bioinformatics/btu275</citation>
+    </citations>
+</tool> 
b
diff -r 000000000000 -r 110834b7923b test-data/demo_db.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/demo_db.csv Tue Jul 24 08:58:00 2018 -0400
b
@@ -0,0 +1,8 @@
+"Identifier","MonoisotopicMass","MolecularFormula","SMILES","InChI","InChIKey1","InChIKey2","InChIKey3","Name","InChIKey"
+"HMDB0000123",75.03202841,"C2H5NO2","NCC(O)=O","InChI=1S/C2H5NO2/c3-1-2(4)5/h1,3H2,(H,4,5)","DHMQDGOQFOQNFH","UHFFFAOYSA","N","Glycine","DHMQDGOQFOQNFH-UHFFFAOYSA-N"
+"HMDB0002151",78.0139355,"C2H6OS","CS(C)=O","InChI=1S/C2H6OS/c1-4(2)3/h1-2H3","IAZDPXIOMUYVGZ","UHFFFAOYSA","N","Dimethyl sulfoxide","IAZDPXIOMUYVGZ-UHFFFAOYSA-N"
+"HMDB0031239",75.03202841,"C2H5NO2","CCON=O","InChI=1S/C2H5NO2/c1-2-5-3-4/h2H2,1H3","QQZWEECEMNQSTG","UHFFFAOYSA","N","Ethyl nitrite","QQZWEECEMNQSTG-UHFFFAOYSA-N"
+"HMDB0014691",75.03202841,"C2H5NO2","CC(=O)NO","InChI=1S/C2H5NO2/c1-2(4)3-5/h5H,1H3,(H,3,4)","RRUDCFGSUDOHDG","UHFFFAOYSA","N","Acetohydroxamic Acid","RRUDCFGSUDOHDG-UHFFFAOYSA-N"
+"HMDB0002039",85.05276385,"C4H7NO","O=C1CCCN1","InChI=1S/C4H7NO/c6-4-2-1-3-5-4/h1-3H2,(H,5,6)","HNJBEVLQSNELDL","UHFFFAOYSA","N","2-Pyrrolidinone","HNJBEVLQSNELDL-UHFFFAOYSA-N"
+"HMDB0060427",85.05276385,"C4H7NO","CC(C)(O)C#N","InChI=1S/C4H7NO/c1-4(2,6)3-5/h6H,1-2H3","MWFMGBPGAXYFAR","UHFFFAOYSA","N","Acetone cyanohydrin","MWFMGBPGAXYFAR-UHFFFAOYSA-N"
+
b
diff -r 000000000000 -r 110834b7923b test-data/input.msp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input.msp Tue Jul 24 08:58:00 2018 -0400
b
@@ -0,0 +1,61 @@
+NAME: 1
+PRECURSORMZ: 70.0658950805664
+Comment:
+Num Peaks: 8
+50.4781379699707 3487.4296875
+51.0193099975586 3390.96948242188
+53.0031509399414 10011.958984375
+53.5898513793945 4252.7880859375
+54.3787727355957 3541.5107421875
+69.0455169677734 9650.0107421875
+70.0660934448242 37168.609375
+82.9910659790039 4077.36694335938
+
+NAME: 2
+PRECURSORMZ: 72.0815277099609
+Comment:
+Num Peaks: 6
+51.773567199707 818.313903808594
+54.0346794128418 1247.91137695312
+54.6847991943359 967.616882324219
+56.050350189209 1780.01037597656
+58.4994125366211 975.196228027344
+72.0814056396484 1660.50390625
+
+NAME: 5
+PRECURSORMZ: 76.0400390625
+Comment:
+Num Peaks: 4
+53.2376174926758 3224.35571289062
+60.3291244506836 3193.19482421875
+73.7529830932617 3305.61401367188
+82.5309600830078 2965.41772460938
+
+NAME: 9
+PRECURSORMZ: 79.0218811035156
+Comment:
+Num Peaks: 5
+59.1125831604004 67799.1953125
+59.9673652648926 345613.1875
+62.9906845092773 117693.296875
+63.9986686706543 1585970.25
+80.5974655151367 66719.4609375
+
+NAME: 11
+PRECURSORMZ: 79.9903564453125
+Comment:
+Num Peaks: 3
+51.6917915344238 584.212829589844
+53.0398750305176 649.807922363281
+97.3154754638672 596.341003417969
+
+NAME: 19
+PRECURSORMZ: 86.0606536865234
+Comment:
+Num Peaks: 5
+53.0031509399414 29580.330078125
+55.3490409851074 4989.64990234375
+61.990592956543 4089.9619140625
+63.2290992736816 4168.97412109375
+67.6647109985352 5392.48779296875
+
b
diff -r 000000000000 -r 110834b7923b test-data/sirus_csifingerid.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sirus_csifingerid.tabular Tue Jul 24 08:58:00 2018 -0400
b
@@ -0,0 +1,4 @@
+UID InChIkey2D InChI molecularFormula Rank Score Name smiles xlogp pubchemids links
+2 RWRDLPDLKQPQOW InChI=1S/C4H9N/c1-2-4-5-3-1/h5H,1-4H2 C4H9N 1 -136.1454621424455 Azolidine C1CCNC1 31268;3613359;11062297;12196044;12196046;12196049;12196050;18440991;20463768;53660610;57608708;57608709;57608710;57750053;60135501;90927493;91312985 HMDB:(31641);Natural Products:(UNPD154562);CHEBI:(33135 52145);HSDB:(123-75-1);Plantcyc:(PYRROLIDINE);Biocyc:(PYRROLIDINE)
+UID InChIkey2D InChI molecularFormula Rank Score Name smiles xlogp pubchemids links
+9 IAZDPXIOMUYVGZ InChI=1S/C2H6OS/c1-4(2)3/h1-2H3 C2H6OS 1 -86.79174845072123 Demasorb CS(=O)C 679;75151;10103116;12206145;12264368;18594457;20151975;21022526;22345572;57247813;71309204;76973052;90811807;90817578 HMDB:(2151);Natural Products:(UNPD148866);CHEBI:(28262);Maconda:(CON00016);HSDB:(67-68-5);KEGG:(C11143);Plantcyc:(DMSO);Biocyc:(DOH-ISO-VAL DMSO)