Next changeset 1:cf9f900eeaff (2019-05-31) |
Commit message:
planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 4861d85108e12f7d7b1df472f42a8449ea1f46f6 |
added:
sirius_csifingerid.py sirius_csifingerid.xml test-data/demo_db.csv test-data/input.msp test-data/sirus_csifingerid.tabular |
b |
diff -r 000000000000 -r 110834b7923b sirius_csifingerid.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sirius_csifingerid.py Tue Jul 24 08:58:00 2018 -0400 |
[ |
@@ -0,0 +1,93 @@ +import argparse +import os +import glob +import shutil +from subprocess import Popen, PIPE + + +parser = argparse.ArgumentParser() +parser.add_argument('--input') +parser.add_argument('--db_online') +parser.add_argument('--profile') +parser.add_argument('--candidates') +parser.add_argument('--ppm_max') +parser.add_argument('--polarity') +parser.add_argument('--results_name') +parser.add_argument('--out_dir') +parser.add_argument('--tool_directory') +args = parser.parse_args() +print args + +result_pth = os.path.join(args.out_dir, args.results_name) +with open(args.input,"r") as infile: + + numlines = 0 + for line in infile: + line = line.strip() + if numlines == 0: #read the headers + if "NAME" in line: + featid = line.split("NAME: ")[1] + if "PRECURSORMZ" in line: + mz = float(line.split("PRECURSORMZ: ")[1]) + if args.polarity=="pos": + mz2 = mz-1.007276 + else: + mz2 = mz+1.007276 + if "Num Peaks" in line: + numlines = int(line.split("Num Peaks: ")[1]) # number of spectra peaks + linesread = 0 + peaklist = [] + else: + if linesread != numlines: # read spectra + line = tuple(line.split("\t")) + linesread += 1 + peaklist.append(line) + else: + numlines = 0 #reset for next header + #write spec file + specpth = os.path.join(args.out_dir,'tmpspec.txt') + tmpdir = os.path.join(args.out_dir,'tempout') + if not os.path.exists(tmpdir): + os.mkdir(tmpdir) + + with open(specpth, 'w') as outfile1: + for p in peaklist: + outfile1.write(p[0]+" "+p[1]+"\n") + #create commandline input + if args.polarity == "pos": + ion = "[M+H]+" + else: + ion = "[M-H]-" + #cmd_command = os.path.join(args.tool_directory, 'bin', 'sirius ') + cmd_command = 'sirius ' + cmd_command += "-c {} -o {} -i {} -z {} -2 {} ".format(args.candidates, tmpdir , ion, mz, specpth) + cmd_command += "-d {} --ppm-max {} --fingerid".format(args.db_online, args.ppm_max) + + # run + print cmd_command + os.system(cmd_command) + + # if fingerid found hits + mtching_files = glob.glob(os.path.join(tmpdir, "*_tmpspec_", "summary_csi_fingerid.csv")) + if mtching_files: + first_read=True + if len(mtching_files)>1: + print 'multiple folder names being used', mtching_files + latest_file = max(mtching_files, key=os.path.getmtime) + + with open(result_pth, 'a') as outfile2: + + with open(latest_file) as infile_csi: + for iline in infile_csi: + if "inchi" in iline: + if first_read: + iline = iline.replace("inchi","InChI") + iline = iline.replace("rank", "Rank") + iline = iline.replace("name", "Name") + iline = iline.replace("score", "Score") + outfile2.write("UID\t"+iline) + first_read = False + else: + outfile2.write(featid+"\t"+ iline) + shutil.rmtree(tmpdir) + |
b |
diff -r 000000000000 -r 110834b7923b sirius_csifingerid.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sirius_csifingerid.xml Tue Jul 24 08:58:00 2018 -0400 |
[ |
@@ -0,0 +1,123 @@ +<tool id="sirius_csifingerid" name="SIRIUS-CSI:FingerID" version="0.1.0"> + <description> </description> + <requirements> + <requirement type="package" version="4.0.0">sirius-csifingerid</requirement> + </requirements> + <command detect_errors="exit_code" interpreter="python"> + <![CDATA[ + + #set $tool_directory = $getVar('__tool_directory__', '../../../../tools/sirius_csifingerid') + sirius_csifingerid.py + --input "$input" + --db_online '$db_online' + --profile $profile + --candidates $candidates + --ppm_max $ppm_max + --polarity $polarity + --results_name 'csi_results.tsv' + --out_dir . + --tool_directory $tool_directory + + ]]></command> + <inputs> + <param name="input" type="data" format="msp,txt" label="MSP file (Output from Create MSP tool)" argument="--input"/> + <param name="db_online" type="select" label="Select SIRIUS-CSI:FingerID Database" argument="--db_online"> + <option value="PubChem" >PubChem</option> + <option selected="true" value="hmdb">HMDB</option> + <option value="kegg">KEGG</option> + <option value="knapsack">KNApSAcK</option> + <option value="biocyc">BioCyc</option> + <option value="all">All (see help)</option> + </param> + <param name="ppm_max" type="integer" value="10" label="Mass deviation of the fragment peaks in ppm" argument="--ppm_max"/> + <param name="candidates" type="integer" value="5" label="The maximum number of candidates in the output" argument="--candidates"/> + <param name="polarity" type="select" label="Ion Mode" argument="--polarity"> + <option value="pos" selected="true">Positive</option> + <option value="neg">Negative</option> + </param> + <param name="profile" type="select" label="Analysis used" argument="--profile"> + <option value="orbitrap" selected="true">Orbitrap</option> + <option value="qtof">qTOF</option> + <option value="fticr">FT-ICR</option> + </param> + </inputs> + <outputs> + <data name="results" format="tsv" label="${tool.name} results of ${input.name}" from_work_dir="csi_results.tsv" /> + </outputs> + <tests> + <test> + <param name="input" value="input.msp"/> + <output name="results" file="sirus_csifingerid.tabular"/> + </test> + </tests> + <help> +---------------- +SIRIUS-FingerID +---------------- + +Description +----------- + +| SIRIUS is a java-based software framework for discovering a landscape of de-novo identification of metabolites using single and tandem mass spectrometry. +| SIRIUS uses isotope pattern analysis for detecting the molecular formula and further analyses the fragmentation pattern of a compound using fragmentation trees. +| Website: https://bio.informatik.uni-jena.de/software/sirius/ +| + +Parameters +---------- + +**\1. MSP file** + +MSP file created using *Create MSP* tool + +**\2. Select SIRIUS-CSI:FingerID Databases** + +The following databases are available: + +* PubChem (default) + +* hmdb + +* kegg + +* knapsack + +* biocyc + +* all (SIRIUS will consider all m/z possible molecular formulas) + +**\3. Allowed mass deviation of the fragment peaks in ppm** + +**\4. The maximum number of candidates in the output** + +Set the top X candidates to return. + +**\5. Ion Mode** + +* Positive + +* Negative + +**\6. Analysis used** + +* Orbitrap + +* qTOF + +* FT-ICR + +If you want to analyze spectra measured with Orbitrap or FT-ICR, you should specify the appropriate analysis profile. A profile is a set of configuration options and scoring functions SIRIUS 3 will use for its analysis. For example, the Orbitrap and FT-ICR profiles having tighter constraints for the allowed mass deviation but do not rely so much on the intensity of isotope peaks. + + +Developers and contributors +--------------------------- + +- **Jordi Capellades (j.capellades.to@gmail.com) - Universitat Rovira i Virgili (SP)** +- **Ralf Weber (r.j.weber@bham.ac.uk) - University of Birmingham (UK)** + + </help> + <citations> + <citation type="doi">10.1073/pnas.1509788112</citation> + <citation type="doi">10.1093/bioinformatics/btu275</citation> + </citations> +</tool> |
b |
diff -r 000000000000 -r 110834b7923b test-data/demo_db.csv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/demo_db.csv Tue Jul 24 08:58:00 2018 -0400 |
b |
@@ -0,0 +1,8 @@ +"Identifier","MonoisotopicMass","MolecularFormula","SMILES","InChI","InChIKey1","InChIKey2","InChIKey3","Name","InChIKey" +"HMDB0000123",75.03202841,"C2H5NO2","NCC(O)=O","InChI=1S/C2H5NO2/c3-1-2(4)5/h1,3H2,(H,4,5)","DHMQDGOQFOQNFH","UHFFFAOYSA","N","Glycine","DHMQDGOQFOQNFH-UHFFFAOYSA-N" +"HMDB0002151",78.0139355,"C2H6OS","CS(C)=O","InChI=1S/C2H6OS/c1-4(2)3/h1-2H3","IAZDPXIOMUYVGZ","UHFFFAOYSA","N","Dimethyl sulfoxide","IAZDPXIOMUYVGZ-UHFFFAOYSA-N" +"HMDB0031239",75.03202841,"C2H5NO2","CCON=O","InChI=1S/C2H5NO2/c1-2-5-3-4/h2H2,1H3","QQZWEECEMNQSTG","UHFFFAOYSA","N","Ethyl nitrite","QQZWEECEMNQSTG-UHFFFAOYSA-N" +"HMDB0014691",75.03202841,"C2H5NO2","CC(=O)NO","InChI=1S/C2H5NO2/c1-2(4)3-5/h5H,1H3,(H,3,4)","RRUDCFGSUDOHDG","UHFFFAOYSA","N","Acetohydroxamic Acid","RRUDCFGSUDOHDG-UHFFFAOYSA-N" +"HMDB0002039",85.05276385,"C4H7NO","O=C1CCCN1","InChI=1S/C4H7NO/c6-4-2-1-3-5-4/h1-3H2,(H,5,6)","HNJBEVLQSNELDL","UHFFFAOYSA","N","2-Pyrrolidinone","HNJBEVLQSNELDL-UHFFFAOYSA-N" +"HMDB0060427",85.05276385,"C4H7NO","CC(C)(O)C#N","InChI=1S/C4H7NO/c1-4(2,6)3-5/h6H,1-2H3","MWFMGBPGAXYFAR","UHFFFAOYSA","N","Acetone cyanohydrin","MWFMGBPGAXYFAR-UHFFFAOYSA-N" + |
b |
diff -r 000000000000 -r 110834b7923b test-data/input.msp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/input.msp Tue Jul 24 08:58:00 2018 -0400 |
b |
@@ -0,0 +1,61 @@ +NAME: 1 +PRECURSORMZ: 70.0658950805664 +Comment: +Num Peaks: 8 +50.4781379699707 3487.4296875 +51.0193099975586 3390.96948242188 +53.0031509399414 10011.958984375 +53.5898513793945 4252.7880859375 +54.3787727355957 3541.5107421875 +69.0455169677734 9650.0107421875 +70.0660934448242 37168.609375 +82.9910659790039 4077.36694335938 + +NAME: 2 +PRECURSORMZ: 72.0815277099609 +Comment: +Num Peaks: 6 +51.773567199707 818.313903808594 +54.0346794128418 1247.91137695312 +54.6847991943359 967.616882324219 +56.050350189209 1780.01037597656 +58.4994125366211 975.196228027344 +72.0814056396484 1660.50390625 + +NAME: 5 +PRECURSORMZ: 76.0400390625 +Comment: +Num Peaks: 4 +53.2376174926758 3224.35571289062 +60.3291244506836 3193.19482421875 +73.7529830932617 3305.61401367188 +82.5309600830078 2965.41772460938 + +NAME: 9 +PRECURSORMZ: 79.0218811035156 +Comment: +Num Peaks: 5 +59.1125831604004 67799.1953125 +59.9673652648926 345613.1875 +62.9906845092773 117693.296875 +63.9986686706543 1585970.25 +80.5974655151367 66719.4609375 + +NAME: 11 +PRECURSORMZ: 79.9903564453125 +Comment: +Num Peaks: 3 +51.6917915344238 584.212829589844 +53.0398750305176 649.807922363281 +97.3154754638672 596.341003417969 + +NAME: 19 +PRECURSORMZ: 86.0606536865234 +Comment: +Num Peaks: 5 +53.0031509399414 29580.330078125 +55.3490409851074 4989.64990234375 +61.990592956543 4089.9619140625 +63.2290992736816 4168.97412109375 +67.6647109985352 5392.48779296875 + |
b |
diff -r 000000000000 -r 110834b7923b test-data/sirus_csifingerid.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sirus_csifingerid.tabular Tue Jul 24 08:58:00 2018 -0400 |
b |
@@ -0,0 +1,4 @@ +UID InChIkey2D InChI molecularFormula Rank Score Name smiles xlogp pubchemids links +2 RWRDLPDLKQPQOW InChI=1S/C4H9N/c1-2-4-5-3-1/h5H,1-4H2 C4H9N 1 -136.1454621424455 Azolidine C1CCNC1 31268;3613359;11062297;12196044;12196046;12196049;12196050;18440991;20463768;53660610;57608708;57608709;57608710;57750053;60135501;90927493;91312985 HMDB:(31641);Natural Products:(UNPD154562);CHEBI:(33135 52145);HSDB:(123-75-1);Plantcyc:(PYRROLIDINE);Biocyc:(PYRROLIDINE) +UID InChIkey2D InChI molecularFormula Rank Score Name smiles xlogp pubchemids links +9 IAZDPXIOMUYVGZ InChI=1S/C2H6OS/c1-4(2)3/h1-2H3 C2H6OS 1 -86.79174845072123 Demasorb CS(=O)C 679;75151;10103116;12206145;12264368;18594457;20151975;21022526;22345572;57247813;71309204;76973052;90811807;90817578 HMDB:(2151);Natural Products:(UNPD148866);CHEBI:(28262);Maconda:(CON00016);HSDB:(67-68-5);KEGG:(C11143);Plantcyc:(DMSO);Biocyc:(DOH-ISO-VAL DMSO) |