Mercurial > repos > recetox > msmetaenhancer
changeset 0:053ce79ed564 draft
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msmetaenhancer commit 2c9c75f7d0c5fcadf1fe0284dd767ea5c6f6be51"
author | recetox |
---|---|
date | Tue, 11 Jan 2022 15:12:26 +0000 |
parents | |
children | 4c328e0e59ab |
files | macros.xml msmetaenhancer.xml msmetaenhancer_wrapper.py test-data/sample.msp test-data/sample_out.msp |
diffstat | 5 files changed, 335 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Tue Jan 11 15:12:26 2022 +0000 @@ -0,0 +1,55 @@ +<macros> + <token name="@TOOL_VERSION@">0.1.2</token> + <xml name="creator"> + <creator> + <organization + url="https://www.recetox.muni.cz/" + name="RECETOX MUNI" /> + </creator> + </xml> + <token name="@HELP@"> + <![CDATA[ + MSMetaEnhancer will fetch and update various metadata included in your spectra .msp dataset. + It does so through a series of conversions ran on all entries in a given file. These conversions fetch + metadata from various online services. If no conversions are specified, all possible jobs will be executed in an arbitrary order. + + Every conversion specifies `service`, `source` and `target` attributes. This can be read as the `service` will receive the `source` attribute + and based on that determines the `target` attribute. For example, the conversion `PubChem: inchi -> smiles` uses PubChem to convert + InChI to SMILES. + + The execution speed and results themselves can be affected by specifying the conversions, or their order. + + For detailed documentation of the tool please visit https://msmetaenhancer.readthedocs.io/. + ]]> + </token> + + <xml name="job_options"> + <option value="inchi formula PubChem">PubChem: inchi -> formula</option> + <option value="inchi inchikey PubChem">PubChem: inchi -> inchikey</option> + <option value="inchi iupac_name PubChem">PubChem: inchi -> iupac_name</option> + <option value="inchi smiles PubChem">PubChem: inchi -> smiles</option> + <option value="name formula PubChem">PubChem: name -> formula</option> + <option value="name inchi PubChem">PubChem: name -> inchi</option> + <option value="name inchikey PubChem">PubChem: name -> inchikey</option> + <option value="name iupac_name PubChem">PubChem: name -> iupac_name</option> + <option value="name smiles PubChem">PubChem: name -> smiles</option> + <option value="casno smiles CIR">CIR: casno -> smiles</option> + <option value="inchikey casno CIR">CIR: inchikey -> casno</option> + <option value="inchikey formula CIR">CIR: inchikey -> formula</option> + <option value="inchikey inchi CIR">CIR: inchikey -> inchi</option> + <option value="inchikey smiles CIR">CIR: inchikey -> smiles</option> + <option value="smiles inchikey CIR">CIR: smiles -> inchikey</option> + <option value="casno inchikey CTS">CTS: casno -> inchikey</option> + <option value="inchikey inchi CTS">CTS: inchikey -> inchi</option> + <option value="inchikey iupac_name CTS">CTS: inchikey -> iupac_name</option> + <option value="inchikey name CTS">CTS: inchikey -> name</option> + <option value="name inchikey CTS">CTS: name -> inchikey</option> + <option value="inchikey casno NLM">NLM: inchikey -> casno</option> + <option value="inchikey formula NLM">NLM: inchikey -> formula</option> + <option value="inchikey name NLM">NLM: inchikey -> name</option> + <option value="name casno NLM">NLM: name -> casno</option> + <option value="name formula NLM">NLM: name -> formula</option> + <option value="name inchikey NLM">NLM: name -> inchikey</option> + </xml> + +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/msmetaenhancer.xml Tue Jan 11 15:12:26 2022 +0000 @@ -0,0 +1,67 @@ +<tool id="msmetaenhancer" name="MSMetaEnhancer" version="@TOOL_VERSION@+galaxy0"> + <description>annotate MS data</description> + <options sanitize="False"/> + <macros> + <import>macros.xml</import> + </macros> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">msmetaenhancer</requirement> + </requirements> + <expand macro="creator"/> + + <command detect_errors="exit_code"><![CDATA[ + sh ${msmetaenhancer_python_cli} + ]]> </command> + + <configfiles> + <configfile name="msmetaenhancer_python_cli"> + python3 ${__tool_directory__}/msmetaenhancer_wrapper.py \ + --input_file "$input_file" \ + --output_file "$output_file" \ + #if len($ordered_jobs) != 0: + #set ordered_jobs = ",".join([str($job.ordered_jobs_select) for $job in $ordered_jobs]) + "," + #else: + #set ordered_jobs = "" + #end if + #if $jobs != 'None': + #set random_jobs = str($jobs) + #else: + #set random_jobs = "" + #end if + #set all_jobs = str($ordered_jobs) + str($random_jobs) + --jobs "$all_jobs" + </configfile> + </configfiles> + + <inputs> + <param label="Input spectra dataset" name="input_file" type="data" format="msp" /> + + <repeat name="ordered_jobs" title="Ordered conversions"> + <param name="ordered_jobs_select" type="select" label="Available conversions" multiple="false" optional="true"> + <expand macro="job_options" /> + </param> + </repeat> + + <param name="jobs" type="select" label="Other conversions" multiple="true" optional="true"> + <expand macro="job_options" /> + </param> + </inputs> + + <outputs> + <data label="MSMetaEnhancer on ${on_string}" name="output_file" format="msp" /> + </outputs> + + <tests> + <test> + <param name="input_file" value="sample.msp" ftype="msp" /> + <output name="output_file" file="sample_out.msp" ftype="msp"/> + </test> + </tests> + + <help> + <![CDATA[ + @HELP@ + ]]> + </help> + +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/msmetaenhancer_wrapper.py Tue Jan 11 15:12:26 2022 +0000 @@ -0,0 +1,43 @@ +import argparse +import asyncio +import sys + + +from MSMetaEnhancer import Application + + +def main(argv): + parser = argparse.ArgumentParser(description="Annotate MSP file.") + parser.add_argument("--input_file", type=str, help="Path to query spectra file in MSP format.") + parser.add_argument("--output_file", type=str, help="Path to output spectra file.") + parser.add_argument("--jobs", type=str, help="Sequence of conversion jobs to be used.") + args = parser.parse_args() + + app = Application() + + # import .msp file + app.load_spectra(args.input_file, file_format='msp') + + # curate given metadata + app.curate_spectra() + + # specify requested services and jobs + services = ['PubChem', 'CTS', 'CIR', 'NLM'] + + if len(args.jobs) != 0: + jobs = [] + for job in args.jobs.split(","): + if len(job) != 0: + jobs.append(job.split()) + asyncio.run(app.annotate_spectra(services, jobs)) + else: + # execute without jobs parameter to run all possible jobs + asyncio.run(app.annotate_spectra(services)) + + # export .msp file + app.save_spectra(args.output_file, file_format="msp") + return 0 + + +if __name__ == "__main__": + main(argv=sys.argv[1:])
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sample.msp Tue Jan 11 15:12:26 2022 +0000 @@ -0,0 +1,75 @@ +NAME: Hydrogen +FORMULA: H2 +MW: 2 +CASNO: 1333740 +ID: 1 +COMMENT: NIST MS# 245692, Seq# M1 +NUM PEAKS: 2 +1.0 20.98 +2.0 999.0 + +NAME: Deuterium +FORMULA: D2 +MW: 4 +CASNO: 7782390 +ID: 2 +COMMENT: NIST MS# 61316, Seq# M2 +NUM PEAKS: 2 +2.0 14.99 +4.0 999.0 + +NAME: Methane +FORMULA: CH4 +MW: 16 +CASNO: 74828 +ID: 3 +COMMENT: Any=100 ; NIST MS# 61313, Seq# M64 +NUM PEAKS: 6 +12.0 37.97 +13.0 105.9 +14.0 203.82 +15.0 886.2 +16.0 999.0 +17.0 15.99 + +NAME: Methane +FORMULA: CH4 +MW: 16 +CASNO: 74828 +ID: 4 +COMMENT: Any=100 ; NIST MS# 18807, Seq# R26 +NUM PEAKS: 6 +12.0 25.98 +13.0 85.92 +14.0 170.85 +15.0 855.23 +16.0 999.0 +17.0 10.99 + +NAME: Methane +FORMULA: CH4 +MW: 16 +CASNO: 74828 +ID: 5 +COMMENT: Any=100 ; NIST MS# 18809, Seq# R27 +NUM PEAKS: 6 +12.0 7.99 +13.0 28.97 +14.0 74.93 +15.0 753.32 +16.0 999.0 +17.0 11.99 + +NAME: Methane +FORMULA: CH4 +MW: 16 +CASNO: 74828 +ID: 6 +COMMENT: Any=100 ; NIST MS# 423924, Seq# R28 +NUM PEAKS: 6 +12.0 25.98 +13.0 74.93 +14.0 152.86 +15.0 829.25 +16.0 999.0 +17.0 10.99 \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sample_out.msp Tue Jan 11 15:12:26 2022 +0000 @@ -0,0 +1,95 @@ +NAME: Hydrogen +FORMULA: H2 +MW: 2 +CASNO: 1333-74-0 +ID: 1 +COMMENT: NIST MS# 245692, Seq# M1 +INCHIKEY: UFHFLCQGNIYNRP-UHFFFAOYSA-N +INCHI: InChI=1S/H2/h1H +IUPAC_NAME: hydrogen monohydride +SMILES: [H][H] +NUM PEAKS: 2 +1.0 20.98 +2.0 999.0 + +NAME: Deuterium +FORMULA: D2 +MW: 4 +CASNO: 7782-39-0 +ID: 2 +COMMENT: NIST MS# 61316, Seq# M2 +INCHIKEY: UFHFLCQGNIYNRP-VVKOMZTBSA-N +INCHI: InChI=1S/H2/h1H/i1+1D +SMILES: [2H][2H] +NUM PEAKS: 2 +2.0 14.99 +4.0 999.0 + +NAME: Methane +FORMULA: CH4 +MW: 16 +CASNO: 74-82-8 +ID: 3 +COMMENT: Any=100 ; NIST MS# 61313, Seq# M64 +INCHIKEY: VNWKTOKETHGBQD-UHFFFAOYSA-N +INCHI: InChI=1S/CH4/h1H4 +IUPAC_NAME: methane +NUM PEAKS: 6 +12.0 37.97 +13.0 105.9 +14.0 203.82 +15.0 886.2 +16.0 999.0 +17.0 15.99 + +NAME: Methane +FORMULA: CH4 +MW: 16 +CASNO: 74-82-8 +ID: 4 +COMMENT: Any=100 ; NIST MS# 18807, Seq# R26 +INCHIKEY: VNWKTOKETHGBQD-UHFFFAOYSA-N +INCHI: InChI=1S/CH4/h1H4 +IUPAC_NAME: methane +NUM PEAKS: 6 +12.0 25.98 +13.0 85.92 +14.0 170.85 +15.0 855.23 +16.0 999.0 +17.0 10.99 + +NAME: Methane +FORMULA: CH4 +MW: 16 +CASNO: 74-82-8 +ID: 5 +COMMENT: Any=100 ; NIST MS# 18809, Seq# R27 +INCHIKEY: VNWKTOKETHGBQD-UHFFFAOYSA-N +INCHI: InChI=1S/CH4/h1H4 +IUPAC_NAME: methane +NUM PEAKS: 6 +12.0 7.99 +13.0 28.97 +14.0 74.93 +15.0 753.32 +16.0 999.0 +17.0 11.99 + +NAME: Methane +FORMULA: CH4 +MW: 16 +CASNO: 74-82-8 +ID: 6 +COMMENT: Any=100 ; NIST MS# 423924, Seq# R28 +INCHIKEY: VNWKTOKETHGBQD-UHFFFAOYSA-N +INCHI: InChI=1S/CH4/h1H4 +IUPAC_NAME: methane +NUM PEAKS: 6 +12.0 25.98 +13.0 74.93 +14.0 152.86 +15.0 829.25 +16.0 999.0 +17.0 10.99 +