annotate splitMSP.py @ 0:e7825eae651e draft default tip

"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
author recetox
date Fri, 25 Mar 2022 15:37:27 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
1 import argparse
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
2 import os
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
3 from typing import List
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
4
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
5 from matchms import Spectrum
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
6 from matchms.exporting import save_as_msp
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
7 from matchms.importing import load_from_msp
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
8
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
9
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
10 def read_spectra(filename: str) -> List[Spectrum]:
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
11 """Read spectra from file.
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
12
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
13 Args:
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
14 filename (str): Path to .msp file from which to load the spectra.
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
15
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
16 Returns:
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
17 List[Spectrum]: Spectra contained in the file.
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
18 """
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
19 return list(load_from_msp(filename, True))
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
20
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
21
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
22 def get_spectra_names(spectra: list) -> List[str]:
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
23 """Read the keyword 'compound_name' from a spectra.
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
24
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
25 Args:
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
26 spectra (list): List of individual spectra.
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
27
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
28 Returns:
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
29 List[str]: List with 'compoud_name' of individual spectra.
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
30 """
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
31 return [x.get("compound_name") for x in spectra]
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
32
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
33
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
34 def make_outdir(outdir: str):
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
35 """Create destination directory.
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
36
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
37 Args:
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
38 outdir (str): Path to destination directory where split spectra files are generated.
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
39 """
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
40 return os.mkdir(outdir)
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
41
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
42
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
43 def write_spectra(filename, outdir):
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
44 """Generates MSP files of individual spectra. Structure of filename is 'compound_name.msp'.
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
45
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
46 Args:
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
47 filename (str): MSP file that contains the spectra.
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
48 outdir (str): Path to destination directory.
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
49 """
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
50 spectra = read_spectra(filename)
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
51 names = get_spectra_names(spectra)
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
52 for i in range(len(spectra)):
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
53 outpath = assemble_outpath(names[i], outdir)
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
54 save_as_msp(spectra[i], outpath)
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
55
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
56
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
57 def assemble_outpath(name, outdir):
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
58 """Filter special chracteres from name.
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
59
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
60 Args:
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
61 name (str): Name to be filetered.
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
62 outdir (str): Path to destination directory.
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
63 """
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
64 filename = ''.join(filter(str.isalnum, name))
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
65 outfile = str(filename) + ".msp"
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
66 outpath = os.path.join(outdir, outfile)
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
67 return outpath
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
68
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
69
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
70 def split_spectra(filename, outdir):
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
71 """Save individual MSP spectra files in the destination directory.
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
72
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
73 Args:
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
74 filename (str): MSP file that contains the spectra.
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
75 outdir (str): Path to destination directory where split spectra files are saved.
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
76 """
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
77 make_outdir(outdir)
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
78 return write_spectra(filename, outdir)
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
79
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
80
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
81 listarg = argparse.ArgumentParser()
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
82 listarg.add_argument('--filename', type=str)
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
83 listarg.add_argument('--outdir', type=str)
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
84 args = listarg.parse_args()
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
85 outdir = args.outdir
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
86 filename = args.filename
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
87
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
88
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
89 if __name__ == "__main__":
e7825eae651e "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_split commit 0d9dfeae375a6761b52be79111e228d950e2902e"
recetox
parents:
diff changeset
90 split_spectra(filename, outdir)