annotate mztab_reader.py @ 0:a46d857e25c2 draft

"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
author galaxyp
date Fri, 15 Jan 2021 15:57:59 +0000
parents
children dc31e153fe6c
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
a46d857e25c2 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
1 #!/usr/bin/env python
a46d857e25c2 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
2
a46d857e25c2 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
3 import argparse
a46d857e25c2 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
4 import os
a46d857e25c2 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
5
a46d857e25c2 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
6 import pandas as pd
a46d857e25c2 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
7 from pyteomics.mztab import MzTab
a46d857e25c2 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
8
a46d857e25c2 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
9
a46d857e25c2 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
10 def read_mztab(input_path, output_path):
a46d857e25c2 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
11 """
a46d857e25c2 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
12 Read mztab file
a46d857e25c2 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
13 """
a46d857e25c2 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
14 mztab = MzTab(input_path)
a46d857e25c2 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
15 if mztab.variant == 'P':
a46d857e25c2 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
16 return read_mztab_p(mztab, output_path)
a46d857e25c2 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
17 elif mztab.variant == 'M':
a46d857e25c2 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
18 return read_mztab_m(mztab, output_path)
a46d857e25c2 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
19
a46d857e25c2 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
20
a46d857e25c2 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
21 def read_mztab_p(mztab, output_path):
a46d857e25c2 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
22 """
a46d857e25c2 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
23 Processing mztab "P"
a46d857e25c2 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
24 """
a46d857e25c2 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
25 mtd = pd.DataFrame.from_dict(mztab.metadata, orient='index')
a46d857e25c2 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
26 mtd.to_csv(os.path.join(output_path, "mtd.tsv"), sep="\t")
a46d857e25c2 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
27 prt = mztab.protein_table
a46d857e25c2 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
28 prt.to_csv(os.path.join(output_path, "prt.tsv"), sep="\t")
a46d857e25c2 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
29 pep = mztab.peptide_table
a46d857e25c2 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
30 pep.to_csv(os.path.join(output_path, "pep.tsv"), sep="\t")
a46d857e25c2 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
31 psm = mztab.spectrum_match_table
a46d857e25c2 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
32 psm.to_csv(os.path.join(output_path, "psm.tsv"), sep="\t")
a46d857e25c2 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
33 sml = mztab.small_molecule_table
a46d857e25c2 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
34 sml.to_csv(os.path.join(output_path, "sml.tsv"), sep="\t")
a46d857e25c2 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
35
a46d857e25c2 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
36
a46d857e25c2 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
37 def read_mztab_m(mztab, output_path):
a46d857e25c2 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
38 """
a46d857e25c2 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
39 Processing mztab "M"
a46d857e25c2 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
40 """
a46d857e25c2 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
41 mtd = pd.DataFrame.from_dict(mztab.metadata, orient='index')
a46d857e25c2 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
42 mtd.to_csv(os.path.join(output_path, "mtd.tsv"), sep="\t")
a46d857e25c2 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
43 sml = mztab.small_molecule_table
a46d857e25c2 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
44 sml.to_csv(os.path.join(output_path, "sml.tsv"), sep="\t")
a46d857e25c2 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
45 smf = mztab.small_molecule_feature_table
a46d857e25c2 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
46 smf.to_csv(os.path.join(output_path, "smf.tsv"), sep="\t")
a46d857e25c2 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
47 sme = mztab.small_molecule_evidence_table
a46d857e25c2 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
48 sme.to_csv(os.path.join(output_path, "sme.tsv"), sep="\t")
a46d857e25c2 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
49
a46d857e25c2 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
50
a46d857e25c2 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
51 if __name__ == "__main__":
a46d857e25c2 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
52 # Create the parser
a46d857e25c2 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
53 my_parser = argparse.ArgumentParser(description='List of paths')
a46d857e25c2 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
54 # Add the arguments
a46d857e25c2 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
55 my_parser.add_argument('--path_in',
a46d857e25c2 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
56 metavar='path',
a46d857e25c2 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
57 type=str,
a46d857e25c2 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
58 required=True,
a46d857e25c2 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
59 help='the path of input .mztab file')
a46d857e25c2 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
60 my_parser.add_argument('--path_out',
a46d857e25c2 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
61 metavar='path',
a46d857e25c2 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
62 type=str,
a46d857e25c2 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
63 default=os.getcwd(),
a46d857e25c2 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
64 help='the path of folder for output .tsv file')
a46d857e25c2 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
65
a46d857e25c2 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
66 # Execute parse_args()
a46d857e25c2 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
67 args = my_parser.parse_args()
a46d857e25c2 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
68
a46d857e25c2 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
galaxyp
parents:
diff changeset
69 read_mztab(args.path_in, args.path_out)