Mercurial > repos > galaxyp > pyteomics_mztab2tsv
annotate mztab_reader.py @ 0:a46d857e25c2 draft
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
| author | galaxyp | 
|---|---|
| date | Fri, 15 Jan 2021 15:57:59 +0000 | 
| parents | |
| children | dc31e153fe6c | 
| rev | line source | 
|---|---|
| 0 
a46d857e25c2
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
 galaxyp parents: diff
changeset | 1 #!/usr/bin/env python | 
| 
a46d857e25c2
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
 galaxyp parents: diff
changeset | 2 | 
| 
a46d857e25c2
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
 galaxyp parents: diff
changeset | 3 import argparse | 
| 
a46d857e25c2
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
 galaxyp parents: diff
changeset | 4 import os | 
| 
a46d857e25c2
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
 galaxyp parents: diff
changeset | 5 | 
| 
a46d857e25c2
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
 galaxyp parents: diff
changeset | 6 import pandas as pd | 
| 
a46d857e25c2
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
 galaxyp parents: diff
changeset | 7 from pyteomics.mztab import MzTab | 
| 
a46d857e25c2
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
 galaxyp parents: diff
changeset | 8 | 
| 
a46d857e25c2
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
 galaxyp parents: diff
changeset | 9 | 
| 
a46d857e25c2
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
 galaxyp parents: diff
changeset | 10 def read_mztab(input_path, output_path): | 
| 
a46d857e25c2
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
 galaxyp parents: diff
changeset | 11 """ | 
| 
a46d857e25c2
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
 galaxyp parents: diff
changeset | 12 Read mztab file | 
| 
a46d857e25c2
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
 galaxyp parents: diff
changeset | 13 """ | 
| 
a46d857e25c2
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
 galaxyp parents: diff
changeset | 14 mztab = MzTab(input_path) | 
| 
a46d857e25c2
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
 galaxyp parents: diff
changeset | 15 if mztab.variant == 'P': | 
| 
a46d857e25c2
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
 galaxyp parents: diff
changeset | 16 return read_mztab_p(mztab, output_path) | 
| 
a46d857e25c2
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
 galaxyp parents: diff
changeset | 17 elif mztab.variant == 'M': | 
| 
a46d857e25c2
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
 galaxyp parents: diff
changeset | 18 return read_mztab_m(mztab, output_path) | 
| 
a46d857e25c2
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
 galaxyp parents: diff
changeset | 19 | 
| 
a46d857e25c2
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
 galaxyp parents: diff
changeset | 20 | 
| 
a46d857e25c2
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
 galaxyp parents: diff
changeset | 21 def read_mztab_p(mztab, output_path): | 
| 
a46d857e25c2
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
 galaxyp parents: diff
changeset | 22 """ | 
| 
a46d857e25c2
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
 galaxyp parents: diff
changeset | 23 Processing mztab "P" | 
| 
a46d857e25c2
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
 galaxyp parents: diff
changeset | 24 """ | 
| 
a46d857e25c2
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
 galaxyp parents: diff
changeset | 25 mtd = pd.DataFrame.from_dict(mztab.metadata, orient='index') | 
| 
a46d857e25c2
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
 galaxyp parents: diff
changeset | 26 mtd.to_csv(os.path.join(output_path, "mtd.tsv"), sep="\t") | 
| 
a46d857e25c2
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
 galaxyp parents: diff
changeset | 27 prt = mztab.protein_table | 
| 
a46d857e25c2
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
 galaxyp parents: diff
changeset | 28 prt.to_csv(os.path.join(output_path, "prt.tsv"), sep="\t") | 
| 
a46d857e25c2
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
 galaxyp parents: diff
changeset | 29 pep = mztab.peptide_table | 
| 
a46d857e25c2
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
 galaxyp parents: diff
changeset | 30 pep.to_csv(os.path.join(output_path, "pep.tsv"), sep="\t") | 
| 
a46d857e25c2
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
 galaxyp parents: diff
changeset | 31 psm = mztab.spectrum_match_table | 
| 
a46d857e25c2
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
 galaxyp parents: diff
changeset | 32 psm.to_csv(os.path.join(output_path, "psm.tsv"), sep="\t") | 
| 
a46d857e25c2
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
 galaxyp parents: diff
changeset | 33 sml = mztab.small_molecule_table | 
| 
a46d857e25c2
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
 galaxyp parents: diff
changeset | 34 sml.to_csv(os.path.join(output_path, "sml.tsv"), sep="\t") | 
| 
a46d857e25c2
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
 galaxyp parents: diff
changeset | 35 | 
| 
a46d857e25c2
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
 galaxyp parents: diff
changeset | 36 | 
| 
a46d857e25c2
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
 galaxyp parents: diff
changeset | 37 def read_mztab_m(mztab, output_path): | 
| 
a46d857e25c2
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
 galaxyp parents: diff
changeset | 38 """ | 
| 
a46d857e25c2
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
 galaxyp parents: diff
changeset | 39 Processing mztab "M" | 
| 
a46d857e25c2
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
 galaxyp parents: diff
changeset | 40 """ | 
| 
a46d857e25c2
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
 galaxyp parents: diff
changeset | 41 mtd = pd.DataFrame.from_dict(mztab.metadata, orient='index') | 
| 
a46d857e25c2
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
 galaxyp parents: diff
changeset | 42 mtd.to_csv(os.path.join(output_path, "mtd.tsv"), sep="\t") | 
| 
a46d857e25c2
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
 galaxyp parents: diff
changeset | 43 sml = mztab.small_molecule_table | 
| 
a46d857e25c2
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
 galaxyp parents: diff
changeset | 44 sml.to_csv(os.path.join(output_path, "sml.tsv"), sep="\t") | 
| 
a46d857e25c2
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
 galaxyp parents: diff
changeset | 45 smf = mztab.small_molecule_feature_table | 
| 
a46d857e25c2
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
 galaxyp parents: diff
changeset | 46 smf.to_csv(os.path.join(output_path, "smf.tsv"), sep="\t") | 
| 
a46d857e25c2
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
 galaxyp parents: diff
changeset | 47 sme = mztab.small_molecule_evidence_table | 
| 
a46d857e25c2
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
 galaxyp parents: diff
changeset | 48 sme.to_csv(os.path.join(output_path, "sme.tsv"), sep="\t") | 
| 
a46d857e25c2
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
 galaxyp parents: diff
changeset | 49 | 
| 
a46d857e25c2
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
 galaxyp parents: diff
changeset | 50 | 
| 
a46d857e25c2
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
 galaxyp parents: diff
changeset | 51 if __name__ == "__main__": | 
| 
a46d857e25c2
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
 galaxyp parents: diff
changeset | 52 # Create the parser | 
| 
a46d857e25c2
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
 galaxyp parents: diff
changeset | 53 my_parser = argparse.ArgumentParser(description='List of paths') | 
| 
a46d857e25c2
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
 galaxyp parents: diff
changeset | 54 # Add the arguments | 
| 
a46d857e25c2
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
 galaxyp parents: diff
changeset | 55 my_parser.add_argument('--path_in', | 
| 
a46d857e25c2
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
 galaxyp parents: diff
changeset | 56 metavar='path', | 
| 
a46d857e25c2
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
 galaxyp parents: diff
changeset | 57 type=str, | 
| 
a46d857e25c2
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
 galaxyp parents: diff
changeset | 58 required=True, | 
| 
a46d857e25c2
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
 galaxyp parents: diff
changeset | 59 help='the path of input .mztab file') | 
| 
a46d857e25c2
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
 galaxyp parents: diff
changeset | 60 my_parser.add_argument('--path_out', | 
| 
a46d857e25c2
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
 galaxyp parents: diff
changeset | 61 metavar='path', | 
| 
a46d857e25c2
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
 galaxyp parents: diff
changeset | 62 type=str, | 
| 
a46d857e25c2
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
 galaxyp parents: diff
changeset | 63 default=os.getcwd(), | 
| 
a46d857e25c2
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
 galaxyp parents: diff
changeset | 64 help='the path of folder for output .tsv file') | 
| 
a46d857e25c2
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
 galaxyp parents: diff
changeset | 65 | 
| 
a46d857e25c2
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
 galaxyp parents: diff
changeset | 66 # Execute parse_args() | 
| 
a46d857e25c2
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
 galaxyp parents: diff
changeset | 67 args = my_parser.parse_args() | 
| 
a46d857e25c2
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
 galaxyp parents: diff
changeset | 68 | 
| 
a46d857e25c2
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
 galaxyp parents: diff
changeset | 69 read_mztab(args.path_in, args.path_out) | 
