Mercurial > repos > eschen42 > mqppep_preproc
annotate mqppep_mrgfltr.py @ 22:43f1fd0ff86b draft
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 00881af5b1373174e5afe706add2f33b8614828c"
author | eschen42 |
---|---|
date | Wed, 13 Apr 2022 19:48:01 +0000 |
parents | ba5f14c2a4af |
children | 7560a4e80a1e |
rev | line source |
---|---|
5
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1 #!/usr/bin/env python |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
2 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
3 # Import the packages needed |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
4 import argparse |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
5 import operator # for operator.itemgetter |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
6 import os.path |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
7 import re |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
8 import shutil # for shutil.copyfile(src, dest) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
9 import sqlite3 as sql |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
10 import sys # import the sys module for exc_info |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
11 import time |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
12 import traceback # for formatting stack-trace |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
13 from codecs import getreader as cx_getreader |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
14 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
15 import numpy as np |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
16 import pandas |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
17 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
18 # global constants |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
19 N_A = "N/A" |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
20 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
21 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
22 # ref: https://stackoverflow.com/a/8915613/15509512 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
23 # answers: "How to handle exceptions in a list comprehensions" |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
24 # usage: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
25 # from math import log |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
26 # eggs = [1,3,0,3,2] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
27 # print([x for x in [catch(log, egg) for egg in eggs] if x is not None]) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
28 # producing: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
29 # for <built-in function log> |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
30 # with args (0,) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
31 # exception: math domain error |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
32 # [0.0, 1.0986122886681098, 1.0986122886681098, 0.6931471805599453] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
33 def catch(func, *args, handle=lambda e: e, **kwargs): |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
34 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
35 try: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
36 return func(*args, **kwargs) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
37 except Exception as e: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
38 print("For %s" % str(func)) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
39 print(" with args %s" % str(args)) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
40 print(" caught exception: %s" % str(e)) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
41 (ty, va, tb) = sys.exc_info() |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
42 print(" stack trace: " + str(traceback.format_exception(ty, va, tb))) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
43 exit(-1) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
44 return None |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
45 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
46 |
6
42daf70d4ed4
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 9dfb7e07a3673d7de4b0a1b7e6ce1b75a8a4f42b"
eschen42
parents:
5
diff
changeset
|
47 def whine(func, *args, handle=lambda e: e, **kwargs): |
42daf70d4ed4
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 9dfb7e07a3673d7de4b0a1b7e6ce1b75a8a4f42b"
eschen42
parents:
5
diff
changeset
|
48 |
42daf70d4ed4
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 9dfb7e07a3673d7de4b0a1b7e6ce1b75a8a4f42b"
eschen42
parents:
5
diff
changeset
|
49 try: |
42daf70d4ed4
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 9dfb7e07a3673d7de4b0a1b7e6ce1b75a8a4f42b"
eschen42
parents:
5
diff
changeset
|
50 return func(*args, **kwargs) |
42daf70d4ed4
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 9dfb7e07a3673d7de4b0a1b7e6ce1b75a8a4f42b"
eschen42
parents:
5
diff
changeset
|
51 except Exception as e: |
42daf70d4ed4
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 9dfb7e07a3673d7de4b0a1b7e6ce1b75a8a4f42b"
eschen42
parents:
5
diff
changeset
|
52 print("Warning: For %s" % str(func)) |
42daf70d4ed4
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 9dfb7e07a3673d7de4b0a1b7e6ce1b75a8a4f42b"
eschen42
parents:
5
diff
changeset
|
53 print(" with args %s" % str(args)) |
42daf70d4ed4
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 9dfb7e07a3673d7de4b0a1b7e6ce1b75a8a4f42b"
eschen42
parents:
5
diff
changeset
|
54 print(" caught exception: %s" % str(e)) |
42daf70d4ed4
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 9dfb7e07a3673d7de4b0a1b7e6ce1b75a8a4f42b"
eschen42
parents:
5
diff
changeset
|
55 (ty, va, tb) = sys.exc_info() |
42daf70d4ed4
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 9dfb7e07a3673d7de4b0a1b7e6ce1b75a8a4f42b"
eschen42
parents:
5
diff
changeset
|
56 print(" stack trace: " + str(traceback.format_exception(ty, va, tb))) |
42daf70d4ed4
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 9dfb7e07a3673d7de4b0a1b7e6ce1b75a8a4f42b"
eschen42
parents:
5
diff
changeset
|
57 return None |
42daf70d4ed4
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 9dfb7e07a3673d7de4b0a1b7e6ce1b75a8a4f42b"
eschen42
parents:
5
diff
changeset
|
58 |
42daf70d4ed4
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 9dfb7e07a3673d7de4b0a1b7e6ce1b75a8a4f42b"
eschen42
parents:
5
diff
changeset
|
59 |
5
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
60 def ppep_join(x): |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
61 x = [i for i in x if N_A != i] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
62 result = "%s" % " | ".join(x) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
63 if result != "": |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
64 return result |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
65 else: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
66 return N_A |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
67 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
68 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
69 def melt_join(x): |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
70 tmp = {key.lower(): key for key in x} |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
71 result = "%s" % " | ".join([tmp[key] for key in tmp]) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
72 return result |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
73 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
74 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
75 def __main__(): |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
76 # Parse Command Line |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
77 parser = argparse.ArgumentParser( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
78 description="Phopsphoproteomic Enrichment Pipeline Merge and Filter." |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
79 ) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
80 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
81 # inputs: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
82 # Phosphopeptide data for experimental results, including the intensities |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
83 # and the mapping to kinase domains, in tabular format. |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
84 parser.add_argument( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
85 "--phosphopeptides", |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
86 "-p", |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
87 nargs=1, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
88 required=True, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
89 dest="phosphopeptides", |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
90 help="Phosphopeptide data for experimental results, including the intensities and the mapping to kinase domains, in tabular format", |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
91 ) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
92 # UniProtKB/SwissProt DB input, SQLite |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
93 parser.add_argument( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
94 "--ppep_mapping_db", |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
95 "-d", |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
96 nargs=1, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
97 required=True, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
98 dest="ppep_mapping_db", |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
99 help="UniProtKB/SwissProt SQLite Database", |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
100 ) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
101 # species to limit records chosed from PhosPhositesPlus |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
102 parser.add_argument( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
103 "--species", |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
104 "-x", |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
105 nargs=1, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
106 required=False, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
107 default=[], |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
108 dest="species", |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
109 help="limit PhosphoSitePlus records to indicated species (field may be empty)", |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
110 ) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
111 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
112 # outputs: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
113 # tabular output |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
114 parser.add_argument( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
115 "--mrgfltr_tab", |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
116 "-o", |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
117 nargs=1, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
118 required=True, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
119 dest="mrgfltr_tab", |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
120 help="Tabular output file for results", |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
121 ) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
122 # CSV output |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
123 parser.add_argument( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
124 "--mrgfltr_csv", |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
125 "-c", |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
126 nargs=1, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
127 required=True, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
128 dest="mrgfltr_csv", |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
129 help="CSV output file for results", |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
130 ) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
131 # SQLite output |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
132 parser.add_argument( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
133 "--mrgfltr_sqlite", |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
134 "-S", |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
135 nargs=1, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
136 required=True, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
137 dest="mrgfltr_sqlite", |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
138 help="SQLite output file for results", |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
139 ) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
140 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
141 # "Make it so!" (parse the arguments) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
142 options = parser.parse_args() |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
143 print("options: " + str(options)) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
144 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
145 # determine phosphopeptide ("upstream map") input tabular file access |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
146 if options.phosphopeptides is None: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
147 exit('Argument "phosphopeptides" is required but not supplied') |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
148 try: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
149 upstream_map_filename_tab = os.path.abspath(options.phosphopeptides[0]) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
150 input_file = open(upstream_map_filename_tab, "r") |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
151 input_file.close() |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
152 except Exception as e: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
153 exit("Error parsing phosphopeptides argument: %s" % str(e)) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
154 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
155 # determine input SQLite access |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
156 if options.ppep_mapping_db is None: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
157 exit('Argument "ppep_mapping_db" is required but not supplied') |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
158 try: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
159 uniprot_sqlite = os.path.abspath(options.ppep_mapping_db[0]) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
160 input_file = open(uniprot_sqlite, "rb") |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
161 input_file.close() |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
162 except Exception as e: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
163 exit("Error parsing ppep_mapping_db argument: %s" % str(e)) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
164 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
165 # copy input SQLite dataset to output SQLite dataset |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
166 if options.mrgfltr_sqlite is None: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
167 exit('Argument "mrgfltr_sqlite" is required but not supplied') |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
168 try: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
169 output_sqlite = os.path.abspath(options.mrgfltr_sqlite[0]) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
170 shutil.copyfile(uniprot_sqlite, output_sqlite) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
171 except Exception as e: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
172 exit("Error copying ppep_mapping_db to mrgfltr_sqlite: %s" % str(e)) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
173 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
174 # determine species to limit records from PSP_Regulatory_Sites |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
175 if options.species is None: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
176 exit( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
177 'Argument "species" is required (and may be empty) but not supplied' |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
178 ) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
179 try: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
180 if len(options.species) > 0: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
181 species = options.species[0] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
182 else: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
183 species = "" |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
184 except Exception as e: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
185 exit("Error parsing species argument: %s" % str(e)) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
186 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
187 # determine tabular output destination |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
188 if options.mrgfltr_tab is None: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
189 exit('Argument "mrgfltr_tab" is required but not supplied') |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
190 try: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
191 output_filename_tab = os.path.abspath(options.mrgfltr_tab[0]) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
192 output_file = open(output_filename_tab, "w") |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
193 output_file.close() |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
194 except Exception as e: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
195 exit("Error parsing mrgfltr_tab argument: %s" % str(e)) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
196 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
197 # determine CSV output destination |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
198 if options.mrgfltr_csv is None: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
199 exit('Argument "mrgfltr_csv" is required but not supplied') |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
200 try: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
201 output_filename_csv = os.path.abspath(options.mrgfltr_csv[0]) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
202 output_file = open(output_filename_csv, "w") |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
203 output_file.close() |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
204 except Exception as e: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
205 exit("Error parsing mrgfltr_csv argument: %s" % str(e)) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
206 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
207 def mqpep_getswissprot(): |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
208 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
209 # |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
210 # copied from Excel Output Script.ipynb BEGIN # |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
211 # |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
212 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
213 # String Constants ################# |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
214 DEPHOSPHOPEP = "DephosphoPep" |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
215 DESCRIPTION = "Description" |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
216 FUNCTION_PHOSPHORESIDUE = ( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
217 "Function Phosphoresidue(PSP=PhosphoSitePlus.org)" |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
218 ) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
219 GENE_NAME = "Gene_Name" # Gene Name from UniProtKB |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
220 ON_FUNCTION = ( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
221 "ON_FUNCTION" # ON_FUNCTION column from PSP_Regulatory_Sites |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
222 ) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
223 ON_NOTES = "NOTES" # NOTES column from PSP_Regulatory_Sites |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
224 ON_OTHER_INTERACT = "ON_OTHER_INTERACT" # ON_OTHER_INTERACT column from PSP_Regulatory_Sites |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
225 ON_PROCESS = ( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
226 "ON_PROCESS" # ON_PROCESS column from PSP_Regulatory_Sites |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
227 ) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
228 ON_PROT_INTERACT = "ON_PROT_INTERACT" # ON_PROT_INTERACT column from PSP_Regulatory_Sites |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
229 PHOSPHOPEPTIDE = "Phosphopeptide" |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
230 PHOSPHOPEPTIDE_MATCH = "Phosphopeptide_match" |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
231 PHOSPHORESIDUE = "Phosphoresidue" |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
232 PUTATIVE_UPSTREAM_DOMAINS = "Putative Upstream Kinases(PSP=PhosphoSitePlus.org)/Phosphatases/Binding Domains" |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
233 SEQUENCE = "Sequence" |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
234 SEQUENCE10 = "Sequence10" |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
235 SEQUENCE7 = "Sequence7" |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
236 SITE_PLUSMINUS_7AA_SQL = "SITE_PLUSMINUS_7AA" |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
237 UNIPROT_ID = "UniProt_ID" |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
238 UNIPROT_SEQ_AND_META_SQL = """ |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
239 select Uniprot_ID, Description, Gene_Name, Sequence, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
240 Organism_Name, Organism_ID, PE, SV |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
241 from UniProtKB |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
242 order by Sequence, UniProt_ID |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
243 """ |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
244 UNIPROT_UNIQUE_SEQ_SQL = """ |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
245 select distinct Sequence |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
246 from UniProtKB |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
247 group by Sequence |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
248 """ |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
249 PPEP_PEP_UNIPROTSEQ_SQL = """ |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
250 select distinct phosphopeptide, peptide, sequence |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
251 from uniprotkb_pep_ppep_view |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
252 order by sequence |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
253 """ |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
254 PPEP_MELT_SQL = """ |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
255 SELECT DISTINCT |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
256 phospho_peptide AS 'p_peptide', |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
257 kinase_map AS 'characterization', |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
258 'X' AS 'X' |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
259 FROM ppep_gene_site_view |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
260 """ |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
261 # CREATE TABLE PSP_Regulatory_site ( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
262 # site_plusminus_7AA TEXT PRIMARY KEY ON CONFLICT IGNORE, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
263 # domain TEXT, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
264 # ON_FUNCTION TEXT, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
265 # ON_PROCESS TEXT, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
266 # ON_PROT_INTERACT TEXT, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
267 # ON_OTHER_INTERACT TEXT, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
268 # notes TEXT, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
269 # organism TEXT |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
270 # ); |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
271 PSP_REGSITE_SQL = """ |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
272 SELECT DISTINCT |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
273 SITE_PLUSMINUS_7AA , |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
274 DOMAIN , |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
275 ON_FUNCTION , |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
276 ON_PROCESS , |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
277 ON_PROT_INTERACT , |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
278 ON_OTHER_INTERACT , |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
279 NOTES , |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
280 ORGANISM |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
281 FROM PSP_Regulatory_site |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
282 """ |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
283 PPEP_ID_SQL = """ |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
284 SELECT |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
285 id AS 'ppep_id', |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
286 seq AS 'ppep_seq' |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
287 FROM ppep |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
288 """ |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
289 MRGFLTR_DDL = """ |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
290 DROP VIEW IF EXISTS mrgfltr_metadata_view; |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
291 DROP TABLE IF EXISTS mrgfltr_metadata; |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
292 CREATE TABLE mrgfltr_metadata |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
293 ( ppep_id INTEGER REFERENCES ppep(id) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
294 , Sequence10 TEXT |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
295 , Sequence7 TEXT |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
296 , GeneName TEXT |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
297 , Phosphoresidue TEXT |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
298 , UniProtID TEXT |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
299 , Description TEXT |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
300 , FunctionPhosphoresidue TEXT |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
301 , PutativeUpstreamDomains TEXT |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
302 , PRIMARY KEY (ppep_id) ON CONFLICT IGNORE |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
303 ) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
304 ; |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
305 CREATE VIEW mrgfltr_metadata_view AS |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
306 SELECT DISTINCT |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
307 ppep.seq AS phospho_peptide |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
308 , Sequence10 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
309 , Sequence7 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
310 , GeneName |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
311 , Phosphoresidue |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
312 , UniProtID |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
313 , Description |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
314 , FunctionPhosphoresidue |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
315 , PutativeUpstreamDomains |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
316 FROM |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
317 ppep, mrgfltr_metadata |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
318 WHERE |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
319 mrgfltr_metadata.ppep_id = ppep.id |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
320 ORDER BY |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
321 ppep.seq |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
322 ; |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
323 """ |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
324 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
325 CITATION_INSERT_STMT = """ |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
326 INSERT INTO Citation ( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
327 ObjectName, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
328 CitationData |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
329 ) VALUES (?,?) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
330 """ |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
331 CITATION_INSERT_PSP = 'PhosphoSitePlus(R) (PSP) was created by Cell Signaling Technology Inc. It is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported License. When using PSP data or analyses in printed publications or in online resources, the following acknowledgements must be included: (a) the words "PhosphoSitePlus(R), www.phosphosite.org" must be included at appropriate places in the text or webpage, and (b) the following citation must be included in the bibliography: "Hornbeck PV, Zhang B, Murray B, Kornhauser JM, Latham V, Skrzypek E PhosphoSitePlus, 2014: mutations, PTMs and recalibrations. Nucleic Acids Res. 2015 43:D512-20. PMID: 25514926."' |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
332 CITATION_INSERT_PSP_REF = 'Hornbeck, 2014, "PhosphoSitePlus, 2014: mutations, PTMs and recalibrations.", https://pubmed.ncbi.nlm.nih.gov/22135298, https://doi.org/10.1093/nar/gkr1122' |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
333 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
334 MRGFLTR_METADATA_COLUMNS = [ |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
335 "ppep_id", |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
336 "Sequence10", |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
337 "Sequence7", |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
338 "GeneName", |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
339 "Phosphoresidue", |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
340 "UniProtID", |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
341 "Description", |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
342 "FunctionPhosphoresidue", |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
343 "PutativeUpstreamDomains", |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
344 ] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
345 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
346 # String Constants (end) ############ |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
347 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
348 class Error(Exception): |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
349 """Base class for exceptions in this module.""" |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
350 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
351 pass |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
352 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
353 class PreconditionError(Error): |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
354 """Exception raised for errors in the input. |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
355 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
356 Attributes: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
357 expression -- input expression in which the error occurred |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
358 message -- explanation of the error |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
359 """ |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
360 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
361 def __init__(self, expression, message): |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
362 self.expression = expression |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
363 self.message = message |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
364 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
365 # start_time = time.clock() #timer |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
366 start_time = time.process_time() # timer |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
367 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
368 # get keys from upstream tabular file using readline() |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
369 # ref: https://stackoverflow.com/a/16713581/15509512 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
370 # answer to "Use codecs to read file with correct encoding" |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
371 file1_encoded = open(upstream_map_filename_tab, "rb") |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
372 file1 = cx_getreader("latin-1")(file1_encoded) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
373 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
374 count = 0 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
375 upstream_map_p_peptide_list = [] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
376 re_tab = re.compile("^[^\t]*") |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
377 while True: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
378 count += 1 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
379 # Get next line from file |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
380 line = file1.readline() |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
381 # if line is empty |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
382 # end of file is reached |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
383 if not line: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
384 break |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
385 if count > 1: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
386 m = re_tab.match(line) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
387 upstream_map_p_peptide_list.append(m[0]) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
388 file1.close() |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
389 file1_encoded.close() |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
390 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
391 # Get the list of phosphopeptides with the p's that represent the phosphorylation sites removed |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
392 re_phos = re.compile("p") |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
393 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
394 end_time = time.process_time() # timer |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
395 print( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
396 "%0.6f pre-read-SwissProt [0.1]" % (end_time - start_time,), |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
397 file=sys.stderr, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
398 ) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
399 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
400 # ----------- Get SwissProt data from SQLite database (start) ----------- |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
401 # build UniProt sequence LUT and list of unique SwissProt sequences |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
402 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
403 # Open SwissProt SQLite database |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
404 conn = sql.connect(uniprot_sqlite) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
405 cur = conn.cursor() |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
406 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
407 # Set up structures to hold SwissProt data |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
408 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
409 uniprot_Sequence_List = [] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
410 UniProtSeqLUT = {} |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
411 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
412 # Execute query for unique seqs without fetching the results yet |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
413 uniprot_unique_seq_cur = cur.execute(UNIPROT_UNIQUE_SEQ_SQL) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
414 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
415 while 1: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
416 batch = uniprot_unique_seq_cur.fetchmany(size=50) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
417 if not batch: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
418 # handle case where no records are returned |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
419 break |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
420 for row in batch: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
421 Sequence = row[0] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
422 UniProtSeqLUT[(Sequence, DESCRIPTION)] = [] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
423 UniProtSeqLUT[(Sequence, GENE_NAME)] = [] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
424 UniProtSeqLUT[(Sequence, UNIPROT_ID)] = [] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
425 UniProtSeqLUT[Sequence] = [] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
426 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
427 # Execute query for seqs and metadata without fetching the results yet |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
428 uniprot_seq_and_meta = cur.execute(UNIPROT_SEQ_AND_META_SQL) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
429 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
430 while 1: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
431 batch = uniprot_seq_and_meta.fetchmany(size=50) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
432 if not batch: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
433 # handle case where no records are returned |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
434 break |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
435 for ( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
436 UniProt_ID, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
437 Description, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
438 Gene_Name, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
439 Sequence, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
440 OS, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
441 OX, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
442 PE, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
443 SV, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
444 ) in batch: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
445 uniprot_Sequence_List.append(Sequence) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
446 UniProtSeqLUT[Sequence] = Sequence |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
447 UniProtSeqLUT[(Sequence, UNIPROT_ID)].append(UniProt_ID) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
448 UniProtSeqLUT[(Sequence, GENE_NAME)].append(Gene_Name) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
449 if OS != N_A: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
450 Description += " OS=" + OS |
17
ba5f14c2a4af
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents:
16
diff
changeset
|
451 if OX != -1: |
ba5f14c2a4af
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents:
16
diff
changeset
|
452 Description += " OX=" + str(OX) |
5
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
453 if Gene_Name != N_A: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
454 Description += " GN=" + Gene_Name |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
455 if PE != N_A: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
456 Description += " PE=" + PE |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
457 if SV != N_A: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
458 Description += " SV=" + SV |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
459 UniProtSeqLUT[(Sequence, DESCRIPTION)].append(Description) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
460 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
461 # Close SwissProt SQLite database; clean up local variables |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
462 conn.close() |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
463 Sequence = "" |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
464 UniProt_ID = "" |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
465 Description = "" |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
466 Gene_Name = "" |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
467 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
468 # ----------- Get SwissProt data from SQLite database (finish) ----------- |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
469 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
470 end_time = time.process_time() # timer |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
471 print( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
472 "%0.6f post-read-SwissProt [0.2]" % (end_time - start_time,), |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
473 file=sys.stderr, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
474 ) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
475 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
476 # ----------- Get SwissProt data from SQLite database (start) ----------- |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
477 # Open SwissProt SQLite database |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
478 conn = sql.connect(uniprot_sqlite) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
479 cur = conn.cursor() |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
480 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
481 # Set up dictionary to aggregate results for phosphopeptides correspounding to dephosphoeptide |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
482 DephosphoPep_UniProtSeq_LUT = {} |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
483 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
484 # Set up dictionary to accumulate results |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
485 PhosphoPep_UniProtSeq_LUT = {} |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
486 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
487 # Execute query for tuples without fetching the results yet |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
488 ppep_pep_uniprotseq_cur = cur.execute(PPEP_PEP_UNIPROTSEQ_SQL) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
489 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
490 while 1: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
491 batch = ppep_pep_uniprotseq_cur.fetchmany(size=50) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
492 if not batch: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
493 # handle case where no records are returned |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
494 break |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
495 for (phospho_pep, dephospho_pep, sequence) in batch: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
496 # do interesting stuff here... |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
497 PhosphoPep_UniProtSeq_LUT[phospho_pep] = phospho_pep |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
498 PhosphoPep_UniProtSeq_LUT[ |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
499 (phospho_pep, DEPHOSPHOPEP) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
500 ] = dephospho_pep |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
501 if dephospho_pep not in DephosphoPep_UniProtSeq_LUT: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
502 DephosphoPep_UniProtSeq_LUT[dephospho_pep] = set() |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
503 DephosphoPep_UniProtSeq_LUT[ |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
504 (dephospho_pep, DESCRIPTION) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
505 ] = [] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
506 DephosphoPep_UniProtSeq_LUT[ |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
507 (dephospho_pep, GENE_NAME) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
508 ] = [] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
509 DephosphoPep_UniProtSeq_LUT[ |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
510 (dephospho_pep, UNIPROT_ID) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
511 ] = [] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
512 DephosphoPep_UniProtSeq_LUT[(dephospho_pep, SEQUENCE)] = [] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
513 DephosphoPep_UniProtSeq_LUT[dephospho_pep].add(phospho_pep) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
514 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
515 if ( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
516 sequence |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
517 not in DephosphoPep_UniProtSeq_LUT[ |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
518 (dephospho_pep, SEQUENCE) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
519 ] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
520 ): |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
521 DephosphoPep_UniProtSeq_LUT[ |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
522 (dephospho_pep, SEQUENCE) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
523 ].append(sequence) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
524 for phospho_pep in DephosphoPep_UniProtSeq_LUT[dephospho_pep]: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
525 if phospho_pep != phospho_pep: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
526 print( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
527 "phospho_pep:'%s' phospho_pep:'%s'" |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
528 % (phospho_pep, phospho_pep) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
529 ) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
530 if phospho_pep not in PhosphoPep_UniProtSeq_LUT: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
531 PhosphoPep_UniProtSeq_LUT[phospho_pep] = phospho_pep |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
532 PhosphoPep_UniProtSeq_LUT[ |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
533 (phospho_pep, DEPHOSPHOPEP) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
534 ] = dephospho_pep |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
535 r = list( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
536 zip( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
537 [s for s in UniProtSeqLUT[(sequence, UNIPROT_ID)]], |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
538 [s for s in UniProtSeqLUT[(sequence, GENE_NAME)]], |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
539 [ |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
540 s |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
541 for s in UniProtSeqLUT[(sequence, DESCRIPTION)] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
542 ], |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
543 ) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
544 ) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
545 # Sort by `UniProt_ID` |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
546 # ref: https://stackoverflow.com/a/4174955/15509512 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
547 r = sorted(r, key=operator.itemgetter(0)) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
548 # Get one tuple for each `phospho_pep` |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
549 # in DephosphoPep_UniProtSeq_LUT[dephospho_pep] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
550 for (upid, gn, desc) in r: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
551 # Append pseudo-tuple per UniProt_ID but only when it is not present |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
552 if ( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
553 upid |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
554 not in DephosphoPep_UniProtSeq_LUT[ |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
555 (dephospho_pep, UNIPROT_ID) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
556 ] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
557 ): |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
558 DephosphoPep_UniProtSeq_LUT[ |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
559 (dephospho_pep, UNIPROT_ID) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
560 ].append(upid) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
561 DephosphoPep_UniProtSeq_LUT[ |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
562 (dephospho_pep, DESCRIPTION) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
563 ].append(desc) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
564 DephosphoPep_UniProtSeq_LUT[ |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
565 (dephospho_pep, GENE_NAME) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
566 ].append(gn) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
567 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
568 # Close SwissProt SQLite database; clean up local variables |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
569 conn.close() |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
570 # wipe local variables |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
571 phospho_pep = dephospho_pep = sequence = 0 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
572 upid = gn = desc = r = "" |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
573 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
574 # ----------- Get SwissProt data from SQLite database (finish) ----------- |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
575 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
576 end_time = time.process_time() # timer |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
577 print( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
578 "%0.6f finished reading and decoding '%s' [0.4]" |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
579 % (end_time - start_time, upstream_map_filename_tab), |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
580 file=sys.stderr, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
581 ) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
582 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
583 print( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
584 "{:>10} unique upstream phosphopeptides tested".format( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
585 str(len(upstream_map_p_peptide_list)) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
586 ) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
587 ) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
588 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
589 # Read in Upstream tabular file |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
590 # We are discarding the intensity data; so read it as text |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
591 upstream_data = pandas.read_table( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
592 upstream_map_filename_tab, dtype="str", index_col=0 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
593 ) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
594 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
595 end_time = time.process_time() # timer |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
596 print( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
597 "%0.6f read Upstream Map from file [1g_1]" |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
598 % (end_time - start_time,), |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
599 file=sys.stderr, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
600 ) # timer |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
601 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
602 upstream_data.index = upstream_map_p_peptide_list |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
603 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
604 end_time = time.process_time() # timer |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
605 print( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
606 "%0.6f added index to Upstream Map [1g_2]" |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
607 % (end_time - start_time,), |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
608 file=sys.stderr, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
609 ) # timer |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
610 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
611 # trim upstream_data to include only the upstream map columns |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
612 old_cols = upstream_data.columns.tolist() |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
613 i = 0 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
614 first_intensity = -1 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
615 last_intensity = -1 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
616 intensity_re = re.compile("Intensity.*") |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
617 for col_name in old_cols: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
618 m = intensity_re.match(col_name) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
619 if m: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
620 last_intensity = i |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
621 if first_intensity == -1: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
622 first_intensity = i |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
623 i += 1 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
624 # print('last intensity = %d' % last_intensity) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
625 col_PKCalpha = last_intensity + 2 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
626 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
627 data_in_cols = [old_cols[0]] + old_cols[ |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
628 first_intensity: last_intensity + 1 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
629 ] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
630 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
631 if upstream_data.empty: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
632 print("upstream_data is empty") |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
633 exit(0) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
634 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
635 data_in = upstream_data.copy(deep=True)[data_in_cols] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
636 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
637 # Convert floating-point integers to int64 integers |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
638 # ref: https://stackoverflow.com/a/68497603/15509512 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
639 data_in[list(data_in.columns[1:])] = ( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
640 data_in[list(data_in.columns[1:])] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
641 .astype("float64") |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
642 .apply(np.int64) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
643 ) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
644 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
645 # create another phosphopeptide column that will be used to join later; |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
646 # MAY need to change depending on Phosphopeptide column position |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
647 # data_in[PHOSPHOPEPTIDE_MATCH] = data_in[data_in.columns.tolist()[0]] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
648 data_in[PHOSPHOPEPTIDE_MATCH] = data_in.index |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
649 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
650 end_time = time.process_time() # timer |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
651 print( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
652 "%0.6f set data_in[PHOSPHOPEPTIDE_MATCH] [A]" |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
653 % (end_time - start_time,), |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
654 file=sys.stderr, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
655 ) # timer |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
656 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
657 # Produce a dictionary of metadata for a single phosphopeptide. |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
658 # This is a replacement of `UniProtInfo_subdict` in the original code. |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
659 def pseq_to_subdict(phospho_pep): |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
660 # Strip "p" from phosphopeptide sequence |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
661 dephospho_pep = re_phos.sub("", phospho_pep) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
662 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
663 # Determine number of phosphoresidues in phosphopeptide |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
664 numps = len(phospho_pep) - len(dephospho_pep) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
665 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
666 # Determine location(s) of phosphoresidue(s) in phosphopeptide |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
667 # (used later for Phosphoresidue, Sequence7, and Sequence10) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
668 ploc = [] # list of p locations |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
669 i = 0 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
670 p = phospho_pep |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
671 while i < numps: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
672 ploc.append(p.find("p")) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
673 p = p[: p.find("p")] + p[p.find("p") + 1:] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
674 i += 1 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
675 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
676 # Establish nested dictionary |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
677 result = {} |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
678 result[SEQUENCE] = [] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
679 result[UNIPROT_ID] = [] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
680 result[DESCRIPTION] = [] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
681 result[GENE_NAME] = [] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
682 result[PHOSPHORESIDUE] = [] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
683 result[SEQUENCE7] = [] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
684 result[SEQUENCE10] = [] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
685 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
686 # Add stripped sequence to dictionary |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
687 result[SEQUENCE].append(dephospho_pep) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
688 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
689 # Locate phospho_pep in PhosphoPep_UniProtSeq_LUT |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
690 # Caller may elect to: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
691 # try: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
692 # ... |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
693 # except PreconditionError as pe: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
694 # print("'{expression}': {message}".format( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
695 # expression = pe.expression, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
696 # message = pe.message)) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
697 # ) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
698 # ) |
6
42daf70d4ed4
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 9dfb7e07a3673d7de4b0a1b7e6ce1b75a8a4f42b"
eschen42
parents:
5
diff
changeset
|
699 if phospho_pep not in PhosphoPep_UniProtSeq_LUT: |
42daf70d4ed4
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 9dfb7e07a3673d7de4b0a1b7e6ce1b75a8a4f42b"
eschen42
parents:
5
diff
changeset
|
700 raise PreconditionError( |
42daf70d4ed4
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 9dfb7e07a3673d7de4b0a1b7e6ce1b75a8a4f42b"
eschen42
parents:
5
diff
changeset
|
701 phospho_pep, |
42daf70d4ed4
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 9dfb7e07a3673d7de4b0a1b7e6ce1b75a8a4f42b"
eschen42
parents:
5
diff
changeset
|
702 "no matching phosphopeptide found in PhosphoPep_UniProtSeq_LUT", |
42daf70d4ed4
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 9dfb7e07a3673d7de4b0a1b7e6ce1b75a8a4f42b"
eschen42
parents:
5
diff
changeset
|
703 ) |
5
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
704 if dephospho_pep not in DephosphoPep_UniProtSeq_LUT: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
705 raise PreconditionError( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
706 dephospho_pep, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
707 "dephosphorylated phosphopeptide not found in DephosphoPep_UniProtSeq_LUT", |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
708 ) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
709 if ( |
16
d9906288cc6a
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 7127d1511d45a8b22d4f50eee6db24d844371227"
eschen42
parents:
6
diff
changeset
|
710 dephospho_pep != PhosphoPep_UniProtSeq_LUT[(phospho_pep, DEPHOSPHOPEP)] |
5
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
711 ): |
16
d9906288cc6a
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 7127d1511d45a8b22d4f50eee6db24d844371227"
eschen42
parents:
6
diff
changeset
|
712 my_err_msg = "dephosphorylated phosphopeptide does not match " |
d9906288cc6a
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 7127d1511d45a8b22d4f50eee6db24d844371227"
eschen42
parents:
6
diff
changeset
|
713 my_err_msg += "PhosphoPep_UniProtSeq_LUT[(phospho_pep,DEPHOSPHOPEP)] = " |
d9906288cc6a
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 7127d1511d45a8b22d4f50eee6db24d844371227"
eschen42
parents:
6
diff
changeset
|
714 my_err_msg += PhosphoPep_UniProtSeq_LUT[(phospho_pep, DEPHOSPHOPEP)] |
d9906288cc6a
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 7127d1511d45a8b22d4f50eee6db24d844371227"
eschen42
parents:
6
diff
changeset
|
715 raise PreconditionError(dephospho_pep, my_err_msg) |
d9906288cc6a
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 7127d1511d45a8b22d4f50eee6db24d844371227"
eschen42
parents:
6
diff
changeset
|
716 |
5
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
717 result[SEQUENCE] = [dephospho_pep] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
718 result[UNIPROT_ID] = DephosphoPep_UniProtSeq_LUT[ |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
719 (dephospho_pep, UNIPROT_ID) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
720 ] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
721 result[DESCRIPTION] = DephosphoPep_UniProtSeq_LUT[ |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
722 (dephospho_pep, DESCRIPTION) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
723 ] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
724 result[GENE_NAME] = DephosphoPep_UniProtSeq_LUT[ |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
725 (dephospho_pep, GENE_NAME) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
726 ] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
727 if (dephospho_pep, SEQUENCE) not in DephosphoPep_UniProtSeq_LUT: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
728 raise PreconditionError( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
729 dephospho_pep, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
730 "no matching phosphopeptide found in DephosphoPep_UniProtSeq_LUT", |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
731 ) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
732 UniProtSeqList = DephosphoPep_UniProtSeq_LUT[ |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
733 (dephospho_pep, SEQUENCE) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
734 ] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
735 if len(UniProtSeqList) < 1: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
736 print( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
737 "Skipping DephosphoPep_UniProtSeq_LUT[('%s',SEQUENCE)] because value has zero length" |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
738 % dephospho_pep |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
739 ) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
740 # raise PreconditionError( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
741 # "DephosphoPep_UniProtSeq_LUT[('" + dephospho_pep + ",SEQUENCE)", |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
742 # 'value has zero length' |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
743 # ) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
744 for UniProtSeq in UniProtSeqList: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
745 i = 0 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
746 phosphoresidues = [] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
747 seq7s_set = set() |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
748 seq7s = [] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
749 seq10s_set = set() |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
750 seq10s = [] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
751 while i < len(ploc): |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
752 start = UniProtSeq.find(dephospho_pep) |
6
42daf70d4ed4
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 9dfb7e07a3673d7de4b0a1b7e6ce1b75a8a4f42b"
eschen42
parents:
5
diff
changeset
|
753 # handle case where no sequence was found for dep-pep |
42daf70d4ed4
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 9dfb7e07a3673d7de4b0a1b7e6ce1b75a8a4f42b"
eschen42
parents:
5
diff
changeset
|
754 if start < 0: |
42daf70d4ed4
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 9dfb7e07a3673d7de4b0a1b7e6ce1b75a8a4f42b"
eschen42
parents:
5
diff
changeset
|
755 i += 1 |
42daf70d4ed4
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 9dfb7e07a3673d7de4b0a1b7e6ce1b75a8a4f42b"
eschen42
parents:
5
diff
changeset
|
756 continue |
5
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
757 psite = ( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
758 start + ploc[i] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
759 ) # location of phosphoresidue on protein sequence |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
760 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
761 # add Phosphoresidue |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
762 phosphosite = "p" + str(UniProtSeq)[psite] + str(psite + 1) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
763 phosphoresidues.append(phosphosite) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
764 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
765 # Add Sequence7 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
766 if psite < 7: # phospho_pep at N terminus |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
767 seq7 = str(UniProtSeq)[: psite + 8] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
768 if seq7[psite] == "S": # if phosphosresidue is serine |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
769 pres = "s" |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
770 elif ( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
771 seq7[psite] == "T" |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
772 ): # if phosphosresidue is threonine |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
773 pres = "t" |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
774 elif ( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
775 seq7[psite] == "Y" |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
776 ): # if phosphoresidue is tyrosine |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
777 pres = "y" |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
778 else: # if not pSTY |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
779 pres = "?" |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
780 seq7 = ( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
781 seq7[:psite] + pres + seq7[psite + 1: psite + 8] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
782 ) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
783 while ( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
784 len(seq7) < 15 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
785 ): # add appropriate number of "_" to the front |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
786 seq7 = "_" + seq7 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
787 elif ( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
788 len(UniProtSeq) - psite < 8 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
789 ): # phospho_pep at C terminus |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
790 seq7 = str(UniProtSeq)[psite - 7:] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
791 if seq7[7] == "S": |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
792 pres = "s" |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
793 elif seq7[7] == "T": |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
794 pres = "t" |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
795 elif seq7[7] == "Y": |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
796 pres = "y" |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
797 else: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
798 pres = "?" |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
799 seq7 = seq7[:7] + pres + seq7[8:] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
800 while ( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
801 len(seq7) < 15 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
802 ): # add appropriate number of "_" to the back |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
803 seq7 = seq7 + "_" |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
804 else: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
805 seq7 = str(UniProtSeq)[psite - 7: psite + 8] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
806 pres = "" # phosphoresidue |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
807 if seq7[7] == "S": # if phosphosresidue is serine |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
808 pres = "s" |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
809 elif seq7[7] == "T": # if phosphosresidue is threonine |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
810 pres = "t" |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
811 elif seq7[7] == "Y": # if phosphoresidue is tyrosine |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
812 pres = "y" |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
813 else: # if not pSTY |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
814 pres = "?" |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
815 seq7 = seq7[:7] + pres + seq7[8:] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
816 if seq7 not in seq7s_set: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
817 seq7s.append(seq7) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
818 seq7s_set.add(seq7) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
819 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
820 # add Sequence10 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
821 if psite < 10: # phospho_pep at N terminus |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
822 seq10 = ( |
16
d9906288cc6a
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 7127d1511d45a8b22d4f50eee6db24d844371227"
eschen42
parents:
6
diff
changeset
|
823 str(UniProtSeq)[:psite] + "p" + str(UniProtSeq)[psite: psite + 11] |
5
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
824 ) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
825 elif ( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
826 len(UniProtSeq) - psite < 11 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
827 ): # phospho_pep at C terminus |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
828 seq10 = ( |
16
d9906288cc6a
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 7127d1511d45a8b22d4f50eee6db24d844371227"
eschen42
parents:
6
diff
changeset
|
829 str(UniProtSeq)[psite - 10: psite] + "p" + str(UniProtSeq)[psite:] |
5
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
830 ) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
831 else: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
832 seq10 = str(UniProtSeq)[psite - 10: psite + 11] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
833 seq10 = seq10[:10] + "p" + seq10[10:] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
834 if seq10 not in seq10s_set: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
835 seq10s.append(seq10) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
836 seq10s_set.add(seq10) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
837 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
838 i += 1 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
839 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
840 result[PHOSPHORESIDUE].append(phosphoresidues) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
841 result[SEQUENCE7].append(seq7s) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
842 # result[SEQUENCE10] is a list of lists of strings |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
843 result[SEQUENCE10].append(seq10s) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
844 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
845 r = list( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
846 zip( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
847 result[UNIPROT_ID], |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
848 result[GENE_NAME], |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
849 result[DESCRIPTION], |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
850 result[PHOSPHORESIDUE], |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
851 ) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
852 ) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
853 # Sort by `UniProt_ID` |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
854 # ref: https://stackoverflow.com//4174955/15509512 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
855 s = sorted(r, key=operator.itemgetter(0)) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
856 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
857 result[UNIPROT_ID] = [] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
858 result[GENE_NAME] = [] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
859 result[DESCRIPTION] = [] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
860 result[PHOSPHORESIDUE] = [] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
861 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
862 for r in s: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
863 result[UNIPROT_ID].append(r[0]) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
864 result[GENE_NAME].append(r[1]) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
865 result[DESCRIPTION].append(r[2]) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
866 result[PHOSPHORESIDUE].append(r[3]) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
867 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
868 # convert lists to strings in the dictionary |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
869 for key, value in result.items(): |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
870 if key not in [PHOSPHORESIDUE, SEQUENCE7, SEQUENCE10]: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
871 result[key] = "; ".join(map(str, value)) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
872 elif key in [SEQUENCE10]: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
873 # result[SEQUENCE10] is a list of lists of strings |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
874 joined_value = "" |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
875 joined_set = set() |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
876 sep = "" |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
877 for valL in value: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
878 # valL is a list of strings |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
879 for val in valL: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
880 # val is a string |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
881 if val not in joined_set: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
882 joined_set.add(val) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
883 joined_value += sep + val |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
884 sep = "; " |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
885 # joined_value is a string |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
886 result[key] = joined_value |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
887 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
888 newstring = "; ".join( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
889 [", ".join(prez) for prez in result[PHOSPHORESIDUE]] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
890 ) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
891 # #separate the isoforms in PHOSPHORESIDUE column with ";" |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
892 # oldstring = result[PHOSPHORESIDUE] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
893 # oldlist = list(oldstring) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
894 # newstring = "" |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
895 # i = 0 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
896 # for e in oldlist: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
897 # if e == ";": |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
898 # if numps > 1: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
899 # if i%numps: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
900 # newstring = newstring + ";" |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
901 # else: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
902 # newstring = newstring + "," |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
903 # else: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
904 # newstring = newstring + ";" |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
905 # i +=1 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
906 # else: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
907 # newstring = newstring + e |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
908 result[PHOSPHORESIDUE] = newstring |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
909 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
910 # separate sequence7's by | |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
911 oldstring = result[SEQUENCE7] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
912 oldlist = oldstring |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
913 newstring = "" |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
914 for ol in oldlist: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
915 for e in ol: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
916 if e == ";": |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
917 newstring = newstring + " |" |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
918 elif len(newstring) > 0 and 1 > newstring.count(e): |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
919 newstring = newstring + " | " + e |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
920 elif 1 > newstring.count(e): |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
921 newstring = newstring + e |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
922 result[SEQUENCE7] = newstring |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
923 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
924 return [phospho_pep, result] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
925 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
926 # Construct list of [string, dictionary] lists |
6
42daf70d4ed4
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 9dfb7e07a3673d7de4b0a1b7e6ce1b75a8a4f42b"
eschen42
parents:
5
diff
changeset
|
927 # where the dictionary provides the SwissProt metadata |
42daf70d4ed4
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 9dfb7e07a3673d7de4b0a1b7e6ce1b75a8a4f42b"
eschen42
parents:
5
diff
changeset
|
928 # for a phosphopeptide |
5
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
929 result_list = [ |
6
42daf70d4ed4
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 9dfb7e07a3673d7de4b0a1b7e6ce1b75a8a4f42b"
eschen42
parents:
5
diff
changeset
|
930 whine(pseq_to_subdict, psequence) |
5
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
931 for psequence in data_in[PHOSPHOPEPTIDE_MATCH] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
932 ] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
933 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
934 end_time = time.process_time() # timer |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
935 print( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
936 "%0.6f added SwissProt annotations to phosphopeptides [B]" |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
937 % (end_time - start_time,), |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
938 file=sys.stderr, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
939 ) # timer |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
940 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
941 # Construct dictionary from list of lists |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
942 # ref: https://www.8bitavenue.com/how-to-convert-list-of-lists-to-dictionary-in-python/ |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
943 UniProt_Info = { |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
944 result[0]: result[1] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
945 for result in result_list |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
946 if result is not None |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
947 } |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
948 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
949 end_time = time.process_time() # timer |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
950 print( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
951 "%0.6f create dictionary mapping phosphopeptide to metadata dictionary [C]" |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
952 % (end_time - start_time,), |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
953 file=sys.stderr, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
954 ) # timer |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
955 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
956 # cosmetic: add N_A to phosphopeptide rows with no hits |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
957 p_peptide_list = [] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
958 for key in UniProt_Info: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
959 p_peptide_list.append(key) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
960 for nestedKey in UniProt_Info[key]: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
961 if UniProt_Info[key][nestedKey] == "": |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
962 UniProt_Info[key][nestedKey] = N_A |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
963 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
964 end_time = time.process_time() # timer |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
965 print( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
966 "%0.6f performed cosmetic clean-up [D]" % (end_time - start_time,), |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
967 file=sys.stderr, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
968 ) # timer |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
969 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
970 # convert UniProt_Info dictionary to dataframe |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
971 uniprot_df = pandas.DataFrame.transpose( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
972 pandas.DataFrame.from_dict(UniProt_Info) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
973 ) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
974 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
975 # reorder columns to match expected output file |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
976 uniprot_df[ |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
977 PHOSPHOPEPTIDE |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
978 ] = uniprot_df.index # make index a column too |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
979 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
980 cols = uniprot_df.columns.tolist() |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
981 # cols = [cols[-1]]+cols[4:6]+[cols[1]]+[cols[2]]+[cols[6]]+[cols[0]] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
982 # uniprot_df = uniprot_df[cols] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
983 uniprot_df = uniprot_df[ |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
984 [ |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
985 PHOSPHOPEPTIDE, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
986 SEQUENCE10, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
987 SEQUENCE7, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
988 GENE_NAME, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
989 PHOSPHORESIDUE, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
990 UNIPROT_ID, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
991 DESCRIPTION, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
992 ] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
993 ] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
994 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
995 end_time = time.process_time() # timer |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
996 print( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
997 "%0.6f reordered columns to match expected output file [1]" |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
998 % (end_time - start_time,), |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
999 file=sys.stderr, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1000 ) # timer |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1001 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1002 # concat to split then groupby to collapse |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1003 seq7_df = pandas.concat( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1004 [ |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1005 pandas.Series(row[PHOSPHOPEPTIDE], row[SEQUENCE7].split(" | ")) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1006 for _, row in uniprot_df.iterrows() |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1007 ] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1008 ).reset_index() |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1009 seq7_df.columns = [SEQUENCE7, PHOSPHOPEPTIDE] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1010 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1011 # --- -------------- begin read PSP_Regulatory_sites --------------------------------- |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1012 # read in PhosphoSitePlus Regulatory Sites dataset |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1013 # ----------- Get PhosphoSitePlus Regulatory Sites data from SQLite database (start) ----------- |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1014 conn = sql.connect(uniprot_sqlite) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1015 regsites_df = pandas.read_sql_query(PSP_REGSITE_SQL, conn) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1016 # Close SwissProt SQLite database |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1017 conn.close() |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1018 # ... -------------- end read PSP_Regulatory_sites ------------------------------------ |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1019 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1020 # keep only the human entries in dataframe |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1021 if len(species) > 0: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1022 print( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1023 'Limit PhosphoSitesPlus records to species "' + species + '"' |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1024 ) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1025 regsites_df = regsites_df[regsites_df.ORGANISM == species] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1026 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1027 # merge the seq7 df with the regsites df based off of the sequence7 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1028 merge_df = seq7_df.merge( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1029 regsites_df, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1030 left_on=SEQUENCE7, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1031 right_on=SITE_PLUSMINUS_7AA_SQL, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1032 how="left", |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1033 ) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1034 |
6
42daf70d4ed4
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 9dfb7e07a3673d7de4b0a1b7e6ce1b75a8a4f42b"
eschen42
parents:
5
diff
changeset
|
1035 # after merging df, select only the columns of interest; |
42daf70d4ed4
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 9dfb7e07a3673d7de4b0a1b7e6ce1b75a8a4f42b"
eschen42
parents:
5
diff
changeset
|
1036 # note that PROTEIN is absent here |
5
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1037 merge_df = merge_df[ |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1038 [ |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1039 PHOSPHOPEPTIDE, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1040 SEQUENCE7, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1041 ON_FUNCTION, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1042 ON_PROCESS, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1043 ON_PROT_INTERACT, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1044 ON_OTHER_INTERACT, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1045 ON_NOTES, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1046 ] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1047 ] |
6
42daf70d4ed4
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 9dfb7e07a3673d7de4b0a1b7e6ce1b75a8a4f42b"
eschen42
parents:
5
diff
changeset
|
1048 # combine column values of interest |
42daf70d4ed4
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 9dfb7e07a3673d7de4b0a1b7e6ce1b75a8a4f42b"
eschen42
parents:
5
diff
changeset
|
1049 # into one FUNCTION_PHOSPHORESIDUE column" |
5
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1050 merge_df[FUNCTION_PHOSPHORESIDUE] = merge_df[ON_FUNCTION].str.cat( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1051 merge_df[ON_PROCESS], sep="; ", na_rep="" |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1052 ) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1053 merge_df[FUNCTION_PHOSPHORESIDUE] = merge_df[ |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1054 FUNCTION_PHOSPHORESIDUE |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1055 ].str.cat(merge_df[ON_PROT_INTERACT], sep="; ", na_rep="") |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1056 merge_df[FUNCTION_PHOSPHORESIDUE] = merge_df[ |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1057 FUNCTION_PHOSPHORESIDUE |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1058 ].str.cat(merge_df[ON_OTHER_INTERACT], sep="; ", na_rep="") |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1059 merge_df[FUNCTION_PHOSPHORESIDUE] = merge_df[ |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1060 FUNCTION_PHOSPHORESIDUE |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1061 ].str.cat(merge_df[ON_NOTES], sep="; ", na_rep="") |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1062 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1063 # remove the columns that were combined |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1064 merge_df = merge_df[ |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1065 [PHOSPHOPEPTIDE, SEQUENCE7, FUNCTION_PHOSPHORESIDUE] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1066 ] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1067 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1068 end_time = time.process_time() # timer |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1069 print( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1070 "%0.6f merge regsite metadata [1a]" % (end_time - start_time,), |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1071 file=sys.stderr, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1072 ) # timer |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1073 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1074 # cosmetic changes to Function Phosphoresidue column |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1075 fp_series = pandas.Series(merge_df[FUNCTION_PHOSPHORESIDUE]) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1076 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1077 end_time = time.process_time() # timer |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1078 print( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1079 "%0.6f more cosmetic changes [1b]" % (end_time - start_time,), |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1080 file=sys.stderr, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1081 ) # timer |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1082 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1083 i = 0 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1084 while i < len(fp_series): |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1085 # remove the extra ";" so that it looks more professional |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1086 if fp_series[i] == "; ; ; ; ": # remove ; from empty hits |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1087 fp_series[i] = "" |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1088 while fp_series[i].endswith("; "): # remove ; from the ends |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1089 fp_series[i] = fp_series[i][:-2] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1090 while fp_series[i].startswith("; "): # remove ; from the beginning |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1091 fp_series[i] = fp_series[i][2:] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1092 fp_series[i] = fp_series[i].replace("; ; ; ; ", "; ") |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1093 fp_series[i] = fp_series[i].replace("; ; ; ", "; ") |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1094 fp_series[i] = fp_series[i].replace("; ; ", "; ") |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1095 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1096 # turn blanks into N_A to signify the info was searched for but cannot be found |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1097 if fp_series[i] == "": |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1098 fp_series[i] = N_A |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1099 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1100 i += 1 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1101 merge_df[FUNCTION_PHOSPHORESIDUE] = fp_series |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1102 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1103 end_time = time.process_time() # timer |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1104 print( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1105 "%0.6f cleaned up semicolons [1c]" % (end_time - start_time,), |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1106 file=sys.stderr, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1107 ) # timer |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1108 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1109 # merge uniprot df with merge df |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1110 uniprot_regsites_merged_df = uniprot_df.merge( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1111 merge_df, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1112 left_on=PHOSPHOPEPTIDE, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1113 right_on=PHOSPHOPEPTIDE, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1114 how="left", |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1115 ) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1116 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1117 # collapse the merged df |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1118 uniprot_regsites_collapsed_df = pandas.DataFrame( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1119 uniprot_regsites_merged_df.groupby(PHOSPHOPEPTIDE)[ |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1120 FUNCTION_PHOSPHORESIDUE |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1121 ].apply(lambda x: ppep_join(x)) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1122 ) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1123 # .apply(lambda x: "%s" % ' | '.join(x))) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1124 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1125 end_time = time.process_time() # timer |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1126 print( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1127 "%0.6f collapsed pandas dataframe [1d]" % (end_time - start_time,), |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1128 file=sys.stderr, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1129 ) # timer |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1130 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1131 uniprot_regsites_collapsed_df[ |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1132 PHOSPHOPEPTIDE |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1133 ] = ( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1134 uniprot_regsites_collapsed_df.index |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1135 ) # add df index as its own column |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1136 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1137 # rename columns |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1138 uniprot_regsites_collapsed_df.columns = [ |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1139 FUNCTION_PHOSPHORESIDUE, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1140 "ppp", |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1141 ] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1142 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1143 end_time = time.process_time() # timer |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1144 print( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1145 "%0.6f selected columns to be merged to uniprot_df [1e]" |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1146 % (end_time - start_time,), |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1147 file=sys.stderr, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1148 ) # timer |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1149 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1150 # add columns based on Sequence7 matching site_+/-7_AA |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1151 uniprot_regsite_df = pandas.merge( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1152 left=uniprot_df, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1153 right=uniprot_regsites_collapsed_df, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1154 how="left", |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1155 left_on=PHOSPHOPEPTIDE, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1156 right_on="ppp", |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1157 ) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1158 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1159 end_time = time.process_time() # timer |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1160 print( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1161 "%0.6f added columns based on Sequence7 matching site_+/-7_AA [1f]" |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1162 % (end_time - start_time,), |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1163 file=sys.stderr, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1164 ) # timer |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1165 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1166 data_in.rename( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1167 {"Protein description": PHOSPHOPEPTIDE}, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1168 axis="columns", |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1169 inplace=True, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1170 ) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1171 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1172 # data_in.sort_values(PHOSPHOPEPTIDE_MATCH, inplace=True, kind='mergesort') |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1173 res2 = sorted( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1174 data_in[PHOSPHOPEPTIDE_MATCH].tolist(), key=lambda s: s.casefold() |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1175 ) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1176 data_in = data_in.loc[res2] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1177 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1178 end_time = time.process_time() # timer |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1179 print( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1180 "%0.6f sorting time [1f]" % (end_time - start_time,), |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1181 file=sys.stderr, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1182 ) # timer |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1183 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1184 cols = [old_cols[0]] + old_cols[col_PKCalpha - 1:] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1185 upstream_data = upstream_data[cols] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1186 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1187 end_time = time.process_time() # timer |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1188 print( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1189 "%0.6f refactored columns for Upstream Map [1g]" |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1190 % (end_time - start_time,), |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1191 file=sys.stderr, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1192 ) # timer |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1193 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1194 # #rename upstream columns in new list |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1195 # new_cols = [] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1196 # for name in cols: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1197 # if "_NetworKIN" in name: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1198 # name = name.split("_")[0] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1199 # if " motif" in name: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1200 # name = name.split(" motif")[0] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1201 # if " sequence " in name: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1202 # name = name.split(" sequence")[0] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1203 # if "_Phosida" in name: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1204 # name = name.split("_")[0] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1205 # if "_PhosphoSite" in name: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1206 # name = name.split("_")[0] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1207 # new_cols.append(name) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1208 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1209 # rename upstream columns in new list |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1210 def col_rename(name): |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1211 if "_NetworKIN" in name: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1212 name = name.split("_")[0] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1213 if " motif" in name: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1214 name = name.split(" motif")[0] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1215 if " sequence " in name: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1216 name = name.split(" sequence")[0] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1217 if "_Phosida" in name: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1218 name = name.split("_")[0] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1219 if "_PhosphoSite" in name: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1220 name = name.split("_")[0] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1221 return name |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1222 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1223 new_cols = [col_rename(col) for col in cols] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1224 upstream_data.columns = new_cols |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1225 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1226 end_time = time.process_time() # timer |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1227 print( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1228 "%0.6f renamed columns for Upstream Map [1h_1]" |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1229 % (end_time - start_time,), |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1230 file=sys.stderr, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1231 ) # timer |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1232 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1233 # Create upstream_data_cast as a copy of upstream_data |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1234 # but with first column substituted by the phosphopeptide sequence |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1235 upstream_data_cast = upstream_data.copy() |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1236 new_cols_cast = new_cols |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1237 new_cols_cast[0] = "p_peptide" |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1238 upstream_data_cast.columns = new_cols_cast |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1239 upstream_data_cast["p_peptide"] = upstream_data.index |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1240 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1241 # --- -------------- begin read upstream_data_melt ------------------------------------ |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1242 # ----------- Get melted kinase mapping data from SQLite database (start) ----------- |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1243 conn = sql.connect(uniprot_sqlite) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1244 upstream_data_melt_df = pandas.read_sql_query(PPEP_MELT_SQL, conn) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1245 # Close SwissProt SQLite database |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1246 conn.close() |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1247 upstream_data_melt = upstream_data_melt_df.copy() |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1248 upstream_data_melt.columns = ["p_peptide", "characterization", "X"] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1249 upstream_data_melt["characterization"] = [ |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1250 col_rename(s) for s in upstream_data_melt["characterization"] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1251 ] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1252 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1253 print( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1254 "%0.6f upstream_data_melt_df initially has %d rows" |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1255 % (end_time - start_time, len(upstream_data_melt.axes[0])), |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1256 file=sys.stderr, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1257 ) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1258 # ref: https://stackoverflow.com/a/27360130/15509512 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1259 # e.g. df.drop(df[df.score < 50].index, inplace=True) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1260 upstream_data_melt.drop( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1261 upstream_data_melt[upstream_data_melt.X != "X"].index, inplace=True |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1262 ) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1263 print( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1264 "%0.6f upstream_data_melt_df pre-dedup has %d rows" |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1265 % (end_time - start_time, len(upstream_data_melt.axes[0])), |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1266 file=sys.stderr, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1267 ) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1268 # ----------- Get melted kinase mapping data from SQLite database (finish) ----------- |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1269 # ... -------------- end read upstream_data_melt -------------------------------------- |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1270 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1271 end_time = time.process_time() # timer |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1272 print( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1273 "%0.6f melted and minimized Upstream Map dataframe [1h_2]" |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1274 % (end_time - start_time,), |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1275 file=sys.stderr, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1276 ) # timer |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1277 # ... end read upstream_data_melt |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1278 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1279 end_time = time.process_time() # timer |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1280 print( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1281 "%0.6f indexed melted Upstream Map [1h_2a]" |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1282 % (end_time - start_time,), |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1283 file=sys.stderr, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1284 ) # timer |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1285 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1286 upstream_delta_melt_LoL = upstream_data_melt.values.tolist() |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1287 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1288 melt_dict = {} |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1289 for key in upstream_map_p_peptide_list: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1290 melt_dict[key] = [] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1291 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1292 for el in upstream_delta_melt_LoL: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1293 (p_peptide, characterization, X) = tuple(el) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1294 if p_peptide in melt_dict: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1295 melt_dict[p_peptide].append(characterization) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1296 else: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1297 exit( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1298 'Phosphopeptide %s not found in ppep_mapping_db: "phopsphopeptides" and "ppep_mapping_db" must both originate from the same run of mqppep_kinase_mapping' |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1299 % (p_peptide) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1300 ) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1301 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1302 end_time = time.process_time() # timer |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1303 print( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1304 "%0.6f appended peptide characterizations [1h_2b]" |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1305 % (end_time - start_time,), |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1306 file=sys.stderr, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1307 ) # timer |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1308 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1309 # for key in upstream_map_p_peptide_list: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1310 # melt_dict[key] = ' | '.join(melt_dict[key]) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1311 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1312 for key in upstream_map_p_peptide_list: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1313 melt_dict[key] = melt_join(melt_dict[key]) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1314 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1315 end_time = time.process_time() # timer |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1316 print( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1317 "%0.6f concatenated multiple characterizations [1h_2c]" |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1318 % (end_time - start_time,), |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1319 file=sys.stderr, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1320 ) # timer |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1321 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1322 # map_dict is a dictionary of dictionaries |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1323 map_dict = {} |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1324 for key in upstream_map_p_peptide_list: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1325 map_dict[key] = {} |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1326 map_dict[key][PUTATIVE_UPSTREAM_DOMAINS] = melt_dict[key] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1327 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1328 end_time = time.process_time() # timer |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1329 print( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1330 "%0.6f instantiated map dictionary [2]" % (end_time - start_time,), |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1331 file=sys.stderr, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1332 ) # timer |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1333 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1334 # convert map_dict to dataframe |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1335 map_df = pandas.DataFrame.transpose( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1336 pandas.DataFrame.from_dict(map_dict) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1337 ) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1338 map_df["p-peptide"] = map_df.index # make index a column too |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1339 cols_map_df = map_df.columns.tolist() |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1340 cols_map_df = [cols_map_df[1]] + [cols_map_df[0]] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1341 map_df = map_df[cols_map_df] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1342 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1343 # join map_df to uniprot_regsite_df |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1344 output_df = uniprot_regsite_df.merge( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1345 map_df, how="left", left_on=PHOSPHOPEPTIDE, right_on="p-peptide" |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1346 ) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1347 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1348 output_df = output_df[ |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1349 [ |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1350 PHOSPHOPEPTIDE, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1351 SEQUENCE10, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1352 SEQUENCE7, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1353 GENE_NAME, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1354 PHOSPHORESIDUE, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1355 UNIPROT_ID, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1356 DESCRIPTION, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1357 FUNCTION_PHOSPHORESIDUE, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1358 PUTATIVE_UPSTREAM_DOMAINS, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1359 ] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1360 ] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1361 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1362 # cols_output_prelim = output_df.columns.tolist() |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1363 # |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1364 # print("cols_output_prelim") |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1365 # print(cols_output_prelim) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1366 # |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1367 # cols_output = cols_output_prelim[:8]+[cols_output_prelim[9]]+[cols_output_prelim[10]] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1368 # |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1369 # print("cols_output with p-peptide") |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1370 # print(cols_output) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1371 # |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1372 # cols_output = [col for col in cols_output if not col == "p-peptide"] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1373 # |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1374 # print("cols_output") |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1375 # print(cols_output) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1376 # |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1377 # output_df = output_df[cols_output] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1378 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1379 # join output_df back to quantitative columns in data_in df |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1380 quant_cols = data_in.columns.tolist() |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1381 quant_cols = quant_cols[1:] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1382 quant_data = data_in[quant_cols] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1383 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1384 # ----------- Write merge/filter metadata to SQLite database (start) ----------- |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1385 # Open SwissProt SQLite database |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1386 conn = sql.connect(output_sqlite) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1387 cur = conn.cursor() |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1388 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1389 cur.executescript(MRGFLTR_DDL) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1390 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1391 cur.execute( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1392 CITATION_INSERT_STMT, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1393 ("mrgfltr_metadata_view", CITATION_INSERT_PSP), |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1394 ) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1395 cur.execute( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1396 CITATION_INSERT_STMT, ("mrgfltr_metadata", CITATION_INSERT_PSP) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1397 ) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1398 cur.execute( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1399 CITATION_INSERT_STMT, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1400 ("mrgfltr_metadata_view", CITATION_INSERT_PSP_REF), |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1401 ) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1402 cur.execute( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1403 CITATION_INSERT_STMT, ("mrgfltr_metadata", CITATION_INSERT_PSP_REF) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1404 ) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1405 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1406 # Read ppep-to-sequence LUT |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1407 ppep_lut_df = pandas.read_sql_query(PPEP_ID_SQL, conn) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1408 # write only metadata for merged/filtered records to SQLite |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1409 mrgfltr_metadata_df = output_df.copy() |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1410 # replace phosphopeptide seq with ppep.id |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1411 mrgfltr_metadata_df = ppep_lut_df.merge( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1412 mrgfltr_metadata_df, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1413 left_on="ppep_seq", |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1414 right_on=PHOSPHOPEPTIDE, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1415 how="inner", |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1416 ) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1417 mrgfltr_metadata_df.drop( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1418 columns=[PHOSPHOPEPTIDE, "ppep_seq"], inplace=True |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1419 ) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1420 # rename columns |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1421 mrgfltr_metadata_df.columns = MRGFLTR_METADATA_COLUMNS |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1422 mrgfltr_metadata_df.to_sql( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1423 "mrgfltr_metadata", |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1424 con=conn, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1425 if_exists="append", |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1426 index=False, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1427 method="multi", |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1428 ) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1429 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1430 # Close SwissProt SQLite database |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1431 conn.close() |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1432 # ----------- Write merge/filter metadata to SQLite database (finish) ----------- |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1433 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1434 output_df = output_df.merge( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1435 quant_data, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1436 how="right", |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1437 left_on=PHOSPHOPEPTIDE, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1438 right_on=PHOSPHOPEPTIDE_MATCH, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1439 ) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1440 output_cols = output_df.columns.tolist() |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1441 output_cols = output_cols[:-1] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1442 output_df = output_df[output_cols] |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1443 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1444 # cosmetic changes to Upstream column |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1445 output_df[PUTATIVE_UPSTREAM_DOMAINS] = output_df[ |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1446 PUTATIVE_UPSTREAM_DOMAINS |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1447 ].fillna( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1448 "" |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1449 ) # fill the NaN with "" for those Phosphopeptides that got a "WARNING: Failed match for " in the upstream mapping |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1450 us_series = pandas.Series(output_df[PUTATIVE_UPSTREAM_DOMAINS]) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1451 i = 0 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1452 while i < len(us_series): |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1453 # turn blanks into N_A to signify the info was searched for but cannot be found |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1454 if us_series[i] == "": |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1455 us_series[i] = N_A |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1456 i += 1 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1457 output_df[PUTATIVE_UPSTREAM_DOMAINS] = us_series |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1458 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1459 end_time = time.process_time() # timer |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1460 print( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1461 "%0.6f establisheed output [3]" % (end_time - start_time,), |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1462 file=sys.stderr, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1463 ) # timer |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1464 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1465 (output_rows, output_cols) = output_df.shape |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1466 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1467 output_df = output_df.convert_dtypes(convert_integer=True) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1468 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1469 # Output onto Final CSV file |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1470 output_df.to_csv(output_filename_csv, index=False) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1471 output_df.to_csv( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1472 output_filename_tab, quoting=None, sep="\t", index=False |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1473 ) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1474 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1475 end_time = time.process_time() # timer |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1476 print( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1477 "%0.6f wrote output [4]" % (end_time - start_time,), |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1478 file=sys.stderr, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1479 ) # timer |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1480 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1481 print( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1482 "{:>10} phosphopeptides written to output".format(str(output_rows)) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1483 ) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1484 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1485 end_time = time.process_time() # timer |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1486 print( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1487 "%0.6f seconds of non-system CPU time were consumed" |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1488 % (end_time - start_time,), |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1489 file=sys.stderr, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1490 ) # timer |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1491 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1492 # Rev. 7/1/2016 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1493 # Rev. 7/3/2016 : fill NaN in Upstream column to replace to N/A's |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1494 # Rev. 7/3/2016: renamed Upstream column to PUTATIVE_UPSTREAM_DOMAINS |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1495 # Rev. 12/2/2021: Converted to Python from ipynb; use fast Aho-Corasick searching; \ |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1496 # read from SwissProt SQLite database |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1497 # Rev. 12/9/2021: Transfer code to Galaxy tool wrapper |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1498 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1499 # |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1500 # copied from Excel Output Script.ipynb END # |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1501 # |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1502 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1503 try: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1504 catch( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1505 mqpep_getswissprot, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1506 ) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1507 exit(0) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1508 except Exception as e: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1509 exit("Internal error running mqpep_getswissprot(): %s" % (e)) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1510 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1511 |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1512 if __name__ == "__main__": |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
1513 __main__() |