Mercurial > repos > eschen42 > mqppep_preproc
annotate search_ppep.py @ 22:43f1fd0ff86b draft
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 00881af5b1373174e5afe706add2f33b8614828c"
author | eschen42 |
---|---|
date | Wed, 13 Apr 2022 19:48:01 +0000 |
parents | ba5f14c2a4af |
children | 29775869faba |
rev | line source |
---|---|
0
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
1 #!/usr/bin/env python |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
2 # Search and memoize phosphopeptides in Swiss-Prot SQLite table UniProtKB |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
3 |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
4 import argparse |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
5 import os.path |
5
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
6 import re |
0
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
7 import sqlite3 |
5
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
8 import sys # import the sys module for exc_info |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
9 import time |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
10 import traceback # import the traceback module for format_exception |
0
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
11 from codecs import getreader as cx_getreader |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
12 |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
13 # For Aho-Corasick search for fixed set of substrings |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
14 # - add_word |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
15 # - make_automaton |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
16 # - iter |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
17 import ahocorasick |
5
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
18 |
0
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
19 |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
20 # ref: https://stackoverflow.com/a/8915613/15509512 |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
21 # answers: "How to handle exceptions in a list comprehensions" |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
22 # usage: |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
23 # from math import log |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
24 # eggs = [1,3,0,3,2] |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
25 # print([x for x in [catch(log, egg) for egg in eggs] if x is not None]) |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
26 # producing: |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
27 # for <built-in function log> |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
28 # with args (0,) |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
29 # exception: math domain error |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
30 # [0.0, 1.0986122886681098, 1.0986122886681098, 0.6931471805599453] |
5
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
31 def catch(func, *args, handle=lambda e: e, **kwargs): |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
32 |
0
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
33 try: |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
34 return func(*args, **kwargs) |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
35 except Exception as e: |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
36 print("For %s" % str(func)) |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
37 print(" with args %s" % str(args)) |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
38 print(" caught exception: %s" % str(e)) |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
39 (ty, va, tb) = sys.exc_info() |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
40 print(" stack trace: " + str(traceback.format_exception(ty, va, tb))) |
5
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
41 # exit(-1) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
42 return None # was handle(e) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
43 |
0
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
44 |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
45 def __main__(): |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
46 |
5
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
47 DROP_TABLES_SQL = """ |
0
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
48 DROP VIEW IF EXISTS ppep_gene_site_view; |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
49 DROP VIEW IF EXISTS uniprot_view; |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
50 DROP VIEW IF EXISTS uniprotkb_pep_ppep_view; |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
51 DROP VIEW IF EXISTS ppep_intensity_view; |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
52 DROP VIEW IF EXISTS ppep_metadata_view; |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
53 |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
54 DROP TABLE IF EXISTS sample; |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
55 DROP TABLE IF EXISTS ppep; |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
56 DROP TABLE IF EXISTS site_type; |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
57 DROP TABLE IF EXISTS deppep_UniProtKB; |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
58 DROP TABLE IF EXISTS deppep; |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
59 DROP TABLE IF EXISTS ppep_gene_site; |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
60 DROP TABLE IF EXISTS ppep_metadata; |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
61 DROP TABLE IF EXISTS ppep_intensity; |
5
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
62 """ |
0
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
63 |
5
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
64 CREATE_TABLES_SQL = """ |
0
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
65 CREATE TABLE deppep |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
66 ( id INTEGER PRIMARY KEY |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
67 , seq TEXT UNIQUE ON CONFLICT IGNORE |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
68 ) |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
69 ; |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
70 CREATE TABLE deppep_UniProtKB |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
71 ( deppep_id INTEGER REFERENCES deppep(id) ON DELETE CASCADE |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
72 , UniProtKB_id TEXT REFERENCES UniProtKB(id) ON DELETE CASCADE |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
73 , pos_start INTEGER |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
74 , pos_end INTEGER |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
75 , PRIMARY KEY (deppep_id, UniProtKB_id, pos_start, pos_end) |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
76 ON CONFLICT IGNORE |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
77 ) |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
78 ; |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
79 CREATE TABLE ppep |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
80 ( id INTEGER PRIMARY KEY |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
81 , deppep_id INTEGER REFERENCES deppep(id) ON DELETE CASCADE |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
82 , seq TEXT UNIQUE ON CONFLICT IGNORE |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
83 , scrubbed TEXT |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
84 ); |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
85 CREATE TABLE site_type |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
86 ( id INTEGER PRIMARY KEY |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
87 , type_name TEXT UNIQUE ON CONFLICT IGNORE |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
88 ); |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
89 CREATE INDEX idx_ppep_scrubbed on ppep(scrubbed) |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
90 ; |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
91 CREATE TABLE sample |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
92 ( id INTEGER PRIMARY KEY |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
93 , name TEXT UNIQUE ON CONFLICT IGNORE |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
94 ) |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
95 ; |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
96 CREATE VIEW uniprot_view AS |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
97 SELECT DISTINCT |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
98 Uniprot_ID |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
99 , Description |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
100 , Organism_Name |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
101 , Organism_ID |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
102 , Gene_Name |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
103 , PE |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
104 , SV |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
105 , Sequence |
17
ba5f14c2a4af
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents:
6
diff
changeset
|
106 , Description || |
ba5f14c2a4af
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents:
6
diff
changeset
|
107 CASE WHEN Organism_Name = 'N/A' |
ba5f14c2a4af
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents:
6
diff
changeset
|
108 THEN '' |
ba5f14c2a4af
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents:
6
diff
changeset
|
109 ELSE ' OS='|| Organism_Name |
ba5f14c2a4af
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents:
6
diff
changeset
|
110 END || |
ba5f14c2a4af
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents:
6
diff
changeset
|
111 CASE WHEN Organism_ID = -1 |
ba5f14c2a4af
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents:
6
diff
changeset
|
112 THEN '' |
ba5f14c2a4af
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents:
6
diff
changeset
|
113 ELSE ' OX='|| Organism_ID |
ba5f14c2a4af
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents:
6
diff
changeset
|
114 END || |
ba5f14c2a4af
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents:
6
diff
changeset
|
115 CASE WHEN Gene_Name = 'N/A' |
ba5f14c2a4af
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents:
6
diff
changeset
|
116 THEN '' |
ba5f14c2a4af
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents:
6
diff
changeset
|
117 ELSE ' GN='|| Gene_Name |
ba5f14c2a4af
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents:
6
diff
changeset
|
118 END || |
ba5f14c2a4af
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents:
6
diff
changeset
|
119 CASE WHEN PE = 'N/A' |
ba5f14c2a4af
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents:
6
diff
changeset
|
120 THEN '' |
ba5f14c2a4af
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents:
6
diff
changeset
|
121 ELSE ' PE='|| PE |
ba5f14c2a4af
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents:
6
diff
changeset
|
122 END || |
ba5f14c2a4af
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents:
6
diff
changeset
|
123 CASE WHEN SV = 'N/A' |
ba5f14c2a4af
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents:
6
diff
changeset
|
124 THEN '' |
ba5f14c2a4af
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents:
6
diff
changeset
|
125 ELSE ' SV='|| SV |
ba5f14c2a4af
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit aea9ac5a98069c3c993bd7903eda732f8ae2109d"
eschen42
parents:
6
diff
changeset
|
126 END AS long_description |
0
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
127 , Database |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
128 FROM UniProtKB |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
129 ; |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
130 CREATE VIEW uniprotkb_pep_ppep_view AS |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
131 SELECT deppep_UniProtKB.UniprotKB_ID AS accession |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
132 , deppep_UniProtKB.pos_start AS pos_start |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
133 , deppep_UniProtKB.pos_end AS pos_end |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
134 , deppep.seq AS peptide |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
135 , ppep.seq AS phosphopeptide |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
136 , ppep.scrubbed AS scrubbed |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
137 , uniprot_view.Sequence AS sequence |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
138 , uniprot_view.Description AS description |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
139 , uniprot_view.long_description AS long_description |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
140 , ppep.id AS ppep_id |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
141 FROM ppep, deppep, deppep_UniProtKB, uniprot_view |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
142 WHERE deppep.id = ppep.deppep_id |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
143 AND deppep.id = deppep_UniProtKB.deppep_id |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
144 AND deppep_UniProtKB.UniprotKB_ID = uniprot_view.Uniprot_ID |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
145 ORDER BY UniprotKB_ID, deppep.seq, ppep.seq |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
146 ; |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
147 CREATE TABLE ppep_gene_site |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
148 ( ppep_id INTEGER REFERENCES ppep(id) |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
149 , gene_names TEXT |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
150 , site_type_id INTEGER REFERENCES site_type(id) |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
151 , kinase_map TEXT |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
152 , PRIMARY KEY (ppep_id, kinase_map) ON CONFLICT IGNORE |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
153 ) |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
154 ; |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
155 CREATE VIEW ppep_gene_site_view AS |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
156 SELECT DISTINCT |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
157 ppep.seq AS phospho_peptide |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
158 , ppep_id |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
159 , gene_names |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
160 , type_name |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
161 , kinase_map |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
162 FROM |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
163 ppep, ppep_gene_site, site_type |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
164 WHERE |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
165 ppep_gene_site.ppep_id = ppep.id |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
166 AND |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
167 ppep_gene_site.site_type_id = site_type.id |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
168 ORDER BY |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
169 ppep.seq |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
170 ; |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
171 CREATE TABLE ppep_metadata |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
172 ( ppep_id INTEGER REFERENCES ppep(id) |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
173 , protein_description TEXT |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
174 , gene_name TEXT |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
175 , FASTA_name TEXT |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
176 , phospho_sites TEXT |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
177 , motifs_unique TEXT |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
178 , accessions TEXT |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
179 , motifs_all_members TEXT |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
180 , domain TEXT |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
181 , ON_FUNCTION TEXT |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
182 , ON_PROCESS TEXT |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
183 , ON_PROT_INTERACT TEXT |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
184 , ON_OTHER_INTERACT TEXT |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
185 , notes TEXT |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
186 , PRIMARY KEY (ppep_id) ON CONFLICT IGNORE |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
187 ) |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
188 ; |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
189 CREATE VIEW ppep_metadata_view AS |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
190 SELECT DISTINCT |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
191 ppep.seq AS phospho_peptide |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
192 , protein_description |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
193 , gene_name |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
194 , FASTA_name |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
195 , phospho_sites |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
196 , motifs_unique |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
197 , accessions |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
198 , motifs_all_members |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
199 , domain |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
200 , ON_FUNCTION |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
201 , ON_PROCESS |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
202 , ON_PROT_INTERACT |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
203 , ON_OTHER_INTERACT |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
204 , notes |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
205 FROM |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
206 ppep, ppep_metadata |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
207 WHERE |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
208 ppep_metadata.ppep_id = ppep.id |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
209 ORDER BY |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
210 ppep.seq |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
211 ; |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
212 CREATE TABLE ppep_intensity |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
213 ( ppep_id INTEGER REFERENCES ppep(id) |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
214 , sample_id INTEGER |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
215 , intensity INTEGER |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
216 , PRIMARY KEY (ppep_id, sample_id) ON CONFLICT IGNORE |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
217 ) |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
218 ; |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
219 CREATE VIEW ppep_intensity_view AS |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
220 SELECT DISTINCT |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
221 ppep.seq AS phospho_peptide |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
222 , sample.name AS sample |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
223 , intensity |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
224 FROM |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
225 ppep, sample, ppep_intensity |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
226 WHERE |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
227 ppep_intensity.sample_id = sample.id |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
228 AND |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
229 ppep_intensity.ppep_id = ppep.id |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
230 ; |
5
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
231 """ |
0
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
232 |
5
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
233 UNIPROT_SEQ_AND_ID_SQL = """ |
0
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
234 select Sequence, Uniprot_ID |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
235 from UniProtKB |
5
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
236 """ |
0
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
237 |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
238 # Parse Command Line |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
239 parser = argparse.ArgumentParser( |
5
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
240 description="Phopsphoproteomic Enrichment phosphopeptide SwissProt search (in place in SQLite DB)." |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
241 ) |
0
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
242 |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
243 # inputs: |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
244 # Phosphopeptide data for experimental results, including the intensities |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
245 # and the mapping to kinase domains, in tabular format. |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
246 parser.add_argument( |
5
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
247 "--phosphopeptides", |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
248 "-p", |
0
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
249 nargs=1, |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
250 required=True, |
5
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
251 dest="phosphopeptides", |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
252 help="Phosphopeptide data for experimental results, generated by the Phopsphoproteomic Enrichment Localization Filter tool", |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
253 ) |
0
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
254 parser.add_argument( |
5
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
255 "--uniprotkb", |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
256 "-u", |
0
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
257 nargs=1, |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
258 required=True, |
5
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
259 dest="uniprotkb", |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
260 help="UniProtKB/Swiss-Prot data, converted from FASTA format by the Phopsphoproteomic Enrichment Kinase Mapping tool", |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
261 ) |
0
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
262 parser.add_argument( |
5
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
263 "--schema", |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
264 action="store_true", |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
265 dest="db_schema", |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
266 help="show updated database schema", |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
267 ) |
0
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
268 parser.add_argument( |
5
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
269 "--warn-duplicates", |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
270 action="store_true", |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
271 dest="warn_duplicates", |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
272 help="show warnings for duplicated sequences", |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
273 ) |
0
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
274 parser.add_argument( |
5
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
275 "--verbose", |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
276 action="store_true", |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
277 dest="verbose", |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
278 help="show somewhat verbose program tracing", |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
279 ) |
0
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
280 # "Make it so!" (parse the arguments) |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
281 options = parser.parse_args() |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
282 if options.verbose: |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
283 print("options: " + str(options) + "\n") |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
284 |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
285 # path to phosphopeptide (e.g., "outputfile_STEP2.txt") input tabular file |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
286 if options.phosphopeptides is None: |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
287 exit('Argument "phosphopeptides" is required but not supplied') |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
288 try: |
5
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
289 f_name = os.path.abspath(options.phosphopeptides[0]) |
0
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
290 except Exception as e: |
5
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
291 exit("Error parsing phosphopeptides argument: %s" % (e)) |
0
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
292 |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
293 # path to SQLite input/output tabular file |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
294 if options.uniprotkb is None: |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
295 exit('Argument "uniprotkb" is required but not supplied') |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
296 try: |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
297 db_name = os.path.abspath(options.uniprotkb[0]) |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
298 except Exception as e: |
5
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
299 exit("Error parsing uniprotkb argument: %s" % (e)) |
0
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
300 |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
301 # print("options.schema is %d" % options.db_schema) |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
302 |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
303 # db_name = "demo/test.sqlite" |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
304 # f_name = "demo/test_input.txt" |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
305 |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
306 con = sqlite3.connect(db_name) |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
307 cur = con.cursor() |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
308 ker = con.cursor() |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
309 |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
310 cur.executescript(DROP_TABLES_SQL) |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
311 |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
312 # if options.db_schema: |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
313 # print("\nAfter dropping tables/views that are to be created, schema is:") |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
314 # cur.execute("SELECT * FROM sqlite_schema") |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
315 # for row in cur.fetchall(): |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
316 # if row[4] is not None: |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
317 # print("%s;" % row[4]) |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
318 |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
319 cur.executescript(CREATE_TABLES_SQL) |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
320 |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
321 if options.db_schema: |
5
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
322 print( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
323 "\nAfter creating tables/views that are to be created, schema is:" |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
324 ) |
0
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
325 cur.execute("SELECT * FROM sqlite_schema") |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
326 for row in cur.fetchall(): |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
327 if row[4] is not None: |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
328 print("%s;" % row[4]) |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
329 |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
330 def generate_ppep(f): |
5
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
331 # get keys from upstream tabular file using readline() |
0
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
332 # ref: https://stackoverflow.com/a/16713581/15509512 |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
333 # answer to "Use codecs to read file with correct encoding" |
5
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
334 file1_encoded = open(f, "rb") |
0
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
335 file1 = cx_getreader("latin-1")(file1_encoded) |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
336 |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
337 count = 0 |
5
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
338 re_tab = re.compile("^[^\t]*") |
0
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
339 re_quote = re.compile('"') |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
340 while True: |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
341 count += 1 |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
342 # Get next line from file |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
343 line = file1.readline() |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
344 # if line is empty |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
345 # end of file is reached |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
346 if not line: |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
347 break |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
348 if count > 1: |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
349 m = re_tab.match(line) |
5
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
350 m = re_quote.sub("", m[0]) |
0
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
351 yield m |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
352 file1.close() |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
353 file1_encoded.close() |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
354 |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
355 # Build an Aho-Corasick automaton from a trie |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
356 # - ref: |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
357 # - https://pypi.org/project/pyahocorasick/ |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
358 # - https://en.wikipedia.org/wiki/Aho%E2%80%93Corasick_algorithm |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
359 # - https://en.wikipedia.org/wiki/Trie |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
360 auto = ahocorasick.Automaton() |
5
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
361 re_phos = re.compile("p") |
0
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
362 # scrub out unsearchable characters per section |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
363 # "Match the p_peptides to the @sequences array:" |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
364 # of the original |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
365 # PhosphoPeptide Upstream Kinase Mapping.pl |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
366 # which originally read |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
367 # $tmp_p_peptide =~ s/#//g; |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
368 # $tmp_p_peptide =~ s/\d//g; |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
369 # $tmp_p_peptide =~ s/\_//g; |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
370 # $tmp_p_peptide =~ s/\.//g; |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
371 # |
5
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
372 re_scrub = re.compile("0-9_.#") |
0
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
373 ppep_count = 0 |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
374 for ppep in generate_ppep(f_name): |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
375 ppep_count += 1 |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
376 add_to_trie = False |
5
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
377 # print(ppep) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
378 scrubbed = re_scrub.sub("", ppep) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
379 deppep = re_phos.sub("", scrubbed) |
0
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
380 if options.verbose: |
5
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
381 print("deppep: %s; scrubbed: %s" % (deppep, scrubbed)) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
382 # print(deppep) |
0
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
383 cur.execute("SELECT id FROM deppep WHERE seq = (?)", (deppep,)) |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
384 if cur.fetchone() is None: |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
385 add_to_trie = True |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
386 cur.execute("INSERT INTO deppep(seq) VALUES (?)", (deppep,)) |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
387 cur.execute("SELECT id FROM deppep WHERE seq = (?)", (deppep,)) |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
388 deppep_id = cur.fetchone()[0] |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
389 if add_to_trie: |
5
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
390 # print((deppep_id, deppep)) |
0
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
391 # Build the trie |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
392 auto.add_word(deppep, (deppep_id, deppep)) |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
393 cur.execute( |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
394 "INSERT INTO ppep(seq, scrubbed, deppep_id) VALUES (?,?,?)", |
5
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
395 (ppep, scrubbed, deppep_id), |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
396 ) |
0
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
397 # def generate_deppep(): |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
398 # cur.execute("SELECT seq FROM deppep") |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
399 # for row in cur.fetchall(): |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
400 # yield row[0] |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
401 cur.execute("SELECT count(*) FROM (SELECT seq FROM deppep GROUP BY seq)") |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
402 for row in cur.fetchall(): |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
403 deppep_count = row[0] |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
404 |
5
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
405 cur.execute( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
406 "SELECT count(*) FROM (SELECT Sequence FROM UniProtKB GROUP BY Sequence)" |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
407 ) |
0
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
408 for row in cur.fetchall(): |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
409 sequence_count = row[0] |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
410 |
5
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
411 print("%d phosphopeptides were read from input" % ppep_count) |
0
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
412 print( |
5
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
413 "%d corresponding dephosphopeptides are represented in input" |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
414 % deppep_count |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
415 ) |
0
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
416 # Look for cases where both Gene_Name and Sequence are identical |
5
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
417 cur.execute( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
418 """ |
0
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
419 SELECT Uniprot_ID, Gene_Name, Sequence |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
420 FROM UniProtKB |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
421 WHERE Sequence IN ( |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
422 SELECT Sequence |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
423 FROM UniProtKB |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
424 GROUP BY Sequence, Gene_Name |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
425 HAVING count(*) > 1 |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
426 ) |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
427 ORDER BY Sequence |
5
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
428 """ |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
429 ) |
0
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
430 duplicate_count = 0 |
5
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
431 old_seq = "" |
0
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
432 for row in cur.fetchall(): |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
433 if duplicate_count == 0: |
5
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
434 print( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
435 "\nEach of the following sequences is associated with several accession IDs (which are listed in the first column) but the same gene ID (which is listed in the second column)." |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
436 ) |
0
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
437 if row[2] != old_seq: |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
438 old_seq = row[2] |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
439 duplicate_count += 1 |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
440 if options.warn_duplicates: |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
441 print("\n%s\t%s\t%s" % row) |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
442 else: |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
443 if options.warn_duplicates: |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
444 print("%s\t%s" % (row[0], row[1])) |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
445 if duplicate_count > 0: |
5
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
446 print( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
447 "\n%d sequences have duplicated accession IDs\n" % duplicate_count |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
448 ) |
0
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
449 |
5
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
450 print("%s accession sequences will be searched\n" % sequence_count) |
0
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
451 |
5
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
452 # print(auto.dump()) |
0
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
453 |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
454 # Convert the trie to an automaton (a finite-state machine) |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
455 auto.make_automaton() |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
456 |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
457 # Execute query for seqs and metadata without fetching the results yet |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
458 uniprot_seq_and_id = cur.execute(UNIPROT_SEQ_AND_ID_SQL) |
5
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
459 while 1: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
460 batch = uniprot_seq_and_id.fetchmany(size=50) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
461 if not batch: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
462 break |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
463 for Sequence, UniProtKB_id in batch: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
464 if Sequence is not None: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
465 for end_index, (insert_order, original_value) in auto.iter( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
466 Sequence |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
467 ): |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
468 ker.execute( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
469 """ |
0
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
470 INSERT INTO deppep_UniProtKB |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
471 (deppep_id,UniProtKB_id,pos_start,pos_end) |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
472 VALUES (?,?,?,?) |
5
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
473 """, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
474 ( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
475 insert_order, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
476 UniProtKB_id, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
477 1 + end_index - len(original_value), |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
478 end_index, |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
479 ), |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
480 ) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
481 else: |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
482 raise ValueError( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
483 "UniProtKB_id %s, but Sequence is None: Check whether SwissProt file is missing sequence for this ID" |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
484 % (UniProtKB_id,) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
485 ) |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
486 ker.execute( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
487 """ |
0
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
488 SELECT count(*) || ' accession-peptide-phosphopeptide combinations were found' |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
489 FROM uniprotkb_pep_ppep_view |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
490 """ |
5
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
491 ) |
0
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
492 for row in ker.fetchall(): |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
493 print(row[0]) |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
494 |
5
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
495 ker.execute( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
496 """ |
0
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
497 SELECT count(*) || ' accession matches were found', count(*) AS accession_count |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
498 FROM ( |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
499 SELECT accession |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
500 FROM uniprotkb_pep_ppep_view |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
501 GROUP BY accession |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
502 ) |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
503 """ |
5
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
504 ) |
0
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
505 for row in ker.fetchall(): |
5
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
506 print(row[0]) |
0
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
507 |
5
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
508 ker.execute( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
509 """ |
0
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
510 SELECT count(*) || ' peptide matches were found' |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
511 FROM ( |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
512 SELECT peptide |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
513 FROM uniprotkb_pep_ppep_view |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
514 GROUP BY peptide |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
515 ) |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
516 """ |
5
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
517 ) |
0
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
518 for row in ker.fetchall(): |
5
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
519 print(row[0]) |
0
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
520 |
5
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
521 ker.execute( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
522 """ |
0
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
523 SELECT count(*) || ' phosphopeptide matches were found', count(*) AS phosphopeptide_count |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
524 FROM ( |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
525 SELECT phosphopeptide |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
526 FROM uniprotkb_pep_ppep_view |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
527 GROUP BY phosphopeptide |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
528 ) |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
529 """ |
5
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
530 ) |
0
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
531 for row in ker.fetchall(): |
5
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
532 print(row[0]) |
0
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
533 |
6
42daf70d4ed4
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 9dfb7e07a3673d7de4b0a1b7e6ce1b75a8a4f42b"
eschen42
parents:
5
diff
changeset
|
534 # link peptides not found in sequence database to a dummy sequence-record |
42daf70d4ed4
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 9dfb7e07a3673d7de4b0a1b7e6ce1b75a8a4f42b"
eschen42
parents:
5
diff
changeset
|
535 ker.execute( |
42daf70d4ed4
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 9dfb7e07a3673d7de4b0a1b7e6ce1b75a8a4f42b"
eschen42
parents:
5
diff
changeset
|
536 """ |
42daf70d4ed4
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 9dfb7e07a3673d7de4b0a1b7e6ce1b75a8a4f42b"
eschen42
parents:
5
diff
changeset
|
537 INSERT INTO deppep_UniProtKB(deppep_id,UniProtKB_id,pos_start,pos_end) |
42daf70d4ed4
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 9dfb7e07a3673d7de4b0a1b7e6ce1b75a8a4f42b"
eschen42
parents:
5
diff
changeset
|
538 SELECT id, 'No Uniprot_ID', 0, 0 |
42daf70d4ed4
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 9dfb7e07a3673d7de4b0a1b7e6ce1b75a8a4f42b"
eschen42
parents:
5
diff
changeset
|
539 FROM deppep |
42daf70d4ed4
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 9dfb7e07a3673d7de4b0a1b7e6ce1b75a8a4f42b"
eschen42
parents:
5
diff
changeset
|
540 WHERE id NOT IN (SELECT deppep_id FROM deppep_UniProtKB) |
42daf70d4ed4
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 9dfb7e07a3673d7de4b0a1b7e6ce1b75a8a4f42b"
eschen42
parents:
5
diff
changeset
|
541 """ |
42daf70d4ed4
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 9dfb7e07a3673d7de4b0a1b7e6ce1b75a8a4f42b"
eschen42
parents:
5
diff
changeset
|
542 ) |
42daf70d4ed4
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 9dfb7e07a3673d7de4b0a1b7e6ce1b75a8a4f42b"
eschen42
parents:
5
diff
changeset
|
543 |
0
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
544 con.commit() |
5
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
545 ker.execute("vacuum") |
0
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
546 con.close() |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
547 |
5
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
548 |
0
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
549 if __name__ == "__main__": |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
550 wrap_start_time = time.perf_counter() |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
551 __main__() |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
552 wrap_stop_time = time.perf_counter() |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
553 # print(wrap_start_time) |
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
554 # print(wrap_stop_time) |
5
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
555 print( |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
556 "\nThe matching process took %d milliseconds to run.\n" |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
557 % ((wrap_stop_time - wrap_start_time) * 1000), |
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
558 ) |
0
2c7e1b167736
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
eschen42
parents:
diff
changeset
|
559 |
5
b91809a18dbe
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
eschen42
parents:
0
diff
changeset
|
560 # vim: sw=4 ts=4 et ai : |