Mercurial > repos > galaxyp > mqppep_anova
annotate search_ppep.py @ 0:d9b68bedbc91 draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
author | galaxyp |
---|---|
date | Mon, 11 Jul 2022 19:20:41 +0000 |
parents | |
children | 2276e88d5a1f |
rev | line source |
---|---|
0
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
1 #!/usr/bin/env python |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
2 # Search and memoize phosphopeptides in Swiss-Prot SQLite table UniProtKB |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
3 |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
4 import argparse |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
5 import os.path |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
6 import re |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
7 import sqlite3 |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
8 import sys # import the sys module for exc_info |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
9 import time |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
10 import traceback # import the traceback module for format_exception |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
11 from codecs import getreader as cx_getreader |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
12 |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
13 # For Aho-Corasick search for fixed set of substrings |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
14 # - add_word |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
15 # - make_automaton |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
16 # - iter |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
17 import ahocorasick |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
18 |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
19 |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
20 # ref: https://stackoverflow.com/a/8915613/15509512 |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
21 # answers: "How to handle exceptions in a list comprehensions" |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
22 # usage: |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
23 # from math import log |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
24 # eggs = [1,3,0,3,2] |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
25 # print([x for x in [catch(log, egg) for egg in eggs] if x is not None]) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
26 # producing: |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
27 # for <built-in function log> |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
28 # with args (0,) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
29 # exception: math domain error |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
30 # [0.0, 1.0986122886681098, 1.0986122886681098, 0.6931471805599453] |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
31 def catch(func, *args, handle=lambda e: e, **kwargs): |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
32 |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
33 try: |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
34 return func(*args, **kwargs) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
35 except Exception as e: |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
36 print("For %s" % str(func)) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
37 print(" with args %s" % str(args)) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
38 print(" caught exception: %s" % str(e)) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
39 (ty, va, tb) = sys.exc_info() |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
40 print(" stack trace: " + str(traceback.format_exception(ty, va, tb))) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
41 # exit(-1) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
42 return None # was handle(e) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
43 |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
44 |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
45 def __main__(): |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
46 |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
47 DROP_TABLES_SQL = """ |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
48 DROP VIEW IF EXISTS ppep_gene_site_view; |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
49 DROP VIEW IF EXISTS uniprot_view; |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
50 DROP VIEW IF EXISTS uniprotkb_pep_ppep_view; |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
51 DROP VIEW IF EXISTS ppep_intensity_view; |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
52 DROP VIEW IF EXISTS ppep_metadata_view; |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
53 |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
54 DROP TABLE IF EXISTS sample; |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
55 DROP TABLE IF EXISTS ppep; |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
56 DROP TABLE IF EXISTS site_type; |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
57 DROP TABLE IF EXISTS deppep_UniProtKB; |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
58 DROP TABLE IF EXISTS deppep; |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
59 DROP TABLE IF EXISTS ppep_gene_site; |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
60 DROP TABLE IF EXISTS ppep_metadata; |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
61 DROP TABLE IF EXISTS ppep_intensity; |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
62 """ |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
63 |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
64 CREATE_TABLES_SQL = """ |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
65 CREATE TABLE deppep |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
66 ( id INTEGER PRIMARY KEY |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
67 , seq TEXT UNIQUE ON CONFLICT IGNORE |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
68 ) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
69 ; |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
70 CREATE TABLE deppep_UniProtKB |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
71 ( deppep_id INTEGER REFERENCES deppep(id) ON DELETE CASCADE |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
72 , UniProtKB_id TEXT REFERENCES UniProtKB(id) ON DELETE CASCADE |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
73 , pos_start INTEGER |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
74 , pos_end INTEGER |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
75 , PRIMARY KEY (deppep_id, UniProtKB_id, pos_start, pos_end) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
76 ON CONFLICT IGNORE |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
77 ) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
78 ; |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
79 CREATE TABLE ppep |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
80 ( id INTEGER PRIMARY KEY |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
81 , deppep_id INTEGER REFERENCES deppep(id) ON DELETE CASCADE |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
82 , seq TEXT UNIQUE ON CONFLICT IGNORE |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
83 , scrubbed TEXT |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
84 ); |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
85 CREATE TABLE site_type |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
86 ( id INTEGER PRIMARY KEY |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
87 , type_name TEXT UNIQUE ON CONFLICT IGNORE |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
88 ); |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
89 CREATE INDEX idx_ppep_scrubbed on ppep(scrubbed) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
90 ; |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
91 CREATE TABLE sample |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
92 ( id INTEGER PRIMARY KEY |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
93 , name TEXT UNIQUE ON CONFLICT IGNORE |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
94 ) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
95 ; |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
96 CREATE VIEW uniprot_view AS |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
97 SELECT DISTINCT |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
98 Uniprot_ID |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
99 , Description |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
100 , Organism_Name |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
101 , Organism_ID |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
102 , Gene_Name |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
103 , PE |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
104 , SV |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
105 , Sequence |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
106 , Description || |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
107 CASE WHEN Organism_Name = 'N/A' |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
108 THEN '' |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
109 ELSE ' OS='|| Organism_Name |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
110 END || |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
111 CASE WHEN Organism_ID = -1 |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
112 THEN '' |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
113 ELSE ' OX='|| Organism_ID |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
114 END || |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
115 CASE WHEN Gene_Name = 'N/A' |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
116 THEN '' |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
117 ELSE ' GN='|| Gene_Name |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
118 END || |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
119 CASE WHEN PE = 'N/A' |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
120 THEN '' |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
121 ELSE ' PE='|| PE |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
122 END || |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
123 CASE WHEN SV = 'N/A' |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
124 THEN '' |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
125 ELSE ' SV='|| SV |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
126 END AS long_description |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
127 , Database |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
128 FROM UniProtKB |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
129 ; |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
130 CREATE VIEW uniprotkb_pep_ppep_view AS |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
131 SELECT deppep_UniProtKB.UniprotKB_ID AS accession |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
132 , deppep_UniProtKB.pos_start AS pos_start |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
133 , deppep_UniProtKB.pos_end AS pos_end |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
134 , deppep.seq AS peptide |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
135 , ppep.seq AS phosphopeptide |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
136 , ppep.scrubbed AS scrubbed |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
137 , uniprot_view.Sequence AS sequence |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
138 , uniprot_view.Description AS description |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
139 , uniprot_view.long_description AS long_description |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
140 , ppep.id AS ppep_id |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
141 FROM ppep, deppep, deppep_UniProtKB, uniprot_view |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
142 WHERE deppep.id = ppep.deppep_id |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
143 AND deppep.id = deppep_UniProtKB.deppep_id |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
144 AND deppep_UniProtKB.UniprotKB_ID = uniprot_view.Uniprot_ID |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
145 ORDER BY UniprotKB_ID, deppep.seq, ppep.seq |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
146 ; |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
147 CREATE TABLE ppep_gene_site |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
148 ( ppep_id INTEGER REFERENCES ppep(id) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
149 , gene_names TEXT |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
150 , site_type_id INTEGER REFERENCES site_type(id) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
151 , kinase_map TEXT |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
152 , PRIMARY KEY (ppep_id, kinase_map) ON CONFLICT IGNORE |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
153 ) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
154 ; |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
155 CREATE VIEW ppep_gene_site_view AS |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
156 SELECT DISTINCT |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
157 ppep.seq AS phospho_peptide |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
158 , ppep_id |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
159 , gene_names |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
160 , type_name |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
161 , kinase_map |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
162 FROM |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
163 ppep, ppep_gene_site, site_type |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
164 WHERE |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
165 ppep_gene_site.ppep_id = ppep.id |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
166 AND |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
167 ppep_gene_site.site_type_id = site_type.id |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
168 ORDER BY |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
169 ppep.seq |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
170 ; |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
171 CREATE TABLE ppep_metadata |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
172 ( ppep_id INTEGER REFERENCES ppep(id) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
173 , protein_description TEXT |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
174 , gene_name TEXT |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
175 , FASTA_name TEXT |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
176 , phospho_sites TEXT |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
177 , motifs_unique TEXT |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
178 , accessions TEXT |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
179 , motifs_all_members TEXT |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
180 , domain TEXT |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
181 , ON_FUNCTION TEXT |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
182 , ON_PROCESS TEXT |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
183 , ON_PROT_INTERACT TEXT |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
184 , ON_OTHER_INTERACT TEXT |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
185 , notes TEXT |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
186 , PRIMARY KEY (ppep_id) ON CONFLICT IGNORE |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
187 ) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
188 ; |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
189 CREATE VIEW ppep_metadata_view AS |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
190 SELECT DISTINCT |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
191 ppep.seq AS phospho_peptide |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
192 , protein_description |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
193 , gene_name |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
194 , FASTA_name |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
195 , phospho_sites |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
196 , motifs_unique |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
197 , accessions |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
198 , motifs_all_members |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
199 , domain |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
200 , ON_FUNCTION |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
201 , ON_PROCESS |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
202 , ON_PROT_INTERACT |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
203 , ON_OTHER_INTERACT |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
204 , notes |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
205 FROM |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
206 ppep, ppep_metadata |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
207 WHERE |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
208 ppep_metadata.ppep_id = ppep.id |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
209 ORDER BY |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
210 ppep.seq |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
211 ; |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
212 CREATE TABLE ppep_intensity |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
213 ( ppep_id INTEGER REFERENCES ppep(id) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
214 , sample_id INTEGER |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
215 , intensity INTEGER |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
216 , PRIMARY KEY (ppep_id, sample_id) ON CONFLICT IGNORE |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
217 ) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
218 ; |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
219 CREATE VIEW ppep_intensity_view AS |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
220 SELECT DISTINCT |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
221 ppep.seq AS phospho_peptide |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
222 , sample.name AS sample |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
223 , intensity |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
224 FROM |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
225 ppep, sample, ppep_intensity |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
226 WHERE |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
227 ppep_intensity.sample_id = sample.id |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
228 AND |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
229 ppep_intensity.ppep_id = ppep.id |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
230 ; |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
231 """ |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
232 |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
233 UNIPROT_SEQ_AND_ID_SQL = """ |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
234 select Sequence, Uniprot_ID |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
235 from UniProtKB |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
236 """ |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
237 |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
238 # Parse Command Line |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
239 parser = argparse.ArgumentParser( |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
240 description="Phopsphoproteomic Enrichment phosphopeptide SwissProt search (in place in SQLite DB)." |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
241 ) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
242 |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
243 # inputs: |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
244 # Phosphopeptide data for experimental results, including the intensities |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
245 # and the mapping to kinase domains, in tabular format. |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
246 parser.add_argument( |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
247 "--phosphopeptides", |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
248 "-p", |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
249 nargs=1, |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
250 required=True, |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
251 dest="phosphopeptides", |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
252 help="Phosphopeptide data for experimental results, generated by the Phopsphoproteomic Enrichment Localization Filter tool", |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
253 ) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
254 parser.add_argument( |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
255 "--uniprotkb", |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
256 "-u", |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
257 nargs=1, |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
258 required=True, |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
259 dest="uniprotkb", |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
260 help="UniProtKB/Swiss-Prot data, converted from FASTA format by the Phopsphoproteomic Enrichment Kinase Mapping tool", |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
261 ) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
262 parser.add_argument( |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
263 "--schema", |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
264 action="store_true", |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
265 dest="db_schema", |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
266 help="show updated database schema", |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
267 ) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
268 parser.add_argument( |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
269 "--warn-duplicates", |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
270 action="store_true", |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
271 dest="warn_duplicates", |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
272 help="show warnings for duplicated sequences", |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
273 ) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
274 parser.add_argument( |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
275 "--verbose", |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
276 action="store_true", |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
277 dest="verbose", |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
278 help="show somewhat verbose program tracing", |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
279 ) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
280 # "Make it so!" (parse the arguments) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
281 options = parser.parse_args() |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
282 if options.verbose: |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
283 print("options: " + str(options) + "\n") |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
284 |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
285 # path to phosphopeptide (e.g., "outputfile_STEP2.txt") input tabular file |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
286 if options.phosphopeptides is None: |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
287 exit('Argument "phosphopeptides" is required but not supplied') |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
288 try: |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
289 f_name = os.path.abspath(options.phosphopeptides[0]) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
290 except Exception as e: |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
291 exit("Error parsing phosphopeptides argument: %s" % (e)) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
292 |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
293 # path to SQLite input/output tabular file |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
294 if options.uniprotkb is None: |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
295 exit('Argument "uniprotkb" is required but not supplied') |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
296 try: |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
297 db_name = os.path.abspath(options.uniprotkb[0]) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
298 except Exception as e: |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
299 exit("Error parsing uniprotkb argument: %s" % (e)) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
300 |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
301 # print("options.schema is %d" % options.db_schema) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
302 |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
303 # db_name = "demo/test.sqlite" |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
304 # f_name = "demo/test_input.txt" |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
305 |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
306 con = sqlite3.connect(db_name) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
307 cur = con.cursor() |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
308 ker = con.cursor() |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
309 |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
310 cur.executescript(DROP_TABLES_SQL) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
311 |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
312 # if options.db_schema: |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
313 # print("\nAfter dropping tables/views that are to be created, schema is:") |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
314 # cur.execute("SELECT * FROM sqlite_schema") |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
315 # for row in cur.fetchall(): |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
316 # if row[4] is not None: |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
317 # print("%s;" % row[4]) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
318 |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
319 cur.executescript(CREATE_TABLES_SQL) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
320 |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
321 if options.db_schema: |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
322 print( |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
323 "\nAfter creating tables/views that are to be created, schema is:" |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
324 ) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
325 cur.execute("SELECT * FROM sqlite_schema") |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
326 for row in cur.fetchall(): |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
327 if row[4] is not None: |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
328 print("%s;" % row[4]) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
329 |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
330 def generate_ppep(f): |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
331 # get keys from upstream tabular file using readline() |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
332 # ref: https://stackoverflow.com/a/16713581/15509512 |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
333 # answer to "Use codecs to read file with correct encoding" |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
334 file1_encoded = open(f, "rb") |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
335 file1 = cx_getreader("latin-1")(file1_encoded) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
336 |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
337 count = 0 |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
338 re_tab = re.compile("^[^\t]*") |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
339 re_quote = re.compile('"') |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
340 while True: |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
341 count += 1 |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
342 # Get next line from file |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
343 line = file1.readline() |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
344 # if line is empty |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
345 # end of file is reached |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
346 if not line: |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
347 break |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
348 if count > 1: |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
349 m = re_tab.match(line) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
350 m = re_quote.sub("", m[0]) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
351 yield m |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
352 file1.close() |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
353 file1_encoded.close() |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
354 |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
355 # Build an Aho-Corasick automaton from a trie |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
356 # - ref: |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
357 # - https://pypi.org/project/pyahocorasick/ |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
358 # - https://en.wikipedia.org/wiki/Aho%E2%80%93Corasick_algorithm |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
359 # - https://en.wikipedia.org/wiki/Trie |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
360 auto = ahocorasick.Automaton() |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
361 re_phos = re.compile("p") |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
362 # scrub out unsearchable characters per section |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
363 # "Match the p_peptides to the @sequences array:" |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
364 # of the original |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
365 # PhosphoPeptide Upstream Kinase Mapping.pl |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
366 # which originally read |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
367 # $tmp_p_peptide =~ s/#//g; |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
368 # $tmp_p_peptide =~ s/\d//g; |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
369 # $tmp_p_peptide =~ s/\_//g; |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
370 # $tmp_p_peptide =~ s/\.//g; |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
371 # |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
372 re_scrub = re.compile("0-9_.#") |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
373 ppep_count = 0 |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
374 for ppep in generate_ppep(f_name): |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
375 ppep_count += 1 |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
376 add_to_trie = False |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
377 # print(ppep) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
378 scrubbed = re_scrub.sub("", ppep) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
379 deppep = re_phos.sub("", scrubbed) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
380 if options.verbose: |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
381 print("deppep: %s; scrubbed: %s" % (deppep, scrubbed)) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
382 # print(deppep) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
383 cur.execute("SELECT id FROM deppep WHERE seq = (?)", (deppep,)) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
384 if cur.fetchone() is None: |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
385 add_to_trie = True |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
386 cur.execute("INSERT INTO deppep(seq) VALUES (?)", (deppep,)) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
387 cur.execute("SELECT id FROM deppep WHERE seq = (?)", (deppep,)) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
388 deppep_id = cur.fetchone()[0] |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
389 if add_to_trie: |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
390 # print((deppep_id, deppep)) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
391 # Build the trie |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
392 auto.add_word(deppep, (deppep_id, deppep)) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
393 cur.execute( |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
394 "INSERT INTO ppep(seq, scrubbed, deppep_id) VALUES (?,?,?)", |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
395 (ppep, scrubbed, deppep_id), |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
396 ) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
397 # def generate_deppep(): |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
398 # cur.execute("SELECT seq FROM deppep") |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
399 # for row in cur.fetchall(): |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
400 # yield row[0] |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
401 cur.execute("SELECT count(*) FROM (SELECT seq FROM deppep GROUP BY seq)") |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
402 for row in cur.fetchall(): |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
403 deppep_count = row[0] |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
404 |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
405 cur.execute( |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
406 "SELECT count(*) FROM (SELECT Sequence FROM UniProtKB GROUP BY Sequence)" |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
407 ) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
408 for row in cur.fetchall(): |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
409 sequence_count = row[0] |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
410 |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
411 print("%d phosphopeptides were read from input" % ppep_count) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
412 print( |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
413 "%d corresponding dephosphopeptides are represented in input" |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
414 % deppep_count |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
415 ) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
416 # Look for cases where both Gene_Name and Sequence are identical |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
417 cur.execute( |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
418 """ |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
419 SELECT Uniprot_ID, Gene_Name, Sequence |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
420 FROM UniProtKB |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
421 WHERE Sequence IN ( |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
422 SELECT Sequence |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
423 FROM UniProtKB |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
424 GROUP BY Sequence, Gene_Name |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
425 HAVING count(*) > 1 |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
426 ) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
427 ORDER BY Sequence |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
428 """ |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
429 ) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
430 duplicate_count = 0 |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
431 old_seq = "" |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
432 for row in cur.fetchall(): |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
433 if duplicate_count == 0: |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
434 print( |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
435 "\nEach of the following sequences is associated with several accession IDs (which are listed in the first column) but the same gene ID (which is listed in the second column)." |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
436 ) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
437 if row[2] != old_seq: |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
438 old_seq = row[2] |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
439 duplicate_count += 1 |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
440 if options.warn_duplicates: |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
441 print("\n%s\t%s\t%s" % row) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
442 else: |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
443 if options.warn_duplicates: |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
444 print("%s\t%s" % (row[0], row[1])) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
445 if duplicate_count > 0: |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
446 print( |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
447 "\n%d sequences have duplicated accession IDs\n" % duplicate_count |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
448 ) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
449 |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
450 print("%s accession sequences will be searched\n" % sequence_count) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
451 |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
452 # print(auto.dump()) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
453 |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
454 # Convert the trie to an automaton (a finite-state machine) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
455 auto.make_automaton() |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
456 |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
457 # Execute query for seqs and metadata without fetching the results yet |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
458 uniprot_seq_and_id = cur.execute(UNIPROT_SEQ_AND_ID_SQL) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
459 while 1: |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
460 batch = uniprot_seq_and_id.fetchmany(size=50) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
461 if not batch: |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
462 break |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
463 for Sequence, UniProtKB_id in batch: |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
464 if Sequence is not None: |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
465 for end_index, (insert_order, original_value) in auto.iter( |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
466 Sequence |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
467 ): |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
468 ker.execute( |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
469 """ |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
470 INSERT INTO deppep_UniProtKB |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
471 (deppep_id,UniProtKB_id,pos_start,pos_end) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
472 VALUES (?,?,?,?) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
473 """, |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
474 ( |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
475 insert_order, |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
476 UniProtKB_id, |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
477 1 + end_index - len(original_value), |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
478 end_index, |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
479 ), |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
480 ) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
481 else: |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
482 raise ValueError( |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
483 "UniProtKB_id %s, but Sequence is None: Check whether SwissProt file is missing sequence for this ID" |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
484 % (UniProtKB_id,) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
485 ) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
486 ker.execute( |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
487 """ |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
488 SELECT count(*) || ' accession-peptide-phosphopeptide combinations were found' |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
489 FROM uniprotkb_pep_ppep_view |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
490 """ |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
491 ) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
492 for row in ker.fetchall(): |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
493 print(row[0]) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
494 |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
495 ker.execute( |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
496 """ |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
497 SELECT count(*) || ' accession matches were found', count(*) AS accession_count |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
498 FROM ( |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
499 SELECT accession |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
500 FROM uniprotkb_pep_ppep_view |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
501 GROUP BY accession |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
502 ) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
503 """ |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
504 ) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
505 for row in ker.fetchall(): |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
506 print(row[0]) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
507 |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
508 ker.execute( |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
509 """ |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
510 SELECT count(*) || ' peptide matches were found' |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
511 FROM ( |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
512 SELECT peptide |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
513 FROM uniprotkb_pep_ppep_view |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
514 GROUP BY peptide |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
515 ) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
516 """ |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
517 ) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
518 for row in ker.fetchall(): |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
519 print(row[0]) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
520 |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
521 ker.execute( |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
522 """ |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
523 SELECT count(*) || ' phosphopeptide matches were found', count(*) AS phosphopeptide_count |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
524 FROM ( |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
525 SELECT phosphopeptide |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
526 FROM uniprotkb_pep_ppep_view |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
527 GROUP BY phosphopeptide |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
528 ) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
529 """ |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
530 ) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
531 for row in ker.fetchall(): |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
532 print(row[0]) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
533 |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
534 # link peptides not found in sequence database to a dummy sequence-record |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
535 ker.execute( |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
536 """ |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
537 INSERT INTO deppep_UniProtKB(deppep_id,UniProtKB_id,pos_start,pos_end) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
538 SELECT id, 'No Uniprot_ID', 0, 0 |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
539 FROM deppep |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
540 WHERE id NOT IN (SELECT deppep_id FROM deppep_UniProtKB) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
541 """ |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
542 ) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
543 |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
544 con.commit() |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
545 ker.execute("vacuum") |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
546 con.close() |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
547 |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
548 |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
549 if __name__ == "__main__": |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
550 wrap_start_time = time.perf_counter() |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
551 __main__() |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
552 wrap_stop_time = time.perf_counter() |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
553 # print(wrap_start_time) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
554 # print(wrap_stop_time) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
555 print( |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
556 "\nThe matching process took %d milliseconds to run.\n" |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
557 % ((wrap_stop_time - wrap_start_time) * 1000), |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
558 ) |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
559 |
d9b68bedbc91
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
galaxyp
parents:
diff
changeset
|
560 # vim: sw=4 ts=4 et ai : |