Mercurial > repos > eschen42 > mqppep_anova
annotate search_ppep.py @ 23:3911581e639a draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
author | eschen42 |
---|---|
date | Mon, 11 Jul 2022 13:51:14 +0000 |
parents | |
children | 5b8e15b2a67c |
rev | line source |
---|---|
23
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
1 #!/usr/bin/env python |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
2 # Search and memoize phosphopeptides in Swiss-Prot SQLite table UniProtKB |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
3 |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
4 import argparse |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
5 import os.path |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
6 import re |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
7 import sqlite3 |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
8 import sys # import the sys module for exc_info |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
9 import time |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
10 import traceback # import the traceback module for format_exception |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
11 from codecs import getreader as cx_getreader |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
12 |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
13 # For Aho-Corasick search for fixed set of substrings |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
14 # - add_word |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
15 # - make_automaton |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
16 # - iter |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
17 import ahocorasick |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
18 |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
19 |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
20 # ref: https://stackoverflow.com/a/8915613/15509512 |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
21 # answers: "How to handle exceptions in a list comprehensions" |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
22 # usage: |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
23 # from math import log |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
24 # eggs = [1,3,0,3,2] |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
25 # print([x for x in [catch(log, egg) for egg in eggs] if x is not None]) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
26 # producing: |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
27 # for <built-in function log> |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
28 # with args (0,) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
29 # exception: math domain error |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
30 # [0.0, 1.0986122886681098, 1.0986122886681098, 0.6931471805599453] |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
31 def catch(func, *args, handle=lambda e: e, **kwargs): |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
32 |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
33 try: |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
34 return func(*args, **kwargs) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
35 except Exception as e: |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
36 print("For %s" % str(func)) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
37 print(" with args %s" % str(args)) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
38 print(" caught exception: %s" % str(e)) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
39 (ty, va, tb) = sys.exc_info() |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
40 print(" stack trace: " + str(traceback.format_exception(ty, va, tb))) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
41 # exit(-1) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
42 return None # was handle(e) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
43 |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
44 |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
45 def __main__(): |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
46 |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
47 DROP_TABLES_SQL = """ |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
48 DROP VIEW IF EXISTS ppep_gene_site_view; |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
49 DROP VIEW IF EXISTS uniprot_view; |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
50 DROP VIEW IF EXISTS uniprotkb_pep_ppep_view; |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
51 DROP VIEW IF EXISTS ppep_intensity_view; |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
52 DROP VIEW IF EXISTS ppep_metadata_view; |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
53 |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
54 DROP TABLE IF EXISTS sample; |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
55 DROP TABLE IF EXISTS ppep; |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
56 DROP TABLE IF EXISTS site_type; |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
57 DROP TABLE IF EXISTS deppep_UniProtKB; |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
58 DROP TABLE IF EXISTS deppep; |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
59 DROP TABLE IF EXISTS ppep_gene_site; |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
60 DROP TABLE IF EXISTS ppep_metadata; |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
61 DROP TABLE IF EXISTS ppep_intensity; |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
62 """ |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
63 |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
64 CREATE_TABLES_SQL = """ |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
65 CREATE TABLE deppep |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
66 ( id INTEGER PRIMARY KEY |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
67 , seq TEXT UNIQUE ON CONFLICT IGNORE |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
68 ) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
69 ; |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
70 CREATE TABLE deppep_UniProtKB |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
71 ( deppep_id INTEGER REFERENCES deppep(id) ON DELETE CASCADE |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
72 , UniProtKB_id TEXT REFERENCES UniProtKB(id) ON DELETE CASCADE |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
73 , pos_start INTEGER |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
74 , pos_end INTEGER |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
75 , PRIMARY KEY (deppep_id, UniProtKB_id, pos_start, pos_end) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
76 ON CONFLICT IGNORE |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
77 ) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
78 ; |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
79 CREATE TABLE ppep |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
80 ( id INTEGER PRIMARY KEY |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
81 , deppep_id INTEGER REFERENCES deppep(id) ON DELETE CASCADE |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
82 , seq TEXT UNIQUE ON CONFLICT IGNORE |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
83 , scrubbed TEXT |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
84 ); |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
85 CREATE TABLE site_type |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
86 ( id INTEGER PRIMARY KEY |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
87 , type_name TEXT UNIQUE ON CONFLICT IGNORE |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
88 ); |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
89 CREATE INDEX idx_ppep_scrubbed on ppep(scrubbed) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
90 ; |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
91 CREATE TABLE sample |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
92 ( id INTEGER PRIMARY KEY |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
93 , name TEXT UNIQUE ON CONFLICT IGNORE |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
94 ) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
95 ; |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
96 CREATE VIEW uniprot_view AS |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
97 SELECT DISTINCT |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
98 Uniprot_ID |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
99 , Description |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
100 , Organism_Name |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
101 , Organism_ID |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
102 , Gene_Name |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
103 , PE |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
104 , SV |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
105 , Sequence |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
106 , Description || |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
107 CASE WHEN Organism_Name = 'N/A' |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
108 THEN '' |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
109 ELSE ' OS='|| Organism_Name |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
110 END || |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
111 CASE WHEN Organism_ID = -1 |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
112 THEN '' |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
113 ELSE ' OX='|| Organism_ID |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
114 END || |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
115 CASE WHEN Gene_Name = 'N/A' |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
116 THEN '' |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
117 ELSE ' GN='|| Gene_Name |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
118 END || |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
119 CASE WHEN PE = 'N/A' |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
120 THEN '' |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
121 ELSE ' PE='|| PE |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
122 END || |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
123 CASE WHEN SV = 'N/A' |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
124 THEN '' |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
125 ELSE ' SV='|| SV |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
126 END AS long_description |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
127 , Database |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
128 FROM UniProtKB |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
129 ; |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
130 CREATE VIEW uniprotkb_pep_ppep_view AS |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
131 SELECT deppep_UniProtKB.UniprotKB_ID AS accession |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
132 , deppep_UniProtKB.pos_start AS pos_start |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
133 , deppep_UniProtKB.pos_end AS pos_end |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
134 , deppep.seq AS peptide |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
135 , ppep.seq AS phosphopeptide |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
136 , ppep.scrubbed AS scrubbed |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
137 , uniprot_view.Sequence AS sequence |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
138 , uniprot_view.Description AS description |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
139 , uniprot_view.long_description AS long_description |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
140 , ppep.id AS ppep_id |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
141 FROM ppep, deppep, deppep_UniProtKB, uniprot_view |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
142 WHERE deppep.id = ppep.deppep_id |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
143 AND deppep.id = deppep_UniProtKB.deppep_id |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
144 AND deppep_UniProtKB.UniprotKB_ID = uniprot_view.Uniprot_ID |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
145 ORDER BY UniprotKB_ID, deppep.seq, ppep.seq |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
146 ; |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
147 CREATE TABLE ppep_gene_site |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
148 ( ppep_id INTEGER REFERENCES ppep(id) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
149 , gene_names TEXT |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
150 , site_type_id INTEGER REFERENCES site_type(id) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
151 , kinase_map TEXT |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
152 , PRIMARY KEY (ppep_id, kinase_map) ON CONFLICT IGNORE |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
153 ) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
154 ; |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
155 CREATE VIEW ppep_gene_site_view AS |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
156 SELECT DISTINCT |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
157 ppep.seq AS phospho_peptide |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
158 , ppep_id |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
159 , gene_names |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
160 , type_name |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
161 , kinase_map |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
162 FROM |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
163 ppep, ppep_gene_site, site_type |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
164 WHERE |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
165 ppep_gene_site.ppep_id = ppep.id |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
166 AND |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
167 ppep_gene_site.site_type_id = site_type.id |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
168 ORDER BY |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
169 ppep.seq |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
170 ; |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
171 CREATE TABLE ppep_metadata |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
172 ( ppep_id INTEGER REFERENCES ppep(id) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
173 , protein_description TEXT |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
174 , gene_name TEXT |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
175 , FASTA_name TEXT |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
176 , phospho_sites TEXT |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
177 , motifs_unique TEXT |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
178 , accessions TEXT |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
179 , motifs_all_members TEXT |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
180 , domain TEXT |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
181 , ON_FUNCTION TEXT |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
182 , ON_PROCESS TEXT |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
183 , ON_PROT_INTERACT TEXT |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
184 , ON_OTHER_INTERACT TEXT |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
185 , notes TEXT |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
186 , PRIMARY KEY (ppep_id) ON CONFLICT IGNORE |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
187 ) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
188 ; |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
189 CREATE VIEW ppep_metadata_view AS |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
190 SELECT DISTINCT |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
191 ppep.seq AS phospho_peptide |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
192 , protein_description |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
193 , gene_name |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
194 , FASTA_name |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
195 , phospho_sites |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
196 , motifs_unique |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
197 , accessions |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
198 , motifs_all_members |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
199 , domain |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
200 , ON_FUNCTION |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
201 , ON_PROCESS |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
202 , ON_PROT_INTERACT |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
203 , ON_OTHER_INTERACT |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
204 , notes |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
205 FROM |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
206 ppep, ppep_metadata |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
207 WHERE |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
208 ppep_metadata.ppep_id = ppep.id |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
209 ORDER BY |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
210 ppep.seq |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
211 ; |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
212 CREATE TABLE ppep_intensity |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
213 ( ppep_id INTEGER REFERENCES ppep(id) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
214 , sample_id INTEGER |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
215 , intensity INTEGER |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
216 , PRIMARY KEY (ppep_id, sample_id) ON CONFLICT IGNORE |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
217 ) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
218 ; |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
219 CREATE VIEW ppep_intensity_view AS |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
220 SELECT DISTINCT |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
221 ppep.seq AS phospho_peptide |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
222 , sample.name AS sample |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
223 , intensity |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
224 FROM |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
225 ppep, sample, ppep_intensity |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
226 WHERE |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
227 ppep_intensity.sample_id = sample.id |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
228 AND |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
229 ppep_intensity.ppep_id = ppep.id |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
230 ; |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
231 """ |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
232 |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
233 UNIPROT_SEQ_AND_ID_SQL = """ |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
234 select Sequence, Uniprot_ID |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
235 from UniProtKB |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
236 """ |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
237 |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
238 # Parse Command Line |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
239 parser = argparse.ArgumentParser( |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
240 description="Phopsphoproteomic Enrichment phosphopeptide SwissProt search (in place in SQLite DB)." |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
241 ) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
242 |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
243 # inputs: |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
244 # Phosphopeptide data for experimental results, including the intensities |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
245 # and the mapping to kinase domains, in tabular format. |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
246 parser.add_argument( |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
247 "--phosphopeptides", |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
248 "-p", |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
249 nargs=1, |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
250 required=True, |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
251 dest="phosphopeptides", |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
252 help="Phosphopeptide data for experimental results, generated by the Phopsphoproteomic Enrichment Localization Filter tool", |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
253 ) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
254 parser.add_argument( |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
255 "--uniprotkb", |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
256 "-u", |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
257 nargs=1, |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
258 required=True, |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
259 dest="uniprotkb", |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
260 help="UniProtKB/Swiss-Prot data, converted from FASTA format by the Phopsphoproteomic Enrichment Kinase Mapping tool", |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
261 ) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
262 parser.add_argument( |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
263 "--schema", |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
264 action="store_true", |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
265 dest="db_schema", |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
266 help="show updated database schema", |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
267 ) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
268 parser.add_argument( |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
269 "--warn-duplicates", |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
270 action="store_true", |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
271 dest="warn_duplicates", |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
272 help="show warnings for duplicated sequences", |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
273 ) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
274 parser.add_argument( |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
275 "--verbose", |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
276 action="store_true", |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
277 dest="verbose", |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
278 help="show somewhat verbose program tracing", |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
279 ) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
280 # "Make it so!" (parse the arguments) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
281 options = parser.parse_args() |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
282 if options.verbose: |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
283 print("options: " + str(options) + "\n") |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
284 |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
285 # path to phosphopeptide (e.g., "outputfile_STEP2.txt") input tabular file |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
286 if options.phosphopeptides is None: |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
287 exit('Argument "phosphopeptides" is required but not supplied') |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
288 try: |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
289 f_name = os.path.abspath(options.phosphopeptides[0]) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
290 except Exception as e: |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
291 exit("Error parsing phosphopeptides argument: %s" % (e)) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
292 |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
293 # path to SQLite input/output tabular file |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
294 if options.uniprotkb is None: |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
295 exit('Argument "uniprotkb" is required but not supplied') |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
296 try: |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
297 db_name = os.path.abspath(options.uniprotkb[0]) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
298 except Exception as e: |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
299 exit("Error parsing uniprotkb argument: %s" % (e)) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
300 |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
301 # print("options.schema is %d" % options.db_schema) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
302 |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
303 # db_name = "demo/test.sqlite" |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
304 # f_name = "demo/test_input.txt" |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
305 |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
306 con = sqlite3.connect(db_name) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
307 cur = con.cursor() |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
308 ker = con.cursor() |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
309 |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
310 cur.executescript(DROP_TABLES_SQL) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
311 |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
312 # if options.db_schema: |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
313 # print("\nAfter dropping tables/views that are to be created, schema is:") |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
314 # cur.execute("SELECT * FROM sqlite_schema") |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
315 # for row in cur.fetchall(): |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
316 # if row[4] is not None: |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
317 # print("%s;" % row[4]) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
318 |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
319 cur.executescript(CREATE_TABLES_SQL) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
320 |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
321 if options.db_schema: |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
322 print( |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
323 "\nAfter creating tables/views that are to be created, schema is:" |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
324 ) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
325 cur.execute("SELECT * FROM sqlite_schema") |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
326 for row in cur.fetchall(): |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
327 if row[4] is not None: |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
328 print("%s;" % row[4]) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
329 |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
330 def generate_ppep(f): |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
331 # get keys from upstream tabular file using readline() |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
332 # ref: https://stackoverflow.com/a/16713581/15509512 |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
333 # answer to "Use codecs to read file with correct encoding" |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
334 file1_encoded = open(f, "rb") |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
335 file1 = cx_getreader("latin-1")(file1_encoded) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
336 |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
337 count = 0 |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
338 re_tab = re.compile("^[^\t]*") |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
339 re_quote = re.compile('"') |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
340 while True: |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
341 count += 1 |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
342 # Get next line from file |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
343 line = file1.readline() |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
344 # if line is empty |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
345 # end of file is reached |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
346 if not line: |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
347 break |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
348 if count > 1: |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
349 m = re_tab.match(line) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
350 m = re_quote.sub("", m[0]) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
351 yield m |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
352 file1.close() |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
353 file1_encoded.close() |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
354 |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
355 # Build an Aho-Corasick automaton from a trie |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
356 # - ref: |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
357 # - https://pypi.org/project/pyahocorasick/ |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
358 # - https://en.wikipedia.org/wiki/Aho%E2%80%93Corasick_algorithm |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
359 # - https://en.wikipedia.org/wiki/Trie |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
360 auto = ahocorasick.Automaton() |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
361 re_phos = re.compile("p") |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
362 # scrub out unsearchable characters per section |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
363 # "Match the p_peptides to the @sequences array:" |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
364 # of the original |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
365 # PhosphoPeptide Upstream Kinase Mapping.pl |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
366 # which originally read |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
367 # $tmp_p_peptide =~ s/#//g; |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
368 # $tmp_p_peptide =~ s/\d//g; |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
369 # $tmp_p_peptide =~ s/\_//g; |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
370 # $tmp_p_peptide =~ s/\.//g; |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
371 # |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
372 re_scrub = re.compile("0-9_.#") |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
373 ppep_count = 0 |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
374 for ppep in generate_ppep(f_name): |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
375 ppep_count += 1 |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
376 add_to_trie = False |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
377 # print(ppep) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
378 scrubbed = re_scrub.sub("", ppep) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
379 deppep = re_phos.sub("", scrubbed) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
380 if options.verbose: |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
381 print("deppep: %s; scrubbed: %s" % (deppep, scrubbed)) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
382 # print(deppep) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
383 cur.execute("SELECT id FROM deppep WHERE seq = (?)", (deppep,)) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
384 if cur.fetchone() is None: |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
385 add_to_trie = True |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
386 cur.execute("INSERT INTO deppep(seq) VALUES (?)", (deppep,)) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
387 cur.execute("SELECT id FROM deppep WHERE seq = (?)", (deppep,)) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
388 deppep_id = cur.fetchone()[0] |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
389 if add_to_trie: |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
390 # print((deppep_id, deppep)) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
391 # Build the trie |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
392 auto.add_word(deppep, (deppep_id, deppep)) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
393 cur.execute( |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
394 "INSERT INTO ppep(seq, scrubbed, deppep_id) VALUES (?,?,?)", |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
395 (ppep, scrubbed, deppep_id), |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
396 ) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
397 # def generate_deppep(): |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
398 # cur.execute("SELECT seq FROM deppep") |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
399 # for row in cur.fetchall(): |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
400 # yield row[0] |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
401 cur.execute("SELECT count(*) FROM (SELECT seq FROM deppep GROUP BY seq)") |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
402 for row in cur.fetchall(): |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
403 deppep_count = row[0] |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
404 |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
405 cur.execute( |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
406 "SELECT count(*) FROM (SELECT Sequence FROM UniProtKB GROUP BY Sequence)" |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
407 ) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
408 for row in cur.fetchall(): |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
409 sequence_count = row[0] |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
410 |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
411 print("%d phosphopeptides were read from input" % ppep_count) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
412 print( |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
413 "%d corresponding dephosphopeptides are represented in input" |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
414 % deppep_count |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
415 ) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
416 # Look for cases where both Gene_Name and Sequence are identical |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
417 cur.execute( |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
418 """ |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
419 SELECT Uniprot_ID, Gene_Name, Sequence |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
420 FROM UniProtKB |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
421 WHERE Sequence IN ( |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
422 SELECT Sequence |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
423 FROM UniProtKB |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
424 GROUP BY Sequence, Gene_Name |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
425 HAVING count(*) > 1 |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
426 ) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
427 ORDER BY Sequence |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
428 """ |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
429 ) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
430 duplicate_count = 0 |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
431 old_seq = "" |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
432 for row in cur.fetchall(): |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
433 if duplicate_count == 0: |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
434 print( |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
435 "\nEach of the following sequences is associated with several accession IDs (which are listed in the first column) but the same gene ID (which is listed in the second column)." |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
436 ) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
437 if row[2] != old_seq: |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
438 old_seq = row[2] |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
439 duplicate_count += 1 |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
440 if options.warn_duplicates: |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
441 print("\n%s\t%s\t%s" % row) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
442 else: |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
443 if options.warn_duplicates: |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
444 print("%s\t%s" % (row[0], row[1])) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
445 if duplicate_count > 0: |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
446 print( |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
447 "\n%d sequences have duplicated accession IDs\n" % duplicate_count |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
448 ) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
449 |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
450 print("%s accession sequences will be searched\n" % sequence_count) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
451 |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
452 # print(auto.dump()) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
453 |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
454 # Convert the trie to an automaton (a finite-state machine) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
455 auto.make_automaton() |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
456 |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
457 # Execute query for seqs and metadata without fetching the results yet |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
458 uniprot_seq_and_id = cur.execute(UNIPROT_SEQ_AND_ID_SQL) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
459 while 1: |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
460 batch = uniprot_seq_and_id.fetchmany(size=50) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
461 if not batch: |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
462 break |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
463 for Sequence, UniProtKB_id in batch: |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
464 if Sequence is not None: |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
465 for end_index, (insert_order, original_value) in auto.iter( |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
466 Sequence |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
467 ): |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
468 ker.execute( |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
469 """ |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
470 INSERT INTO deppep_UniProtKB |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
471 (deppep_id,UniProtKB_id,pos_start,pos_end) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
472 VALUES (?,?,?,?) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
473 """, |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
474 ( |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
475 insert_order, |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
476 UniProtKB_id, |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
477 1 + end_index - len(original_value), |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
478 end_index, |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
479 ), |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
480 ) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
481 else: |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
482 raise ValueError( |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
483 "UniProtKB_id %s, but Sequence is None: Check whether SwissProt file is missing sequence for this ID" |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
484 % (UniProtKB_id,) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
485 ) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
486 ker.execute( |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
487 """ |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
488 SELECT count(*) || ' accession-peptide-phosphopeptide combinations were found' |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
489 FROM uniprotkb_pep_ppep_view |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
490 """ |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
491 ) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
492 for row in ker.fetchall(): |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
493 print(row[0]) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
494 |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
495 ker.execute( |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
496 """ |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
497 SELECT count(*) || ' accession matches were found', count(*) AS accession_count |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
498 FROM ( |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
499 SELECT accession |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
500 FROM uniprotkb_pep_ppep_view |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
501 GROUP BY accession |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
502 ) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
503 """ |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
504 ) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
505 for row in ker.fetchall(): |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
506 print(row[0]) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
507 |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
508 ker.execute( |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
509 """ |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
510 SELECT count(*) || ' peptide matches were found' |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
511 FROM ( |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
512 SELECT peptide |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
513 FROM uniprotkb_pep_ppep_view |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
514 GROUP BY peptide |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
515 ) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
516 """ |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
517 ) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
518 for row in ker.fetchall(): |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
519 print(row[0]) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
520 |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
521 ker.execute( |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
522 """ |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
523 SELECT count(*) || ' phosphopeptide matches were found', count(*) AS phosphopeptide_count |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
524 FROM ( |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
525 SELECT phosphopeptide |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
526 FROM uniprotkb_pep_ppep_view |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
527 GROUP BY phosphopeptide |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
528 ) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
529 """ |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
530 ) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
531 for row in ker.fetchall(): |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
532 print(row[0]) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
533 |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
534 # link peptides not found in sequence database to a dummy sequence-record |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
535 ker.execute( |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
536 """ |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
537 INSERT INTO deppep_UniProtKB(deppep_id,UniProtKB_id,pos_start,pos_end) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
538 SELECT id, 'No Uniprot_ID', 0, 0 |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
539 FROM deppep |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
540 WHERE id NOT IN (SELECT deppep_id FROM deppep_UniProtKB) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
541 """ |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
542 ) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
543 |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
544 con.commit() |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
545 ker.execute("vacuum") |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
546 con.close() |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
547 |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
548 |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
549 if __name__ == "__main__": |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
550 wrap_start_time = time.perf_counter() |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
551 __main__() |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
552 wrap_stop_time = time.perf_counter() |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
553 # print(wrap_start_time) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
554 # print(wrap_stop_time) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
555 print( |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
556 "\nThe matching process took %d milliseconds to run.\n" |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
557 % ((wrap_stop_time - wrap_start_time) * 1000), |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
558 ) |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
559 |
3911581e639a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit b18713d25e7b260d6cdaf99fb216a4d1b3014c47
eschen42
parents:
diff
changeset
|
560 # vim: sw=4 ts=4 et ai : |