Mercurial > repos > eschen42 > mqppep_anova
comparison search_ppep.py @ 5:d4d531006735 draft
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 92e8ab6fc27a1f02583742715d644bc96418fbdf"
author | eschen42 |
---|---|
date | Thu, 10 Mar 2022 23:42:48 +0000 |
parents | c1403d18c189 |
children | 922d309640db |
comparison
equal
deleted
inserted
replaced
4:cfc65ae227f8 | 5:d4d531006735 |
---|---|
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 # Search and memoize phosphopeptides in Swiss-Prot SQLite table UniProtKB | 2 # Search and memoize phosphopeptides in Swiss-Prot SQLite table UniProtKB |
3 | 3 |
4 import argparse | 4 import argparse |
5 import os.path | 5 import os.path |
6 import re | |
6 import sqlite3 | 7 import sqlite3 |
7 import re | 8 import sys # import the sys module for exc_info |
9 import time | |
10 import traceback # import the traceback module for format_exception | |
8 from codecs import getreader as cx_getreader | 11 from codecs import getreader as cx_getreader |
9 import time | |
10 | 12 |
11 # For Aho-Corasick search for fixed set of substrings | 13 # For Aho-Corasick search for fixed set of substrings |
12 # - add_word | 14 # - add_word |
13 # - make_automaton | 15 # - make_automaton |
14 # - iter | 16 # - iter |
15 import ahocorasick | 17 import ahocorasick |
16 # Support map over auto.iter(...) | 18 |
17 # - itemgetter | |
18 import operator | |
19 #import hashlib | |
20 | 19 |
21 # ref: https://stackoverflow.com/a/8915613/15509512 | 20 # ref: https://stackoverflow.com/a/8915613/15509512 |
22 # answers: "How to handle exceptions in a list comprehensions" | 21 # answers: "How to handle exceptions in a list comprehensions" |
23 # usage: | 22 # usage: |
24 # from math import log | 23 # from math import log |
27 # producing: | 26 # producing: |
28 # for <built-in function log> | 27 # for <built-in function log> |
29 # with args (0,) | 28 # with args (0,) |
30 # exception: math domain error | 29 # exception: math domain error |
31 # [0.0, 1.0986122886681098, 1.0986122886681098, 0.6931471805599453] | 30 # [0.0, 1.0986122886681098, 1.0986122886681098, 0.6931471805599453] |
32 def catch(func, *args, handle=lambda e : e, **kwargs): | 31 def catch(func, *args, handle=lambda e: e, **kwargs): |
32 | |
33 try: | 33 try: |
34 return func(*args, **kwargs) | 34 return func(*args, **kwargs) |
35 except Exception as e: | 35 except Exception as e: |
36 print("For %s" % str(func)) | 36 print("For %s" % str(func)) |
37 print(" with args %s" % str(args)) | 37 print(" with args %s" % str(args)) |
38 print(" caught exception: %s" % str(e)) | 38 print(" caught exception: %s" % str(e)) |
39 (ty, va, tb) = sys.exc_info() | 39 (ty, va, tb) = sys.exc_info() |
40 print(" stack trace: " + str(traceback.format_exception(ty, va, tb))) | 40 print(" stack trace: " + str(traceback.format_exception(ty, va, tb))) |
41 #exit(-1) | 41 # exit(-1) |
42 return None # was handle(e) | 42 return None # was handle(e) |
43 | |
43 | 44 |
44 def __main__(): | 45 def __main__(): |
45 ITEM_GETTER = operator.itemgetter(1) | 46 |
46 | 47 DROP_TABLES_SQL = """ |
47 DROP_TABLES_SQL = ''' | |
48 DROP VIEW IF EXISTS ppep_gene_site_view; | 48 DROP VIEW IF EXISTS ppep_gene_site_view; |
49 DROP VIEW IF EXISTS uniprot_view; | 49 DROP VIEW IF EXISTS uniprot_view; |
50 DROP VIEW IF EXISTS uniprotkb_pep_ppep_view; | 50 DROP VIEW IF EXISTS uniprotkb_pep_ppep_view; |
51 DROP VIEW IF EXISTS ppep_intensity_view; | 51 DROP VIEW IF EXISTS ppep_intensity_view; |
52 DROP VIEW IF EXISTS ppep_metadata_view; | 52 DROP VIEW IF EXISTS ppep_metadata_view; |
57 DROP TABLE IF EXISTS deppep_UniProtKB; | 57 DROP TABLE IF EXISTS deppep_UniProtKB; |
58 DROP TABLE IF EXISTS deppep; | 58 DROP TABLE IF EXISTS deppep; |
59 DROP TABLE IF EXISTS ppep_gene_site; | 59 DROP TABLE IF EXISTS ppep_gene_site; |
60 DROP TABLE IF EXISTS ppep_metadata; | 60 DROP TABLE IF EXISTS ppep_metadata; |
61 DROP TABLE IF EXISTS ppep_intensity; | 61 DROP TABLE IF EXISTS ppep_intensity; |
62 ''' | 62 """ |
63 | 63 |
64 CREATE_TABLES_SQL = ''' | 64 CREATE_TABLES_SQL = """ |
65 CREATE TABLE deppep | 65 CREATE TABLE deppep |
66 ( id INTEGER PRIMARY KEY | 66 ( id INTEGER PRIMARY KEY |
67 , seq TEXT UNIQUE ON CONFLICT IGNORE | 67 , seq TEXT UNIQUE ON CONFLICT IGNORE |
68 ) | 68 ) |
69 ; | 69 ; |
211 WHERE | 211 WHERE |
212 ppep_intensity.sample_id = sample.id | 212 ppep_intensity.sample_id = sample.id |
213 AND | 213 AND |
214 ppep_intensity.ppep_id = ppep.id | 214 ppep_intensity.ppep_id = ppep.id |
215 ; | 215 ; |
216 ''' | 216 """ |
217 | 217 |
218 UNIPROT_SEQ_AND_ID_SQL = ''' | 218 UNIPROT_SEQ_AND_ID_SQL = """ |
219 select Sequence, Uniprot_ID | 219 select Sequence, Uniprot_ID |
220 from UniProtKB | 220 from UniProtKB |
221 ''' | 221 """ |
222 | 222 |
223 # Parse Command Line | 223 # Parse Command Line |
224 parser = argparse.ArgumentParser( | 224 parser = argparse.ArgumentParser( |
225 description='Phopsphoproteomic Enrichment phosphopeptide SwissProt search (in place in SQLite DB).' | 225 description="Phopsphoproteomic Enrichment phosphopeptide SwissProt search (in place in SQLite DB)." |
226 ) | 226 ) |
227 | 227 |
228 # inputs: | 228 # inputs: |
229 # Phosphopeptide data for experimental results, including the intensities | 229 # Phosphopeptide data for experimental results, including the intensities |
230 # and the mapping to kinase domains, in tabular format. | 230 # and the mapping to kinase domains, in tabular format. |
231 parser.add_argument( | 231 parser.add_argument( |
232 '--phosphopeptides', '-p', | 232 "--phosphopeptides", |
233 "-p", | |
233 nargs=1, | 234 nargs=1, |
234 required=True, | 235 required=True, |
235 dest='phosphopeptides', | 236 dest="phosphopeptides", |
236 help='Phosphopeptide data for experimental results, generated by the Phopsphoproteomic Enrichment Localization Filter tool' | 237 help="Phosphopeptide data for experimental results, generated by the Phopsphoproteomic Enrichment Localization Filter tool", |
237 ) | 238 ) |
238 parser.add_argument( | 239 parser.add_argument( |
239 '--uniprotkb', '-u', | 240 "--uniprotkb", |
241 "-u", | |
240 nargs=1, | 242 nargs=1, |
241 required=True, | 243 required=True, |
242 dest='uniprotkb', | 244 dest="uniprotkb", |
243 help='UniProtKB/Swiss-Prot data, converted from FASTA format by the Phopsphoproteomic Enrichment Kinase Mapping tool' | 245 help="UniProtKB/Swiss-Prot data, converted from FASTA format by the Phopsphoproteomic Enrichment Kinase Mapping tool", |
244 ) | 246 ) |
245 parser.add_argument( | 247 parser.add_argument( |
246 '--schema', | 248 "--schema", |
247 action='store_true', | 249 action="store_true", |
248 dest='db_schema', | 250 dest="db_schema", |
249 help='show updated database schema' | 251 help="show updated database schema", |
250 ) | 252 ) |
251 parser.add_argument( | 253 parser.add_argument( |
252 '--warn-duplicates', | 254 "--warn-duplicates", |
253 action='store_true', | 255 action="store_true", |
254 dest='warn_duplicates', | 256 dest="warn_duplicates", |
255 help='show warnings for duplicated sequences' | 257 help="show warnings for duplicated sequences", |
256 ) | 258 ) |
257 parser.add_argument( | 259 parser.add_argument( |
258 '--verbose', | 260 "--verbose", |
259 action='store_true', | 261 action="store_true", |
260 dest='verbose', | 262 dest="verbose", |
261 help='show somewhat verbose program tracing' | 263 help="show somewhat verbose program tracing", |
262 ) | 264 ) |
263 # "Make it so!" (parse the arguments) | 265 # "Make it so!" (parse the arguments) |
264 options = parser.parse_args() | 266 options = parser.parse_args() |
265 if options.verbose: | 267 if options.verbose: |
266 print("options: " + str(options) + "\n") | 268 print("options: " + str(options) + "\n") |
267 | 269 |
268 # path to phosphopeptide (e.g., "outputfile_STEP2.txt") input tabular file | 270 # path to phosphopeptide (e.g., "outputfile_STEP2.txt") input tabular file |
269 if options.phosphopeptides is None: | 271 if options.phosphopeptides is None: |
270 exit('Argument "phosphopeptides" is required but not supplied') | 272 exit('Argument "phosphopeptides" is required but not supplied') |
271 try: | 273 try: |
272 f_name = os.path.abspath(options.phosphopeptides[0]) | 274 f_name = os.path.abspath(options.phosphopeptides[0]) |
273 except Exception as e: | 275 except Exception as e: |
274 exit('Error parsing phosphopeptides argument: %s' % (e)) | 276 exit("Error parsing phosphopeptides argument: %s" % (e)) |
275 | 277 |
276 # path to SQLite input/output tabular file | 278 # path to SQLite input/output tabular file |
277 if options.uniprotkb is None: | 279 if options.uniprotkb is None: |
278 exit('Argument "uniprotkb" is required but not supplied') | 280 exit('Argument "uniprotkb" is required but not supplied') |
279 try: | 281 try: |
280 db_name = os.path.abspath(options.uniprotkb[0]) | 282 db_name = os.path.abspath(options.uniprotkb[0]) |
281 except Exception as e: | 283 except Exception as e: |
282 exit('Error parsing uniprotkb argument: %s' % (e)) | 284 exit("Error parsing uniprotkb argument: %s" % (e)) |
283 | 285 |
284 # print("options.schema is %d" % options.db_schema) | 286 # print("options.schema is %d" % options.db_schema) |
285 | 287 |
286 # db_name = "demo/test.sqlite" | 288 # db_name = "demo/test.sqlite" |
287 # f_name = "demo/test_input.txt" | 289 # f_name = "demo/test_input.txt" |
300 # print("%s;" % row[4]) | 302 # print("%s;" % row[4]) |
301 | 303 |
302 cur.executescript(CREATE_TABLES_SQL) | 304 cur.executescript(CREATE_TABLES_SQL) |
303 | 305 |
304 if options.db_schema: | 306 if options.db_schema: |
305 print("\nAfter creating tables/views that are to be created, schema is:") | 307 print( |
308 "\nAfter creating tables/views that are to be created, schema is:" | |
309 ) | |
306 cur.execute("SELECT * FROM sqlite_schema") | 310 cur.execute("SELECT * FROM sqlite_schema") |
307 for row in cur.fetchall(): | 311 for row in cur.fetchall(): |
308 if row[4] is not None: | 312 if row[4] is not None: |
309 print("%s;" % row[4]) | 313 print("%s;" % row[4]) |
310 | 314 |
311 def generate_ppep(f): | 315 def generate_ppep(f): |
312 #get keys from upstream tabular file using readline() | 316 # get keys from upstream tabular file using readline() |
313 # ref: https://stackoverflow.com/a/16713581/15509512 | 317 # ref: https://stackoverflow.com/a/16713581/15509512 |
314 # answer to "Use codecs to read file with correct encoding" | 318 # answer to "Use codecs to read file with correct encoding" |
315 file1_encoded = open(f, 'rb') | 319 file1_encoded = open(f, "rb") |
316 file1 = cx_getreader("latin-1")(file1_encoded) | 320 file1 = cx_getreader("latin-1")(file1_encoded) |
317 | 321 |
318 count = 0 | 322 count = 0 |
319 re_tab = re.compile('^[^\t]*') | 323 re_tab = re.compile("^[^\t]*") |
320 re_quote = re.compile('"') | 324 re_quote = re.compile('"') |
321 while True: | 325 while True: |
322 count += 1 | 326 count += 1 |
323 # Get next line from file | 327 # Get next line from file |
324 line = file1.readline() | 328 line = file1.readline() |
326 # end of file is reached | 330 # end of file is reached |
327 if not line: | 331 if not line: |
328 break | 332 break |
329 if count > 1: | 333 if count > 1: |
330 m = re_tab.match(line) | 334 m = re_tab.match(line) |
331 m = re_quote.sub('',m[0]) | 335 m = re_quote.sub("", m[0]) |
332 yield m | 336 yield m |
333 file1.close() | 337 file1.close() |
334 file1_encoded.close() | 338 file1_encoded.close() |
335 | 339 |
336 # Build an Aho-Corasick automaton from a trie | 340 # Build an Aho-Corasick automaton from a trie |
337 # - ref: | 341 # - ref: |
338 # - https://pypi.org/project/pyahocorasick/ | 342 # - https://pypi.org/project/pyahocorasick/ |
339 # - https://en.wikipedia.org/wiki/Aho%E2%80%93Corasick_algorithm | 343 # - https://en.wikipedia.org/wiki/Aho%E2%80%93Corasick_algorithm |
340 # - https://en.wikipedia.org/wiki/Trie | 344 # - https://en.wikipedia.org/wiki/Trie |
341 auto = ahocorasick.Automaton() | 345 auto = ahocorasick.Automaton() |
342 re_phos = re.compile('p') | 346 re_phos = re.compile("p") |
343 # scrub out unsearchable characters per section | 347 # scrub out unsearchable characters per section |
344 # "Match the p_peptides to the @sequences array:" | 348 # "Match the p_peptides to the @sequences array:" |
345 # of the original | 349 # of the original |
346 # PhosphoPeptide Upstream Kinase Mapping.pl | 350 # PhosphoPeptide Upstream Kinase Mapping.pl |
347 # which originally read | 351 # which originally read |
348 # $tmp_p_peptide =~ s/#//g; | 352 # $tmp_p_peptide =~ s/#//g; |
349 # $tmp_p_peptide =~ s/\d//g; | 353 # $tmp_p_peptide =~ s/\d//g; |
350 # $tmp_p_peptide =~ s/\_//g; | 354 # $tmp_p_peptide =~ s/\_//g; |
351 # $tmp_p_peptide =~ s/\.//g; | 355 # $tmp_p_peptide =~ s/\.//g; |
352 # | 356 # |
353 re_scrub = re.compile('0-9_.#') | 357 re_scrub = re.compile("0-9_.#") |
354 ppep_count = 0 | 358 ppep_count = 0 |
355 for ppep in generate_ppep(f_name): | 359 for ppep in generate_ppep(f_name): |
356 ppep_count += 1 | 360 ppep_count += 1 |
357 add_to_trie = False | 361 add_to_trie = False |
358 #print(ppep) | 362 # print(ppep) |
359 scrubbed = re_scrub.sub('',ppep) | 363 scrubbed = re_scrub.sub("", ppep) |
360 deppep = re_phos.sub('',scrubbed) | 364 deppep = re_phos.sub("", scrubbed) |
361 if options.verbose: | 365 if options.verbose: |
362 print("deppep: %s; scrubbed: %s" % (deppep,scrubbed)) | 366 print("deppep: %s; scrubbed: %s" % (deppep, scrubbed)) |
363 #print(deppep) | 367 # print(deppep) |
364 cur.execute("SELECT id FROM deppep WHERE seq = (?)", (deppep,)) | 368 cur.execute("SELECT id FROM deppep WHERE seq = (?)", (deppep,)) |
365 if cur.fetchone() is None: | 369 if cur.fetchone() is None: |
366 add_to_trie = True | 370 add_to_trie = True |
367 cur.execute("INSERT INTO deppep(seq) VALUES (?)", (deppep,)) | 371 cur.execute("INSERT INTO deppep(seq) VALUES (?)", (deppep,)) |
368 cur.execute("SELECT id FROM deppep WHERE seq = (?)", (deppep,)) | 372 cur.execute("SELECT id FROM deppep WHERE seq = (?)", (deppep,)) |
369 deppep_id = cur.fetchone()[0] | 373 deppep_id = cur.fetchone()[0] |
370 if add_to_trie: | 374 if add_to_trie: |
371 #print((deppep_id, deppep)) | 375 # print((deppep_id, deppep)) |
372 # Build the trie | 376 # Build the trie |
373 auto.add_word(deppep, (deppep_id, deppep)) | 377 auto.add_word(deppep, (deppep_id, deppep)) |
374 cur.execute( | 378 cur.execute( |
375 "INSERT INTO ppep(seq, scrubbed, deppep_id) VALUES (?,?,?)", | 379 "INSERT INTO ppep(seq, scrubbed, deppep_id) VALUES (?,?,?)", |
376 (ppep, scrubbed, deppep_id) | 380 (ppep, scrubbed, deppep_id), |
377 ) | 381 ) |
378 # def generate_deppep(): | 382 # def generate_deppep(): |
379 # cur.execute("SELECT seq FROM deppep") | 383 # cur.execute("SELECT seq FROM deppep") |
380 # for row in cur.fetchall(): | 384 # for row in cur.fetchall(): |
381 # yield row[0] | 385 # yield row[0] |
382 cur.execute("SELECT count(*) FROM (SELECT seq FROM deppep GROUP BY seq)") | 386 cur.execute("SELECT count(*) FROM (SELECT seq FROM deppep GROUP BY seq)") |
383 for row in cur.fetchall(): | 387 for row in cur.fetchall(): |
384 deppep_count = row[0] | 388 deppep_count = row[0] |
385 | 389 |
386 cur.execute("SELECT count(*) FROM (SELECT Sequence FROM UniProtKB GROUP BY Sequence)") | 390 cur.execute( |
391 "SELECT count(*) FROM (SELECT Sequence FROM UniProtKB GROUP BY Sequence)" | |
392 ) | |
387 for row in cur.fetchall(): | 393 for row in cur.fetchall(): |
388 sequence_count = row[0] | 394 sequence_count = row[0] |
389 | 395 |
396 print("%d phosphopeptides were read from input" % ppep_count) | |
390 print( | 397 print( |
391 "%d phosphopeptides were read from input" % ppep_count | 398 "%d corresponding dephosphopeptides are represented in input" |
392 ) | 399 % deppep_count |
393 print( | 400 ) |
394 "%d corresponding dephosphopeptides are represented in input" % deppep_count | |
395 ) | |
396 # Look for cases where both Gene_Name and Sequence are identical | 401 # Look for cases where both Gene_Name and Sequence are identical |
397 cur.execute(''' | 402 cur.execute( |
403 """ | |
398 SELECT Uniprot_ID, Gene_Name, Sequence | 404 SELECT Uniprot_ID, Gene_Name, Sequence |
399 FROM UniProtKB | 405 FROM UniProtKB |
400 WHERE Sequence IN ( | 406 WHERE Sequence IN ( |
401 SELECT Sequence | 407 SELECT Sequence |
402 FROM UniProtKB | 408 FROM UniProtKB |
403 GROUP BY Sequence, Gene_Name | 409 GROUP BY Sequence, Gene_Name |
404 HAVING count(*) > 1 | 410 HAVING count(*) > 1 |
405 ) | 411 ) |
406 ORDER BY Sequence | 412 ORDER BY Sequence |
407 ''') | 413 """ |
414 ) | |
408 duplicate_count = 0 | 415 duplicate_count = 0 |
409 old_seq = '' | 416 old_seq = "" |
410 for row in cur.fetchall(): | 417 for row in cur.fetchall(): |
411 if duplicate_count == 0: | 418 if duplicate_count == 0: |
412 print("\nEach of the following sequences is associated with several accession IDs (which are listed in the first column) but the same gene ID (which is listed in the second column).") | 419 print( |
420 "\nEach of the following sequences is associated with several accession IDs (which are listed in the first column) but the same gene ID (which is listed in the second column)." | |
421 ) | |
413 if row[2] != old_seq: | 422 if row[2] != old_seq: |
414 old_seq = row[2] | 423 old_seq = row[2] |
415 duplicate_count += 1 | 424 duplicate_count += 1 |
416 if options.warn_duplicates: | 425 if options.warn_duplicates: |
417 print("\n%s\t%s\t%s" % row) | 426 print("\n%s\t%s\t%s" % row) |
418 else: | 427 else: |
419 if options.warn_duplicates: | 428 if options.warn_duplicates: |
420 print("%s\t%s" % (row[0], row[1])) | 429 print("%s\t%s" % (row[0], row[1])) |
421 if duplicate_count > 0: | 430 if duplicate_count > 0: |
422 print("\n%d sequences have duplicated accession IDs\n" % duplicate_count) | 431 print( |
423 | 432 "\n%d sequences have duplicated accession IDs\n" % duplicate_count |
424 print( | 433 ) |
425 "%s accession sequences will be searched\n" % sequence_count | 434 |
426 ) | 435 print("%s accession sequences will be searched\n" % sequence_count) |
427 | 436 |
428 #print(auto.dump()) | 437 # print(auto.dump()) |
429 | 438 |
430 # Convert the trie to an automaton (a finite-state machine) | 439 # Convert the trie to an automaton (a finite-state machine) |
431 auto.make_automaton() | 440 auto.make_automaton() |
432 | 441 |
433 # Execute query for seqs and metadata without fetching the results yet | 442 # Execute query for seqs and metadata without fetching the results yet |
434 uniprot_seq_and_id = cur.execute(UNIPROT_SEQ_AND_ID_SQL) | 443 uniprot_seq_and_id = cur.execute(UNIPROT_SEQ_AND_ID_SQL) |
435 while batch := uniprot_seq_and_id.fetchmany(size=50): | 444 while 1: |
436 if None == batch: | 445 batch = uniprot_seq_and_id.fetchmany(size=50) |
437 break | 446 if not batch: |
438 for Sequence, UniProtKB_id in batch: | 447 break |
439 if Sequence is not None: | 448 for Sequence, UniProtKB_id in batch: |
440 for end_index, (insert_order, original_value) in auto.iter(Sequence): | 449 if Sequence is not None: |
441 ker.execute(''' | 450 for end_index, (insert_order, original_value) in auto.iter( |
451 Sequence | |
452 ): | |
453 ker.execute( | |
454 """ | |
442 INSERT INTO deppep_UniProtKB | 455 INSERT INTO deppep_UniProtKB |
443 (deppep_id,UniProtKB_id,pos_start,pos_end) | 456 (deppep_id,UniProtKB_id,pos_start,pos_end) |
444 VALUES (?,?,?,?) | 457 VALUES (?,?,?,?) |
445 ''', ( | 458 """, |
446 insert_order, | 459 ( |
447 UniProtKB_id, | 460 insert_order, |
448 1 + end_index - len(original_value), | 461 UniProtKB_id, |
449 end_index | 462 1 + end_index - len(original_value), |
450 ) | 463 end_index, |
451 ) | 464 ), |
452 else: | 465 ) |
453 raise ValueError("UniProtKB_id %s, but Sequence is None: Check whether SwissProt file is missing sequence for this ID" % (UniProtKB_id,)) | 466 else: |
454 ker.execute(""" | 467 raise ValueError( |
468 "UniProtKB_id %s, but Sequence is None: Check whether SwissProt file is missing sequence for this ID" | |
469 % (UniProtKB_id,) | |
470 ) | |
471 ker.execute( | |
472 """ | |
455 SELECT count(*) || ' accession-peptide-phosphopeptide combinations were found' | 473 SELECT count(*) || ' accession-peptide-phosphopeptide combinations were found' |
456 FROM uniprotkb_pep_ppep_view | 474 FROM uniprotkb_pep_ppep_view |
457 """ | 475 """ |
458 ) | 476 ) |
459 for row in ker.fetchall(): | 477 for row in ker.fetchall(): |
460 print(row[0]) | 478 print(row[0]) |
461 | 479 |
462 ker.execute(""" | 480 ker.execute( |
481 """ | |
463 SELECT count(*) || ' accession matches were found', count(*) AS accession_count | 482 SELECT count(*) || ' accession matches were found', count(*) AS accession_count |
464 FROM ( | 483 FROM ( |
465 SELECT accession | 484 SELECT accession |
466 FROM uniprotkb_pep_ppep_view | 485 FROM uniprotkb_pep_ppep_view |
467 GROUP BY accession | 486 GROUP BY accession |
468 ) | 487 ) |
469 """ | 488 """ |
470 ) | 489 ) |
471 for row in ker.fetchall(): | 490 for row in ker.fetchall(): |
472 print(row[0]) | 491 print(row[0]) |
473 accession_count = row[1] | 492 |
474 | 493 ker.execute( |
475 ker.execute(""" | 494 """ |
476 SELECT count(*) || ' peptide matches were found' | 495 SELECT count(*) || ' peptide matches were found' |
477 FROM ( | 496 FROM ( |
478 SELECT peptide | 497 SELECT peptide |
479 FROM uniprotkb_pep_ppep_view | 498 FROM uniprotkb_pep_ppep_view |
480 GROUP BY peptide | 499 GROUP BY peptide |
481 ) | 500 ) |
482 """ | 501 """ |
483 ) | 502 ) |
484 for row in ker.fetchall(): | 503 for row in ker.fetchall(): |
485 print(row[0]) | 504 print(row[0]) |
486 | 505 |
487 ker.execute(""" | 506 ker.execute( |
507 """ | |
488 SELECT count(*) || ' phosphopeptide matches were found', count(*) AS phosphopeptide_count | 508 SELECT count(*) || ' phosphopeptide matches were found', count(*) AS phosphopeptide_count |
489 FROM ( | 509 FROM ( |
490 SELECT phosphopeptide | 510 SELECT phosphopeptide |
491 FROM uniprotkb_pep_ppep_view | 511 FROM uniprotkb_pep_ppep_view |
492 GROUP BY phosphopeptide | 512 GROUP BY phosphopeptide |
493 ) | 513 ) |
494 """ | 514 """ |
495 ) | 515 ) |
496 for row in ker.fetchall(): | 516 for row in ker.fetchall(): |
497 print(row[0]) | 517 print(row[0]) |
498 phosphopeptide_count = row[1] | |
499 | 518 |
500 con.commit() | 519 con.commit() |
501 ker.execute('vacuum') | 520 ker.execute("vacuum") |
502 con.close() | 521 con.close() |
522 | |
503 | 523 |
504 if __name__ == "__main__": | 524 if __name__ == "__main__": |
505 wrap_start_time = time.perf_counter() | 525 wrap_start_time = time.perf_counter() |
506 __main__() | 526 __main__() |
507 wrap_stop_time = time.perf_counter() | 527 wrap_stop_time = time.perf_counter() |
508 # print(wrap_start_time) | 528 # print(wrap_start_time) |
509 # print(wrap_stop_time) | 529 # print(wrap_stop_time) |
510 print("\nThe matching process took %d milliseconds to run.\n" % ((wrap_stop_time - wrap_start_time)*1000),) | 530 print( |
511 | 531 "\nThe matching process took %d milliseconds to run.\n" |
512 # vim: sw=4 ts=4 et ai : | 532 % ((wrap_stop_time - wrap_start_time) * 1000), |
533 ) | |
534 | |
535 # vim: sw=4 ts=4 et ai : |