comparison gene_family_scaffold_loader.py @ 5:cb986be6355e draft

Uploaded
author greg
date Tue, 05 Jun 2018 10:56:33 -0400
parents 3c6514d810ae
children 9a4b0ae3d408
comparison
equal deleted inserted replaced
4:3c6514d810ae 5:cb986be6355e
348 # then we'll add a row only to the gene_scaffold_orthogroup_association table. 348 # then we'll add a row only to the gene_scaffold_orthogroup_association table.
349 # Get the taxon_id for the species_name from the plant_tribes_taxon table. 349 # Get the taxon_id for the species_name from the plant_tribes_taxon table.
350 sql = "SELECT id FROM plant_tribes_taxon WHERE species_name = '%s';" % species_name 350 sql = "SELECT id FROM plant_tribes_taxon WHERE species_name = '%s';" % species_name
351 cur = self.conn.cursor() 351 cur = self.conn.cursor()
352 cur.execute(sql) 352 cur.execute(sql)
353 taxon_id = cur.fetchone()[0] 353 taxon_id_db = cur.fetchone()[0]
354 # If the plant_tribes_gene table contains a row that has the gene_id, 354 # If the plant_tribes_gene table contains a row that has the gene_id,
355 # then we'll add a row only to the gene_scaffold_orthogroup_association table. 355 # then we'll add a row only to the gene_scaffold_orthogroup_association table.
356 sql = "SELECT id FROM plant_tribes_gene WHERE gene_id = '%s';" % gene_id 356 sql = "SELECT id FROM plant_tribes_gene WHERE gene_id = '%s';" % gene_id
357 cur = self.conn.cursor() 357 cur = self.conn.cursor()
358 cur.execute(sql) 358 cur.execute(sql)
359 try: 359 try:
360 gene_id_db = cur.fetchone()[0] 360 gene_id_db = cur.fetchone()[0]
361 except: 361 except:
362 # Insert a row into the plant_tribes_gene table. 362 # Insert a row into the plant_tribes_gene table.
363 args = [gene_id, taxon_id, dna_sequence, aa_sequence] 363 args = [gene_id, dna_sequence, aa_sequence]
364 sql = """ 364 sql = """
365 INSERT INTO plant_tribes_gene 365 INSERT INTO plant_tribes_gene
366 VALUES (nextval('plant_tribes_gene_id_seq'), %s, %s, %s, %s) 366 VALUES (nextval('plant_tribes_gene_id_seq'), %s, %s, %s)
367 RETURNING id; 367 RETURNING id;
368 """ 368 """
369 cur = self.update(sql, tuple(args)) 369 cur = self.update(sql, tuple(args))
370 self.flush() 370 self.flush()
371 gene_id_db = cur.fetchone()[0] 371 gene_id_db = cur.fetchone()[0]
376 # Get the scaffold_rec for the current scaffold_id and clustering_method. 376 # Get the scaffold_rec for the current scaffold_id and clustering_method.
377 # The list is [<scaffold_id_db>, <scaffold_id>, <clustering_method>] 377 # The list is [<scaffold_id_db>, <scaffold_id>, <clustering_method>]
378 for scaffold_rec in self.scaffold_recs: 378 for scaffold_rec in self.scaffold_recs:
379 if scaffold_id in scaffold_rec and clustering_method in scaffold_rec: 379 if scaffold_id in scaffold_rec and clustering_method in scaffold_rec:
380 scaffold_id_db = scaffold_rec[0] 380 scaffold_id_db = scaffold_rec[0]
381 args = [gene_id_db, scaffold_id_db, orthogroup_id_db] 381 args = [gene_id_db, scaffold_id_db, orthogroup_id_db, taxon_id_db]
382 sql = """ 382 sql = """
383 INSERT INTO gene_scaffold_orthogroup_association 383 INSERT INTO gene_scaffold_orthogroup_taxon_association
384 VALUES (nextval('gene_scaffold_orthogroup_association_id_seq'), %s, %s, %s); 384 VALUES (nextval('gene_scaffold_orthogroup_taxon_association_id_seq'), %s, %s, %s, %s);
385 """ 385 """
386 cur = self.update(sql, tuple(args)) 386 cur = self.update(sql, tuple(args))
387 self.flush() 387 self.flush()
388 if gsoai % 1000 == 0: 388 if gsoai % 1000 == 0:
389 self.log("Inserted 1000 more rows into the gene_scaffold_orthogroup_association table.") 389 self.log("Inserted 1000 more rows into the gene_scaffold_orthogroup_association table.")