Mercurial > repos > greg > plant_tribes_add_scaffold
changeset 9:3706dea6c2f1 draft default tip
Uploaded
author | greg |
---|---|
date | Tue, 22 May 2018 10:49:03 -0400 |
parents | 9b2ede3e7100 |
children | |
files | add_scaffold.py |
diffstat | 1 files changed, 17 insertions(+), 13 deletions(-) [+] |
line wrap: on
line diff
--- a/add_scaffold.py Tue May 22 10:17:21 2018 -0400 +++ b/add_scaffold.py Tue May 22 10:49:03 2018 -0400 @@ -35,15 +35,9 @@ parser.add_argument('--scaffold_path', dest='scaffold_path', help='Full path to PlantTribes scaffold directory') self.args = parser.parse_args() - def stop_err(self, msg): - sys.stderr.write(msg) - self.fh.flush() - self.fh.close() - sys.exit(1) - def connect_db(self): url = make_url(self.args.database_connection_string) - self.fh.write('Connecting to database with URL: %s' % url) + self.log('Connecting to database with URL: %s' % url) args = url.translate_connect_args(username='user') args.update(url.query) assert url.get_dialect().name == 'postgresql', 'This script can only be used with PostgreSQL.' @@ -64,6 +58,16 @@ self.stop_err(msg) return cur + def stop_err(self, msg): + sys.stderr.write(msg) + self.fh.flush() + self.fh.close() + sys.exit(1) + + def log(self, msg): + self.fh.write("%s\n" % msg) + self.fh.flush() + @property def can_add_scaffold(self): """ @@ -116,7 +120,7 @@ if clustering_method not in self.clustering_methods: self.clustering_methods.append(clustering_method) # Insert a row in to the plant_tribes_scaffold table. - self.fh.write("Inserting a row into the plant_tribes_scaffold table for scaffold %s and clustering method %s..." % (scaffold_id, clustering_method)) + self.log("Inserting a row into the plant_tribes_scaffold table for scaffold %s and clustering method %s..." % (scaffold_id, clustering_method)) args = [scaffold_id, clustering_method] sql = """ INSERT INTO plant_tribes_scaffold @@ -146,7 +150,7 @@ num_species += 1 num_genes += j_int # Insert a row into the plant_tribes_orthogroup table. - self.fh.write("Inserting a row into the plant_tribes_orthogroup table...") + self.log("Inserting a row into the plant_tribes_orthogroup table...") args = [orthogroup_id, scaffold_id_db, num_species, num_genes] for k in range(super_ortho_start_index, len(items)): args.append('%s' % str(items[k])) @@ -183,7 +187,7 @@ """ scaffold_id = os.path.basename(self.args.scaffold_path) file_name = os.path.join(self.args.scaffold_path, '%s.rootingOrder.config' % scaffold_id) - self.fh.write("Processing rooting order config: %s" % str(file_name)) + self.log("Processing rooting order config: %s" % str(file_name)) # Populate self.species_ids_dict. with open(file_name, "r") as fh: for i, line in enumerate(fh): @@ -224,7 +228,7 @@ self.species_genes_dict[species_genes_dict_key] = [species_name, 1] # Populate the plant_tribes_taxon table. file_name = os.path.join(self.args.scaffold_path, '%s.taxaLineage.config' % scaffold_id) - self.fh.write("Processing taxa lineage config: %s" % str(file_name)) + self.log("Processing taxa lineage config: %s" % str(file_name)) with open(file_name, "r") as fh: for i, line in enumerate(fh): line = line.strip() @@ -234,7 +238,7 @@ # Example line: Populus trichocarpa\tSalicaceae\tMalpighiales\tRosids\tCore Eudicots items = line.split("\t") species_name = items[0] - self.fh.write("Calculating the number of genes for species_name: %s" % str(species_name)) + self.log("Calculating the number of genes for species_name: %s" % str(species_name)) for species_genes_dict_key in sorted(self.species_genes_dict.keys()): # The format of species_genes_dict_key is <clustering_method>^^<species_code>. species_genes_dict_key_items = species_genes_dict_key.split("^^") @@ -312,7 +316,7 @@ orthogroup_id = items[0] clustering_method = items[1] gene_id = items[2] - self.fh.write("Populating the plant_tribes_gene and gene_scaffold_orthogroup_association tables with gene %s, scaffold %s and orthogroup %s..." % (gene_id, scaffold_id, orthogroup_id)) + self.log("Populating the plant_tribes_gene and gene_scaffold_orthogroup_association tables with gene %s, scaffold %s and orthogroup %s..." % (gene_id, scaffold_id, orthogroup_id)) # The value will be a list containing both # clustering_method and the dna string. dna_sequence = dna_dict[combined_id]