changeset 9:3706dea6c2f1 draft default tip

Uploaded
author greg
date Tue, 22 May 2018 10:49:03 -0400
parents 9b2ede3e7100
children
files add_scaffold.py
diffstat 1 files changed, 17 insertions(+), 13 deletions(-) [+]
line wrap: on
line diff
--- a/add_scaffold.py	Tue May 22 10:17:21 2018 -0400
+++ b/add_scaffold.py	Tue May 22 10:49:03 2018 -0400
@@ -35,15 +35,9 @@
         parser.add_argument('--scaffold_path', dest='scaffold_path', help='Full path to PlantTribes scaffold directory')
         self.args = parser.parse_args()
 
-    def stop_err(self, msg):
-        sys.stderr.write(msg)
-        self.fh.flush()
-        self.fh.close()
-        sys.exit(1)
-
     def connect_db(self):
         url = make_url(self.args.database_connection_string)
-        self.fh.write('Connecting to database with URL: %s' % url)
+        self.log('Connecting to database with URL: %s' % url)
         args = url.translate_connect_args(username='user')
         args.update(url.query)
         assert url.get_dialect().name == 'postgresql', 'This script can only be used with PostgreSQL.'
@@ -64,6 +58,16 @@
             self.stop_err(msg)
         return cur
 
+    def stop_err(self, msg):
+        sys.stderr.write(msg)
+        self.fh.flush()
+        self.fh.close()
+        sys.exit(1)
+
+    def log(self, msg):
+        self.fh.write("%s\n" % msg)
+        self.fh.flush()
+
     @property
     def can_add_scaffold(self):
         """
@@ -116,7 +120,7 @@
             if clustering_method not in self.clustering_methods:
                 self.clustering_methods.append(clustering_method)
             # Insert a row in to the plant_tribes_scaffold table.
-            self.fh.write("Inserting a row into the plant_tribes_scaffold table for scaffold %s and clustering method %s..." % (scaffold_id, clustering_method))
+            self.log("Inserting a row into the plant_tribes_scaffold table for scaffold %s and clustering method %s..." % (scaffold_id, clustering_method))
             args = [scaffold_id, clustering_method]
             sql = """
                 INSERT INTO plant_tribes_scaffold
@@ -146,7 +150,7 @@
                             num_species += 1
                             num_genes += j_int
                     # Insert a row into the plant_tribes_orthogroup table.
-                    self.fh.write("Inserting a row into the plant_tribes_orthogroup table...")
+                    self.log("Inserting a row into the plant_tribes_orthogroup table...")
                     args = [orthogroup_id, scaffold_id_db, num_species, num_genes]
                     for k in range(super_ortho_start_index, len(items)):
                         args.append('%s' % str(items[k]))
@@ -183,7 +187,7 @@
         """
         scaffold_id = os.path.basename(self.args.scaffold_path)
         file_name = os.path.join(self.args.scaffold_path, '%s.rootingOrder.config' % scaffold_id)
-        self.fh.write("Processing rooting order config: %s" % str(file_name))
+        self.log("Processing rooting order config: %s" % str(file_name))
         # Populate self.species_ids_dict.
         with open(file_name, "r") as fh:
             for i, line in enumerate(fh):
@@ -224,7 +228,7 @@
                     self.species_genes_dict[species_genes_dict_key] = [species_name, 1]
         # Populate the plant_tribes_taxon table.
         file_name = os.path.join(self.args.scaffold_path, '%s.taxaLineage.config' % scaffold_id)
-        self.fh.write("Processing taxa lineage config: %s" % str(file_name))
+        self.log("Processing taxa lineage config: %s" % str(file_name))
         with open(file_name, "r") as fh:
             for i, line in enumerate(fh):
                 line = line.strip()
@@ -234,7 +238,7 @@
                 # Example line: Populus trichocarpa\tSalicaceae\tMalpighiales\tRosids\tCore Eudicots
                 items = line.split("\t")
                 species_name = items[0]
-                self.fh.write("Calculating the number of genes for species_name: %s" % str(species_name))
+                self.log("Calculating the number of genes for species_name: %s" % str(species_name))
                 for species_genes_dict_key in sorted(self.species_genes_dict.keys()):
                     # The format of species_genes_dict_key is <clustering_method>^^<species_code>.
                     species_genes_dict_key_items = species_genes_dict_key.split("^^")
@@ -312,7 +316,7 @@
             orthogroup_id = items[0]
             clustering_method = items[1]
             gene_id = items[2]
-            self.fh.write("Populating the plant_tribes_gene and gene_scaffold_orthogroup_association tables with gene %s, scaffold %s and orthogroup %s..." % (gene_id, scaffold_id, orthogroup_id))
+            self.log("Populating the plant_tribes_gene and gene_scaffold_orthogroup_association tables with gene %s, scaffold %s and orthogroup %s..." % (gene_id, scaffold_id, orthogroup_id))
             # The value will be a list containing both
             # clustering_method and the dna string.
             dna_sequence = dna_dict[combined_id]