changeset 3:04ad7b5d22dd draft

Uploaded
author greg
date Tue, 22 May 2018 10:01:20 -0400
parents 38b2da3cac1e
children fa0822e74ed3
files add_scaffold.py
diffstat 1 files changed, 27 insertions(+), 29 deletions(-) [+]
line wrap: on
line diff
--- a/add_scaffold.py	Tue May 22 09:47:13 2018 -0400
+++ b/add_scaffold.py	Tue May 22 10:01:20 2018 -0400
@@ -18,17 +18,17 @@
         self.args = None
         self.clustering_methods = []
         self.conn = None
-        self.fh = None
         self.gene_sequences_dict = {}
         self.scaffold_genes_dict = {}
         self.scaffold_recs = []
         self.species_genes_dict = {}
         self.species_ids_dict = {}
         self.taxa_lineage_config = None
-        self.__parse_args()
-        self.__connect_db()
+        self.parse_args()
+        self.fh = open(self.args.output, "w")
+        self.connect_db()
 
-    def __parse_args(self):
+    def parse_args(self):
         parser = argparse.ArgumentParser()
         parser.add_argument('--database_connection_string', dest='database_connection_string', help='Postgres database connection string'),
         parser.add_argument('--output', dest='output', help='Output dataset'),
@@ -37,9 +37,11 @@
 
     def stop_err(msg):
         sys.stderr.write(msg)
+        self.fh.flush()
+        self.fh.close()
         sys.exit(1)
 
-    def __connect_db(self):
+    def connect_db(self):
         url = make_url(self.args.database_connection_string)
         self.fh.write('Connecting to database with URL: %s' % url)
         args = url.translate_connect_args(username='user')
@@ -47,13 +49,13 @@
         assert url.get_dialect().name == 'postgresql', 'This script can only be used with PostgreSQL.'
         self.conn = psycopg2.connect(**args)
 
-    def _flush(self):
+    def flush(self):
         self.conn.commit()
 
-    def _shutdown(self):
+    def shutdown(self):
         self.conn.close()
 
-    def _update(self, sql, args):
+    def update(self, sql, args):
         try:
             cur = self.conn.cursor()
             cur.execute(sql, args)
@@ -77,16 +79,12 @@
             # The scaffold has not yet been added.
             pass
 
-    def _run(self):
+    def run(self):
         self.check_scaffold()
-        with open(self.args.output, "w") as fh:
-            self.fh = fh
-            self.process_annot_dir(self.fh)
-            self.fh.flush()
-            self.process_scaffold_config_files(fh)
-            self.fh.flush()
-            self.process_orthogroup_fasta_files(fh)
-            self.fh.flush()
+        self.process_annot_dir()
+        self.process_scaffold_config_files()
+        self.process_orthogroup_fasta_files()
+        self.fh.flush()
         self.fh.close()
 
     def process_annot_dir(self):
@@ -121,8 +119,8 @@
                      VALUES (nextval('plant_tribes_scaffold_id_seq'), %s, %s)
                      RETURNING id;
             """
-            cur = self._update(sql, tuple(args))
-            self._flush()
+            cur = self.update(sql, tuple(args))
+            self.flush()
             scaffold_id_db = cur.fetchone()[0]
             self.scaffold_recs.append([scaffold_id_db, scaffold_id, clustering_method])
             with open(file_name, "r") as fh:
@@ -152,8 +150,8 @@
                         INSERT INTO plant_tribes_orthogroup
                              VALUES (nextval('plant_tribes_orthogroup_id_seq'), %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);
                     """
-                    cur = self._update(sql, tuple(args))
-                    self._flush()
+                    cur = self.update(sql, tuple(args))
+                    self.flush()
         for file_name in glob.glob(os.path.join(file_dir, "*list")):
             items = os.path.basename(file_name).split(".")
             clustering_method = items[0]
@@ -255,8 +253,8 @@
                         INSERT INTO plant_tribes_taxon
                              VALUES (nextval('plant_tribes_taxon_id_seq'), %s, %s, %s, %s, %s, %s, %s);
                     """
-                    self._update(sql, tuple(args))
-                    self._flush()
+                    self.update(sql, tuple(args))
+                    self.flush()
 
     def process_orthogroup_fasta_files(self):
         scaffold_id = os.path.basename(self.args.scaffold_path)
@@ -349,8 +347,8 @@
                          VALUES (nextval('plant_tribes_gene_id_seq'), %s, %s, %s, %s)
                          RETURNING id;
                 """
-                cur = self._update(sql, tuple(args))
-                self._flush()
+                cur = self.update(sql, tuple(args))
+                self.flush()
                 gene_id_db = cur.fetchone()[0]
             # Insert a row into the gene_scaffold_orthogroup_association table.
             # Get the scaffold_rec for the current scaffold_id and clustering_method.
@@ -363,11 +361,11 @@
                 INSERT INTO gene_scaffold_orthogroup_association
                      VALUES (nextval('gene_scaffold_orthogroup_association_id_seq'), %s, %s, %s);
             """
-            cur = self._update(sql, tuple(args))
-            self._flush()
+            cur = self.update(sql, tuple(args))
+            self.flush()
 
 
 if __name__ == '__main__':
     add_scaffold = AddScaffold()
-    add_scaffold._run()
-    add_scaffold._shutdown()
+    add_scaffold.run()
+    add_scaffold.shutdown()