Mercurial > repos > greg > plant_tribes_add_scaffold
changeset 3:04ad7b5d22dd draft
Uploaded
author | greg |
---|---|
date | Tue, 22 May 2018 10:01:20 -0400 |
parents | 38b2da3cac1e |
children | fa0822e74ed3 |
files | add_scaffold.py |
diffstat | 1 files changed, 27 insertions(+), 29 deletions(-) [+] |
line wrap: on
line diff
--- a/add_scaffold.py Tue May 22 09:47:13 2018 -0400 +++ b/add_scaffold.py Tue May 22 10:01:20 2018 -0400 @@ -18,17 +18,17 @@ self.args = None self.clustering_methods = [] self.conn = None - self.fh = None self.gene_sequences_dict = {} self.scaffold_genes_dict = {} self.scaffold_recs = [] self.species_genes_dict = {} self.species_ids_dict = {} self.taxa_lineage_config = None - self.__parse_args() - self.__connect_db() + self.parse_args() + self.fh = open(self.args.output, "w") + self.connect_db() - def __parse_args(self): + def parse_args(self): parser = argparse.ArgumentParser() parser.add_argument('--database_connection_string', dest='database_connection_string', help='Postgres database connection string'), parser.add_argument('--output', dest='output', help='Output dataset'), @@ -37,9 +37,11 @@ def stop_err(msg): sys.stderr.write(msg) + self.fh.flush() + self.fh.close() sys.exit(1) - def __connect_db(self): + def connect_db(self): url = make_url(self.args.database_connection_string) self.fh.write('Connecting to database with URL: %s' % url) args = url.translate_connect_args(username='user') @@ -47,13 +49,13 @@ assert url.get_dialect().name == 'postgresql', 'This script can only be used with PostgreSQL.' self.conn = psycopg2.connect(**args) - def _flush(self): + def flush(self): self.conn.commit() - def _shutdown(self): + def shutdown(self): self.conn.close() - def _update(self, sql, args): + def update(self, sql, args): try: cur = self.conn.cursor() cur.execute(sql, args) @@ -77,16 +79,12 @@ # The scaffold has not yet been added. pass - def _run(self): + def run(self): self.check_scaffold() - with open(self.args.output, "w") as fh: - self.fh = fh - self.process_annot_dir(self.fh) - self.fh.flush() - self.process_scaffold_config_files(fh) - self.fh.flush() - self.process_orthogroup_fasta_files(fh) - self.fh.flush() + self.process_annot_dir() + self.process_scaffold_config_files() + self.process_orthogroup_fasta_files() + self.fh.flush() self.fh.close() def process_annot_dir(self): @@ -121,8 +119,8 @@ VALUES (nextval('plant_tribes_scaffold_id_seq'), %s, %s) RETURNING id; """ - cur = self._update(sql, tuple(args)) - self._flush() + cur = self.update(sql, tuple(args)) + self.flush() scaffold_id_db = cur.fetchone()[0] self.scaffold_recs.append([scaffold_id_db, scaffold_id, clustering_method]) with open(file_name, "r") as fh: @@ -152,8 +150,8 @@ INSERT INTO plant_tribes_orthogroup VALUES (nextval('plant_tribes_orthogroup_id_seq'), %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s); """ - cur = self._update(sql, tuple(args)) - self._flush() + cur = self.update(sql, tuple(args)) + self.flush() for file_name in glob.glob(os.path.join(file_dir, "*list")): items = os.path.basename(file_name).split(".") clustering_method = items[0] @@ -255,8 +253,8 @@ INSERT INTO plant_tribes_taxon VALUES (nextval('plant_tribes_taxon_id_seq'), %s, %s, %s, %s, %s, %s, %s); """ - self._update(sql, tuple(args)) - self._flush() + self.update(sql, tuple(args)) + self.flush() def process_orthogroup_fasta_files(self): scaffold_id = os.path.basename(self.args.scaffold_path) @@ -349,8 +347,8 @@ VALUES (nextval('plant_tribes_gene_id_seq'), %s, %s, %s, %s) RETURNING id; """ - cur = self._update(sql, tuple(args)) - self._flush() + cur = self.update(sql, tuple(args)) + self.flush() gene_id_db = cur.fetchone()[0] # Insert a row into the gene_scaffold_orthogroup_association table. # Get the scaffold_rec for the current scaffold_id and clustering_method. @@ -363,11 +361,11 @@ INSERT INTO gene_scaffold_orthogroup_association VALUES (nextval('gene_scaffold_orthogroup_association_id_seq'), %s, %s, %s); """ - cur = self._update(sql, tuple(args)) - self._flush() + cur = self.update(sql, tuple(args)) + self.flush() if __name__ == '__main__': add_scaffold = AddScaffold() - add_scaffold._run() - add_scaffold._shutdown() + add_scaffold.run() + add_scaffold.shutdown()