# HG changeset patch
# User greg
# Date 1491502899 14400
# Node ID 656614635ebfa9224fe3ac66f223602a8049b601
# Parent fb3feee2638d71d14f8bca81bbe6f7d6b6387f32
Uploaded
diff -r fb3feee2638d -r 656614635ebf gene_family_classifier.py
--- a/gene_family_classifier.py Wed Mar 22 08:45:31 2017 -0400
+++ b/gene_family_classifier.py Thu Apr 06 14:21:39 2017 -0400
@@ -3,8 +3,9 @@
import os
import shutil
import subprocess
-import sys
-import tempfile
+
+import utils
+
BUFF_SIZE = 1048576
OUTPUT_DIR = 'geneFamilyClassification_dir'
@@ -34,79 +35,6 @@
args = parser.parse_args()
-def get_stderr_exception(tmp_err, tmp_stderr, tmp_out, tmp_stdout, include_stdout=False):
- tmp_stderr.close()
- """
- Return a stderr string of reasonable size.
- """
- # Get stderr, allowing for case where it's very large.
- tmp_stderr = open(tmp_err, 'rb')
- stderr_str = ''
- buffsize = BUFF_SIZE
- try:
- while True:
- stderr_str += tmp_stderr.read(buffsize)
- if not stderr_str or len(stderr_str) % buffsize != 0:
- break
- except OverflowError:
- pass
- tmp_stderr.close()
- if include_stdout:
- tmp_stdout = open(tmp_out, 'rb')
- stdout_str = ''
- buffsize = BUFF_SIZE
- try:
- while True:
- stdout_str += tmp_stdout.read(buffsize)
- if not stdout_str or len(stdout_str) % buffsize != 0:
- break
- except OverflowError:
- pass
- tmp_stdout.close()
- if include_stdout:
- return 'STDOUT\n%s\n\nSTDERR\n%s\n' % (stdout_str, stderr_str)
- return stderr_str
-
-
-def move_directory_files(source_dir, destination_dir):
- source_directory = os.path.abspath(source_dir)
- destination_directory = os.path.abspath(destination_dir)
- if not os.path.isdir(destination_directory):
- os.makedirs(destination_directory)
- for dir_entry in os.listdir(source_directory):
- source_entry = os.path.join(source_directory, dir_entry)
- shutil.move(source_entry, destination_directory)
-
-
-def stop_err(msg):
- sys.stderr.write(msg)
- sys.exit(1)
-
-
-def write_html_output(output, title, dir):
- with open(output, 'w') as fh:
- fh.write('
%s
\n' % title)
- fh.write('\n')
- fh.write('| Size | Name |
\n')
- for index, fname in enumerate(sorted(os.listdir(dir))):
- if index % 2 == 0:
- bgcolor = '#D8D8D8'
- else:
- bgcolor = '#FFFFFF'
- try:
- size = str(os.path.getsize(os.path.join(dir, fname)))
- except:
- size = 'unknown'
- link = '%s\n' % (fname, fname)
- fh.write('| %s | %s |
\n' % (bgcolor, size, link))
- fh.write('
\n')
-
-
-# Define command response buffers.
-tmp_out = tempfile.NamedTemporaryFile().name
-tmp_stdout = open(tmp_out, 'wb')
-tmp_err = tempfile.NamedTemporaryFile().name
-tmp_stderr = open(tmp_err, 'wb')
# Build the command line.
cmd = 'GeneFamilyClassifier'
cmd += ' --proteins %s' % args.input
@@ -134,12 +62,9 @@
create_corresponding_coding_sequences = True
cmd += ' --coding_sequences %s' % args.coding_sequences
# Run the command.
-proc = subprocess.Popen(args=cmd, stderr=tmp_stderr, stdout=tmp_stdout, shell=True)
+proc = subprocess.Popen(args=cmd, stderr=subprocess.PIPE, stdout=subprocess.PIPE, shell=True)
rc = proc.wait()
-# Handle execution errors.
-if rc != 0:
- error_message = get_stderr_exception(tmp_err, tmp_stderr, tmp_out, tmp_stdout)
- stop_err( error_message )
+utils.check_execution_errors(rc, proc.stderr)
# Handle hmmscan.log output.
if args.classifier in ['hmmscan', 'both']:
src_hmmscan_log = os.path.join(OUTPUT_DIR, 'hmmscan.log')
@@ -159,12 +84,12 @@
orthogroups_fasta_dest_dir = args.output_ptortho_dir
title = 'Orthogroups files'
orthogroups_fasta_src_dir = os.path.join(OUTPUT_DIR, 'orthogroups_fasta')
- move_directory_files(orthogroups_fasta_src_dir, orthogroups_fasta_dest_dir)
- write_html_output(out_file, title, orthogroups_fasta_dest_dir)
+ utils.move_directory_files(orthogroups_fasta_src_dir, orthogroups_fasta_dest_dir)
+ utils.write_html_output(out_file, title, orthogroups_fasta_dest_dir)
# Handle single copy orthogroup outputs.
if args.output_ptsco is not None:
single_copy_fasta_src_dir = os.path.join(OUTPUT_DIR, 'single_copy_fasta')
single_copy_fasta_dest_dir = args.output_ptsco_dir
title = 'Single copy orthogroups files'
- move_directory_files(single_copy_fasta_src_dir, single_copy_fasta_dest_dir)
- write_html_output(args.output_ptsco, title, single_copy_fasta_dest_dir)
+ utils.move_directory_files(single_copy_fasta_src_dir, single_copy_fasta_dest_dir)
+ utils.write_html_output(args.output_ptsco, title, single_copy_fasta_dest_dir)
diff -r fb3feee2638d -r 656614635ebf gene_family_classifier.xml
--- a/gene_family_classifier.xml Wed Mar 22 08:45:31 2017 -0400
+++ b/gene_family_classifier.xml Thu Apr 06 14:21:39 2017 -0400
@@ -1,16 +1,10 @@
classifies gene sequences into pre-computed orthologous plant gene family clusters
-
- plant_tribes_gene_family_classifier
-
-
-
-
-
-
-
-
-
+
+ macros.xml
+
+
+
-
- @misc{None,
- journal = {None},
- author = {1. Wafula EK},
- title = {Manuscript in preparation},
- year = {None},
- url = {https://github.com/dePamphilis/PlantTribes},}
-
-
- @article{Sasidharan2012,
- journal = {Nucleic Acids Research},
- author = {2. Sasidharan R, Nepusz T, Swarbreck D, Huala E, Paccanaro A},
- title = {GFam: a platform for automatic annotation of gene families},
- year = {2012},
- pages = {gks631},}
-
-
- @article{Li2003,
- journal = {Genome Research}
- author = {3. Li L, Stoeckert CJ, Roos DS},
- title = {OrthoMCL: identification of ortholog groups for eukaryotic genomes},
- year = {2003},
- volume = {13},
- number = {9},
- pages = {2178-2189},}
-
-
- @article{Emms2015,
- journal = {Genome Biology}
- author = {4. Emms DM, Kelly S},
- title = {OrthoFinder: solving fundamental biases in whole genome comparisons dramatically improves orthogroup inference accuracy},
- year = {2015},
- volume = {16},
- number = {1},
- pages = {157},}
-
+
+
@article{Altschul1990,
journal = {Journal of molecular biology}
diff -r fb3feee2638d -r 656614635ebf macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Thu Apr 06 14:21:39 2017 -0400
@@ -0,0 +1,85 @@
+
+
+
+
+ plant_tribes_assembly_post_processor
+
+
+
+
+ plant_tribes_gene_family_classifier
+
+
+
+
+ plant_tribes_gene_family_integrator
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ @misc{None,
+ journal = {None},
+ author = {1. Wafula EK},
+ title = {Manuscript in preparation},
+ year = {None},
+ url = {https://github.com/dePamphilis/PlantTribes},}
+
+
+
+
+ @article{Sasidharan2012,
+ journal = {Nucleic Acids Research},
+ author = {2. Sasidharan R, Nepusz T, Swarbreck D, Huala E, Paccanaro A},
+ title = {GFam: a platform for automatic annotation of gene families},
+ year = {2012},
+ pages = {gks631},}
+
+
+ @article{Li2003,
+ journal = {Genome Research}
+ author = {3. Li L, Stoeckert CJ, Roos DS},
+ title = {OrthoMCL: identification of ortholog groups for eukaryotic genomes},
+ year = {2003},
+ volume = {13},
+ number = {9},
+ pages = {2178-2189},}
+
+
+ @article{Emms2015,
+ journal = {Genome Biology}
+ author = {4. Emms DM, Kelly S},
+ title = {OrthoFinder: solving fundamental biases in whole genome comparisons dramatically improves orthogroup inference accuracy},
+ year = {2015},
+ volume = {16},
+ number = {1},
+ pages = {157},}
+
+
+
diff -r fb3feee2638d -r 656614635ebf utils.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/utils.py Thu Apr 06 14:21:39 2017 -0400
@@ -0,0 +1,42 @@
+import os
+import shutil
+import sys
+
+
+def check_execution_errors(rc, stderr):
+ if rc != 0:
+ stop_err(stderr.read())
+
+
+def move_directory_files(source_dir, destination_dir):
+ source_directory = os.path.abspath(source_dir)
+ destination_directory = os.path.abspath(destination_dir)
+ if not os.path.isdir(destination_directory):
+ os.makedirs(destination_directory)
+ for dir_entry in os.listdir(source_directory):
+ source_entry = os.path.join(source_directory, dir_entry)
+ shutil.move(source_entry, destination_directory)
+
+
+def stop_err(msg):
+ sys.stderr.write(msg)
+ sys.exit(1)
+
+
+def write_html_output(output, title, dir):
+ with open(output, 'w') as fh:
+ fh.write('%s
\n' % title)
+ fh.write('\n')
+ fh.write('| Size | Name |
\n')
+ for index, fname in enumerate(sorted(os.listdir(dir))):
+ if index % 2 == 0:
+ bgcolor = '#D8D8D8'
+ else:
+ bgcolor = '#FFFFFF'
+ try:
+ size = str(os.path.getsize(os.path.join(dir, fname)))
+ except:
+ size = 'unknown'
+ link = '%s\n' % (fname, fname)
+ fh.write('| %s | %s |
\n' % (bgcolor, size, link))
+ fh.write('
\n')