comparison gene_family_classifier.py @ 131:656614635ebf draft

Uploaded
author greg
date Thu, 06 Apr 2017 14:21:39 -0400
parents 7789adec8707
children b2ae23e484e8
comparison
equal deleted inserted replaced
130:fb3feee2638d 131:656614635ebf
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 import argparse 2 import argparse
3 import os 3 import os
4 import shutil 4 import shutil
5 import subprocess 5 import subprocess
6 import sys 6
7 import tempfile 7 import utils
8
8 9
9 BUFF_SIZE = 1048576 10 BUFF_SIZE = 1048576
10 OUTPUT_DIR = 'geneFamilyClassification_dir' 11 OUTPUT_DIR = 'geneFamilyClassification_dir'
11 12
12 parser = argparse.ArgumentParser() 13 parser = argparse.ArgumentParser()
32 parser.add_argument('--output_ptsco_dir', dest='output_ptsco_dir', default=None, help='output_ptsco.files_path') 33 parser.add_argument('--output_ptsco_dir', dest='output_ptsco_dir', default=None, help='output_ptsco.files_path')
33 34
34 args = parser.parse_args() 35 args = parser.parse_args()
35 36
36 37
37 def get_stderr_exception(tmp_err, tmp_stderr, tmp_out, tmp_stdout, include_stdout=False):
38 tmp_stderr.close()
39 """
40 Return a stderr string of reasonable size.
41 """
42 # Get stderr, allowing for case where it's very large.
43 tmp_stderr = open(tmp_err, 'rb')
44 stderr_str = ''
45 buffsize = BUFF_SIZE
46 try:
47 while True:
48 stderr_str += tmp_stderr.read(buffsize)
49 if not stderr_str or len(stderr_str) % buffsize != 0:
50 break
51 except OverflowError:
52 pass
53 tmp_stderr.close()
54 if include_stdout:
55 tmp_stdout = open(tmp_out, 'rb')
56 stdout_str = ''
57 buffsize = BUFF_SIZE
58 try:
59 while True:
60 stdout_str += tmp_stdout.read(buffsize)
61 if not stdout_str or len(stdout_str) % buffsize != 0:
62 break
63 except OverflowError:
64 pass
65 tmp_stdout.close()
66 if include_stdout:
67 return 'STDOUT\n%s\n\nSTDERR\n%s\n' % (stdout_str, stderr_str)
68 return stderr_str
69
70
71 def move_directory_files(source_dir, destination_dir):
72 source_directory = os.path.abspath(source_dir)
73 destination_directory = os.path.abspath(destination_dir)
74 if not os.path.isdir(destination_directory):
75 os.makedirs(destination_directory)
76 for dir_entry in os.listdir(source_directory):
77 source_entry = os.path.join(source_directory, dir_entry)
78 shutil.move(source_entry, destination_directory)
79
80
81 def stop_err(msg):
82 sys.stderr.write(msg)
83 sys.exit(1)
84
85
86 def write_html_output(output, title, dir):
87 with open(output, 'w') as fh:
88 fh.write('<html><head><h3>%s</h3></head>\n' % title)
89 fh.write('<body><p/><table cellpadding="2">\n')
90 fh.write('<tr><th>Size</th><th>Name</th></tr>\n')
91 for index, fname in enumerate(sorted(os.listdir(dir))):
92 if index % 2 == 0:
93 bgcolor = '#D8D8D8'
94 else:
95 bgcolor = '#FFFFFF'
96 try:
97 size = str(os.path.getsize(os.path.join(dir, fname)))
98 except:
99 size = 'unknown'
100 link = '<a href="%s" type="text/plain">%s</a>\n' % (fname, fname)
101 fh.write('<tr bgcolor="%s"><td>%s</td><td>%s</td></tr>\n' % (bgcolor, size, link))
102 fh.write('</table></body></html>\n')
103
104
105 # Define command response buffers.
106 tmp_out = tempfile.NamedTemporaryFile().name
107 tmp_stdout = open(tmp_out, 'wb')
108 tmp_err = tempfile.NamedTemporaryFile().name
109 tmp_stderr = open(tmp_err, 'wb')
110 # Build the command line. 38 # Build the command line.
111 cmd = 'GeneFamilyClassifier' 39 cmd = 'GeneFamilyClassifier'
112 cmd += ' --proteins %s' % args.input 40 cmd += ' --proteins %s' % args.input
113 cmd += ' --scaffold %s' % args.scaffold 41 cmd += ' --scaffold %s' % args.scaffold
114 cmd += ' --method %s' % args.method 42 cmd += ' --method %s' % args.method
132 create_corresponding_coding_sequences = False 60 create_corresponding_coding_sequences = False
133 else: 61 else:
134 create_corresponding_coding_sequences = True 62 create_corresponding_coding_sequences = True
135 cmd += ' --coding_sequences %s' % args.coding_sequences 63 cmd += ' --coding_sequences %s' % args.coding_sequences
136 # Run the command. 64 # Run the command.
137 proc = subprocess.Popen(args=cmd, stderr=tmp_stderr, stdout=tmp_stdout, shell=True) 65 proc = subprocess.Popen(args=cmd, stderr=subprocess.PIPE, stdout=subprocess.PIPE, shell=True)
138 rc = proc.wait() 66 rc = proc.wait()
139 # Handle execution errors. 67 utils.check_execution_errors(rc, proc.stderr)
140 if rc != 0:
141 error_message = get_stderr_exception(tmp_err, tmp_stderr, tmp_out, tmp_stdout)
142 stop_err( error_message )
143 # Handle hmmscan.log output. 68 # Handle hmmscan.log output.
144 if args.classifier in ['hmmscan', 'both']: 69 if args.classifier in ['hmmscan', 'both']:
145 src_hmmscan_log = os.path.join(OUTPUT_DIR, 'hmmscan.log') 70 src_hmmscan_log = os.path.join(OUTPUT_DIR, 'hmmscan.log')
146 if os.path.exists(src_hmmscan_log): 71 if os.path.exists(src_hmmscan_log):
147 if args.save_hmmscan_log is None: 72 if args.save_hmmscan_log is None:
157 else: 82 else:
158 out_file = args.output_ptortho 83 out_file = args.output_ptortho
159 orthogroups_fasta_dest_dir = args.output_ptortho_dir 84 orthogroups_fasta_dest_dir = args.output_ptortho_dir
160 title = 'Orthogroups files' 85 title = 'Orthogroups files'
161 orthogroups_fasta_src_dir = os.path.join(OUTPUT_DIR, 'orthogroups_fasta') 86 orthogroups_fasta_src_dir = os.path.join(OUTPUT_DIR, 'orthogroups_fasta')
162 move_directory_files(orthogroups_fasta_src_dir, orthogroups_fasta_dest_dir) 87 utils.move_directory_files(orthogroups_fasta_src_dir, orthogroups_fasta_dest_dir)
163 write_html_output(out_file, title, orthogroups_fasta_dest_dir) 88 utils.write_html_output(out_file, title, orthogroups_fasta_dest_dir)
164 # Handle single copy orthogroup outputs. 89 # Handle single copy orthogroup outputs.
165 if args.output_ptsco is not None: 90 if args.output_ptsco is not None:
166 single_copy_fasta_src_dir = os.path.join(OUTPUT_DIR, 'single_copy_fasta') 91 single_copy_fasta_src_dir = os.path.join(OUTPUT_DIR, 'single_copy_fasta')
167 single_copy_fasta_dest_dir = args.output_ptsco_dir 92 single_copy_fasta_dest_dir = args.output_ptsco_dir
168 title = 'Single copy orthogroups files' 93 title = 'Single copy orthogroups files'
169 move_directory_files(single_copy_fasta_src_dir, single_copy_fasta_dest_dir) 94 utils.move_directory_files(single_copy_fasta_src_dir, single_copy_fasta_dest_dir)
170 write_html_output(args.output_ptsco, title, single_copy_fasta_dest_dir) 95 utils.write_html_output(args.output_ptsco, title, single_copy_fasta_dest_dir)