Mercurial > repos > peterjc > tmhmm_and_signalp
comparison tools/protein_analysis/promoter2.py @ 29:3cb02adf4326 draft
v0.2.9 Python style improvements
| author | peterjc | 
|---|---|
| date | Wed, 01 Feb 2017 09:46:14 -0500 | 
| parents | 22e71e53f534 | 
| children | 6d9d7cdf00fc | 
   comparison
  equal
  deleted
  inserted
  replaced
| 28:22e71e53f534 | 29:3cb02adf4326 | 
|---|---|
| 28 """ | 28 """ | 
| 29 import sys | 29 import sys | 
| 30 import os | 30 import os | 
| 31 import commands | 31 import commands | 
| 32 import tempfile | 32 import tempfile | 
| 33 from seq_analysis_utils import sys_exit, split_fasta, run_jobs, thread_count | 33 from seq_analysis_utils import split_fasta, run_jobs, thread_count | 
| 34 | 34 | 
| 35 FASTA_CHUNK = 500 | 35 FASTA_CHUNK = 500 | 
| 36 | 36 | 
| 37 if "-v" in sys.argv or "--version" in sys.argv: | 37 if "-v" in sys.argv or "--version" in sys.argv: | 
| 38 sys.exit(os.system("promoter -V")) | 38 sys.exit(os.system("promoter -V")) | 
| 39 | 39 | 
| 40 if len(sys.argv) != 4: | 40 if len(sys.argv) != 4: | 
| 41 sys_exit("Require three arguments, number of threads (int), input DNA FASTA file & output tabular file. " | 41 sys.exit("Require three arguments, number of threads (int), input DNA FASTA file & output tabular file. " | 
| 42 "Got %i arguments." % (len(sys.argv)-1)) | 42 "Got %i arguments." % (len(sys.argv) - 1)) | 
| 43 | 43 | 
| 44 num_threads = thread_count(sys.argv[3],default=4) | 44 num_threads = thread_count(sys.argv[3], default=4) | 
| 45 fasta_file = os.path.abspath(sys.argv[2]) | 45 fasta_file = os.path.abspath(sys.argv[2]) | 
| 46 tabular_file = os.path.abspath(sys.argv[3]) | 46 tabular_file = os.path.abspath(sys.argv[3]) | 
| 47 | 47 | 
| 48 tmp_dir = tempfile.mkdtemp() | 48 tmp_dir = tempfile.mkdtemp() | 
| 49 | 49 | 
| 50 | |
| 50 def get_path_and_binary(): | 51 def get_path_and_binary(): | 
| 51 platform = commands.getoutput("uname") #e.g. Linux | 52 platform = commands.getoutput("uname") # e.g. Linux | 
| 52 shell_script = commands.getoutput("which promoter") | 53 shell_script = commands.getoutput("which promoter") | 
| 53 if not os.path.isfile(shell_script): | 54 if not os.path.isfile(shell_script): | 
| 54 sys_exit("ERROR: Missing promoter executable shell script") | 55 sys.exit("ERROR: Missing promoter executable shell script") | 
| 55 path = None | 56 path = None | 
| 56 for line in open(shell_script): | 57 for line in open(shell_script): | 
| 57 if line.startswith("setenv"): #could then be tab or space! | 58 if line.startswith("setenv"): # could then be tab or space! | 
| 58 parts = line.rstrip().split(None, 2) | 59 parts = line.rstrip().split(None, 2) | 
| 59 if parts[0] == "setenv" and parts[1] == "PROM": | 60 if parts[0] == "setenv" and parts[1] == "PROM": | 
| 60 path = parts[2] | 61 path = parts[2] | 
| 61 if not path: | 62 if not path: | 
| 62 sys_exit("ERROR: Could not find promoter path (PROM) in %r" % shell_script) | 63 sys.exit("ERROR: Could not find promoter path (PROM) in %r" % shell_script) | 
| 63 if not os.path.isdir(path): | 64 if not os.path.isdir(path): | 
| 64 sys_exit("ERROR: %r is not a directory" % path) | 65 sys.exit("ERROR: %r is not a directory" % path) | 
| 65 bin = "%s/bin/promoter_%s" % (path, platform) | 66 bin = "%s/bin/promoter_%s" % (path, platform) | 
| 66 if not os.path.isfile(bin): | 67 if not os.path.isfile(bin): | 
| 67 sys_exit("ERROR: Missing promoter binary %r" % bin) | 68 sys.exit("ERROR: Missing promoter binary %r" % bin) | 
| 68 return path, bin | 69 return path, bin | 
| 70 | |
| 69 | 71 | 
| 70 def make_tabular(raw_handle, out_handle): | 72 def make_tabular(raw_handle, out_handle): | 
| 71 """Parse text output into tabular, return query count.""" | 73 """Parse text output into tabular, return query count.""" | 
| 72 identifier = None | 74 identifier = None | 
| 73 queries = 0 | 75 queries = 0 | 
| 74 for line in raw_handle: | 76 for line in raw_handle: | 
| 75 #print repr(line) | 77 # print repr(line) | 
| 76 if not line.strip() or line == "Promoter prediction:\n": | 78 if not line.strip() or line == "Promoter prediction:\n": | 
| 77 pass | 79 pass | 
| 78 elif line[0] != " ": | 80 elif line[0] != " ": | 
| 79 identifier = line.strip().replace("\t", " ").split(None,1)[0] | 81 identifier = line.strip().replace("\t", " ").split(None, 1)[0] | 
| 80 queries += 1 | 82 queries += 1 | 
| 81 elif line == " No promoter predicted\n": | 83 elif line == " No promoter predicted\n": | 
| 82 #End of a record | 84 # End of a record | 
| 83 identifier = None | 85 identifier = None | 
| 84 elif line == " Position Score Likelihood\n": | 86 elif line == " Position Score Likelihood\n": | 
| 85 assert identifier | 87 assert identifier | 
| 86 else: | 88 else: | 
| 87 try: | 89 try: | 
| 88 position, score, likelihood = line.strip().split(None,2) | 90 position, score, likelihood = line.strip().split(None, 2) | 
| 89 except ValueError: | 91 except ValueError: | 
| 90 print "WARNING: Problem with line: %r" % line | 92 print "WARNING: Problem with line: %r" % line | 
| 91 continue | 93 continue | 
| 92 #sys_exit("ERROR: Problem with line: %r" % line) | 94 # sys.exit("ERROR: Problem with line: %r" % line) | 
| 93 if likelihood not in ["ignored", | 95 if likelihood not in ["ignored", | 
| 94 "Marginal prediction", | 96 "Marginal prediction", | 
| 95 "Medium likely prediction", | 97 "Medium likely prediction", | 
| 96 "Highly likely prediction"]: | 98 "Highly likely prediction"]: | 
| 97 sys_exit("ERROR: Problem with line: %r" % line) | 99 sys.exit("ERROR: Problem with line: %r" % line) | 
| 98 out_handle.write("%s\t%s\t%s\t%s\n" % (identifier, position, score, likelihood)) | 100 out_handle.write("%s\t%s\t%s\t%s\n" % (identifier, position, score, likelihood)) | 
| 99 return queries | 101 return queries | 
| 100 | 102 | 
| 101 working_dir, bin = get_path_and_binary() | 103 working_dir, bin = get_path_and_binary() | 
| 102 | 104 | 
| 103 if not os.path.isfile(fasta_file): | 105 if not os.path.isfile(fasta_file): | 
| 104 sys_exit("ERROR: Missing input FASTA file %r" % fasta_file) | 106 sys.exit("ERROR: Missing input FASTA file %r" % fasta_file) | 
| 105 | 107 | 
| 106 #Note that if the input FASTA file contains no sequences, | 108 # Note that if the input FASTA file contains no sequences, | 
| 107 #split_fasta returns an empty list (i.e. zero temp files). | 109 # split_fasta returns an empty list (i.e. zero temp files). | 
| 108 #We deliberately omit the FASTA descriptions to avoid a | 110 # We deliberately omit the FASTA descriptions to avoid a | 
| 109 #bug in promoter2 with descriptions over 200 characters. | 111 # bug in promoter2 with descriptions over 200 characters. | 
| 110 fasta_files = split_fasta(fasta_file, os.path.join(tmp_dir, "promoter"), FASTA_CHUNK, keep_descr=False) | 112 fasta_files = split_fasta(fasta_file, os.path.join(tmp_dir, "promoter"), FASTA_CHUNK, keep_descr=False) | 
| 111 temp_files = [f+".out" for f in fasta_files] | 113 temp_files = [f + ".out" for f in fasta_files] | 
| 112 jobs = ["%s %s > %s" % (bin, fasta, temp) | 114 jobs = ["%s %s > %s" % (bin, fasta, temp) | 
| 113 for fasta, temp in zip(fasta_files, temp_files)] | 115 for fasta, temp in zip(fasta_files, temp_files)] | 
| 116 | |
| 114 | 117 | 
| 115 def clean_up(file_list): | 118 def clean_up(file_list): | 
| 116 for f in file_list: | 119 for f in file_list: | 
| 117 if os.path.isfile(f): | 120 if os.path.isfile(f): | 
| 118 os.remove(f) | 121 os.remove(f) | 
| 119 try: | 122 try: | 
| 120 os.rmdir(tmp_dir) | 123 os.rmdir(tmp_dir) | 
| 121 except: | 124 except Exception: | 
| 122 pass | 125 pass | 
| 123 | 126 | 
| 124 if len(jobs) > 1 and num_threads > 1: | 127 if len(jobs) > 1 and num_threads > 1: | 
| 125 #A small "info" message for Galaxy to show the user. | 128 # A small "info" message for Galaxy to show the user. | 
| 126 print "Using %i threads for %i tasks" % (min(num_threads, len(jobs)), len(jobs)) | 129 print "Using %i threads for %i tasks" % (min(num_threads, len(jobs)), len(jobs)) | 
| 127 cur_dir = os.path.abspath(os.curdir) | 130 cur_dir = os.path.abspath(os.curdir) | 
| 128 os.chdir(working_dir) | 131 os.chdir(working_dir) | 
| 129 results = run_jobs(jobs, num_threads) | 132 results = run_jobs(jobs, num_threads) | 
| 130 os.chdir(cur_dir) | 133 os.chdir(cur_dir) | 
| 134 try: | 137 try: | 
| 135 output = open(temp).readline() | 138 output = open(temp).readline() | 
| 136 except IOError: | 139 except IOError: | 
| 137 output = "" | 140 output = "" | 
| 138 clean_up(fasta_files + temp_files) | 141 clean_up(fasta_files + temp_files) | 
| 139 sys_exit("One or more tasks failed, e.g. %i from %r gave:\n%s" % (error_level, cmd, output), | 142 sys.exit("One or more tasks failed, e.g. %i from %r gave:\n%s" % (error_level, cmd, output), | 
| 140 error_level) | 143 error_level) | 
| 141 | 144 | 
| 142 del results | 145 del results | 
| 143 del jobs | 146 del jobs | 
| 144 | 147 | 
| 149 data_handle = open(temp) | 152 data_handle = open(temp) | 
| 150 count = make_tabular(data_handle, out_handle) | 153 count = make_tabular(data_handle, out_handle) | 
| 151 data_handle.close() | 154 data_handle.close() | 
| 152 if not count: | 155 if not count: | 
| 153 clean_up(fasta_files + temp_files) | 156 clean_up(fasta_files + temp_files) | 
| 154 sys_exit("No output from promoter2") | 157 sys.exit("No output from promoter2") | 
| 155 queries += count | 158 queries += count | 
| 156 out_handle.close() | 159 out_handle.close() | 
| 157 | 160 | 
| 158 clean_up(fasta_files + temp_files) | 161 clean_up(fasta_files + temp_files) | 
| 159 print "Results for %i queries" % queries | 162 print "Results for %i queries" % queries | 
