| 0 | 1 #!/usr/bin/env python | 
|  | 2 import optparse | 
|  | 3 import os | 
|  | 4 import sys | 
|  | 5 import tempfile | 
|  | 6 import shutil | 
|  | 7 import subprocess | 
|  | 8 import re | 
|  | 9 import logging | 
|  | 10 | 
|  | 11 assert sys.version_info[:2] >= (2, 6) | 
|  | 12 | 
|  | 13 log = logging.getLogger(__name__) | 
|  | 14 working_directory = os.getcwd() | 
|  | 15 tmp_stderr_name = tempfile.NamedTemporaryFile(dir=working_directory, suffix='.stderr').name | 
|  | 16 tmp_stdout_name = tempfile.NamedTemporaryFile(dir=working_directory, suffix='.stdout').name | 
|  | 17 | 
|  | 18 | 
|  | 19 def stop_err(msg): | 
|  | 20     sys.stderr.write("%s\n" % msg) | 
|  | 21     sys.exit() | 
|  | 22 | 
|  | 23 | 
|  | 24 def read_stderr(): | 
|  | 25     stderr = '' | 
|  | 26     if(os.path.exists(tmp_stderr_name)): | 
|  | 27         with open(tmp_stderr_name, 'rb') as tmp_stderr: | 
|  | 28             buffsize = 1048576 | 
|  | 29             try: | 
|  | 30                 while True: | 
|  | 31                     stderr += tmp_stderr.read(buffsize) | 
|  | 32                     if not stderr or len(stderr) % buffsize != 0: | 
|  | 33                         break | 
|  | 34             except OverflowError: | 
|  | 35                 pass | 
|  | 36     return stderr | 
|  | 37 | 
|  | 38 | 
|  | 39 def execute(command, stdin=None): | 
|  | 40     try: | 
|  | 41         with open(tmp_stderr_name, 'wb') as tmp_stderr: | 
|  | 42             with open(tmp_stdout_name, 'wb') as tmp_stdout: | 
|  | 43                 proc = subprocess.Popen(args=command, shell=True, stderr=tmp_stderr.fileno(), stdout=tmp_stdout.fileno(), stdin=stdin, env=os.environ) | 
|  | 44                 returncode = proc.wait() | 
|  | 45                 if returncode != 0: | 
|  | 46                     raise Exception("Program returned with non-zero exit code %d. stderr: %s" % (returncode, read_stderr())) | 
|  | 47     finally: | 
|  | 48         print open(tmp_stderr_name, "r").read() | 
|  | 49         print open(tmp_stdout_name, "r").read() | 
|  | 50 | 
|  | 51 | 
|  | 52 def delete_file(path): | 
|  | 53     if os.path.exists(path): | 
|  | 54         try: | 
|  | 55             os.remove(path) | 
|  | 56         except: | 
|  | 57             pass | 
|  | 58 | 
|  | 59 | 
|  | 60 def delete_directory(directory): | 
|  | 61     if os.path.exists(directory): | 
|  | 62         try: | 
|  | 63             shutil.rmtree(directory) | 
|  | 64         except: | 
|  | 65             pass | 
|  | 66 | 
|  | 67 | 
|  | 68 def symlink(source, link_name): | 
|  | 69     import platform | 
|  | 70     if platform.system() == 'Windows': | 
|  | 71         try: | 
|  | 72             import win32file | 
|  | 73             win32file.CreateSymbolicLink(source, link_name, 1) | 
|  | 74         except: | 
|  | 75             shutil.copy(source, link_name) | 
|  | 76     else: | 
|  | 77         os.symlink(source, link_name) | 
|  | 78 | 
|  | 79 | 
|  | 80 def copy_to_working_directory(data_file, relative_path): | 
|  | 81     if os.path.abspath(data_file) != os.path.abspath(relative_path): | 
|  | 82         symlink(data_file, relative_path) | 
|  | 83     return relative_path | 
|  | 84 | 
|  | 85 | 
|  | 86 def __main__(): | 
|  | 87     run_script() | 
|  | 88 | 
|  | 89 #ENDTEMPLATE | 
|  | 90 | 
|  | 91 to_extensions = ['mzML', 'mzXML', 'unindexed_mzML', 'unindexed_mzXML', 'mgf', 'txt', 'ms2', 'cms2'] | 
|  | 92 | 
|  | 93 | 
|  | 94 def str_to_bool(v): | 
|  | 95     """ From http://stackoverflow.com/questions/715417/converting-from-a-string-to-boolean-in-python """ | 
|  | 96     return v.lower() in ["yes", "true", "t", "1"] | 
|  | 97 | 
|  | 98 | 
|  | 99 def _add_filter(filters_file, contents): | 
|  | 100     filters_file.write("filter=%s\n" % contents) | 
|  | 101 | 
|  | 102 | 
|  | 103 def _skip_line(options, file_num, line_parts): | 
|  | 104     file_num_column = options.filter_table_file_column | 
|  | 105     if not file_num_column: | 
|  | 106         return False | 
|  | 107     else: | 
|  | 108         target_file_num_val = str(file_num).strip() | 
|  | 109         query_file_num_val = line_parts[int(file_num_column) - 1].strip() | 
|  | 110         #print "target %s, query %s" % (target_file_num_val, query_file_num_val) | 
|  | 111         return target_file_num_val != query_file_num_val | 
|  | 112 | 
|  | 113 | 
|  | 114 def _read_table_numbers(path, options, file_num=None): | 
|  | 115     unique_numbers = set([]) | 
|  | 116     column_num = options.filter_table_column | 
|  | 117     input = open(path, "r") | 
|  | 118     first_line = True | 
|  | 119     for line in input: | 
|  | 120         if not line: | 
|  | 121             continue | 
|  | 122         line = line.strip() | 
|  | 123         if line.startswith("#"): | 
|  | 124             first_line = False | 
|  | 125             continue | 
|  | 126         if column_num == None: | 
|  | 127             column = line | 
|  | 128         else: | 
|  | 129             line_parts = line.split("\t") | 
|  | 130             if _skip_line(options, file_num, line_parts): | 
|  | 131                 continue | 
|  | 132             column = line_parts[int(column_num) - 1] | 
|  | 133         match = re.match("\d+", column) | 
|  | 134         if match: | 
|  | 135             unique_numbers.add(int(match.group())) | 
|  | 136         first_line = False | 
|  | 137     return unique_numbers | 
|  | 138 | 
|  | 139 | 
|  | 140 def shellquote(s): | 
|  | 141     return '"' + s.replace('"', '\\"') + '"' | 
|  | 142 | 
|  | 143 | 
|  | 144 def _add_filter_line_from_file(filter_file, options, file_num=None): | 
|  | 145     file = options.filter_table | 
|  | 146     if not file: | 
|  | 147         return | 
|  | 148     numbers = _read_table_numbers(file, options, file_num) | 
|  | 149     msconvert_int_set = " ".join([str(number) for number in numbers]) | 
|  | 150     filter_type = options.filter_table_type | 
|  | 151     if filter_type == 'number': | 
|  | 152         filter_prefix = 'scanNumber' | 
|  | 153     else: | 
|  | 154         filter_prefix = 'index' | 
|  | 155     _add_filter(filter_file, "%s %s" % (filter_prefix, msconvert_int_set)) | 
|  | 156 | 
|  | 157 | 
|  | 158 def _create_filters_file(options, file_num=None, debug=False): | 
|  | 159     suffix = "" if not file_num else str(file_num) | 
|  | 160     filters_file_path = "filters%s" % suffix | 
|  | 161     filters_file = open(filters_file_path, "w") | 
|  | 162     if options.filters_file: | 
|  | 163         filters_file.write(open(options.filters_file, "r").read()) | 
|  | 164     for filter in options.filter: | 
|  | 165         _add_filter(filters_file, filter) | 
|  | 166     _add_filter_line_from_file(filters_file, options, file_num=file_num) | 
|  | 167 | 
|  | 168     filters_file.close() | 
|  | 169     if debug: | 
|  | 170         print open(filters_file_path, "r").read() | 
|  | 171     return filters_file_path | 
|  | 172 | 
|  | 173 | 
|  | 174 def _build_base_cmd(options): | 
|  | 175     to_extension = options.toextension | 
|  | 176     if to_extension.startswith("unindexed_"): | 
|  | 177         to_extension = to_extension[len("unindexed_"):] | 
|  | 178         to_params = "--noindex" | 
|  | 179     else: | 
|  | 180         to_params = "" | 
|  | 181     cmd = "msconvert --%s %s" % (to_extension, to_params) | 
|  | 182     if str_to_bool(options.zlib): | 
|  | 183         cmd = "%s %s" % (cmd, "--zlib") | 
|  | 184     if options.binaryencoding: | 
|  | 185         cmd = "%s --%s" % (cmd, options.binaryencoding) | 
|  | 186     if options.mzencoding: | 
|  | 187         cmd = "%s --mz%s" % (cmd, options.mzencoding) | 
|  | 188     if options.intensityencoding: | 
|  | 189         cmd = "%s --inten%s" % (cmd, options.intensityencoding) | 
|  | 190     return cmd | 
|  | 191 | 
|  | 192 | 
|  | 193 def _run(base_cmd, output_dir='output', inputs=[], debug=False): | 
|  | 194     inputs_as_str = " ".join(['%s' % shellquote(input) for input in inputs]) | 
|  | 195     os.mkdir(output_dir) | 
|  | 196     cmd = "%s -o %s %s" % (base_cmd, shellquote(output_dir), inputs_as_str) | 
|  | 197     if debug: | 
|  | 198         print cmd | 
|  | 199     execute(cmd) | 
|  | 200     output_files = os.listdir(output_dir) | 
|  | 201     assert len(output_files) == 1 | 
|  | 202     output_file = output_files[0] | 
|  | 203     return os.path.join(output_dir, output_file) | 
|  | 204 | 
|  | 205 | 
|  | 206 def run_script(): | 
|  | 207     parser = optparse.OptionParser() | 
|  | 208     parser.add_option('--input', dest='inputs', action='append', default=[]) | 
|  | 209     parser.add_option('--input_name', dest='input_names', action='append', default=[]) | 
|  | 210     parser.add_option('--output', dest='output') | 
|  | 211     parser.add_option('--fromextension', dest='fromextension') | 
|  | 212     parser.add_option('--toextension', dest='toextension', default='mzML', choices=to_extensions) | 
|  | 213     parser.add_option('--binaryencoding', dest='binaryencoding', choices=['32', '64']) | 
|  | 214     parser.add_option('--mzencoding', dest='mzencoding', choices=['32', '64']) | 
|  | 215     parser.add_option('--intensityencoding', dest='intensityencoding', choices=['32', '64']) | 
|  | 216     parser.add_option('--zlib', dest='zlib', default="false") | 
|  | 217     parser.add_option('--filter', dest='filter', action='append', default=[]) | 
|  | 218     parser.add_option('--filters_file', dest='filters_file', default=None) | 
|  | 219     parser.add_option('--filter_table', default=None) | 
|  | 220     parser.add_option('--filter_table_type', default='index', choices=['index', 'number']) | 
|  | 221     parser.add_option('--filter_table_column', default=None) | 
|  | 222     parser.add_option('--filter_table_file_column', default=None) | 
|  | 223     parser.add_option('--debug', dest='debug', action='store_true', default=False) | 
|  | 224 | 
|  | 225     (options, args) = parser.parse_args() | 
|  | 226     if len(options.inputs) < 1: | 
|  | 227         stop_err("No input files to msconvert specified") | 
|  | 228     if len(options.input_names) > 0 and len(options.input_names) != len(options.inputs): | 
|  | 229         stop_err("Number(s) of supplied input names and input files do not match") | 
|  | 230     if not options.output: | 
|  | 231         stop_err("Must specify output location") | 
|  | 232     input_files = [] | 
|  | 233     for i, input in enumerate(options.inputs): | 
|  | 234         input_base = None | 
|  | 235         if len(options.input_names) > i: | 
|  | 236             input_base = options.input_names[i] | 
|  | 237         if not input_base: | 
|  | 238             input_base = 'input%s' % i | 
|  | 239         if not input_base.lower().endswith(options.fromextension.lower()): | 
|  | 240             input_file = '%s.%s' % (input_base, options.fromextension) | 
|  | 241         else: | 
|  | 242             input_file = input_base | 
|  | 243         input_file = input_file | 
|  | 244         copy_to_working_directory(input, input_file) | 
|  | 245         input_files.append(input_file) | 
|  | 246 | 
|  | 247     cmd = _build_base_cmd(options) | 
|  | 248     file_column = options.filter_table_file_column | 
|  | 249     if not file_column: | 
|  | 250         # Apply same filters to all files, just create a unviersal filter files | 
|  | 251         # and run msconvert once. | 
|  | 252         filters_file_path = _create_filters_file(options, debug=options.debug) | 
|  | 253         cmd = "%s -c %s" % (cmd, filters_file_path) | 
|  | 254     else: | 
|  | 255         # Dispatching on a column to filter different files differently, need to filter | 
|  | 256         # each input once with msconvert and then merge once. | 
|  | 257         filtered_files = [] | 
|  | 258         for index, input_file in enumerate(input_files): | 
|  | 259             filters_file_path = _create_filters_file(options, index + 1, debug=options.debug) | 
|  | 260             filter_cmd = "%s -c %s" % (cmd, filters_file_path) | 
|  | 261             filtered_output_file = _run(filter_cmd, output_dir='output%d' % index, inputs=[input_file], debug=options.debug) | 
|  | 262             filtered_files.append(filtered_output_file) | 
|  | 263         input_files = filtered_files | 
|  | 264     if len(input_files) > 1: | 
|  | 265         cmd = "%s --merge" % cmd | 
|  | 266     output_file = _run(cmd, output_dir='output', inputs=input_files, debug=options.debug) | 
|  | 267     shutil.copy(output_file, options.output) | 
|  | 268 | 
|  | 269 | 
|  | 270 if __name__ == '__main__': | 
|  | 271     __main__() |