Mercurial > repos > galaxyp > msconvert_nix
comparison msconvert_wrapper.py @ 0:5f8848c7d7b7 draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/msconvert commit dc0103e36b921e9bcb86f9e9a402d8e6b4c47345-dirty
| author | galaxyp |
|---|---|
| date | Fri, 08 Apr 2016 15:46:25 -0400 |
| parents | |
| children | b00023193f6e |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:5f8848c7d7b7 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 import optparse | |
| 3 import os | |
| 4 import sys | |
| 5 import tempfile | |
| 6 import shutil | |
| 7 import subprocess | |
| 8 import re | |
| 9 import logging | |
| 10 | |
| 11 assert sys.version_info[:2] >= (2, 6) | |
| 12 | |
| 13 log = logging.getLogger(__name__) | |
| 14 working_directory = os.getcwd() | |
| 15 tmp_stderr_name = tempfile.NamedTemporaryFile(dir=working_directory, suffix='.stderr').name | |
| 16 tmp_stdout_name = tempfile.NamedTemporaryFile(dir=working_directory, suffix='.stdout').name | |
| 17 | |
| 18 | |
| 19 def stop_err(msg): | |
| 20 sys.stderr.write("%s\n" % msg) | |
| 21 sys.exit() | |
| 22 | |
| 23 | |
| 24 def read_stderr(): | |
| 25 stderr = '' | |
| 26 if(os.path.exists(tmp_stderr_name)): | |
| 27 with open(tmp_stderr_name, 'rb') as tmp_stderr: | |
| 28 buffsize = 1048576 | |
| 29 try: | |
| 30 while True: | |
| 31 stderr += tmp_stderr.read(buffsize) | |
| 32 if not stderr or len(stderr) % buffsize != 0: | |
| 33 break | |
| 34 except OverflowError: | |
| 35 pass | |
| 36 return stderr | |
| 37 | |
| 38 | |
| 39 def execute(command, stdin=None): | |
| 40 try: | |
| 41 with open(tmp_stderr_name, 'wb') as tmp_stderr: | |
| 42 with open(tmp_stdout_name, 'wb') as tmp_stdout: | |
| 43 proc = subprocess.Popen(args=command, shell=True, stderr=tmp_stderr.fileno(), stdout=tmp_stdout.fileno(), stdin=stdin, env=os.environ) | |
| 44 returncode = proc.wait() | |
| 45 if returncode != 0: | |
| 46 raise Exception("Program returned with non-zero exit code %d. stderr: %s" % (returncode, read_stderr())) | |
| 47 finally: | |
| 48 print(( open(tmp_stderr_name, "r").read() )) | |
| 49 print(( open(tmp_stdout_name, "r").read() )) | |
| 50 | |
| 51 | |
| 52 def delete_file(path): | |
| 53 if os.path.exists(path): | |
| 54 try: | |
| 55 os.remove(path) | |
| 56 except: | |
| 57 pass | |
| 58 | |
| 59 | |
| 60 def delete_directory(directory): | |
| 61 if os.path.exists(directory): | |
| 62 try: | |
| 63 shutil.rmtree(directory) | |
| 64 except: | |
| 65 pass | |
| 66 | |
| 67 | |
| 68 def symlink(source, link_name): | |
| 69 import platform | |
| 70 if platform.system() == 'Windows': | |
| 71 try: | |
| 72 import win32file | |
| 73 win32file.CreateSymbolicLink(source, link_name, 1) | |
| 74 except: | |
| 75 shutil.copy(source, link_name) | |
| 76 else: | |
| 77 os.symlink(source, link_name) | |
| 78 | |
| 79 | |
| 80 def copy_to_working_directory(data_file, relative_path): | |
| 81 if os.path.abspath(data_file) != os.path.abspath(relative_path): | |
| 82 symlink(data_file, relative_path) | |
| 83 return relative_path | |
| 84 | |
| 85 | |
| 86 def __main__(): | |
| 87 run_script() | |
| 88 | |
| 89 #ENDTEMPLATE | |
| 90 | |
| 91 to_extensions = ['mzML', 'mzXML', 'unindexed_mzML', 'unindexed_mzXML', 'mgf', 'mz5', 'txt', 'ms2', 'cms2'] | |
| 92 | |
| 93 | |
| 94 def str_to_bool(v): | |
| 95 """ From http://stackoverflow.com/questions/715417/converting-from-a-string-to-boolean-in-python """ | |
| 96 return v.lower() in ["yes", "true", "t", "1"] | |
| 97 | |
| 98 | |
| 99 def _add_filter(filters_file, contents): | |
| 100 filters_file.write("filter=%s\n" % contents) | |
| 101 | |
| 102 | |
| 103 def _skip_line(options, file_num, line_parts): | |
| 104 file_num_column = options.filter_table_file_column | |
| 105 if not file_num_column: | |
| 106 return False | |
| 107 else: | |
| 108 target_file_num_val = str(file_num).strip() | |
| 109 query_file_num_val = line_parts[int(file_num_column) - 1].strip() | |
| 110 #print "target %s, query %s" % (target_file_num_val, query_file_num_val) | |
| 111 return target_file_num_val != query_file_num_val | |
| 112 | |
| 113 | |
| 114 def _read_table_numbers(path, options, file_num=None): | |
| 115 unique_numbers = set([]) | |
| 116 column_num = options.filter_table_column | |
| 117 input = open(path, "r") | |
| 118 first_line = True | |
| 119 for line in input: | |
| 120 if not line: | |
| 121 continue | |
| 122 line = line.strip() | |
| 123 if line.startswith("#"): | |
| 124 first_line = False | |
| 125 continue | |
| 126 if column_num == None: | |
| 127 column = line | |
| 128 else: | |
| 129 line_parts = line.split("\t") | |
| 130 if _skip_line(options, file_num, line_parts): | |
| 131 continue | |
| 132 column = line_parts[int(column_num) - 1] | |
| 133 match = re.match("\d+", column) | |
| 134 if match: | |
| 135 unique_numbers.add(int(match.group())) | |
| 136 first_line = False | |
| 137 return unique_numbers | |
| 138 | |
| 139 | |
| 140 def shellquote(s): | |
| 141 return '"' + s.replace('"', '\\"') + '"' | |
| 142 | |
| 143 | |
| 144 def _add_filter_line_from_file(filter_file, options, file_num=None): | |
| 145 file = options.filter_table | |
| 146 if not file: | |
| 147 return | |
| 148 numbers = _read_table_numbers(file, options, file_num) | |
| 149 msconvert_int_set = " ".join([str(number) for number in numbers]) | |
| 150 filter_type = options.filter_table_type | |
| 151 if filter_type == 'number': | |
| 152 filter_prefix = 'scanNumber' | |
| 153 else: | |
| 154 filter_prefix = 'index' | |
| 155 _add_filter(filter_file, "%s %s" % (filter_prefix, msconvert_int_set)) | |
| 156 | |
| 157 | |
| 158 def _create_filters_file(options, file_num=None, debug=False): | |
| 159 suffix = "" if not file_num else str(file_num) | |
| 160 filters_file_path = "filters%s" % suffix | |
| 161 filters_file = open(filters_file_path, "w") | |
| 162 if options.filters_file: | |
| 163 filters_file.write(open(options.filters_file, "r").read()) | |
| 164 for filter in options.filter: | |
| 165 _add_filter(filters_file, filter) | |
| 166 _add_filter_line_from_file(filters_file, options, file_num=file_num) | |
| 167 | |
| 168 filters_file.close() | |
| 169 if debug: | |
| 170 print(( open(filters_file_path, "r").read() )) | |
| 171 return filters_file_path | |
| 172 | |
| 173 | |
| 174 def _build_base_cmd(options,args=None): | |
| 175 to_extension = options.toextension | |
| 176 if to_extension.startswith("unindexed_"): | |
| 177 to_extension = to_extension[len("unindexed_"):] | |
| 178 to_params = "--noindex" | |
| 179 else: | |
| 180 to_params = "" | |
| 181 cmd = "msconvert --%s %s" % (to_extension, to_params) | |
| 182 if args: | |
| 183 cmd = "%s %s" % (cmd, ' '.join(args)) | |
| 184 if str_to_bool(options.zlib): | |
| 185 cmd = "%s %s" % (cmd, "--zlib") | |
| 186 if options.binaryencoding: | |
| 187 cmd = "%s --%s" % (cmd, options.binaryencoding) | |
| 188 if options.mzencoding: | |
| 189 cmd = "%s --mz%s" % (cmd, options.mzencoding) | |
| 190 if options.intensityencoding: | |
| 191 cmd = "%s --inten%s" % (cmd, options.intensityencoding) | |
| 192 return cmd | |
| 193 | |
| 194 | |
| 195 def _run(base_cmd, output_dir='output', inputs=[], debug=False): | |
| 196 inputs_as_str = " ".join(['%s' % shellquote(input) for input in inputs]) | |
| 197 os.mkdir(output_dir) | |
| 198 cmd = "%s -o %s %s" % (base_cmd, shellquote(output_dir), inputs_as_str) | |
| 199 if debug: | |
| 200 print(cmd) | |
| 201 execute(cmd) | |
| 202 output_files = os.listdir(output_dir) | |
| 203 assert len(output_files) == 1 | |
| 204 output_file = output_files[0] | |
| 205 return os.path.join(output_dir, output_file) | |
| 206 | |
| 207 | |
| 208 def run_script(): | |
| 209 parser = optparse.OptionParser() | |
| 210 parser.add_option('--input', dest='inputs', action='append', default=[]) | |
| 211 parser.add_option('--input_name', dest='input_names', action='append', default=[]) | |
| 212 parser.add_option('--implicit', dest='implicits', action='append', default=[], help='input files that should NOT be on the msconvert command line.') | |
| 213 parser.add_option('--ident', dest='idents', action='append', default=[]) | |
| 214 parser.add_option('--ident_name', dest='ident_names', action='append', default=[]) | |
| 215 parser.add_option('--output', dest='output') | |
| 216 parser.add_option('--refinement', dest='refinement') | |
| 217 parser.add_option('--fromextension', dest='fromextension') | |
| 218 parser.add_option('--toextension', dest='toextension', default='mzML', choices=to_extensions) | |
| 219 parser.add_option('--binaryencoding', dest='binaryencoding', choices=['32', '64']) | |
| 220 parser.add_option('--mzencoding', dest='mzencoding', choices=['32', '64']) | |
| 221 parser.add_option('--intensityencoding', dest='intensityencoding', choices=['32', '64']) | |
| 222 parser.add_option('--zlib', dest='zlib', default="false") | |
| 223 parser.add_option('--filter', dest='filter', action='append', default=[]) | |
| 224 parser.add_option('--filters_file', dest='filters_file', default=None) | |
| 225 parser.add_option('--filter_table', default=None) | |
| 226 parser.add_option('--filter_table_type', default='index', choices=['index', 'number']) | |
| 227 parser.add_option('--filter_table_column', default=None) | |
| 228 parser.add_option('--filter_table_file_column', default=None) | |
| 229 parser.add_option('--debug', dest='debug', action='store_true', default=False) | |
| 230 | |
| 231 (options, args) = parser.parse_args() | |
| 232 if len(options.inputs) < 1: | |
| 233 stop_err("No input files to msconvert specified") | |
| 234 if len(options.input_names) > 0 and len(options.input_names) != len(options.inputs): | |
| 235 stop_err("Number(s) of supplied input names and input files do not match") | |
| 236 if not options.output: | |
| 237 stop_err("Must specify output location") | |
| 238 input_files = [] | |
| 239 for i, input in enumerate(options.inputs): | |
| 240 input_base = None | |
| 241 if len(options.input_names) > i: | |
| 242 input_base = options.input_names[i] | |
| 243 input_base = input_base.replace("'", "").replace("\"", "") | |
| 244 print("1- input_base: %s" % input_base) | |
| 245 if not input_base: | |
| 246 input_base = 'input%s' % i | |
| 247 print("2- input_base: %s" % input_base) | |
| 248 if not input_base.lower().endswith('.%s' % options.fromextension.lower()) and input not in options.implicits: | |
| 249 input_file = '%s.%s' % (input_base, options.fromextension) | |
| 250 print("3- input_base: %s" % input_base) | |
| 251 print("3- input_file: %s" % input_file) | |
| 252 else: | |
| 253 input_file = input_base | |
| 254 print("4- input_base: %s" % input_base) | |
| 255 print("4- input_file: %s" % input_file) | |
| 256 input_file = input_file | |
| 257 copy_to_working_directory(input, input_file) | |
| 258 if input in options.implicits: | |
| 259 continue | |
| 260 input_files.append(input_file) | |
| 261 for i, ident in enumerate(options.idents): | |
| 262 ident_file = options.ident_names[i] | |
| 263 copy_to_working_directory(ident, ident_file) | |
| 264 | |
| 265 cmd = _build_base_cmd(options,args=args) | |
| 266 file_column = options.filter_table_file_column | |
| 267 if not file_column: | |
| 268 # Apply same filters to all files, just create a unviersal filter files | |
| 269 # and run msconvert once. | |
| 270 filters_file_path = _create_filters_file(options, debug=options.debug) | |
| 271 cmd = "%s -c %s" % (cmd, filters_file_path) | |
| 272 else: | |
| 273 # Dispatching on a column to filter different files differently, need to filter | |
| 274 # each input once with msconvert and then merge once. | |
| 275 filtered_files = [] | |
| 276 for index, input_file in enumerate(input_files): | |
| 277 filters_file_path = _create_filters_file(options, index + 1, debug=options.debug) | |
| 278 filter_cmd = "%s -c %s" % (cmd, filters_file_path) | |
| 279 filtered_output_file = _run(filter_cmd, output_dir='output%d' % index, inputs=[input_file], debug=options.debug) | |
| 280 filtered_files.append(filtered_output_file) | |
| 281 input_files = filtered_files | |
| 282 if len(input_files) > 1: | |
| 283 cmd = "%s --merge" % cmd | |
| 284 output_file = _run(cmd, output_dir='output', inputs=input_files, debug=options.debug) | |
| 285 shutil.copy(output_file, options.output) | |
| 286 if options.refinement: | |
| 287 # .mzRefinement.tsv | |
| 288 files = os.listdir(os.getcwd()) | |
| 289 for fname in files: | |
| 290 if fname.endswith('.mzRefinement.tsv'): | |
| 291 shutil.copy(fname, options.refinement) | |
| 292 break | |
| 293 | |
| 294 def __main__(): | |
| 295 run_script() | |
| 296 | |
| 297 if __name__ == '__main__': | |
| 298 __main__() |
