Mercurial > repos > davidvanzessen > change_o
comparison ParseDb.py @ 0:dda9b2e72e2b draft
Uploaded
| author | davidvanzessen |
|---|---|
| date | Tue, 03 May 2016 09:52:21 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:dda9b2e72e2b |
|---|---|
| 1 #!/usr/bin/env python3 | |
| 2 """ | |
| 3 Parses tab delimited database files | |
| 4 """ | |
| 5 # Info | |
| 6 __author__ = 'Jason Anthony Vander Heiden' | |
| 7 from changeo import __version__, __date__ | |
| 8 | |
| 9 # Imports | |
| 10 import csv | |
| 11 import os | |
| 12 import re | |
| 13 from argparse import ArgumentParser | |
| 14 from collections import OrderedDict | |
| 15 | |
| 16 from textwrap import dedent | |
| 17 from time import time | |
| 18 from Bio import SeqIO | |
| 19 from Bio.Seq import Seq | |
| 20 from Bio.SeqRecord import SeqRecord | |
| 21 from Bio.Alphabet import IUPAC | |
| 22 | |
| 23 # Presto and changeo imports | |
| 24 from presto.Defaults import default_delimiter, default_out_args | |
| 25 from presto.Annotation import flattenAnnotation | |
| 26 from presto.IO import getOutputHandle, printLog, printProgress, printMessage | |
| 27 from changeo.Commandline import CommonHelpFormatter, getCommonArgParser, parseCommonArgs | |
| 28 from changeo.IO import getDbWriter, readDbFile, countDbFile | |
| 29 | |
| 30 # Defaults | |
| 31 default_id_field = 'SEQUENCE_ID' | |
| 32 default_seq_field = 'SEQUENCE_IMGT' | |
| 33 default_germ_field = 'GERMLINE_IMGT_D_MASK' | |
| 34 default_index_field = 'INDEX' | |
| 35 | |
| 36 # TODO: convert SQL-ish operations to modify_func() as per ParseHeaders | |
| 37 | |
| 38 def getDbSeqRecord(db_record, id_field, seq_field, meta_fields=None, | |
| 39 delimiter=default_delimiter): | |
| 40 """ | |
| 41 Parses a database record into a SeqRecord | |
| 42 | |
| 43 Arguments: | |
| 44 db_record = a dictionary containing a database record | |
| 45 id_field = the field containing identifiers | |
| 46 seq_field = the field containing sequences | |
| 47 meta_fields = a list of fields to add to sequence annotations | |
| 48 delimiter = a tuple of delimiters for (fields, values, value lists) | |
| 49 | |
| 50 Returns: | |
| 51 a SeqRecord | |
| 52 """ | |
| 53 # Return None if ID or sequence fields are empty | |
| 54 if not db_record[id_field] or not db_record[seq_field]: | |
| 55 return None | |
| 56 | |
| 57 # Create description string | |
| 58 desc_dict = OrderedDict([('ID', db_record[id_field])]) | |
| 59 if meta_fields is not None: | |
| 60 desc_dict.update([(f, db_record[f]) for f in meta_fields if f in db_record]) | |
| 61 desc_str = flattenAnnotation(desc_dict, delimiter=delimiter) | |
| 62 | |
| 63 # Create SeqRecord | |
| 64 seq_record = SeqRecord(Seq(db_record[seq_field], IUPAC.ambiguous_dna), | |
| 65 id=desc_str, name=desc_str, description='') | |
| 66 | |
| 67 return seq_record | |
| 68 | |
| 69 | |
| 70 def splitDbFile(db_file, field, num_split=None, out_args=default_out_args): | |
| 71 """ | |
| 72 Divides a tab-delimited database file into segments by description tags | |
| 73 | |
| 74 Arguments: | |
| 75 db_file = filename of the tab-delimited database file to split | |
| 76 field = the field name by which to split db_file | |
| 77 num_split = the numerical threshold by which to group sequences; | |
| 78 if None treat field as textual | |
| 79 out_args = common output argument dictionary from parseCommonArgs | |
| 80 | |
| 81 Returns: | |
| 82 a list of output file names | |
| 83 """ | |
| 84 log = OrderedDict() | |
| 85 log['START'] = 'ParseDb' | |
| 86 log['COMMAND'] = 'split' | |
| 87 log['FILE'] = os.path.basename(db_file) | |
| 88 log['FIELD'] = field | |
| 89 log['NUM_SPLIT'] = num_split | |
| 90 printLog(log) | |
| 91 | |
| 92 # Open IgRecord reader iter object | |
| 93 reader = readDbFile(db_file, ig=False) | |
| 94 | |
| 95 # Determine total numbers of records | |
| 96 rec_count = countDbFile(db_file) | |
| 97 | |
| 98 start_time = time() | |
| 99 count = 0 | |
| 100 # Sort records into files based on textual field | |
| 101 if num_split is None: | |
| 102 # Create set of unique field tags | |
| 103 tmp_iter = readDbFile(db_file, ig=False) | |
| 104 tag_list = list(set([row[field] for row in tmp_iter])) | |
| 105 | |
| 106 # Forbidden characters in filename and replacements | |
| 107 noGood = {'\/':'f','\\':'b','?':'q','\%':'p','*':'s',':':'c', | |
| 108 '\|':'pi','\"':'dq','\'':'sq','<':'gt','>':'lt',' ':'_'} | |
| 109 # Replace forbidden characters in tag_list | |
| 110 tag_dict = {} | |
| 111 for tag in tag_list: | |
| 112 for c,r in noGood.items(): | |
| 113 tag_dict[tag] = (tag_dict.get(tag, tag).replace(c,r) \ | |
| 114 if c in tag else tag_dict.get(tag, tag)) | |
| 115 | |
| 116 # Create output handles | |
| 117 handles_dict = {tag:getOutputHandle(db_file, | |
| 118 '%s-%s' % (field, label), | |
| 119 out_type = out_args['out_type'], | |
| 120 out_name = out_args['out_name'], | |
| 121 out_dir = out_args['out_dir']) | |
| 122 for tag, label in tag_dict.items()} | |
| 123 | |
| 124 # Create Db writer instances | |
| 125 writers_dict = {tag:getDbWriter(handles_dict[tag], db_file) | |
| 126 for tag in tag_dict} | |
| 127 | |
| 128 # Iterate over IgRecords | |
| 129 for row in reader: | |
| 130 printProgress(count, rec_count, 0.05, start_time) | |
| 131 count += 1 | |
| 132 # Write row to appropriate file | |
| 133 tag = row[field] | |
| 134 writers_dict[tag].writerow(row) | |
| 135 | |
| 136 # Sort records into files based on numeric num_split | |
| 137 else: | |
| 138 num_split = float(num_split) | |
| 139 | |
| 140 # Create output handles | |
| 141 handles_dict = {'under':getOutputHandle(db_file, | |
| 142 'under-%.1f' % num_split, | |
| 143 out_type = out_args['out_type'], | |
| 144 out_name = out_args['out_name'], | |
| 145 out_dir = out_args['out_dir']), | |
| 146 'atleast':getOutputHandle(db_file, | |
| 147 'atleast-%.1f' % num_split, | |
| 148 out_type = out_args['out_type'], | |
| 149 out_name = out_args['out_name'], | |
| 150 out_dir = out_args['out_dir'])} | |
| 151 | |
| 152 # Create Db writer instances | |
| 153 writers_dict = {'under':getDbWriter(handles_dict['under'], db_file), | |
| 154 'atleast':getDbWriter(handles_dict['atleast'], db_file)} | |
| 155 | |
| 156 # Iterate over IgRecords | |
| 157 for row in reader: | |
| 158 printProgress(count, rec_count, 0.05, start_time) | |
| 159 count += 1 | |
| 160 tag = row[field] | |
| 161 tag = 'under' if float(tag) < num_split else 'atleast' | |
| 162 writers_dict[tag].writerow(row) | |
| 163 | |
| 164 # Write log | |
| 165 printProgress(count, rec_count, 0.05, start_time) | |
| 166 log = OrderedDict() | |
| 167 for i, k in enumerate(handles_dict): | |
| 168 log['OUTPUT%i' % (i + 1)] = os.path.basename(handles_dict[k].name) | |
| 169 log['RECORDS'] = rec_count | |
| 170 log['PARTS'] = len(handles_dict) | |
| 171 log['END'] = 'ParseDb' | |
| 172 printLog(log) | |
| 173 | |
| 174 # Close output file handles | |
| 175 for t in handles_dict: handles_dict[t].close() | |
| 176 | |
| 177 return [handles_dict[t].name for t in handles_dict] | |
| 178 | |
| 179 | |
| 180 # TODO: SHOULD ALLOW FOR UNSORTED CLUSTER COLUMN | |
| 181 # TODO: SHOULD ALLOW FOR GROUPING FIELDS | |
| 182 def convertDbClip(db_file, id_field=default_id_field, seq_field=default_seq_field, | |
| 183 germ_field=default_germ_field, cluster_field=None, | |
| 184 meta_fields=None, out_args=default_out_args): | |
| 185 """ | |
| 186 Builds fasta files from database records | |
| 187 | |
| 188 Arguments: | |
| 189 db_file = the database file name | |
| 190 id_field = the field containing identifiers | |
| 191 seq_field = the field containing sample sequences | |
| 192 germ_field = the field containing germline sequences | |
| 193 cluster_field = the field containing clonal groupings | |
| 194 if None write the germline for each record | |
| 195 meta_fields = a list of fields to add to sequence annotations | |
| 196 out_args = common output argument dictionary from parseCommonArgs | |
| 197 | |
| 198 Returns: | |
| 199 the output file name | |
| 200 """ | |
| 201 log = OrderedDict() | |
| 202 log['START'] = 'ParseDb' | |
| 203 log['COMMAND'] = 'fasta' | |
| 204 log['FILE'] = os.path.basename(db_file) | |
| 205 log['ID_FIELD'] = id_field | |
| 206 log['SEQ_FIELD'] = seq_field | |
| 207 log['GERM_FIELD'] = germ_field | |
| 208 log['CLUSTER_FIELD'] = cluster_field | |
| 209 if meta_fields is not None: log['META_FIELDS'] = ','.join(meta_fields) | |
| 210 printLog(log) | |
| 211 | |
| 212 # Open file handles | |
| 213 db_iter = readDbFile(db_file, ig=False) | |
| 214 pass_handle = getOutputHandle(db_file, out_label='sequences', out_dir=out_args['out_dir'], | |
| 215 out_name=out_args['out_name'], out_type='clip') | |
| 216 # Count records | |
| 217 result_count = countDbFile(db_file) | |
| 218 | |
| 219 # Iterate over records | |
| 220 start_time = time() | |
| 221 rec_count = germ_count = pass_count = fail_count = 0 | |
| 222 cluster_last = None | |
| 223 for rec in db_iter: | |
| 224 # Print progress for previous iteration | |
| 225 printProgress(rec_count, result_count, 0.05, start_time) | |
| 226 rec_count += 1 | |
| 227 | |
| 228 # Update cluster ID | |
| 229 cluster = rec.get(cluster_field, None) | |
| 230 | |
| 231 # Get germline SeqRecord when needed | |
| 232 if cluster_field is None: | |
| 233 germ = getDbSeqRecord(rec, id_field, germ_field, meta_fields, | |
| 234 delimiter=out_args['delimiter']) | |
| 235 germ.id = '>' + germ.id | |
| 236 elif cluster != cluster_last: | |
| 237 germ = getDbSeqRecord(rec, cluster_field, germ_field, | |
| 238 delimiter=out_args['delimiter']) | |
| 239 germ.id = '>' + germ.id | |
| 240 else: | |
| 241 germ = None | |
| 242 | |
| 243 # Get read SeqRecord | |
| 244 seq = getDbSeqRecord(rec, id_field, seq_field, meta_fields, | |
| 245 delimiter=out_args['delimiter']) | |
| 246 | |
| 247 # Write germline | |
| 248 if germ is not None: | |
| 249 germ_count += 1 | |
| 250 SeqIO.write(germ, pass_handle, 'fasta') | |
| 251 | |
| 252 # Write sequences | |
| 253 if seq is not None: | |
| 254 pass_count += 1 | |
| 255 SeqIO.write(seq, pass_handle, 'fasta') | |
| 256 else: | |
| 257 fail_count += 1 | |
| 258 | |
| 259 # Set last cluster ID | |
| 260 cluster_last = cluster | |
| 261 | |
| 262 # Print counts | |
| 263 printProgress(rec_count, result_count, 0.05, start_time) | |
| 264 log = OrderedDict() | |
| 265 log['OUTPUT'] = os.path.basename(pass_handle.name) | |
| 266 log['RECORDS'] = rec_count | |
| 267 log['GERMLINES'] = germ_count | |
| 268 log['PASS'] = pass_count | |
| 269 log['FAIL'] = fail_count | |
| 270 log['END'] = 'ParseDb' | |
| 271 printLog(log) | |
| 272 | |
| 273 # Close file handles | |
| 274 pass_handle.close() | |
| 275 | |
| 276 return pass_handle.name | |
| 277 | |
| 278 | |
| 279 def convertDbFasta(db_file, id_field=default_id_field, seq_field=default_seq_field, | |
| 280 meta_fields=None, out_args=default_out_args): | |
| 281 """ | |
| 282 Builds fasta files from database records | |
| 283 | |
| 284 Arguments: | |
| 285 db_file = the database file name | |
| 286 id_field = the field containing identifiers | |
| 287 seq_field = the field containing sequences | |
| 288 meta_fields = a list of fields to add to sequence annotations | |
| 289 out_args = common output argument dictionary from parseCommonArgs | |
| 290 | |
| 291 Returns: | |
| 292 the output file name | |
| 293 """ | |
| 294 log = OrderedDict() | |
| 295 log['START'] = 'ParseDb' | |
| 296 log['COMMAND'] = 'fasta' | |
| 297 log['FILE'] = os.path.basename(db_file) | |
| 298 log['ID_FIELD'] = id_field | |
| 299 log['SEQ_FIELD'] = seq_field | |
| 300 if meta_fields is not None: log['META_FIELDS'] = ','.join(meta_fields) | |
| 301 printLog(log) | |
| 302 | |
| 303 # Open file handles | |
| 304 out_type = 'fasta' | |
| 305 db_iter = readDbFile(db_file, ig=False) | |
| 306 pass_handle = getOutputHandle(db_file, out_label='sequences', out_dir=out_args['out_dir'], | |
| 307 out_name=out_args['out_name'], out_type=out_type) | |
| 308 # Count records | |
| 309 result_count = countDbFile(db_file) | |
| 310 | |
| 311 # Iterate over records | |
| 312 start_time = time() | |
| 313 rec_count = pass_count = fail_count = 0 | |
| 314 for rec in db_iter: | |
| 315 # Print progress for previous iteration | |
| 316 printProgress(rec_count, result_count, 0.05, start_time) | |
| 317 rec_count += 1 | |
| 318 | |
| 319 # Get SeqRecord | |
| 320 seq = getDbSeqRecord(rec, id_field, seq_field, meta_fields, out_args['delimiter']) | |
| 321 | |
| 322 # Write sequences | |
| 323 if seq is not None: | |
| 324 pass_count += 1 | |
| 325 SeqIO.write(seq, pass_handle, out_type) | |
| 326 else: | |
| 327 fail_count += 1 | |
| 328 | |
| 329 # Print counts | |
| 330 printProgress(rec_count, result_count, 0.05, start_time) | |
| 331 log = OrderedDict() | |
| 332 log['OUTPUT'] = os.path.basename(pass_handle.name) | |
| 333 log['RECORDS'] = rec_count | |
| 334 log['PASS'] = pass_count | |
| 335 log['FAIL'] = fail_count | |
| 336 log['END'] = 'ParseDb' | |
| 337 printLog(log) | |
| 338 | |
| 339 # Close file handles | |
| 340 pass_handle.close() | |
| 341 | |
| 342 return pass_handle.name | |
| 343 | |
| 344 | |
| 345 def addDbFile(db_file, fields, values, out_args=default_out_args): | |
| 346 """ | |
| 347 Adds field and value pairs to a database file | |
| 348 | |
| 349 Arguments: | |
| 350 db_file = the database file name | |
| 351 fields = a list of fields to add | |
| 352 values = a list of values to assign to all rows of each field | |
| 353 out_args = common output argument dictionary from parseCommonArgs | |
| 354 | |
| 355 Returns: | |
| 356 the output file name | |
| 357 """ | |
| 358 log = OrderedDict() | |
| 359 log['START'] = 'ParseDb' | |
| 360 log['COMMAND'] = 'add' | |
| 361 log['FILE'] = os.path.basename(db_file) | |
| 362 log['FIELDS'] = ','.join(fields) | |
| 363 log['VALUES'] = ','.join(values) | |
| 364 printLog(log) | |
| 365 | |
| 366 # Open file handles | |
| 367 db_iter = readDbFile(db_file, ig=False) | |
| 368 pass_handle = getOutputHandle(db_file, out_label='parse-add', out_dir=out_args['out_dir'], | |
| 369 out_name=out_args['out_name'], out_type='tab') | |
| 370 pass_writer = getDbWriter(pass_handle, db_file, add_fields=fields) | |
| 371 # Count records | |
| 372 result_count = countDbFile(db_file) | |
| 373 | |
| 374 # Define fields and values to append | |
| 375 add_dict = {k:v for k,v in zip(fields, values) if k not in db_iter.fieldnames} | |
| 376 | |
| 377 # Iterate over records | |
| 378 start_time = time() | |
| 379 rec_count = 0 | |
| 380 for rec in db_iter: | |
| 381 # Print progress for previous iteration | |
| 382 printProgress(rec_count, result_count, 0.05, start_time) | |
| 383 rec_count += 1 | |
| 384 # Write updated row | |
| 385 rec.update(add_dict) | |
| 386 pass_writer.writerow(rec) | |
| 387 | |
| 388 # Print counts | |
| 389 printProgress(rec_count, result_count, 0.05, start_time) | |
| 390 log = OrderedDict() | |
| 391 log['OUTPUT'] = os.path.basename(pass_handle.name) | |
| 392 log['RECORDS'] = rec_count | |
| 393 log['END'] = 'ParseDb' | |
| 394 printLog(log) | |
| 395 | |
| 396 # Close file handles | |
| 397 pass_handle.close() | |
| 398 | |
| 399 return pass_handle.name | |
| 400 | |
| 401 | |
| 402 def indexDbFile(db_file, field=default_index_field, out_args=default_out_args): | |
| 403 """ | |
| 404 Adds an index column to a database file | |
| 405 | |
| 406 Arguments: | |
| 407 db_file = the database file name | |
| 408 field = the name of the index field to add | |
| 409 out_args = common output argument dictionary from parseCommonArgs | |
| 410 | |
| 411 Returns: | |
| 412 the output file name | |
| 413 """ | |
| 414 log = OrderedDict() | |
| 415 log['START'] = 'ParseDb' | |
| 416 log['COMMAND'] = 'index' | |
| 417 log['FILE'] = os.path.basename(db_file) | |
| 418 log['FIELD'] = field | |
| 419 printLog(log) | |
| 420 | |
| 421 # Open file handles | |
| 422 db_iter = readDbFile(db_file, ig=False) | |
| 423 pass_handle = getOutputHandle(db_file, out_label='parse-index', out_dir=out_args['out_dir'], | |
| 424 out_name=out_args['out_name'], out_type='tab') | |
| 425 pass_writer = getDbWriter(pass_handle, db_file, add_fields=field) | |
| 426 # Count records | |
| 427 result_count = countDbFile(db_file) | |
| 428 | |
| 429 # Iterate over records | |
| 430 start_time = time() | |
| 431 rec_count = 0 | |
| 432 for rec in db_iter: | |
| 433 # Print progress for previous iteration | |
| 434 printProgress(rec_count, result_count, 0.05, start_time) | |
| 435 rec_count += 1 | |
| 436 | |
| 437 # Add count and write updated row | |
| 438 rec.update({field:rec_count}) | |
| 439 pass_writer.writerow(rec) | |
| 440 | |
| 441 # Print counts | |
| 442 printProgress(rec_count, result_count, 0.05, start_time) | |
| 443 log = OrderedDict() | |
| 444 log['OUTPUT'] = os.path.basename(pass_handle.name) | |
| 445 log['RECORDS'] = rec_count | |
| 446 log['END'] = 'ParseDb' | |
| 447 printLog(log) | |
| 448 | |
| 449 # Close file handles | |
| 450 pass_handle.close() | |
| 451 | |
| 452 return pass_handle.name | |
| 453 | |
| 454 | |
| 455 def dropDbFile(db_file, fields, out_args=default_out_args): | |
| 456 """ | |
| 457 Deletes entire fields from a database file | |
| 458 | |
| 459 Arguments: | |
| 460 db_file = the database file name | |
| 461 fields = a list of fields to drop | |
| 462 out_args = common output argument dictionary from parseCommonArgs | |
| 463 | |
| 464 Returns: | |
| 465 the output file name | |
| 466 """ | |
| 467 log = OrderedDict() | |
| 468 log['START'] = 'ParseDb' | |
| 469 log['COMMAND'] = 'add' | |
| 470 log['FILE'] = os.path.basename(db_file) | |
| 471 log['FIELDS'] = ','.join(fields) | |
| 472 printLog(log) | |
| 473 | |
| 474 # Open file handles | |
| 475 db_iter = readDbFile(db_file, ig=False) | |
| 476 pass_handle = getOutputHandle(db_file, out_label='parse-drop', out_dir=out_args['out_dir'], | |
| 477 out_name=out_args['out_name'], out_type='tab') | |
| 478 pass_writer = getDbWriter(pass_handle, db_file, exclude_fields=fields) | |
| 479 # Count records | |
| 480 result_count = countDbFile(db_file) | |
| 481 | |
| 482 # Iterate over records | |
| 483 start_time = time() | |
| 484 rec_count = 0 | |
| 485 for rec in db_iter: | |
| 486 # Print progress for previous iteration | |
| 487 printProgress(rec_count, result_count, 0.05, start_time) | |
| 488 rec_count += 1 | |
| 489 # Write row | |
| 490 pass_writer.writerow(rec) | |
| 491 | |
| 492 # Print counts | |
| 493 printProgress(rec_count, result_count, 0.05, start_time) | |
| 494 log = OrderedDict() | |
| 495 log['OUTPUT'] = os.path.basename(pass_handle.name) | |
| 496 log['RECORDS'] = rec_count | |
| 497 log['END'] = 'ParseDb' | |
| 498 printLog(log) | |
| 499 | |
| 500 # Close file handles | |
| 501 pass_handle.close() | |
| 502 | |
| 503 return pass_handle.name | |
| 504 | |
| 505 | |
| 506 def deleteDbFile(db_file, fields, values, logic='any', regex=False, | |
| 507 out_args=default_out_args): | |
| 508 """ | |
| 509 Deletes records from a database file | |
| 510 | |
| 511 Arguments: | |
| 512 db_file = the database file name | |
| 513 fields = a list of fields to check for deletion criteria | |
| 514 values = a list of values defining deletion targets | |
| 515 logic = one of 'any' or 'all' defining whether one or all fields must have a match. | |
| 516 regex = if False do exact full string matches; if True allow partial regex matches. | |
| 517 out_args = common output argument dictionary from parseCommonArgs | |
| 518 | |
| 519 Returns: | |
| 520 the output file name | |
| 521 """ | |
| 522 # Define string match function | |
| 523 if regex: | |
| 524 def _match_func(x, patterns): return any([re.search(p, x) for p in patterns]) | |
| 525 else: | |
| 526 def _match_func(x, patterns): return x in patterns | |
| 527 | |
| 528 # Define logic function | |
| 529 if logic == 'any': | |
| 530 _logic_func = any | |
| 531 elif logic == 'all': | |
| 532 _logic_func = all | |
| 533 | |
| 534 log = OrderedDict() | |
| 535 log['START'] = 'ParseDb' | |
| 536 log['COMMAND'] = 'delete' | |
| 537 log['FILE'] = os.path.basename(db_file) | |
| 538 log['FIELDS'] = ','.join(fields) | |
| 539 log['VALUES'] = ','.join(values) | |
| 540 printLog(log) | |
| 541 | |
| 542 # Open file handles | |
| 543 db_iter = readDbFile(db_file, ig=False) | |
| 544 pass_handle = getOutputHandle(db_file, out_label='parse-delete', out_dir=out_args['out_dir'], | |
| 545 out_name=out_args['out_name'], out_type='tab') | |
| 546 pass_writer = getDbWriter(pass_handle, db_file) | |
| 547 # Count records | |
| 548 result_count = countDbFile(db_file) | |
| 549 | |
| 550 # Iterate over records | |
| 551 start_time = time() | |
| 552 rec_count = pass_count = fail_count = 0 | |
| 553 for rec in db_iter: | |
| 554 # Print progress for previous iteration | |
| 555 printProgress(rec_count, result_count, 0.05, start_time) | |
| 556 rec_count += 1 | |
| 557 | |
| 558 # Check for deletion values in all fields | |
| 559 delete = _logic_func([_match_func(rec.get(f, False), values) for f in fields]) | |
| 560 | |
| 561 # Write sequences | |
| 562 if not delete: | |
| 563 pass_count += 1 | |
| 564 pass_writer.writerow(rec) | |
| 565 else: | |
| 566 fail_count += 1 | |
| 567 | |
| 568 # Print counts | |
| 569 printProgress(rec_count, result_count, 0.05, start_time) | |
| 570 log = OrderedDict() | |
| 571 log['OUTPUT'] = os.path.basename(pass_handle.name) | |
| 572 log['RECORDS'] = rec_count | |
| 573 log['KEPT'] = pass_count | |
| 574 log['DELETED'] = fail_count | |
| 575 log['END'] = 'ParseDb' | |
| 576 printLog(log) | |
| 577 | |
| 578 # Close file handles | |
| 579 pass_handle.close() | |
| 580 | |
| 581 return pass_handle.name | |
| 582 | |
| 583 | |
| 584 def renameDbFile(db_file, fields, names, out_args=default_out_args): | |
| 585 """ | |
| 586 Renames fields in a database file | |
| 587 | |
| 588 Arguments: | |
| 589 db_file = the database file name | |
| 590 fields = a list of fields to rename | |
| 591 values = a list of new names for fields | |
| 592 out_args = common output argument dictionary from parseCommonArgs | |
| 593 | |
| 594 Returns: | |
| 595 the output file name | |
| 596 """ | |
| 597 log = OrderedDict() | |
| 598 log['START'] = 'ParseDb' | |
| 599 log['COMMAND'] = 'rename' | |
| 600 log['FILE'] = os.path.basename(db_file) | |
| 601 log['FIELDS'] = ','.join(fields) | |
| 602 log['NAMES'] = ','.join(names) | |
| 603 printLog(log) | |
| 604 | |
| 605 # Open file handles | |
| 606 db_iter = readDbFile(db_file, ig=False) | |
| 607 pass_handle = getOutputHandle(db_file, out_label='parse-rename', out_dir=out_args['out_dir'], | |
| 608 out_name=out_args['out_name'], out_type='tab') | |
| 609 | |
| 610 # Get header and rename fields | |
| 611 header = (readDbFile(db_file, ig=False)).fieldnames | |
| 612 for f, n in zip(fields, names): | |
| 613 i = header.index(f) | |
| 614 header[i] = n | |
| 615 | |
| 616 # Open writer and write new header | |
| 617 # TODO: should modify getDbWriter to take a list of fields | |
| 618 pass_writer = csv.DictWriter(pass_handle, fieldnames=header, dialect='excel-tab') | |
| 619 pass_writer.writeheader() | |
| 620 | |
| 621 # Count records | |
| 622 result_count = countDbFile(db_file) | |
| 623 | |
| 624 # Iterate over records | |
| 625 start_time = time() | |
| 626 rec_count = 0 | |
| 627 for rec in db_iter: | |
| 628 # Print progress for previous iteration | |
| 629 printProgress(rec_count, result_count, 0.05, start_time) | |
| 630 rec_count += 1 | |
| 631 # TODO: repeating renaming is unnecessary. should had a non-dict reader/writer to DbCore | |
| 632 # Rename fields | |
| 633 for f, n in zip(fields, names): | |
| 634 rec[n] = rec.pop(f) | |
| 635 # Write | |
| 636 pass_writer.writerow(rec) | |
| 637 | |
| 638 # Print counts | |
| 639 printProgress(rec_count, result_count, 0.05, start_time) | |
| 640 log = OrderedDict() | |
| 641 log['OUTPUT'] = os.path.basename(pass_handle.name) | |
| 642 log['RECORDS'] = rec_count | |
| 643 log['END'] = 'ParseDb' | |
| 644 printLog(log) | |
| 645 | |
| 646 # Close file handles | |
| 647 pass_handle.close() | |
| 648 | |
| 649 return pass_handle.name | |
| 650 | |
| 651 | |
| 652 def selectDbFile(db_file, fields, values, logic='any', regex=False, | |
| 653 out_args=default_out_args): | |
| 654 """ | |
| 655 Selects records from a database file | |
| 656 | |
| 657 Arguments: | |
| 658 db_file = the database file name | |
| 659 fields = a list of fields to check for selection criteria | |
| 660 values = a list of values defining selection targets | |
| 661 logic = one of 'any' or 'all' defining whether one or all fields must have a match. | |
| 662 regex = if False do exact full string matches; if True allow partial regex matches. | |
| 663 out_args = common output argument dictionary from parseCommonArgs | |
| 664 | |
| 665 Returns: | |
| 666 the output file name | |
| 667 """ | |
| 668 # Define string match function | |
| 669 if regex: | |
| 670 def _match_func(x, patterns): return any([re.search(p, x) for p in patterns]) | |
| 671 else: | |
| 672 def _match_func(x, patterns): return x in patterns | |
| 673 | |
| 674 # Define logic function | |
| 675 if logic == 'any': | |
| 676 _logic_func = any | |
| 677 elif logic == 'all': | |
| 678 _logic_func = all | |
| 679 | |
| 680 # Print console log | |
| 681 log = OrderedDict() | |
| 682 log['START'] = 'ParseDb' | |
| 683 log['COMMAND'] = 'select' | |
| 684 log['FILE'] = os.path.basename(db_file) | |
| 685 log['FIELDS'] = ','.join(fields) | |
| 686 log['VALUES'] = ','.join(values) | |
| 687 log['REGEX'] =regex | |
| 688 printLog(log) | |
| 689 | |
| 690 # Open file handles | |
| 691 db_iter = readDbFile(db_file, ig=False) | |
| 692 pass_handle = getOutputHandle(db_file, out_label='parse-select', out_dir=out_args['out_dir'], | |
| 693 out_name=out_args['out_name'], out_type='tab') | |
| 694 pass_writer = getDbWriter(pass_handle, db_file) | |
| 695 # Count records | |
| 696 result_count = countDbFile(db_file) | |
| 697 | |
| 698 # Iterate over records | |
| 699 start_time = time() | |
| 700 rec_count = pass_count = fail_count = 0 | |
| 701 for rec in db_iter: | |
| 702 # Print progress for previous iteration | |
| 703 printProgress(rec_count, result_count, 0.05, start_time) | |
| 704 rec_count += 1 | |
| 705 | |
| 706 # Check for selection values in all fields | |
| 707 select = _logic_func([_match_func(rec.get(f, False), values) for f in fields]) | |
| 708 | |
| 709 # Write sequences | |
| 710 if select: | |
| 711 pass_count += 1 | |
| 712 pass_writer.writerow(rec) | |
| 713 else: | |
| 714 fail_count += 1 | |
| 715 | |
| 716 # Print counts | |
| 717 printProgress(rec_count, result_count, 0.05, start_time) | |
| 718 log = OrderedDict() | |
| 719 log['OUTPUT'] = os.path.basename(pass_handle.name) | |
| 720 log['RECORDS'] = rec_count | |
| 721 log['SELECTED'] = pass_count | |
| 722 log['DISCARDED'] = fail_count | |
| 723 log['END'] = 'ParseDb' | |
| 724 printLog(log) | |
| 725 | |
| 726 # Close file handles | |
| 727 pass_handle.close() | |
| 728 | |
| 729 return pass_handle.name | |
| 730 | |
| 731 | |
| 732 def sortDbFile(db_file, field, numeric=False, descend=False, | |
| 733 out_args=default_out_args): | |
| 734 """ | |
| 735 Sorts records by values in an annotation field | |
| 736 | |
| 737 Arguments: | |
| 738 db_file = the database filename | |
| 739 field = the field name to sort by | |
| 740 numeric = if True sort field numerically; | |
| 741 if False sort field alphabetically | |
| 742 descend = if True sort in descending order; | |
| 743 if False sort in ascending order | |
| 744 | |
| 745 out_args = common output argument dictionary from parseCommonArgs | |
| 746 | |
| 747 Returns: | |
| 748 the output file name | |
| 749 """ | |
| 750 log = OrderedDict() | |
| 751 log['START'] = 'ParseDb' | |
| 752 log['COMMAND'] = 'sort' | |
| 753 log['FILE'] = os.path.basename(db_file) | |
| 754 log['FIELD'] = field | |
| 755 log['NUMERIC'] = numeric | |
| 756 printLog(log) | |
| 757 | |
| 758 # Open file handles | |
| 759 db_iter = readDbFile(db_file, ig=False) | |
| 760 pass_handle = getOutputHandle(db_file, out_label='parse-sort', out_dir=out_args['out_dir'], | |
| 761 out_name=out_args['out_name'], out_type='tab') | |
| 762 pass_writer = getDbWriter(pass_handle, db_file) | |
| 763 | |
| 764 | |
| 765 # Store all records in a dictionary | |
| 766 start_time = time() | |
| 767 printMessage("Indexing: Running", start_time=start_time) | |
| 768 db_dict = {i:r for i, r in enumerate(db_iter)} | |
| 769 result_count = len(db_dict) | |
| 770 | |
| 771 # Sort db_dict by field values | |
| 772 tag_dict = {k:v[field] for k, v in db_dict.items()} | |
| 773 if numeric: tag_dict = {k:float(v or 0) for k, v in tag_dict.items()} | |
| 774 sorted_keys = sorted(tag_dict, key=tag_dict.get, reverse=descend) | |
| 775 printMessage("Indexing: Done", start_time=start_time, end=True) | |
| 776 | |
| 777 # Iterate over records | |
| 778 start_time = time() | |
| 779 rec_count = 0 | |
| 780 for key in sorted_keys: | |
| 781 # Print progress for previous iteration | |
| 782 printProgress(rec_count, result_count, 0.05, start_time) | |
| 783 rec_count += 1 | |
| 784 | |
| 785 # Write records | |
| 786 pass_writer.writerow(db_dict[key]) | |
| 787 | |
| 788 # Print counts | |
| 789 printProgress(rec_count, result_count, 0.05, start_time) | |
| 790 log = OrderedDict() | |
| 791 log['OUTPUT'] = os.path.basename(pass_handle.name) | |
| 792 log['RECORDS'] = rec_count | |
| 793 log['END'] = 'ParseDb' | |
| 794 printLog(log) | |
| 795 | |
| 796 # Close file handles | |
| 797 pass_handle.close() | |
| 798 | |
| 799 return pass_handle.name | |
| 800 | |
| 801 | |
| 802 def updateDbFile(db_file, field, values, updates, out_args=default_out_args): | |
| 803 """ | |
| 804 Updates field and value pairs to a database file | |
| 805 | |
| 806 Arguments: | |
| 807 db_file = the database file name | |
| 808 field = the field to update | |
| 809 values = a list of values to specifying which rows to update | |
| 810 updates = a list of values to update each value with | |
| 811 out_args = common output argument dictionary from parseCommonArgs | |
| 812 | |
| 813 Returns: | |
| 814 the output file name | |
| 815 """ | |
| 816 log = OrderedDict() | |
| 817 log['START'] = 'ParseDb' | |
| 818 log['COMMAND'] = 'update' | |
| 819 log['FILE'] = os.path.basename(db_file) | |
| 820 log['FIELD'] = field | |
| 821 log['VALUES'] = ','.join(values) | |
| 822 log['UPDATES'] = ','.join(updates) | |
| 823 printLog(log) | |
| 824 | |
| 825 # Open file handles | |
| 826 db_iter = readDbFile(db_file, ig=False) | |
| 827 pass_handle = getOutputHandle(db_file, out_label='parse-update', out_dir=out_args['out_dir'], | |
| 828 out_name=out_args['out_name'], out_type='tab') | |
| 829 pass_writer = getDbWriter(pass_handle, db_file) | |
| 830 # Count records | |
| 831 result_count = countDbFile(db_file) | |
| 832 | |
| 833 # Iterate over records | |
| 834 start_time = time() | |
| 835 rec_count = pass_count = 0 | |
| 836 for rec in db_iter: | |
| 837 # Print progress for previous iteration | |
| 838 printProgress(rec_count, result_count, 0.05, start_time) | |
| 839 rec_count += 1 | |
| 840 | |
| 841 # Updated values if found | |
| 842 for x, y in zip(values, updates): | |
| 843 if rec[field] == x: | |
| 844 rec[field] = y | |
| 845 pass_count += 1 | |
| 846 | |
| 847 # Write records | |
| 848 pass_writer.writerow(rec) | |
| 849 | |
| 850 # Print counts | |
| 851 printProgress(rec_count, result_count, 0.05, start_time) | |
| 852 log = OrderedDict() | |
| 853 log['OUTPUT'] = os.path.basename(pass_handle.name) | |
| 854 log['RECORDS'] = rec_count | |
| 855 log['UPDATED'] = pass_count | |
| 856 log['END'] = 'ParseDb' | |
| 857 printLog(log) | |
| 858 | |
| 859 # Close file handles | |
| 860 pass_handle.close() | |
| 861 | |
| 862 return pass_handle.name | |
| 863 | |
| 864 | |
| 865 def getArgParser(): | |
| 866 """ | |
| 867 Defines the ArgumentParser | |
| 868 | |
| 869 Arguments: | |
| 870 None | |
| 871 | |
| 872 Returns: | |
| 873 an ArgumentParser object | |
| 874 """ | |
| 875 # Define input and output field help message | |
| 876 fields = dedent( | |
| 877 ''' | |
| 878 output files: | |
| 879 sequences | |
| 880 FASTA formatted sequences output from the subcommands fasta and clip. | |
| 881 <field>-<value> | |
| 882 database files partitioned by annotation <field> and <value>. | |
| 883 parse-<command> | |
| 884 output of the database modification functions where <command> is one of | |
| 885 the subcommands add, index, drop, delete, rename, select, sort or update. | |
| 886 | |
| 887 required fields: | |
| 888 SEQUENCE_ID | |
| 889 | |
| 890 optional fields: | |
| 891 JUNCTION, SEQUENCE_IMGT, SEQUENCE_VDJ, GERMLINE_IMGT, GERMLINE_VDJ, | |
| 892 GERMLINE_IMGT_D_MASK, GERMLINE_VDJ_D_MASK, | |
| 893 GERMLINE_IMGT_V_REGION, GERMLINE_VDJ_V_REGION | |
| 894 | |
| 895 output fields: | |
| 896 None | |
| 897 ''') | |
| 898 | |
| 899 # Define ArgumentParser | |
| 900 parser = ArgumentParser(description=__doc__, epilog=fields, | |
| 901 formatter_class=CommonHelpFormatter) | |
| 902 parser.add_argument('--version', action='version', | |
| 903 version='%(prog)s:' + ' %s-%s' %(__version__, __date__)) | |
| 904 subparsers = parser.add_subparsers(title='subcommands', dest='command', metavar='', | |
| 905 help='Database operation') | |
| 906 # TODO: This is a temporary fix for Python issue 9253 | |
| 907 subparsers.required = True | |
| 908 | |
| 909 # Define parent parser | |
| 910 parser_parent = getCommonArgParser(seq_in=False, seq_out=False, db_in=True, | |
| 911 failed=False, log=False) | |
| 912 | |
| 913 # Subparser to convert database entries to sequence file | |
| 914 parser_seq = subparsers.add_parser('fasta', parents=[parser_parent], | |
| 915 formatter_class=CommonHelpFormatter, | |
| 916 help='Creates a fasta file from database records') | |
| 917 parser_seq.add_argument('--if', action='store', dest='id_field', | |
| 918 default=default_id_field, | |
| 919 help='The name of the field containing identifiers') | |
| 920 parser_seq.add_argument('--sf', action='store', dest='seq_field', | |
| 921 default=default_seq_field, | |
| 922 help='The name of the field containing sequences') | |
| 923 parser_seq.add_argument('--mf', nargs='+', action='store', dest='meta_fields', | |
| 924 help='List of annotation fields to add to the sequence description') | |
| 925 parser_seq.set_defaults(func=convertDbFasta) | |
| 926 | |
| 927 # Subparser to convert database entries to clip-fasta file | |
| 928 parser_clip = subparsers.add_parser('clip', parents=[parser_parent], | |
| 929 formatter_class=CommonHelpFormatter, | |
| 930 help='''Creates a clip-fasta file from database | |
| 931 records, wherein germline sequences precede | |
| 932 each clone and are denoted by ">>" headers.''') | |
| 933 parser_clip.add_argument('--if', action='store', dest='id_field', | |
| 934 default=default_id_field, | |
| 935 help='The name of the field containing identifiers') | |
| 936 parser_clip.add_argument('--sf', action='store', dest='seq_field', | |
| 937 default=default_seq_field, | |
| 938 help='The name of the field containing reads') | |
| 939 parser_clip.add_argument('--gf', action='store', dest='germ_field', | |
| 940 default=default_germ_field, | |
| 941 help='The name of the field containing germline sequences') | |
| 942 parser_clip.add_argument('--cf', action='store', dest='cluster_field', default=None, | |
| 943 help='The name of the field containing containing sorted clone IDs') | |
| 944 parser_clip.add_argument('--mf', nargs='+', action='store', dest='meta_fields', | |
| 945 help='List of annotation fields to add to the sequence description') | |
| 946 parser_clip.set_defaults(func=convertDbClip) | |
| 947 | |
| 948 # Subparser to partition files by annotation values | |
| 949 parser_split = subparsers.add_parser('split', parents=[parser_parent], | |
| 950 formatter_class=CommonHelpFormatter, | |
| 951 help='Splits database files by field values') | |
| 952 parser_split.add_argument('-f', action='store', dest='field', type=str, required=True, | |
| 953 help='Annotation field by which to split database files.') | |
| 954 parser_split.add_argument('--num', action='store', dest='num_split', type=float, default=None, | |
| 955 help='''Specify to define the field as numeric and group | |
| 956 records by whether they are less than or at least | |
| 957 (greater than or equal to) the specified value.''') | |
| 958 parser_split.set_defaults(func=splitDbFile) | |
| 959 | |
| 960 # Subparser to add records | |
| 961 parser_add = subparsers.add_parser('add', parents=[parser_parent], | |
| 962 formatter_class=CommonHelpFormatter, | |
| 963 help='Adds field and value pairs') | |
| 964 parser_add.add_argument('-f', nargs='+', action='store', dest='fields', required=True, | |
| 965 help='The name of the fields to add.') | |
| 966 parser_add.add_argument('-u', nargs='+', action='store', dest='values', required=True, | |
| 967 help='The value to assign to all rows for each field.') | |
| 968 parser_add.set_defaults(func=addDbFile) | |
| 969 | |
| 970 # Subparser to delete records | |
| 971 parser_delete = subparsers.add_parser('delete', parents=[parser_parent], | |
| 972 formatter_class=CommonHelpFormatter, | |
| 973 help='Deletes specific records') | |
| 974 parser_delete.add_argument('-f', nargs='+', action='store', dest='fields', required=True, | |
| 975 help='The name of the fields to check for deletion criteria.') | |
| 976 parser_delete.add_argument('-u', nargs='+', action='store', dest='values', default=['', 'NA'], | |
| 977 help='''The values defining which records to delete. A value | |
| 978 may appear in any of the fields specified with -f.''') | |
| 979 parser_delete.add_argument('--logic', action='store', dest='logic', | |
| 980 choices=('any', 'all'), default='any', | |
| 981 help='''Defines whether a value may appear in any field (any) | |
| 982 or whether it must appear in all fields (all).''') | |
| 983 parser_delete.add_argument('--regex', action='store_true', dest='regex', | |
| 984 help='''If specified, treat values as regular expressions | |
| 985 and allow partial string matches.''') | |
| 986 parser_delete.set_defaults(func=deleteDbFile) | |
| 987 | |
| 988 # Subparser to drop fields | |
| 989 parser_drop = subparsers.add_parser('drop', parents=[parser_parent], | |
| 990 formatter_class=CommonHelpFormatter, | |
| 991 help='Deletes entire fields') | |
| 992 parser_drop.add_argument('-f', nargs='+', action='store', dest='fields', required=True, | |
| 993 help='The name of the fields to delete from the database.') | |
| 994 parser_drop.set_defaults(func=dropDbFile) | |
| 995 | |
| 996 # Subparser to index fields | |
| 997 parser_index = subparsers.add_parser('index', parents=[parser_parent], | |
| 998 formatter_class=CommonHelpFormatter, | |
| 999 help='Adds a numeric index field') | |
| 1000 parser_index.add_argument('-f', action='store', dest='field', | |
| 1001 default=default_index_field, | |
| 1002 help='The name of the index field to add to the database.') | |
| 1003 parser_index.set_defaults(func=indexDbFile) | |
| 1004 | |
| 1005 # Subparser to rename fields | |
| 1006 parser_rename = subparsers.add_parser('rename', parents=[parser_parent], | |
| 1007 formatter_class=CommonHelpFormatter, | |
| 1008 help='Renames fields') | |
| 1009 parser_rename.add_argument('-f', nargs='+', action='store', dest='fields', required=True, | |
| 1010 help='List of fields to rename.') | |
| 1011 parser_rename.add_argument('-k', nargs='+', action='store', dest='names', required=True, | |
| 1012 help='List of new names for each field.') | |
| 1013 parser_rename.set_defaults(func=renameDbFile) | |
| 1014 | |
| 1015 # Subparser to select records | |
| 1016 parser_select = subparsers.add_parser('select', parents=[parser_parent], | |
| 1017 formatter_class=CommonHelpFormatter, | |
| 1018 help='Selects specific records') | |
| 1019 parser_select.add_argument('-f', nargs='+', action='store', dest='fields', required=True, | |
| 1020 help='The name of the fields to check for selection criteria.') | |
| 1021 parser_select.add_argument('-u', nargs='+', action='store', dest='values', required=True, | |
| 1022 help='''The values defining with records to select. A value | |
| 1023 may appear in any of the fields specified with -f.''') | |
| 1024 parser_select.add_argument('--logic', action='store', dest='logic', | |
| 1025 choices=('any', 'all'), default='any', | |
| 1026 help='''Defines whether a value may appear in any field (any) | |
| 1027 or whether it must appear in all fields (all).''') | |
| 1028 parser_select.add_argument('--regex', action='store_true', dest='regex', | |
| 1029 help='''If specified, treat values as regular expressions | |
| 1030 and allow partial string matches.''') | |
| 1031 parser_select.set_defaults(func=selectDbFile) | |
| 1032 | |
| 1033 # Subparser to sort file by records | |
| 1034 parser_sort = subparsers.add_parser('sort', parents=[parser_parent], | |
| 1035 formatter_class=CommonHelpFormatter, | |
| 1036 help='Sorts records by field values') | |
| 1037 parser_sort.add_argument('-f', action='store', dest='field', type=str, required=True, | |
| 1038 help='The annotation field by which to sort records.') | |
| 1039 parser_sort.add_argument('--num', action='store_true', dest='numeric', default=False, | |
| 1040 help='''Specify to define the sort column as numeric rather | |
| 1041 than textual.''') | |
| 1042 parser_sort.add_argument('--descend', action='store_true', dest='descend', | |
| 1043 help='''If specified, sort records in descending, rather | |
| 1044 than ascending, order by values in the target field.''') | |
| 1045 parser_sort.set_defaults(func=sortDbFile) | |
| 1046 | |
| 1047 # Subparser to update records | |
| 1048 parser_update = subparsers.add_parser('update', parents=[parser_parent], | |
| 1049 formatter_class=CommonHelpFormatter, | |
| 1050 help='Updates field and value pairs') | |
| 1051 parser_update.add_argument('-f', action='store', dest='field', required=True, | |
| 1052 help='The name of the field to update.') | |
| 1053 parser_update.add_argument('-u', nargs='+', action='store', dest='values', required=True, | |
| 1054 help='The values that will be replaced.') | |
| 1055 parser_update.add_argument('-t', nargs='+', action='store', dest='updates', required=True, | |
| 1056 help='''The new value to assign to each selected row.''') | |
| 1057 parser_update.set_defaults(func=updateDbFile) | |
| 1058 | |
| 1059 return parser | |
| 1060 | |
| 1061 | |
| 1062 if __name__ == '__main__': | |
| 1063 """ | |
| 1064 Parses command line arguments and calls main function | |
| 1065 """ | |
| 1066 # Parse arguments | |
| 1067 parser = getArgParser() | |
| 1068 args = parser.parse_args() | |
| 1069 args_dict = parseCommonArgs(args) | |
| 1070 # Convert case of fields | |
| 1071 if 'id_field' in args_dict: | |
| 1072 args_dict['id_field'] = args_dict['id_field'].upper() | |
| 1073 if 'seq_field' in args_dict: | |
| 1074 args_dict['seq_field'] = args_dict['seq_field'].upper() | |
| 1075 if 'germ_field' in args_dict: | |
| 1076 args_dict['germ_field'] = args_dict['germ_field'].upper() | |
| 1077 if 'field' in args_dict: | |
| 1078 args_dict['field'] = args_dict['field'].upper() | |
| 1079 if 'cluster_field' in args_dict and args_dict['cluster_field'] is not None: | |
| 1080 args_dict['cluster_field'] = args_dict['cluster_field'].upper() | |
| 1081 if 'meta_fields' in args_dict and args_dict['meta_fields'] is not None: | |
| 1082 args_dict['meta_fields'] = [f.upper() for f in args_dict['meta_fields']] | |
| 1083 if 'fields' in args_dict: | |
| 1084 args_dict['fields'] = [f.upper() for f in args_dict['fields']] | |
| 1085 | |
| 1086 # Check modify_args arguments | |
| 1087 if args.command == 'add' and len(args_dict['fields']) != len(args_dict['values']): | |
| 1088 parser.error('You must specify exactly one value (-u) per field (-f)') | |
| 1089 elif args.command == 'rename' and len(args_dict['fields']) != len(args_dict['names']): | |
| 1090 parser.error('You must specify exactly one new name (-k) per field (-f)') | |
| 1091 elif args.command == 'update' and len(args_dict['values']) != len(args_dict['updates']): | |
| 1092 parser.error('You must specify exactly one value (-u) per replacement (-t)') | |
| 1093 | |
| 1094 # Call parser function for each database file | |
| 1095 del args_dict['command'] | |
| 1096 del args_dict['func'] | |
| 1097 del args_dict['db_files'] | |
| 1098 for f in args.__dict__['db_files']: | |
| 1099 args_dict['db_file'] = f | |
| 1100 args.func(**args_dict) | |
| 1101 |
