Mercurial > repos > bcclaywell > microbiome_pplacer_suite
comparison bootstrap.py @ 0:d4690e65afcd draft
Uploaded
| author | bcclaywell |
|---|---|
| date | Thu, 26 Feb 2015 18:16:36 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:d4690e65afcd |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 | |
| 3 from __future__ import print_function | |
| 4 import csv | |
| 5 import sys | |
| 6 import os | |
| 7 import argparse | |
| 8 | |
| 9 def warning(*objs): | |
| 10 print("WARNING: ", *objs, file=sys.stderr) | |
| 11 | |
| 12 def main(arguments): | |
| 13 | |
| 14 parser = argparse.ArgumentParser(arguments, description=__doc__, | |
| 15 formatter_class=argparse.RawDescriptionHelpFormatter) | |
| 16 parser.add_argument('infile', help = "CSV input", | |
| 17 type = argparse.FileType('r'), default = sys.stdin) | |
| 18 parser.add_argument('--junior', help = "use junior run specimen naming convention", action = 'store_true') | |
| 19 parser.add_argument('--plate', help = "plate number", type = int, required = True) | |
| 20 parser.add_argument('--zone', help = "zone number", type = int, required = True) | |
| 21 parser.add_argument('--barcodes', help = "name of barcodes file", | |
| 22 type = argparse.FileType('w'), default = 'barcodes.csv') | |
| 23 parser.add_argument('--labels', help = "name of labels file", | |
| 24 type = argparse.FileType('w'), default = 'labels.csv') | |
| 25 parser.add_argument('--metadata', help = "name of metadata template file", | |
| 26 type = argparse.FileType('w'), default = 'metadata.csv') | |
| 27 | |
| 28 args = parser.parse_args(arguments) | |
| 29 | |
| 30 label_key = 'sampleid' | |
| 31 primer_key = 'reverse' | |
| 32 barcode_key = 'barcode' | |
| 33 zone_key = 'zone' | |
| 34 | |
| 35 fstr = "j{plate_id}{primer_id}" if args.junior else "p{plate_id}z{zone_id}{primer_id}" | |
| 36 | |
| 37 reader = csv.DictReader(sys.stdin) | |
| 38 | |
| 39 barcodes = csv.writer(args.barcodes) | |
| 40 labels = csv.writer(args.labels) | |
| 41 metadata = csv.writer(args.metadata) | |
| 42 | |
| 43 barcodes.writerow(['stub', 'barcode']) | |
| 44 labels.writerow(['specimen', 'label']) | |
| 45 metadata.writerow(['specimen', 'plate', 'zone', 'label', 'primer']) | |
| 46 | |
| 47 seen_labels = {} | |
| 48 seen_primers = {} | |
| 49 | |
| 50 # TODO: add checks for duplicates, empty fields, etc., and bail if something goes wrong | |
| 51 for i, d in enumerate(reader): | |
| 52 if not all (k in d for k in (label_key, primer_key, barcode_key)): | |
| 53 return "Expected columns not found" | |
| 54 | |
| 55 if zone_key in d and d[zone_key] != str(args.zone): | |
| 56 continue | |
| 57 | |
| 58 label = d[label_key] | |
| 59 primer = d[primer_key] | |
| 60 barcode = d[barcode_key] | |
| 61 zone = args.zone | |
| 62 | |
| 63 if not all((label, primer, barcode)): | |
| 64 # only print a warning if at least one of the fields is non-empty | |
| 65 if any((label, primer, barcode)): | |
| 66 warning("Missing required field on row {}, skipping".format(i+2)) | |
| 67 continue | |
| 68 | |
| 69 if label in seen_labels: | |
| 70 return "Duplicate label '{}' found on rows {} and {}".format(label, seen_labels[label]+2, i+2) | |
| 71 | |
| 72 if primer in seen_primers: | |
| 73 return "Duplicate primer '{}' found on rows {} and {}".format(primer, seen_primers[primer]+2, i+2) | |
| 74 | |
| 75 seen_labels[label] = i | |
| 76 seen_primers[primer] = i | |
| 77 | |
| 78 specimen = fstr.format(plate_id=args.plate, zone_id=zone, primer_id=primer.strip().lower().replace('-','')) | |
| 79 barcodes.writerow([specimen, barcode]) | |
| 80 labels.writerow([specimen, label]) | |
| 81 metadata.writerow([specimen, args.plate, zone, label, primer]) | |
| 82 | |
| 83 if __name__ == '__main__': | |
| 84 sys.exit(main(sys.argv[1:])) |
