Mercurial > repos > bcclaywell > microbiome_pplacer_suite
diff bootstrap.py @ 0:d4690e65afcd draft
Uploaded
author | bcclaywell |
---|---|
date | Thu, 26 Feb 2015 18:16:36 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bootstrap.py Thu Feb 26 18:16:36 2015 -0500 @@ -0,0 +1,84 @@ +#!/usr/bin/env python + +from __future__ import print_function +import csv +import sys +import os +import argparse + +def warning(*objs): + print("WARNING: ", *objs, file=sys.stderr) + +def main(arguments): + + parser = argparse.ArgumentParser(arguments, description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument('infile', help = "CSV input", + type = argparse.FileType('r'), default = sys.stdin) + parser.add_argument('--junior', help = "use junior run specimen naming convention", action = 'store_true') + parser.add_argument('--plate', help = "plate number", type = int, required = True) + parser.add_argument('--zone', help = "zone number", type = int, required = True) + parser.add_argument('--barcodes', help = "name of barcodes file", + type = argparse.FileType('w'), default = 'barcodes.csv') + parser.add_argument('--labels', help = "name of labels file", + type = argparse.FileType('w'), default = 'labels.csv') + parser.add_argument('--metadata', help = "name of metadata template file", + type = argparse.FileType('w'), default = 'metadata.csv') + + args = parser.parse_args(arguments) + + label_key = 'sampleid' + primer_key = 'reverse' + barcode_key = 'barcode' + zone_key = 'zone' + + fstr = "j{plate_id}{primer_id}" if args.junior else "p{plate_id}z{zone_id}{primer_id}" + + reader = csv.DictReader(sys.stdin) + + barcodes = csv.writer(args.barcodes) + labels = csv.writer(args.labels) + metadata = csv.writer(args.metadata) + + barcodes.writerow(['stub', 'barcode']) + labels.writerow(['specimen', 'label']) + metadata.writerow(['specimen', 'plate', 'zone', 'label', 'primer']) + + seen_labels = {} + seen_primers = {} + + # TODO: add checks for duplicates, empty fields, etc., and bail if something goes wrong + for i, d in enumerate(reader): + if not all (k in d for k in (label_key, primer_key, barcode_key)): + return "Expected columns not found" + + if zone_key in d and d[zone_key] != str(args.zone): + continue + + label = d[label_key] + primer = d[primer_key] + barcode = d[barcode_key] + zone = args.zone + + if not all((label, primer, barcode)): + # only print a warning if at least one of the fields is non-empty + if any((label, primer, barcode)): + warning("Missing required field on row {}, skipping".format(i+2)) + continue + + if label in seen_labels: + return "Duplicate label '{}' found on rows {} and {}".format(label, seen_labels[label]+2, i+2) + + if primer in seen_primers: + return "Duplicate primer '{}' found on rows {} and {}".format(primer, seen_primers[primer]+2, i+2) + + seen_labels[label] = i + seen_primers[primer] = i + + specimen = fstr.format(plate_id=args.plate, zone_id=zone, primer_id=primer.strip().lower().replace('-','')) + barcodes.writerow([specimen, barcode]) + labels.writerow([specimen, label]) + metadata.writerow([specimen, args.plate, zone, label, primer]) + +if __name__ == '__main__': + sys.exit(main(sys.argv[1:]))