annotate bootstrap.py @ 8:35f9b833e12b draft default tip

planemo upload commit 2774930eebe258ecd56c8f1c5ddabf5092282ab9
author bcclaywell
date Mon, 12 Oct 2015 16:09:02 -0400
parents d4690e65afcd
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
1 #!/usr/bin/env python
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
2
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
3 from __future__ import print_function
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
4 import csv
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
5 import sys
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
6 import os
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
7 import argparse
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
8
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
9 def warning(*objs):
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
10 print("WARNING: ", *objs, file=sys.stderr)
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
11
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
12 def main(arguments):
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
13
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
14 parser = argparse.ArgumentParser(arguments, description=__doc__,
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
15 formatter_class=argparse.RawDescriptionHelpFormatter)
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
16 parser.add_argument('infile', help = "CSV input",
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
17 type = argparse.FileType('r'), default = sys.stdin)
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
18 parser.add_argument('--junior', help = "use junior run specimen naming convention", action = 'store_true')
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
19 parser.add_argument('--plate', help = "plate number", type = int, required = True)
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
20 parser.add_argument('--zone', help = "zone number", type = int, required = True)
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
21 parser.add_argument('--barcodes', help = "name of barcodes file",
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
22 type = argparse.FileType('w'), default = 'barcodes.csv')
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
23 parser.add_argument('--labels', help = "name of labels file",
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
24 type = argparse.FileType('w'), default = 'labels.csv')
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
25 parser.add_argument('--metadata', help = "name of metadata template file",
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
26 type = argparse.FileType('w'), default = 'metadata.csv')
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
27
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
28 args = parser.parse_args(arguments)
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
29
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
30 label_key = 'sampleid'
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
31 primer_key = 'reverse'
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
32 barcode_key = 'barcode'
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
33 zone_key = 'zone'
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
34
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
35 fstr = "j{plate_id}{primer_id}" if args.junior else "p{plate_id}z{zone_id}{primer_id}"
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
36
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
37 reader = csv.DictReader(sys.stdin)
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
38
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
39 barcodes = csv.writer(args.barcodes)
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
40 labels = csv.writer(args.labels)
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
41 metadata = csv.writer(args.metadata)
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
42
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
43 barcodes.writerow(['stub', 'barcode'])
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
44 labels.writerow(['specimen', 'label'])
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
45 metadata.writerow(['specimen', 'plate', 'zone', 'label', 'primer'])
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
46
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
47 seen_labels = {}
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
48 seen_primers = {}
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
49
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
50 # TODO: add checks for duplicates, empty fields, etc., and bail if something goes wrong
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
51 for i, d in enumerate(reader):
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
52 if not all (k in d for k in (label_key, primer_key, barcode_key)):
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
53 return "Expected columns not found"
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
54
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
55 if zone_key in d and d[zone_key] != str(args.zone):
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
56 continue
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
57
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
58 label = d[label_key]
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
59 primer = d[primer_key]
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
60 barcode = d[barcode_key]
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
61 zone = args.zone
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
62
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
63 if not all((label, primer, barcode)):
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
64 # only print a warning if at least one of the fields is non-empty
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
65 if any((label, primer, barcode)):
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
66 warning("Missing required field on row {}, skipping".format(i+2))
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
67 continue
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
68
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
69 if label in seen_labels:
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
70 return "Duplicate label '{}' found on rows {} and {}".format(label, seen_labels[label]+2, i+2)
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
71
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
72 if primer in seen_primers:
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
73 return "Duplicate primer '{}' found on rows {} and {}".format(primer, seen_primers[primer]+2, i+2)
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
74
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
75 seen_labels[label] = i
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
76 seen_primers[primer] = i
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
77
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
78 specimen = fstr.format(plate_id=args.plate, zone_id=zone, primer_id=primer.strip().lower().replace('-',''))
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
79 barcodes.writerow([specimen, barcode])
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
80 labels.writerow([specimen, label])
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
81 metadata.writerow([specimen, args.plate, zone, label, primer])
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
82
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
83 if __name__ == '__main__':
d4690e65afcd Uploaded
bcclaywell
parents:
diff changeset
84 sys.exit(main(sys.argv[1:]))