annotate check_bcfile.py @ 2:6e43ea81f086 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
author iuc
date Wed, 15 Jul 2020 21:13:59 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
6e43ea81f086 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
1 #!/usr/bin/env python
6e43ea81f086 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
2
6e43ea81f086 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
3 import argparse
6e43ea81f086 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
4 import sys
6e43ea81f086 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
5
6e43ea81f086 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
6 parser = argparse.ArgumentParser()
6e43ea81f086 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
7 parser.add_argument('bcfile', help='barcode file')
6e43ea81f086 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
8 args = parser.parse_args()
6e43ea81f086 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
9
6e43ea81f086 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
10 barcodes = []
6e43ea81f086 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
11
6e43ea81f086 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
12 with open(args.bcfile, "r") as fh:
6e43ea81f086 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
13 for line in fh:
6e43ea81f086 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
14 if len(line) == 0:
6e43ea81f086 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
15 continue
6e43ea81f086 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
16 if line.startswith("#"):
6e43ea81f086 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
17 continue
6e43ea81f086 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
18 barcodes.append(line.split())
6e43ea81f086 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
19
6e43ea81f086 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
20 if len(barcodes) <= 1:
6e43ea81f086 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
21 sys.exit("barcode file is empty")
6e43ea81f086 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
22
6e43ea81f086 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
23 # check that all lines have the same number of columns
6e43ea81f086 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
24 ncol = None
6e43ea81f086 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
25 for bc in barcodes:
6e43ea81f086 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
26 if ncol is None:
6e43ea81f086 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
27 ncol = len(bc)
6e43ea81f086 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
28 elif ncol != len(bc):
6e43ea81f086 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
29 sys.exit("barcode file has inconsistent number of columns")
6e43ea81f086 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
30
6e43ea81f086 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
31 isname = False
6e43ea81f086 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
32 for bc in barcodes:
6e43ea81f086 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
33 if len(bc[-1].strip("ATCGatcg")) > 0:
6e43ea81f086 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
34 isname = True
6e43ea81f086 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
35 break
6e43ea81f086 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
36
6e43ea81f086 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
37 names = set()
6e43ea81f086 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
38 for bc in barcodes:
6e43ea81f086 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
39 if isname:
6e43ea81f086 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
40 n = bc[-1]
6e43ea81f086 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
41 else:
6e43ea81f086 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
42 n = '-'.join(bc)
6e43ea81f086 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
43 if n in names:
6e43ea81f086 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
44 sys.exit("duplicate sample %s in barcode file" % n)
6e43ea81f086 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
45 names.add(n)