annotate germline/split-from-imgt.py @ 1:d6b3bde8e63d draft

Uploaded
author davidvanzessen
date Tue, 09 Dec 2014 04:02:23 -0500
parents fe4001bdf8d4
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
fe4001bdf8d4 Uploaded
davidvanzessen
parents:
diff changeset
1
fe4001bdf8d4 Uploaded
davidvanzessen
parents:
diff changeset
2
fe4001bdf8d4 Uploaded
davidvanzessen
parents:
diff changeset
3 import sys
fe4001bdf8d4 Uploaded
davidvanzessen
parents:
diff changeset
4
fe4001bdf8d4 Uploaded
davidvanzessen
parents:
diff changeset
5 # Parse lines in IMGT/GENE-DB such as:
fe4001bdf8d4 Uploaded
davidvanzessen
parents:
diff changeset
6 # >M12949|TRGV1*01|Homo sapiens|ORF|...
fe4001bdf8d4 Uploaded
davidvanzessen
parents:
diff changeset
7
fe4001bdf8d4 Uploaded
davidvanzessen
parents:
diff changeset
8 open_files = {}
fe4001bdf8d4 Uploaded
davidvanzessen
parents:
diff changeset
9 current_file = None
fe4001bdf8d4 Uploaded
davidvanzessen
parents:
diff changeset
10
fe4001bdf8d4 Uploaded
davidvanzessen
parents:
diff changeset
11 for l in sys.stdin:
fe4001bdf8d4 Uploaded
davidvanzessen
parents:
diff changeset
12
fe4001bdf8d4 Uploaded
davidvanzessen
parents:
diff changeset
13 if ">" in l:
fe4001bdf8d4 Uploaded
davidvanzessen
parents:
diff changeset
14 current_file = None
fe4001bdf8d4 Uploaded
davidvanzessen
parents:
diff changeset
15 if "Homo sapiens" in l and ("V-REGION" in l or "D-REGION" in l or "J-REGION" in l):
fe4001bdf8d4 Uploaded
davidvanzessen
parents:
diff changeset
16 system = l.split('|')[1][:4]
fe4001bdf8d4 Uploaded
davidvanzessen
parents:
diff changeset
17 if system.startswith('IG') or system.startswith('TR'):
fe4001bdf8d4 Uploaded
davidvanzessen
parents:
diff changeset
18
fe4001bdf8d4 Uploaded
davidvanzessen
parents:
diff changeset
19 if system in open_files:
fe4001bdf8d4 Uploaded
davidvanzessen
parents:
diff changeset
20 current_file = open_files[system]
fe4001bdf8d4 Uploaded
davidvanzessen
parents:
diff changeset
21 else:
fe4001bdf8d4 Uploaded
davidvanzessen
parents:
diff changeset
22 name = '%s.fa' % system
fe4001bdf8d4 Uploaded
davidvanzessen
parents:
diff changeset
23 print " ==>", name
fe4001bdf8d4 Uploaded
davidvanzessen
parents:
diff changeset
24 current_file = open(name, 'w')
fe4001bdf8d4 Uploaded
davidvanzessen
parents:
diff changeset
25 open_files[system] = current_file
fe4001bdf8d4 Uploaded
davidvanzessen
parents:
diff changeset
26
fe4001bdf8d4 Uploaded
davidvanzessen
parents:
diff changeset
27
fe4001bdf8d4 Uploaded
davidvanzessen
parents:
diff changeset
28 if current_file:
fe4001bdf8d4 Uploaded
davidvanzessen
parents:
diff changeset
29 current_file.write(l)
fe4001bdf8d4 Uploaded
davidvanzessen
parents:
diff changeset
30
fe4001bdf8d4 Uploaded
davidvanzessen
parents:
diff changeset
31
fe4001bdf8d4 Uploaded
davidvanzessen
parents:
diff changeset
32