0
|
1
|
|
2
|
|
3 import sys
|
|
4
|
|
5 # Parse lines in IMGT/GENE-DB such as:
|
|
6 # >M12949|TRGV1*01|Homo sapiens|ORF|...
|
|
7
|
|
8 open_files = {}
|
|
9 current_file = None
|
|
10
|
|
11 for l in sys.stdin:
|
|
12
|
|
13 if ">" in l:
|
|
14 current_file = None
|
|
15 if "Homo sapiens" in l and ("V-REGION" in l or "D-REGION" in l or "J-REGION" in l):
|
|
16 system = l.split('|')[1][:4]
|
|
17 if system.startswith('IG') or system.startswith('TR'):
|
|
18
|
|
19 if system in open_files:
|
|
20 current_file = open_files[system]
|
|
21 else:
|
|
22 name = '%s.fa' % system
|
|
23 print " ==>", name
|
|
24 current_file = open(name, 'w')
|
|
25 open_files[system] = current_file
|
|
26
|
|
27
|
|
28 if current_file:
|
|
29 current_file.write(l)
|
|
30
|
|
31
|
|
32
|