comparison splitSHAPE.py @ 6:e31c659be8bc draft

planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 6767a5ffb02052c844e9d862c79912f998f39d8e
author rnateam
date Mon, 20 Nov 2017 05:01:21 -0500
parents f4ad5dceb619
children
comparison
equal deleted inserted replaced
5:f4ad5dceb619 6:e31c659be8bc
1 import os 1 import os
2 import re 2 import re
3 import sys 3 import sys
4 4
5 shape_file = sys.argv[1] 5 shape_file = sys.argv[1]
6 win_size = int(sys.argv[2])
7 6
8 pattern = re.compile("^>.*$") 7 pattern = re.compile("^>.*$")
9 toWrite = "" 8 toWrite = ""
10 9
11 count_for_id = 1 10 count_for_id = 1
16 seq_string = [] 15 seq_string = []
17 orig_id = [] 16 orig_id = []
18 name_file = "FASTA/data.names" 17 name_file = "FASTA/data.names"
19 array_all_chunks = [] 18 array_all_chunks = []
20 with open(name_file, 'r') as f: 19 with open(name_file, 'r') as f:
21 content = f.read() 20 for line in f:
22 lines = content.split('\n')[:-1] 21 if len(line.strip()) == 0:
23 for line in lines: 22 continue
24 seq_id.append(int(line.split()[0])) 23 seq_id.append(int(line.split()[0]))
25 seq_string.append(line.split()[1]) 24 seq_string.append(line.split()[1])
26 orig_id_srt = line.split()[3] 25 orig_id_srt = line.split()[3]
27 orig_id_srt = orig_id_srt.rsplit('_',1)[0] 26 orig_id_srt = orig_id_srt.rsplit('_',1)[0]
28 orig_id.append(orig_id_srt) 27 orig_id.append(orig_id_srt)
34 with open(shape_file, 'r') as shape: 33 with open(shape_file, 'r') as shape:
35 content = shape.read() 34 content = shape.read()
36 lines = content.split('\n') 35 lines = content.split('\n')
37 for line in lines: 36 for line in lines:
38 if pattern.match(line): 37 if pattern.match(line):
39 line = line.replace('>','').strip() 38 line = line.replace('>','').split()[0]
40 react_arr=[] 39 react_arr=[]
41 react_dict[line] = react_arr 40 react_dict[line] = react_arr
42 continue 41 continue
43 else: 42 else:
44 react_arr.append(line) 43 react_arr.append(line)
45 44
46 toWrite = "" 45 toWrite = ""
47 chunks = [] 46 chunks = []
48 for i in range(len(orig_id)): 47 for i in range(len(orig_id)):
49 if not orig_id[i] in react_dict: 48 if not orig_id[i] in react_dict:
50 raise RuntimeError('Error key {} not found'.format(orig_id)) 49 raise RuntimeError('Error key {} {} not found'.format(i, orig_id[i]))
51 50
52 react_val = react_dict[orig_id[i]] 51 react_val = react_dict[orig_id[i]]
53 toWrite += '>' + str(seq_id[i]) + " " + seq_string[i] + "\n" 52 toWrite += '>' + str(seq_id[i]) + " " + seq_string[i] + "\n"
54 chunks = re.findall(r'\d+', seq_string[i]) 53 chunks = re.findall(r'\d+', seq_string[i])
55 54