Mercurial > repos > rnateam > graphclust_preprocessing
comparison splitSHAPE.py @ 6:e31c659be8bc draft
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 6767a5ffb02052c844e9d862c79912f998f39d8e
author | rnateam |
---|---|
date | Mon, 20 Nov 2017 05:01:21 -0500 |
parents | f4ad5dceb619 |
children |
comparison
equal
deleted
inserted
replaced
5:f4ad5dceb619 | 6:e31c659be8bc |
---|---|
1 import os | 1 import os |
2 import re | 2 import re |
3 import sys | 3 import sys |
4 | 4 |
5 shape_file = sys.argv[1] | 5 shape_file = sys.argv[1] |
6 win_size = int(sys.argv[2]) | |
7 | 6 |
8 pattern = re.compile("^>.*$") | 7 pattern = re.compile("^>.*$") |
9 toWrite = "" | 8 toWrite = "" |
10 | 9 |
11 count_for_id = 1 | 10 count_for_id = 1 |
16 seq_string = [] | 15 seq_string = [] |
17 orig_id = [] | 16 orig_id = [] |
18 name_file = "FASTA/data.names" | 17 name_file = "FASTA/data.names" |
19 array_all_chunks = [] | 18 array_all_chunks = [] |
20 with open(name_file, 'r') as f: | 19 with open(name_file, 'r') as f: |
21 content = f.read() | 20 for line in f: |
22 lines = content.split('\n')[:-1] | 21 if len(line.strip()) == 0: |
23 for line in lines: | 22 continue |
24 seq_id.append(int(line.split()[0])) | 23 seq_id.append(int(line.split()[0])) |
25 seq_string.append(line.split()[1]) | 24 seq_string.append(line.split()[1]) |
26 orig_id_srt = line.split()[3] | 25 orig_id_srt = line.split()[3] |
27 orig_id_srt = orig_id_srt.rsplit('_',1)[0] | 26 orig_id_srt = orig_id_srt.rsplit('_',1)[0] |
28 orig_id.append(orig_id_srt) | 27 orig_id.append(orig_id_srt) |
34 with open(shape_file, 'r') as shape: | 33 with open(shape_file, 'r') as shape: |
35 content = shape.read() | 34 content = shape.read() |
36 lines = content.split('\n') | 35 lines = content.split('\n') |
37 for line in lines: | 36 for line in lines: |
38 if pattern.match(line): | 37 if pattern.match(line): |
39 line = line.replace('>','').strip() | 38 line = line.replace('>','').split()[0] |
40 react_arr=[] | 39 react_arr=[] |
41 react_dict[line] = react_arr | 40 react_dict[line] = react_arr |
42 continue | 41 continue |
43 else: | 42 else: |
44 react_arr.append(line) | 43 react_arr.append(line) |
45 | 44 |
46 toWrite = "" | 45 toWrite = "" |
47 chunks = [] | 46 chunks = [] |
48 for i in range(len(orig_id)): | 47 for i in range(len(orig_id)): |
49 if not orig_id[i] in react_dict: | 48 if not orig_id[i] in react_dict: |
50 raise RuntimeError('Error key {} not found'.format(orig_id)) | 49 raise RuntimeError('Error key {} {} not found'.format(i, orig_id[i])) |
51 | 50 |
52 react_val = react_dict[orig_id[i]] | 51 react_val = react_dict[orig_id[i]] |
53 toWrite += '>' + str(seq_id[i]) + " " + seq_string[i] + "\n" | 52 toWrite += '>' + str(seq_id[i]) + " " + seq_string[i] + "\n" |
54 chunks = re.findall(r'\d+', seq_string[i]) | 53 chunks = re.findall(r'\d+', seq_string[i]) |
55 | 54 |