Mercurial > repos > rnateam > graphclust_preprocessing
diff splitSHAPE.py @ 6:e31c659be8bc draft
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 6767a5ffb02052c844e9d862c79912f998f39d8e
| author | rnateam |
|---|---|
| date | Mon, 20 Nov 2017 05:01:21 -0500 |
| parents | f4ad5dceb619 |
| children |
line wrap: on
line diff
--- a/splitSHAPE.py Wed May 24 09:56:11 2017 -0400 +++ b/splitSHAPE.py Mon Nov 20 05:01:21 2017 -0500 @@ -3,7 +3,6 @@ import sys shape_file = sys.argv[1] -win_size = int(sys.argv[2]) pattern = re.compile("^>.*$") toWrite = "" @@ -18,9 +17,9 @@ name_file = "FASTA/data.names" array_all_chunks = [] with open(name_file, 'r') as f: - content = f.read() - lines = content.split('\n')[:-1] - for line in lines: + for line in f: + if len(line.strip()) == 0: + continue seq_id.append(int(line.split()[0])) seq_string.append(line.split()[1]) orig_id_srt = line.split()[3] @@ -36,7 +35,7 @@ lines = content.split('\n') for line in lines: if pattern.match(line): - line = line.replace('>','').strip() + line = line.replace('>','').split()[0] react_arr=[] react_dict[line] = react_arr continue @@ -47,7 +46,7 @@ chunks = [] for i in range(len(orig_id)): if not orig_id[i] in react_dict: - raise RuntimeError('Error key {} not found'.format(orig_id)) + raise RuntimeError('Error key {} {} not found'.format(i, orig_id[i])) react_val = react_dict[orig_id[i]] toWrite += '>' + str(seq_id[i]) + " " + seq_string[i] + "\n"
