Mercurial > repos > rnateam > graphclust_postprocessing
annotate addCdhitseqs.py @ 4:dbcea781900e draft
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
author | rnateam |
---|---|
date | Mon, 13 Mar 2017 17:54:32 -0400 |
parents | 6c88ad83de28 |
children |
rev | line source |
---|---|
2
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
1 import re |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
2 import glob |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
3 import sys |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
4 |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
5 cdhitcluster = sys.argv[1] |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
6 |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
7 cluster_seqs_stats_path = "RESULTS/*.cluster.all" |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
8 cluster_seqs_stats_files = glob.glob(cluster_seqs_stats_path) |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
9 |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
10 repSeqRedSeqdict = {} |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
11 repLine = "" |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
12 count = 0 |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
13 first = False |
4
dbcea781900e
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents:
2
diff
changeset
|
14 add_FullId = "" |
dbcea781900e
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents:
2
diff
changeset
|
15 k = 0 |
2
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
16 |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
17 with open(cdhitcluster, 'r+') as f: |
4
dbcea781900e
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents:
2
diff
changeset
|
18 content = f.read() |
dbcea781900e
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents:
2
diff
changeset
|
19 reps = re.compile("^.*\*$", re.MULTILINE).findall(content) |
dbcea781900e
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents:
2
diff
changeset
|
20 lines = content.split('\n') |
dbcea781900e
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents:
2
diff
changeset
|
21 |
2
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
22 for i in range(0, len(lines)): |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
23 line = lines[i] |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
24 if ">Cluster" in line: |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
25 first = True |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
26 count = 0 |
4
dbcea781900e
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents:
2
diff
changeset
|
27 repLine = reps[k] |
dbcea781900e
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents:
2
diff
changeset
|
28 k = k+1 |
2
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
29 continue |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
30 elif not first: |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
31 count += 1 |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
32 first = False |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
33 else: |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
34 first = False |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
35 lineArr = [] |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
36 if count > 0: |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
37 repLine = repLine.strip() |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
38 rep_FullId = repLine.split()[2] |
4
dbcea781900e
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents:
2
diff
changeset
|
39 rep_FullId = rep_FullId.replace(">","") |
dbcea781900e
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents:
2
diff
changeset
|
40 rep_FullId = rep_FullId.replace("...","") |
dbcea781900e
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents:
2
diff
changeset
|
41 if "*" in line or not line.strip(): |
dbcea781900e
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents:
2
diff
changeset
|
42 continue |
2
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
43 line = line.strip() |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
44 add_FullId = line.split()[2] |
4
dbcea781900e
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents:
2
diff
changeset
|
45 add_FullId = add_FullId.replace(">","") |
dbcea781900e
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents:
2
diff
changeset
|
46 add_FullId = add_FullId.replace("...","") |
2
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
47 lineArr.append(add_FullId) |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
48 repSeqRedSeqdict[rep_FullId] = lineArr |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
49 |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
50 toWrite = "" |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
51 for singleFile in sorted(cluster_seqs_stats_files): |
4
dbcea781900e
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents:
2
diff
changeset
|
52 toWrite = "" |
dbcea781900e
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents:
2
diff
changeset
|
53 with open(singleFile, "r+") as clFile: |
dbcea781900e
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents:
2
diff
changeset
|
54 file_lines = clFile.readlines() |
dbcea781900e
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents:
2
diff
changeset
|
55 for line in file_lines: |
dbcea781900e
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents:
2
diff
changeset
|
56 line = '\t'.join(line.split()) |
dbcea781900e
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents:
2
diff
changeset
|
57 toWrite += line + '\n' |
dbcea781900e
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents:
2
diff
changeset
|
58 clFile.seek(0) |
dbcea781900e
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents:
2
diff
changeset
|
59 clFile.write(toWrite) |
dbcea781900e
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents:
2
diff
changeset
|
60 clFile.truncate() |
dbcea781900e
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents:
2
diff
changeset
|
61 first_line = file_lines[0] |
dbcea781900e
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents:
2
diff
changeset
|
62 toWrite = "" |
dbcea781900e
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents:
2
diff
changeset
|
63 cols = first_line.split() |
dbcea781900e
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents:
2
diff
changeset
|
64 file_content = '\n'.join(file_lines) |
2
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
65 for key, val in repSeqRedSeqdict.items(): |
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
66 if key in file_content: |
4
dbcea781900e
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents:
2
diff
changeset
|
67 |
2
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
68 for i in val: |
4
dbcea781900e
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents:
2
diff
changeset
|
69 cols[3] = "---" |
dbcea781900e
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents:
2
diff
changeset
|
70 cols[4] = "CD-Hit" |
dbcea781900e
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents:
2
diff
changeset
|
71 cols[7] = str(i) |
dbcea781900e
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents:
2
diff
changeset
|
72 if len(first_line.split()) > 9: |
dbcea781900e
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents:
2
diff
changeset
|
73 cols[9] = str(i.rsplit("_",1)[0]) |
dbcea781900e
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents:
2
diff
changeset
|
74 toWrite += '\t'.join(cols) |
dbcea781900e
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents:
2
diff
changeset
|
75 toWrite +="\n" |
2
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
76 clFile.write(toWrite) |