Mercurial > repos > rnateam > graphclust_postprocessing
annotate addCdhitseqs.py @ 4:dbcea781900e draft
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
| author | rnateam |
|---|---|
| date | Mon, 13 Mar 2017 17:54:32 -0400 |
| parents | 6c88ad83de28 |
| children |
| rev | line source |
|---|---|
|
2
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
1 import re |
|
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
2 import glob |
|
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
3 import sys |
|
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
4 |
|
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
5 cdhitcluster = sys.argv[1] |
|
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
6 |
|
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
7 cluster_seqs_stats_path = "RESULTS/*.cluster.all" |
|
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
8 cluster_seqs_stats_files = glob.glob(cluster_seqs_stats_path) |
|
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
9 |
|
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
10 repSeqRedSeqdict = {} |
|
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
11 repLine = "" |
|
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
12 count = 0 |
|
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
13 first = False |
|
4
dbcea781900e
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents:
2
diff
changeset
|
14 add_FullId = "" |
|
dbcea781900e
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents:
2
diff
changeset
|
15 k = 0 |
|
2
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
16 |
|
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
17 with open(cdhitcluster, 'r+') as f: |
|
4
dbcea781900e
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents:
2
diff
changeset
|
18 content = f.read() |
|
dbcea781900e
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents:
2
diff
changeset
|
19 reps = re.compile("^.*\*$", re.MULTILINE).findall(content) |
|
dbcea781900e
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents:
2
diff
changeset
|
20 lines = content.split('\n') |
|
dbcea781900e
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents:
2
diff
changeset
|
21 |
|
2
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
22 for i in range(0, len(lines)): |
|
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
23 line = lines[i] |
|
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
24 if ">Cluster" in line: |
|
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
25 first = True |
|
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
26 count = 0 |
|
4
dbcea781900e
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents:
2
diff
changeset
|
27 repLine = reps[k] |
|
dbcea781900e
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents:
2
diff
changeset
|
28 k = k+1 |
|
2
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
29 continue |
|
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
30 elif not first: |
|
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
31 count += 1 |
|
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
32 first = False |
|
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
33 else: |
|
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
34 first = False |
|
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
35 lineArr = [] |
|
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
36 if count > 0: |
|
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
37 repLine = repLine.strip() |
|
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
38 rep_FullId = repLine.split()[2] |
|
4
dbcea781900e
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents:
2
diff
changeset
|
39 rep_FullId = rep_FullId.replace(">","") |
|
dbcea781900e
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents:
2
diff
changeset
|
40 rep_FullId = rep_FullId.replace("...","") |
|
dbcea781900e
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents:
2
diff
changeset
|
41 if "*" in line or not line.strip(): |
|
dbcea781900e
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents:
2
diff
changeset
|
42 continue |
|
2
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
43 line = line.strip() |
|
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
44 add_FullId = line.split()[2] |
|
4
dbcea781900e
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents:
2
diff
changeset
|
45 add_FullId = add_FullId.replace(">","") |
|
dbcea781900e
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents:
2
diff
changeset
|
46 add_FullId = add_FullId.replace("...","") |
|
2
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
47 lineArr.append(add_FullId) |
|
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
48 repSeqRedSeqdict[rep_FullId] = lineArr |
|
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
49 |
|
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
50 toWrite = "" |
|
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
51 for singleFile in sorted(cluster_seqs_stats_files): |
|
4
dbcea781900e
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents:
2
diff
changeset
|
52 toWrite = "" |
|
dbcea781900e
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents:
2
diff
changeset
|
53 with open(singleFile, "r+") as clFile: |
|
dbcea781900e
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents:
2
diff
changeset
|
54 file_lines = clFile.readlines() |
|
dbcea781900e
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents:
2
diff
changeset
|
55 for line in file_lines: |
|
dbcea781900e
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents:
2
diff
changeset
|
56 line = '\t'.join(line.split()) |
|
dbcea781900e
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents:
2
diff
changeset
|
57 toWrite += line + '\n' |
|
dbcea781900e
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents:
2
diff
changeset
|
58 clFile.seek(0) |
|
dbcea781900e
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents:
2
diff
changeset
|
59 clFile.write(toWrite) |
|
dbcea781900e
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents:
2
diff
changeset
|
60 clFile.truncate() |
|
dbcea781900e
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents:
2
diff
changeset
|
61 first_line = file_lines[0] |
|
dbcea781900e
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents:
2
diff
changeset
|
62 toWrite = "" |
|
dbcea781900e
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents:
2
diff
changeset
|
63 cols = first_line.split() |
|
dbcea781900e
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents:
2
diff
changeset
|
64 file_content = '\n'.join(file_lines) |
|
2
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
65 for key, val in repSeqRedSeqdict.items(): |
|
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
66 if key in file_content: |
|
4
dbcea781900e
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents:
2
diff
changeset
|
67 |
|
2
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
68 for i in val: |
|
4
dbcea781900e
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents:
2
diff
changeset
|
69 cols[3] = "---" |
|
dbcea781900e
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents:
2
diff
changeset
|
70 cols[4] = "CD-Hit" |
|
dbcea781900e
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents:
2
diff
changeset
|
71 cols[7] = str(i) |
|
dbcea781900e
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents:
2
diff
changeset
|
72 if len(first_line.split()) > 9: |
|
dbcea781900e
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents:
2
diff
changeset
|
73 cols[9] = str(i.rsplit("_",1)[0]) |
|
dbcea781900e
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents:
2
diff
changeset
|
74 toWrite += '\t'.join(cols) |
|
dbcea781900e
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents:
2
diff
changeset
|
75 toWrite +="\n" |
|
2
6c88ad83de28
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff
changeset
|
76 clFile.write(toWrite) |
