annotate addCdhitseqs.py @ 4:dbcea781900e draft

planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
author rnateam
date Mon, 13 Mar 2017 17:54:32 -0400
parents 6c88ad83de28
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
6c88ad83de28 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff changeset
1 import re
6c88ad83de28 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff changeset
2 import glob
6c88ad83de28 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff changeset
3 import sys
6c88ad83de28 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff changeset
4
6c88ad83de28 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff changeset
5 cdhitcluster = sys.argv[1]
6c88ad83de28 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff changeset
6
6c88ad83de28 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff changeset
7 cluster_seqs_stats_path = "RESULTS/*.cluster.all"
6c88ad83de28 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff changeset
8 cluster_seqs_stats_files = glob.glob(cluster_seqs_stats_path)
6c88ad83de28 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff changeset
9
6c88ad83de28 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff changeset
10 repSeqRedSeqdict = {}
6c88ad83de28 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff changeset
11 repLine = ""
6c88ad83de28 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff changeset
12 count = 0
6c88ad83de28 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff changeset
13 first = False
4
dbcea781900e planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents: 2
diff changeset
14 add_FullId = ""
dbcea781900e planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents: 2
diff changeset
15 k = 0
2
6c88ad83de28 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff changeset
16
6c88ad83de28 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff changeset
17 with open(cdhitcluster, 'r+') as f:
4
dbcea781900e planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents: 2
diff changeset
18 content = f.read()
dbcea781900e planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents: 2
diff changeset
19 reps = re.compile("^.*\*$", re.MULTILINE).findall(content)
dbcea781900e planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents: 2
diff changeset
20 lines = content.split('\n')
dbcea781900e planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents: 2
diff changeset
21
2
6c88ad83de28 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff changeset
22 for i in range(0, len(lines)):
6c88ad83de28 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff changeset
23 line = lines[i]
6c88ad83de28 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff changeset
24 if ">Cluster" in line:
6c88ad83de28 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff changeset
25 first = True
6c88ad83de28 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff changeset
26 count = 0
4
dbcea781900e planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents: 2
diff changeset
27 repLine = reps[k]
dbcea781900e planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents: 2
diff changeset
28 k = k+1
2
6c88ad83de28 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff changeset
29 continue
6c88ad83de28 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff changeset
30 elif not first:
6c88ad83de28 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff changeset
31 count += 1
6c88ad83de28 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff changeset
32 first = False
6c88ad83de28 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff changeset
33 else:
6c88ad83de28 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff changeset
34 first = False
6c88ad83de28 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff changeset
35 lineArr = []
6c88ad83de28 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff changeset
36 if count > 0:
6c88ad83de28 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff changeset
37 repLine = repLine.strip()
6c88ad83de28 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff changeset
38 rep_FullId = repLine.split()[2]
4
dbcea781900e planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents: 2
diff changeset
39 rep_FullId = rep_FullId.replace(">","")
dbcea781900e planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents: 2
diff changeset
40 rep_FullId = rep_FullId.replace("...","")
dbcea781900e planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents: 2
diff changeset
41 if "*" in line or not line.strip():
dbcea781900e planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents: 2
diff changeset
42 continue
2
6c88ad83de28 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff changeset
43 line = line.strip()
6c88ad83de28 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff changeset
44 add_FullId = line.split()[2]
4
dbcea781900e planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents: 2
diff changeset
45 add_FullId = add_FullId.replace(">","")
dbcea781900e planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents: 2
diff changeset
46 add_FullId = add_FullId.replace("...","")
2
6c88ad83de28 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff changeset
47 lineArr.append(add_FullId)
6c88ad83de28 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff changeset
48 repSeqRedSeqdict[rep_FullId] = lineArr
6c88ad83de28 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff changeset
49
6c88ad83de28 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff changeset
50 toWrite = ""
6c88ad83de28 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff changeset
51 for singleFile in sorted(cluster_seqs_stats_files):
4
dbcea781900e planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents: 2
diff changeset
52 toWrite = ""
dbcea781900e planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents: 2
diff changeset
53 with open(singleFile, "r+") as clFile:
dbcea781900e planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents: 2
diff changeset
54 file_lines = clFile.readlines()
dbcea781900e planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents: 2
diff changeset
55 for line in file_lines:
dbcea781900e planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents: 2
diff changeset
56 line = '\t'.join(line.split())
dbcea781900e planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents: 2
diff changeset
57 toWrite += line + '\n'
dbcea781900e planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents: 2
diff changeset
58 clFile.seek(0)
dbcea781900e planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents: 2
diff changeset
59 clFile.write(toWrite)
dbcea781900e planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents: 2
diff changeset
60 clFile.truncate()
dbcea781900e planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents: 2
diff changeset
61 first_line = file_lines[0]
dbcea781900e planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents: 2
diff changeset
62 toWrite = ""
dbcea781900e planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents: 2
diff changeset
63 cols = first_line.split()
dbcea781900e planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents: 2
diff changeset
64 file_content = '\n'.join(file_lines)
2
6c88ad83de28 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff changeset
65 for key, val in repSeqRedSeqdict.items():
6c88ad83de28 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff changeset
66 if key in file_content:
4
dbcea781900e planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents: 2
diff changeset
67
2
6c88ad83de28 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff changeset
68 for i in val:
4
dbcea781900e planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents: 2
diff changeset
69 cols[3] = "---"
dbcea781900e planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents: 2
diff changeset
70 cols[4] = "CD-Hit"
dbcea781900e planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents: 2
diff changeset
71 cols[7] = str(i)
dbcea781900e planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents: 2
diff changeset
72 if len(first_line.split()) > 9:
dbcea781900e planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents: 2
diff changeset
73 cols[9] = str(i.rsplit("_",1)[0])
dbcea781900e planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents: 2
diff changeset
74 toWrite += '\t'.join(cols)
dbcea781900e planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit f971832d2b34a182314e5201ea6895dd207c5923
rnateam
parents: 2
diff changeset
75 toWrite +="\n"
2
6c88ad83de28 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents:
diff changeset
76 clFile.write(toWrite)