annotate evaluation.py @ 2:6c88ad83de28 draft

planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
author rnateam
date Fri, 13 Jan 2017 16:57:54 -0500
parents e166d1382033
children a8fde40f00fc
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
4503c49f31c4 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
1 import glob
4503c49f31c4 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
2 from os import system
4503c49f31c4 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
3 import re
2
6c88ad83de28 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents: 1
diff changeset
4 from sklearn import metrics
0
4503c49f31c4 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
5
4503c49f31c4 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
6 def sh(script):
4503c49f31c4 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
7 system("bash -c '%s'" % script)
4503c49f31c4 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
8
1
e166d1382033 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 11e50007837b1efa01a3039c92df0ebf63f0f7e9
rnateam
parents: 0
diff changeset
9 dataNames = "FASTA/data.names"
0
4503c49f31c4 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
10
4503c49f31c4 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
11 listOfClusters = []
4503c49f31c4 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
12 listOfClasses = []
4503c49f31c4 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
13 cluster_seqs_stats_path = "RESULTS/*.cluster.all"
4503c49f31c4 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
14 cluster_seqs_stats_files = glob.glob(cluster_seqs_stats_path)
4503c49f31c4 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
15
4503c49f31c4 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
16 blackList = []
4503c49f31c4 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
17 numberOfClusters = 0
4503c49f31c4 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
18 for singleFile in sorted(cluster_seqs_stats_files):
4503c49f31c4 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
19 numberOfClusters += 1
4503c49f31c4 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
20 with open(singleFile, "r") as f:
4503c49f31c4 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
21 for line in f.readlines():
1
e166d1382033 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 11e50007837b1efa01a3039c92df0ebf63f0f7e9
rnateam
parents: 0
diff changeset
22 uniqueId = line.split()[7]
0
4503c49f31c4 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
23 clustNum = line.split()[1]
4503c49f31c4 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
24 rnaClass, sep, tail = uniqueId.partition("_")
4503c49f31c4 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
25 listOfClasses.append(rnaClass)
4503c49f31c4 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
26 listOfClusters.append(clustNum)
4503c49f31c4 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
27 with open(dataNames, "r") as names:
4503c49f31c4 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
28 for line in names.readlines():
4503c49f31c4 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
29 fullUniqeId = line.split()[3]
4503c49f31c4 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
30 rnaClass, sep, tail = fullUniqeId.partition("_")
1
e166d1382033 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 11e50007837b1efa01a3039c92df0ebf63f0f7e9
rnateam
parents: 0
diff changeset
31 if fullUniqeId == uniqueId:
0
4503c49f31c4 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
32 blackList.append(uniqueId)
4503c49f31c4 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
33
4503c49f31c4 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
34 numberOfClusters += 1 # 1 cluster for all unassigned seqs
4503c49f31c4 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
35 with open(dataNames, "r") as names:
4503c49f31c4 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
36 for line in names.readlines():
4503c49f31c4 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
37 fullUniqeId = line.split()[3]
4503c49f31c4 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
38 rnaClass, sep, tail = fullUniqeId.partition("_")
4503c49f31c4 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
39 rnaClass, sep, tail = fullUniqeId.partition("_")
1
e166d1382033 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 11e50007837b1efa01a3039c92df0ebf63f0f7e9
rnateam
parents: 0
diff changeset
40 if fullUniqeId not in blackList:
0
4503c49f31c4 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
41 listOfClasses.append(rnaClass)
4503c49f31c4 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
42 listOfClusters.append(str(numberOfClusters))
4503c49f31c4 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
43 numberOfClusters += 1 # separate cluster for all unassigned seqs
4503c49f31c4 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
44
4503c49f31c4 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
45 toWrite = ""
4503c49f31c4 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
46 for i in range(len(listOfClusters)):
4503c49f31c4 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
47 toWrite += listOfClasses[i] + "\t" + listOfClusters[i] + '\n'
4503c49f31c4 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
48 with open("RESULTS/fullTab.tabular", "w") as full:
4503c49f31c4 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
49 full.write(toWrite)
2
6c88ad83de28 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents: 1
diff changeset
50
6c88ad83de28 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents: 1
diff changeset
51
6c88ad83de28 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents: 1
diff changeset
52 pattern = re.compile("^RF.*$")
6c88ad83de28 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents: 1
diff changeset
53
6c88ad83de28 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents: 1
diff changeset
54
6c88ad83de28 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents: 1
diff changeset
55 if len(listOfClasses) > 0 and pattern.match(str(listOfClasses[0])):
6c88ad83de28 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents: 1
diff changeset
56
6c88ad83de28 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents: 1
diff changeset
57 completeness_score = metrics.completeness_score(listOfClasses, listOfClusters)
6c88ad83de28 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents: 1
diff changeset
58 homogeneity_score = metrics.homogeneity_score(listOfClasses, listOfClusters)
6c88ad83de28 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents: 1
diff changeset
59 adjusted_rand_score = metrics.adjusted_rand_score(listOfClasses, listOfClusters)
6c88ad83de28 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents: 1
diff changeset
60 adjusted_mutual_info_score = metrics.adjusted_mutual_info_score(listOfClasses, listOfClusters)
6c88ad83de28 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents: 1
diff changeset
61 v_measure_score = metrics.v_measure_score(listOfClasses, listOfClusters)
6c88ad83de28 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents: 1
diff changeset
62
6c88ad83de28 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents: 1
diff changeset
63 toWrite = "completeness_score : " + str(completeness_score) + "\n" + "homogeneity_score : " + str(homogeneity_score) + "\n" + "adjusted_rand_score : " +str(adjusted_rand_score) + "\n" + "adjusted_mutual_info_score : " + str(adjusted_mutual_info_score)+ "\n" + "v_measure_score : " + str(v_measure_score)
6c88ad83de28 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents: 1
diff changeset
64
6c88ad83de28 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents: 1
diff changeset
65 else:
6c88ad83de28 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents: 1
diff changeset
66 toWrite = "completeness_score : NA \nhomogeneity_score : NA \nadjusted_rand_score : NA \nadjusted_mutual_info_score : NA \nv_measure_score : NA"
6c88ad83de28 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents: 1
diff changeset
67
6c88ad83de28 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents: 1
diff changeset
68 with open("RESULTS/evaluation.txt", "w") as fOut:
6c88ad83de28 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
rnateam
parents: 1
diff changeset
69 fOut.write(toWrite)