# HG changeset patch
# User rnateam
# Date 1484344674 18000
# Node ID 6c88ad83de2842c37f3be596278ffce7a9d3a08b
# Parent e166d1382033a26f979de6eca3f27675c368b232
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
diff -r e166d1382033 -r 6c88ad83de28 addCdhitseqs.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/addCdhitseqs.py Fri Jan 13 16:57:54 2017 -0500
@@ -0,0 +1,59 @@
+import re
+import glob
+import sys
+
+cdhitcluster = sys.argv[1]
+#clusters = sys.argv[2]
+
+cluster_seqs_stats_path = "RESULTS/*.cluster.all"
+cluster_seqs_stats_files = glob.glob(cluster_seqs_stats_path)
+
+#clusterFiles = clusters.split(',')
+repSeqRedSeqdict = {}
+repLine = ""
+count = 0
+first = False
+
+with open(cdhitcluster, 'r+') as f:
+ lines = f.readlines()
+ for i in range(0, len(lines)):
+ line = lines[i]
+ if ">Cluster" in line:
+ first = True
+ count = 0
+ if i+1 < len(lines):
+ repLine = lines[i+1]
+ continue
+ elif not first:
+ count += 1
+ first = False
+ else:
+ first = False
+ lineArr = []
+ if count > 0:
+ repLine = repLine.strip()
+ rep_FullId = repLine.split()[2]
+ rep_FullId = rep_FullId.replace(">", "")
+ #rep_short_id = re.findall("_".join(["[^_]+"] * 2), rep_FullId)[0]
+ rep_FullId = rep_FullId.replace("...", "")
+ line = line.strip()
+ add_FullId = line.split()[2]
+ add_FullId = add_FullId.replace(">", "")
+ add_FullId = add_FullId.replace("...", "")
+ #add_short_id = re.findall("_".join(["[^_]+"] * 2), add_FullId)[0]
+ lineArr.append(add_FullId)
+ repSeqRedSeqdict[rep_FullId] = lineArr
+ #lineArr.append(add_short_id)
+ #repSeqRedSeqdict[rep_short_id] = lineArr
+
+toWrite = ""
+
+for singleFile in sorted(cluster_seqs_stats_files):
+ with open(singleFile, "a+") as clFile:
+ file_content = clFile.read()
+ first_line = file_content.split('\n')[0]
+ for key, val in repSeqRedSeqdict.items():
+ if key in file_content:
+ for i in val:
+ toWrite += first_line.split()[0] + " " + first_line.split()[1] + " " + first_line.split()[2] + " " + " - " + " " + "CD-Hit" + " " + first_line.split()[5] + " " + "ORIGID" + " " + str(i) + "\n"
+ clFile.write(toWrite)
diff -r e166d1382033 -r 6c88ad83de28 evaluation.py
--- a/evaluation.py Thu Dec 22 08:49:14 2016 -0500
+++ b/evaluation.py Fri Jan 13 16:57:54 2017 -0500
@@ -1,6 +1,7 @@
import glob
from os import system
import re
+from sklearn import metrics
def sh(script):
system("bash -c '%s'" % script)
@@ -46,3 +47,23 @@
toWrite += listOfClasses[i] + "\t" + listOfClusters[i] + '\n'
with open("RESULTS/fullTab.tabular", "w") as full:
full.write(toWrite)
+
+
+pattern = re.compile("^RF.*$")
+
+
+if len(listOfClasses) > 0 and pattern.match(str(listOfClasses[0])):
+
+ completeness_score = metrics.completeness_score(listOfClasses, listOfClusters)
+ homogeneity_score = metrics.homogeneity_score(listOfClasses, listOfClusters)
+ adjusted_rand_score = metrics.adjusted_rand_score(listOfClasses, listOfClusters)
+ adjusted_mutual_info_score = metrics.adjusted_mutual_info_score(listOfClasses, listOfClusters)
+ v_measure_score = metrics.v_measure_score(listOfClasses, listOfClusters)
+
+ toWrite = "completeness_score : " + str(completeness_score) + "\n" + "homogeneity_score : " + str(homogeneity_score) + "\n" + "adjusted_rand_score : " +str(adjusted_rand_score) + "\n" + "adjusted_mutual_info_score : " + str(adjusted_mutual_info_score)+ "\n" + "v_measure_score : " + str(v_measure_score)
+
+else:
+ toWrite = "completeness_score : NA \nhomogeneity_score : NA \nadjusted_rand_score : NA \nadjusted_mutual_info_score : NA \nv_measure_score : NA"
+
+with open("RESULTS/evaluation.txt", "w") as fOut:
+ fOut.write(toWrite)
diff -r e166d1382033 -r 6c88ad83de28 glob_report.xml
--- a/glob_report.xml Thu Dec 22 08:49:14 2016 -0500
+++ b/glob_report.xml Fri Jan 13 16:57:54 2017 -0500
@@ -2,13 +2,18 @@
graphclust-wrappers
perl-array-utils
+ scikit-learn
+ locarna
+ rnaz
+ infernal
+ viennarna
+ graphicsmagick
/dev/null &&
#set $inputFiles = ""
@@ -24,17 +29,30 @@
#set $inputFilesTrees += str($mods)+','
#end for
#set $inputFilesTrees = $inputFilesTrees[:-1]
-
- 'glob_res.pl' '$inputFiles' $merge_cluster_ol $merge_overlap $min_cluster_size $cm_min_bitscore $cm_max_eval $cm_bitscore_sig $partition_type '' $cut_type '$inputFilesTrees'
+ glob_res.pl
+ '$inputFiles'
+ $merge_cluster_ol
+ $merge_overlap
+ $min_cluster_size
+ $cm_min_bitscore
+ $cm_max_eval
+ $cm_bitscore_sig
+ $partition_type ''
+ $cut_type
+ '$inputFilesTrees'
+ $results_top_num
#if $iteration_num.iteration_num_selector:
$iteration_num.CI
-
$final_partition_soft
$final_partition_used_cmsearch
#end if
&&
python '$__tool_directory__/evaluation.py'
+ #if $cdhit:
+ &&
+ python '$__tool_directory__/addCdhitseqs.py' '$cdhit'
+ #end if
]]>
@@ -43,6 +61,7 @@
+
@@ -58,18 +77,26 @@
+
+
+
+
+
+
+
+
@@ -111,6 +138,15 @@
+
+
+
+
+
+
+
+
+
diff -r e166d1382033 -r 6c88ad83de28 test-data/1.cluster.top5.alirna.png
Binary file test-data/1.cluster.top5.alirna.png has changed
diff -r e166d1382033 -r 6c88ad83de28 test-data/1.cluster.top5.aln.png
Binary file test-data/1.cluster.top5.aln.png has changed
diff -r e166d1382033 -r 6c88ad83de28 test-data/2.cluster.top5.alirna.png
Binary file test-data/2.cluster.top5.alirna.png has changed
diff -r e166d1382033 -r 6c88ad83de28 test-data/2.cluster.top5.aln.png
Binary file test-data/2.cluster.top5.aln.png has changed