# HG changeset patch # User rnateam # Date 1484344674 18000 # Node ID 6c88ad83de2842c37f3be596278ffce7a9d3a08b # Parent e166d1382033a26f979de6eca3f27675c368b232 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0 diff -r e166d1382033 -r 6c88ad83de28 addCdhitseqs.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/addCdhitseqs.py Fri Jan 13 16:57:54 2017 -0500 @@ -0,0 +1,59 @@ +import re +import glob +import sys + +cdhitcluster = sys.argv[1] +#clusters = sys.argv[2] + +cluster_seqs_stats_path = "RESULTS/*.cluster.all" +cluster_seqs_stats_files = glob.glob(cluster_seqs_stats_path) + +#clusterFiles = clusters.split(',') +repSeqRedSeqdict = {} +repLine = "" +count = 0 +first = False + +with open(cdhitcluster, 'r+') as f: + lines = f.readlines() + for i in range(0, len(lines)): + line = lines[i] + if ">Cluster" in line: + first = True + count = 0 + if i+1 < len(lines): + repLine = lines[i+1] + continue + elif not first: + count += 1 + first = False + else: + first = False + lineArr = [] + if count > 0: + repLine = repLine.strip() + rep_FullId = repLine.split()[2] + rep_FullId = rep_FullId.replace(">", "") + #rep_short_id = re.findall("_".join(["[^_]+"] * 2), rep_FullId)[0] + rep_FullId = rep_FullId.replace("...", "") + line = line.strip() + add_FullId = line.split()[2] + add_FullId = add_FullId.replace(">", "") + add_FullId = add_FullId.replace("...", "") + #add_short_id = re.findall("_".join(["[^_]+"] * 2), add_FullId)[0] + lineArr.append(add_FullId) + repSeqRedSeqdict[rep_FullId] = lineArr + #lineArr.append(add_short_id) + #repSeqRedSeqdict[rep_short_id] = lineArr + +toWrite = "" + +for singleFile in sorted(cluster_seqs_stats_files): + with open(singleFile, "a+") as clFile: + file_content = clFile.read() + first_line = file_content.split('\n')[0] + for key, val in repSeqRedSeqdict.items(): + if key in file_content: + for i in val: + toWrite += first_line.split()[0] + " " + first_line.split()[1] + " " + first_line.split()[2] + " " + " - " + " " + "CD-Hit" + " " + first_line.split()[5] + " " + "ORIGID" + " " + str(i) + "\n" + clFile.write(toWrite) diff -r e166d1382033 -r 6c88ad83de28 evaluation.py --- a/evaluation.py Thu Dec 22 08:49:14 2016 -0500 +++ b/evaluation.py Fri Jan 13 16:57:54 2017 -0500 @@ -1,6 +1,7 @@ import glob from os import system import re +from sklearn import metrics def sh(script): system("bash -c '%s'" % script) @@ -46,3 +47,23 @@ toWrite += listOfClasses[i] + "\t" + listOfClusters[i] + '\n' with open("RESULTS/fullTab.tabular", "w") as full: full.write(toWrite) + + +pattern = re.compile("^RF.*$") + + +if len(listOfClasses) > 0 and pattern.match(str(listOfClasses[0])): + + completeness_score = metrics.completeness_score(listOfClasses, listOfClusters) + homogeneity_score = metrics.homogeneity_score(listOfClasses, listOfClusters) + adjusted_rand_score = metrics.adjusted_rand_score(listOfClasses, listOfClusters) + adjusted_mutual_info_score = metrics.adjusted_mutual_info_score(listOfClasses, listOfClusters) + v_measure_score = metrics.v_measure_score(listOfClasses, listOfClusters) + + toWrite = "completeness_score : " + str(completeness_score) + "\n" + "homogeneity_score : " + str(homogeneity_score) + "\n" + "adjusted_rand_score : " +str(adjusted_rand_score) + "\n" + "adjusted_mutual_info_score : " + str(adjusted_mutual_info_score)+ "\n" + "v_measure_score : " + str(v_measure_score) + +else: + toWrite = "completeness_score : NA \nhomogeneity_score : NA \nadjusted_rand_score : NA \nadjusted_mutual_info_score : NA \nv_measure_score : NA" + +with open("RESULTS/evaluation.txt", "w") as fOut: + fOut.write(toWrite) diff -r e166d1382033 -r 6c88ad83de28 glob_report.xml --- a/glob_report.xml Thu Dec 22 08:49:14 2016 -0500 +++ b/glob_report.xml Fri Jan 13 16:57:54 2017 -0500 @@ -2,13 +2,18 @@ graphclust-wrappers perl-array-utils + scikit-learn + locarna + rnaz + infernal + viennarna + graphicsmagick /dev/null && #set $inputFiles = "" @@ -24,17 +29,30 @@ #set $inputFilesTrees += str($mods)+',' #end for #set $inputFilesTrees = $inputFilesTrees[:-1] - - 'glob_res.pl' '$inputFiles' $merge_cluster_ol $merge_overlap $min_cluster_size $cm_min_bitscore $cm_max_eval $cm_bitscore_sig $partition_type '' $cut_type '$inputFilesTrees' + glob_res.pl + '$inputFiles' + $merge_cluster_ol + $merge_overlap + $min_cluster_size + $cm_min_bitscore + $cm_max_eval + $cm_bitscore_sig + $partition_type '' + $cut_type + '$inputFilesTrees' + $results_top_num #if $iteration_num.iteration_num_selector: $iteration_num.CI - $final_partition_soft $final_partition_used_cmsearch #end if && python '$__tool_directory__/evaluation.py' + #if $cdhit: + && + python '$__tool_directory__/addCdhitseqs.py' '$cdhit' + #end if ]]> @@ -43,6 +61,7 @@ + @@ -58,18 +77,26 @@ + + + + + + + + @@ -111,6 +138,15 @@ + + + + + + + + + diff -r e166d1382033 -r 6c88ad83de28 test-data/1.cluster.top5.alirna.png Binary file test-data/1.cluster.top5.alirna.png has changed diff -r e166d1382033 -r 6c88ad83de28 test-data/1.cluster.top5.aln.png Binary file test-data/1.cluster.top5.aln.png has changed diff -r e166d1382033 -r 6c88ad83de28 test-data/2.cluster.top5.alirna.png Binary file test-data/2.cluster.top5.alirna.png has changed diff -r e166d1382033 -r 6c88ad83de28 test-data/2.cluster.top5.aln.png Binary file test-data/2.cluster.top5.aln.png has changed