Mercurial > repos > rnateam > graphclust_postprocessing_no_align
comparison glob_report_no_align.xml @ 0:b8bd06d72563 draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/GraphClust/CollectResultsNoAlign commit 2a6fd70c1bcec36ffdf0bba2ec82489b39cfc84e
author | rnateam |
---|---|
date | Sat, 27 Oct 2018 13:50:00 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:b8bd06d72563 |
---|---|
1 <tool id="graphclust_glob_report_no_align" name="Graphclust glob_report collect clusters" version="0.5" > | |
2 <requirements> | |
3 <requirement type="package" version="0.6.0">graphclust-wrappers</requirement> | |
4 <requirement type="package" version='0.5'>perl-array-utils</requirement> | |
5 <requirement type="package" version='0.18.1'>scikit-learn</requirement> | |
6 <requirement type="package" version='1.8.10'>locarna</requirement> | |
7 <requirement type="package" version='2.1'>rnaz</requirement> | |
8 <requirement type="package" version="1.1.2">infernal</requirement> | |
9 <requirement type="package" version='2.2.10'>viennarna</requirement> | |
10 <requirement type="package" version='1.3.30'>graphicsmagick</requirement> | |
11 <requirement type="package" version='0.6.1'>rscape</requirement> | |
12 <requirement type="package" version='6.0'>unzip</requirement> | |
13 </requirements> | |
14 <command detect_errors="exit_code"> | |
15 <![CDATA[ | |
16 unzip $FASTA &> /dev/null && | |
17 mkdir ./CMSEARCH && | |
18 mkdir ./MODEL && | |
19 #import re | |
20 #for $cms_res in $cmsearch_results: | |
21 #set $safename_cm = re.sub('[^\w\-_\.]', '_', $cms_res.element_identifier) | |
22 ln -f -s '$cms_res' ./CMSEARCH/$safename_cm && | |
23 #end for | |
24 #for $mods in $model_tree_files: | |
25 #set $safename_tr = re.sub('[^\w\-_\.]', '_', $mods.element_identifier) | |
26 ln -f -s '$mods' ./MODEL/$safename_tr && | |
27 #end for | |
28 | |
29 'glob_res.pl' | |
30 $merge_cluster_ol | |
31 $merge_overlap | |
32 $min_cluster_size | |
33 $cm_min_bitscore | |
34 $cm_max_eval | |
35 1 ## cm_bitscore_sig | |
36 $partition_type '' | |
37 $cut_type | |
38 0 ## zero means do not align | |
39 #if $iteration_num.iteration_num_selector: | |
40 $iteration_num.CI | |
41 $final_partition_soft | |
42 $final_partition_used_cmsearch | |
43 '$combined_cm' | |
44 | |
45 #end if | |
46 | |
47 && | |
48 python '$__tool_directory__/evaluation.py' FASTA/ RESULTS/ | |
49 | |
50 #if $cdhit: | |
51 && | |
52 python '$__tool_directory__/addCdhitseqs.py' '$cdhit' | |
53 #end if | |
54 ]]> | |
55 </command> | |
56 <inputs> | |
57 <param type="data" name="FASTA" format="zip" help="FASTA.zip from pre-processing step"/> | |
58 <param type="data" name="cmsearch_results" format="tabular" multiple="True" | |
59 help="Tabular cmsearch results of the candidate clusters from the cmsearch step"/> | |
60 <param type="data" name="model_tree_files" format="txt" multiple="True" label="model-tree-stk" | |
61 help="model.tree.stk files from pgma_graphclust candidate clustering step"/> | |
62 <param name="partition_type" type="boolean" checked="True" truevalue="0" falsevalue="1" label="Hard partition" | |
63 help="Whether to do hard partitioning (no overlap) or soft mode(cluster elements may overlap) "/> | |
64 <param name="cut_type" type="boolean" checked="True" truevalue="0" falsevalue="1" label="Use CM score for cutoff" help="otherwise use E-value"/> | |
65 <param name="cm_min_bitscore" type="integer" value="20" size="5" label="cm_min_bitscore" help=""/> | |
66 <param name="cm_max_eval" type="float" value="0.001" size="5" label="cm_max_E-val" help=""/> | |
67 <param type="data" name="cdhit" format="txt" optional="true" label="CD-HIT output" | |
68 help="Optional CD-HIT pre-clustering output to be combined into the final clustering output"/> | |
69 <param name="merge_cluster_ol" type="float" value="0.66" size="5" label="merge_cluster_ovelap" | |
70 help="Overlapping ratio criteria to merge overlapping clusters or keep separate clusters (soft partitioning)"/> | |
71 <param name="merge_overlap" type="float" value="0.51" size="5" label="merge_fraction_overlap" | |
72 help="Overlapping ratio criteria to merge overlapping sequence fractions from same input sequence"/> | |
73 <param name="min_cluster_size" type="integer" value="3" size="5" label="minimum cluster size" | |
74 help="Minimum number of elements that can form a cluster. Higher values discard small clusters and may produce larger merged clusters"/> | |
75 <!-- <param name="cm_bitscore_sig" type="integer" value="1" size="5" label="cm_bitscore_sig" help=""/> --> | |
76 | |
77 <conditional name="iteration_num"> | |
78 <param name="iteration_num_selector" type="boolean" checked="no" label="Multiple iterations" help="for single iteration- NO, for multiple-YES"/> | |
79 <when value="true"> | |
80 <param name="CI" type="integer" value="2" size="5" label="Number of current iteration "/> | |
81 <param type="data" name="final_partition_soft" format="txt" /> | |
82 <param type="data" name="final_partition_used_cmsearch" format="txt" /> | |
83 <param type="data" name="combined_cm" format="txt" /> | |
84 </when> | |
85 <when value="false" ></when> | |
86 </conditional> | |
87 | |
88 </inputs> | |
89 <outputs> | |
90 <data name="final_stats" format="txt" from_work_dir="RESULTS/cluster.final.stats" label="cluster.final.stats" /> | |
91 <data name="tableForEval" format="tabular" from_work_dir="RESULTS/fullTab.tabular" label="tableForEval" /> | |
92 <data name="final_soft" format="txt" from_work_dir="RESULTS/partitions/final_partition.soft" label="soft_part" /> | |
93 <data name="final_used_cmsearch" format="txt" from_work_dir="RESULTS/partitions/final_partition.used_cmsearch" label="final_partition_used_cmsearch" /> | |
94 <data name="evaluation" format="txt" from_work_dir="RESULTS/evaluation.txt" label="evaluation_of_clusters" /> | |
95 <data name="combined_cm_out" format="txt" from_work_dir="combined_cm_out" label="combined_cmsearch_output" /> | |
96 <collection name="clusters" type="list" label="CLUSTERS-cmsearch"> | |
97 <discover_datasets format="txt" pattern="(?P<name>^.*\.all$)" directory="RESULTS" /> | |
98 </collection> | |
99 <collection name="allFastaSorted" type="list" label="cluster-sequences-sorted"> | |
100 <discover_datasets format="fasta" pattern="(?P<name>^.*\.sorted.fa$)" directory="RESULTS" /> | |
101 </collection> | |
102 | |
103 <collection name="partitions" type="list" label="Partitions"> | |
104 <discover_datasets pattern="(?P<name>^.*$)" directory="RESULTS/partitions" /> | |
105 </collection> | |
106 <data name="RESULTS_zip" format="zip" from_work_dir="RESULTS.zip" label="RESULTS.zip" /> | |
107 </outputs> | |
108 <tests> | |
109 <test> | |
110 <param name="FASTA" value="FASTA.zip" ftype="searchgui_archive"/> | |
111 <param name="cmsearch_results" value="1.1.tree,1.2.tree"/> | |
112 <param name="model_tree_files" value="1.1.model.tree.fa,1.2.model.tree.fa"/> | |
113 <param name="partition_type" value="0"/> | |
114 <param name="cut_type" value="0"/> | |
115 <conditional name="iteration_num"> | |
116 <param name="iteration_num_selector" value="false"/> | |
117 </conditional> | |
118 <param name="merge_cluster_ol" value="0.66"/> | |
119 <param name="merge_overlap" value="0.51"/> | |
120 <param name="min_cluster_size" value="3"/> | |
121 <param name="cm_min_bitscore" value="20"/> | |
122 <param name="cm_max_eval" value="0.001"/> | |
123 <!-- <param name="cm_bitscore_sig" value="0"/> --> | |
124 <output name="final_stats" file="RESULTS/cluster.final.stats" /> | |
125 <output name="combined_cm_out" file="combined_cm_out"/> | |
126 <output name="evaluation" file="evaluation1.txt"/> | |
127 <output_collection name="clusters" type="list"> | |
128 <element name="1.cluster.all" file="RESULTS/1.cluster.all" compare="contains"/> | |
129 <element name="2.cluster.all" file="RESULTS/2.cluster.all" compare="contains"/> | |
130 | |
131 </output_collection> | |
132 <output_collection name="partitions"> | |
133 <element name="final_overlap.map" file="RESULTS/partitions/final_overlap.map" compare="contains"> | |
134 <assert_contents> | |
135 <has_text text="1.1 1.1 " /> | |
136 <has_text text="1.2 1.2" /> | |
137 </assert_contents> | |
138 </element> | |
139 <element name="final_overlap.matrix" file="RESULTS/partitions/final_overlap.matrix" compare="contains"> | |
140 <assert_contents> | |
141 <has_text text="MODEL CLASS 0 0" /> | |
142 <!--has_text text="1.2" /> | |
143 <has_text text="1.1" /--> | |
144 </assert_contents> | |
145 </element> | |
146 <element name="final_partition.hard.best" file="RESULTS/partitions/final_partition.hard.best" /> | |
147 <element name="final_partition.hard.merged" file="RESULTS/partitions/final_partition.hard.merged" /> | |
148 <element name="final_partition.soft" file="RESULTS/partitions/final_partition.soft" /> | |
149 <element name="final_partition.used_cmsearch" file="RESULTS/partitions/final_partition.used_cmsearch" compare="contains"/> | |
150 </output_collection> | |
151 | |
152 <output name="RESULTS_zip" file="RESULTS.zip" ftype="zip" compare="sim_size" delta="20000"/> | |
153 | |
154 </test> | |
155 </tests> | |
156 <help> | |
157 <![CDATA[ | |
158 | |
159 **What it does** | |
160 | |
161 Post-processing. Redundant clusters are merged and instances that belong to multiple clusters | |
162 are assigned unambiguously. For every pair of clusters, the relative overlap (i.e. the fraction of | |
163 instances that occur in both clusters) is computed and clusters are merged if the overlap exceeds 50%. | |
164 Cluster members are finally ranked by their CM bitscore. | |
165 | |
166 ]]> | |
167 </help> | |
168 <citations> | |
169 <citation type="doi">10.5281/zenodo.597695</citation> | |
170 </citations> | |
171 </tool> |