annotate ideas.xml @ 181:475fa65d5138 draft

Uploaded
author greg
date Thu, 01 Feb 2018 12:54:15 -0500
parents 15bd502e6a0c
children 28a995056cd0
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
d75cbb2db2c4 Uploaded
greg
parents: 0
diff changeset
1 <tool id="ideas" name="IDEAS" version="1.2.0">
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
2 <description>accounts for position dependent epigenetic events and detects local cell type relationships</description>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
3 <requirements>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
4 <requirement type="package" version="2.26.0">bedtools</requirement>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
5 <requirement type="package" version="332">ucsc-bedgraphtobigwig</requirement>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
6 <requirement type="package" version="332">ucsc-bedsort</requirement>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
7 <requirement type="package" version="332">ucsc-bigwigaverageoverbed</requirement>
100
86445eab5f51 Uploaded
greg
parents: 99
diff changeset
8 <requirement type="package" version="1.20">ideas</requirement>
151
9d34f7e6d80c Uploaded
greg
parents: 149
diff changeset
9 <requirement type="package" version="1.10.4">r-data.table</requirement>
106
a0e38f759ad5 Uploaded
greg
parents: 104
diff changeset
10 <requirement type="package" version="1.4.4">r-optparse</requirement>
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
11 </requirements>
30
1d854705de39 Uploaded
greg
parents: 29
diff changeset
12 <command detect_errors="exit_code"><![CDATA[
173
843bfa2dff2c Uploaded
greg
parents: 172
diff changeset
13 #import os
176
e0d5669fee03 Uploaded
greg
parents: 173
diff changeset
14 #set perform_training = $perform_training_cond.perform_training
e0d5669fee03 Uploaded
greg
parents: 173
diff changeset
15
180
15bd502e6a0c Uploaded
greg
parents: 178
diff changeset
16 ## Copy the input's compressed tmp directory archive.
15bd502e6a0c Uploaded
greg
parents: 178
diff changeset
17 cp $input.metadata.tmp_archive . &&
15bd502e6a0c Uploaded
greg
parents: 178
diff changeset
18 ## Extract the tmp archive.
15bd502e6a0c Uploaded
greg
parents: 178
diff changeset
19 tar -xzf tmp.tar.gz &&
176
e0d5669fee03 Uploaded
greg
parents: 173
diff changeset
20
e0d5669fee03 Uploaded
greg
parents: 173
diff changeset
21 ## Define and create output directories.
120
cfa683d96cae Uploaded
greg
parents: 119
diff changeset
22 #set output_pdf_dir = "output_pdf_dir"
cfa683d96cae Uploaded
greg
parents: 119
diff changeset
23 #set output_txt_dir = "output_txt_dir"
155
f70a86b6c6e0 Uploaded
greg
parents: 153
diff changeset
24 #set output_training_dir = "output_training_dir"
172
473a089c5fb6 Uploaded
greg
parents: 171
diff changeset
25 #if str($output_heatmaps) == "yes":
473a089c5fb6 Uploaded
greg
parents: 171
diff changeset
26 mkdir '$output_pdf_dir' &&
473a089c5fb6 Uploaded
greg
parents: 171
diff changeset
27 #end if
473a089c5fb6 Uploaded
greg
parents: 171
diff changeset
28 #if str($perform_training) == "yes":
473a089c5fb6 Uploaded
greg
parents: 171
diff changeset
29 #set output_dir = $output_training_dir
473a089c5fb6 Uploaded
greg
parents: 171
diff changeset
30 mkdir '$output_training_dir' &&
473a089c5fb6 Uploaded
greg
parents: 171
diff changeset
31 #else:
473a089c5fb6 Uploaded
greg
parents: 171
diff changeset
32 #set output_dir = $output_txt_dir
473a089c5fb6 Uploaded
greg
parents: 171
diff changeset
33 mkdir '$output_txt_dir' &&
473a089c5fb6 Uploaded
greg
parents: 171
diff changeset
34 #end if
176
e0d5669fee03 Uploaded
greg
parents: 173
diff changeset
35
173
843bfa2dff2c Uploaded
greg
parents: 172
diff changeset
36 Rscript '$__tool_directory__/ideas.R'
169
7b0c6c6cb82b Uploaded
greg
parents: 168
diff changeset
37 --burnin_num $burnin_num
7b0c6c6cb82b Uploaded
greg
parents: 168
diff changeset
38 #if str($bychr) == "true":
7b0c6c6cb82b Uploaded
greg
parents: 168
diff changeset
39 --bychr true
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
40 #end if
180
15bd502e6a0c Uploaded
greg
parents: 178
diff changeset
41 #if $input.metadata.chrom_bed is not None:
15bd502e6a0c Uploaded
greg
parents: 178
diff changeset
42 --chrom_bed_input $input.metadata.chrom_bed
15bd502e6a0c Uploaded
greg
parents: 178
diff changeset
43 #end if
15bd502e6a0c Uploaded
greg
parents: 178
diff changeset
44 #if $input.metadata.chrom_windows is not None:
15bd502e6a0c Uploaded
greg
parents: 178
diff changeset
45 --chromosome_windows $input.metadata.chrom_windows
15bd502e6a0c Uploaded
greg
parents: 178
diff changeset
46 #end if
169
7b0c6c6cb82b Uploaded
greg
parents: 168
diff changeset
47 #if str($hp) == "true":
7b0c6c6cb82b Uploaded
greg
parents: 168
diff changeset
48 --hp true
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
49 #end if
169
7b0c6c6cb82b Uploaded
greg
parents: 168
diff changeset
50 #if str($initial_states) != "0":
7b0c6c6cb82b Uploaded
greg
parents: 168
diff changeset
51 --initial_states $initial_states
165
bb5544d1c85e Uploaded
greg
parents: 164
diff changeset
52 #end if
180
15bd502e6a0c Uploaded
greg
parents: 178
diff changeset
53 --ideas_input_config $input.metadata.input_config
165
bb5544d1c85e Uploaded
greg
parents: 164
diff changeset
54 #if str($log2) != "0.0":
bb5544d1c85e Uploaded
greg
parents: 164
diff changeset
55 --log2 $log2
bb5544d1c85e Uploaded
greg
parents: 164
diff changeset
56 #end if
169
7b0c6c6cb82b Uploaded
greg
parents: 168
diff changeset
57 #if str($maxerr) != "0.0":
7b0c6c6cb82b Uploaded
greg
parents: 168
diff changeset
58 --maxerr $maxerr
7b0c6c6cb82b Uploaded
greg
parents: 168
diff changeset
59 #end if
7b0c6c6cb82b Uploaded
greg
parents: 168
diff changeset
60 #if str($max_cell_type_clusters) != "0":
7b0c6c6cb82b Uploaded
greg
parents: 168
diff changeset
61 --max_cell_type_clusters $max_cell_type_clusters
7b0c6c6cb82b Uploaded
greg
parents: 168
diff changeset
62 #end if
7b0c6c6cb82b Uploaded
greg
parents: 168
diff changeset
63 #if str($max_position_classes) != "0":
7b0c6c6cb82b Uploaded
greg
parents: 168
diff changeset
64 --max_position_classes $max_position_classes
7b0c6c6cb82b Uploaded
greg
parents: 168
diff changeset
65 #end if
165
bb5544d1c85e Uploaded
greg
parents: 164
diff changeset
66 #if str($max_states) != "0.0":
bb5544d1c85e Uploaded
greg
parents: 164
diff changeset
67 --max_states $max_states
bb5544d1c85e Uploaded
greg
parents: 164
diff changeset
68 #end if
169
7b0c6c6cb82b Uploaded
greg
parents: 168
diff changeset
69 --mcmc_num $mcmc_num
7b0c6c6cb82b Uploaded
greg
parents: 168
diff changeset
70 #if str($minerr) != "0.0":
7b0c6c6cb82b Uploaded
greg
parents: 168
diff changeset
71 --minerr $minerr
165
bb5544d1c85e Uploaded
greg
parents: 164
diff changeset
72 #end if
173
843bfa2dff2c Uploaded
greg
parents: 172
diff changeset
73 --output_dir $output_dir
165
bb5544d1c85e Uploaded
greg
parents: 164
diff changeset
74 #if str($prior_concentration) != "0.0":
bb5544d1c85e Uploaded
greg
parents: 164
diff changeset
75 --prior_concentration $prior_concentration
bb5544d1c85e Uploaded
greg
parents: 164
diff changeset
76 #end if
bb5544d1c85e Uploaded
greg
parents: 164
diff changeset
77 --project_name '$project_name'
bb5544d1c85e Uploaded
greg
parents: 164
diff changeset
78 #if str($save_ideas_log) == "yes":
bb5544d1c85e Uploaded
greg
parents: 164
diff changeset
79 --save_ideas_log $save_ideas_log
bb5544d1c85e Uploaded
greg
parents: 164
diff changeset
80 --output_log '$output_log'
bb5544d1c85e Uploaded
greg
parents: 164
diff changeset
81 #end if
169
7b0c6c6cb82b Uploaded
greg
parents: 168
diff changeset
82 #if str($standardize_datasets) == "true":
7b0c6c6cb82b Uploaded
greg
parents: 168
diff changeset
83 --standardize_datasets true
7b0c6c6cb82b Uploaded
greg
parents: 168
diff changeset
84 #end if
7b0c6c6cb82b Uploaded
greg
parents: 168
diff changeset
85 --rseed $rseed
7b0c6c6cb82b Uploaded
greg
parents: 168
diff changeset
86 --thread \${GALAXY_SLOTS:-4}
7b0c6c6cb82b Uploaded
greg
parents: 168
diff changeset
87 #if str($perform_training) == "yes":
7b0c6c6cb82b Uploaded
greg
parents: 168
diff changeset
88 --training_iterations $perform_training_cond.training_iterations
7b0c6c6cb82b Uploaded
greg
parents: 168
diff changeset
89 --training_windows $perform_training_cond.training_windows
7b0c6c6cb82b Uploaded
greg
parents: 168
diff changeset
90 #end if
165
bb5544d1c85e Uploaded
greg
parents: 164
diff changeset
91 #if str($perform_training) == "yes":
bb5544d1c85e Uploaded
greg
parents: 164
diff changeset
92 && mv ./*.para0 '$output_dir'
bb5544d1c85e Uploaded
greg
parents: 164
diff changeset
93 && mv ./*.profile0 '$output_dir'
153
1dc3ef0a6312 Uploaded
greg
parents: 151
diff changeset
94 #else:
165
bb5544d1c85e Uploaded
greg
parents: 164
diff changeset
95 && mv ./*.para '$output_dir'
bb5544d1c85e Uploaded
greg
parents: 164
diff changeset
96 && mv ./*.profile '$output_dir'
bb5544d1c85e Uploaded
greg
parents: 164
diff changeset
97 #end if
bb5544d1c85e Uploaded
greg
parents: 164
diff changeset
98 && mv ./*.cluster '$output_dir'
bb5544d1c85e Uploaded
greg
parents: 164
diff changeset
99 && mv ./*.state '$output_dir'
bb5544d1c85e Uploaded
greg
parents: 164
diff changeset
100 #if str($output_heatmaps) == "yes":
bb5544d1c85e Uploaded
greg
parents: 164
diff changeset
101 && Rscript '$__tool_directory__/create_heatmaps.R'
bb5544d1c85e Uploaded
greg
parents: 164
diff changeset
102 --input_dir '$output_dir'
bb5544d1c85e Uploaded
greg
parents: 164
diff changeset
103 --output_dir '$output_pdf_dir'
bb5544d1c85e Uploaded
greg
parents: 164
diff changeset
104 --script_dir '$__tool_directory__'
bb5544d1c85e Uploaded
greg
parents: 164
diff changeset
105 #if str($perform_training) == "yes":
bb5544d1c85e Uploaded
greg
parents: 164
diff changeset
106 --in_training_mode true
158
c996089f1747 Uploaded
greg
parents: 157
diff changeset
107 #end if
131
5150fcdcd0fa Uploaded
greg
parents: 130
diff changeset
108 #end if
128
de93d8e8a096 Uploaded
greg
parents: 126
diff changeset
109 ]]></command>
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
110 <inputs>
149
a80b76535243 Uploaded
greg
parents: 142
diff changeset
111 <conditional name="perform_training_cond">
a80b76535243 Uploaded
greg
parents: 142
diff changeset
112 <param name="perform_training" type="select" label="Perform training?">
a80b76535243 Uploaded
greg
parents: 142
diff changeset
113 <option value="yes" selected="true">Yes</option>
a80b76535243 Uploaded
greg
parents: 142
diff changeset
114 <option value="no">No</option>
a80b76535243 Uploaded
greg
parents: 142
diff changeset
115 </param>
a80b76535243 Uploaded
greg
parents: 142
diff changeset
116 <when value="yes">
165
bb5544d1c85e Uploaded
greg
parents: 164
diff changeset
117 <param name="training_iterations" type="integer" value="20" min="3" label="Number of training iterations"/>
153
1dc3ef0a6312 Uploaded
greg
parents: 151
diff changeset
118 <param name="training_windows" type="integer" value="10000" min="2" label="Number of randomly selected windows for training"/>
149
a80b76535243 Uploaded
greg
parents: 142
diff changeset
119 </when>
a80b76535243 Uploaded
greg
parents: 142
diff changeset
120 <when value="no"/>
a80b76535243 Uploaded
greg
parents: 142
diff changeset
121 </conditional>
171
445f67ea18f6 Uploaded
greg
parents: 169
diff changeset
122 <param name="input" type="data" format="ideaspre" label="Select IDEAS input"/>
113
ba7238a53377 Uploaded
greg
parents: 112
diff changeset
123 <param name="project_name" type="text" value="myProject" label="Project name" help="Outputs will have this base name">
112
5753c1386737 Uploaded
greg
parents: 111
diff changeset
124 <validator type="empty_field"/>
5753c1386737 Uploaded
greg
parents: 111
diff changeset
125 </param>
149
a80b76535243 Uploaded
greg
parents: 142
diff changeset
126 <param name="rseed" type="integer" value="1234" min="0" max="1000000" label="Seed for IDEAS model initialization" help="Zero value generates a random seed, and this seed will be different for each job run."/>
a80b76535243 Uploaded
greg
parents: 142
diff changeset
127 <param name="bychr" type="boolean" truevalue="true" falsevalue="" checked="False" label="Output chromosomes in separate files"/>
180
15bd502e6a0c Uploaded
greg
parents: 178
diff changeset
128 <param name="reads_per_bp" type="select" display="radio" label="Calculate the signal in each window using">
36
859687afe7bc Uploaded
greg
parents: 35
diff changeset
129 <option value="6" selected="true">mean</option>
859687afe7bc Uploaded
greg
parents: 35
diff changeset
130 <option value="8">max</option>
859687afe7bc Uploaded
greg
parents: 35
diff changeset
131 </param>
149
a80b76535243 Uploaded
greg
parents: 142
diff changeset
132 <param name="hp" type="boolean" truevalue="true" falsevalue="" checked="False" label="Discourage state transition across chromosomes"/>
101
ad45c3ba16b2 Uploaded
greg
parents: 100
diff changeset
133 <param name="log2" type="float" value="0" min="0" label="Use log2(x+number) transformation" help="Zero means no log2 transformation"/>
ad45c3ba16b2 Uploaded
greg
parents: 100
diff changeset
134 <param name="max_states" type="float" value="0" min="0" label="Maximum number of states to be inferred" help="Zero sets the maximum to a large number"/>
ad45c3ba16b2 Uploaded
greg
parents: 100
diff changeset
135 <param name="initial_states" type="integer" value="20" min="0" label="Initial number of states" help="Positive integer"/>
ad45c3ba16b2 Uploaded
greg
parents: 100
diff changeset
136 <param name="max_position_classes" type="integer" value="0" min="0" label="Maximum number of position classes to be inferred" help="Zero sets the maximum to a large number"/>
ad45c3ba16b2 Uploaded
greg
parents: 100
diff changeset
137 <param name="max_cell_type_clusters" type="integer" value="0" min="0" label="Maximum number of cell type clusters allowed" help="Zero sets the maximum to a large number"/>
ad45c3ba16b2 Uploaded
greg
parents: 100
diff changeset
138 <param name="prior_concentration" type="float" value="1" min="0" label="Prior concentration" help="Zero value results in the default: sqrt(number of cell types)"/>
169
7b0c6c6cb82b Uploaded
greg
parents: 168
diff changeset
139 <param name="standardize_datasets" type="boolean" truevalue="true" falsevalue="" checked="False" label="Standardize all datasets"/>
34
8d8f796a3bda Uploaded
greg
parents: 33
diff changeset
140 <param name="burnin_num" type="integer" value="20" min="1" label="Number of burnin steps"/>
8d8f796a3bda Uploaded
greg
parents: 33
diff changeset
141 <param name="mcmc_num" type="integer" value="20" min="1" label="Number of maximization steps"/>
101
ad45c3ba16b2 Uploaded
greg
parents: 100
diff changeset
142 <param name="minerr" type="float" value="0.5" min="0" label="Minimum standard deviation for the emission Gaussian distribution" help="Zero value results in the default: 0.5"/>
ad45c3ba16b2 Uploaded
greg
parents: 100
diff changeset
143 <param name="maxerr" type="float" value="1000000" min="0" label="Maximum standard deviation for the emission Gaussian distribution" help="Zero sets the maximum to a large number"/>
131
5150fcdcd0fa Uploaded
greg
parents: 130
diff changeset
144 <param name="output_heatmaps" type="select" display="radio" label="Output heatmaps?">
149
a80b76535243 Uploaded
greg
parents: 142
diff changeset
145 <option value="yes" selected="true">Yes</option>
a80b76535243 Uploaded
greg
parents: 142
diff changeset
146 <option value="no">No</option>
131
5150fcdcd0fa Uploaded
greg
parents: 130
diff changeset
147 </param>
89
d6ab97fb8aca Uploaded
greg
parents: 88
diff changeset
148 <param name="save_ideas_log" type="select" display="radio" label="Save IDEAS log in an additional history item">
d6ab97fb8aca Uploaded
greg
parents: 88
diff changeset
149 <option value="no" selected="true">No</option>
d6ab97fb8aca Uploaded
greg
parents: 88
diff changeset
150 <option value="yes">Yes</option>
d6ab97fb8aca Uploaded
greg
parents: 88
diff changeset
151 </param>
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
152 </inputs>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
153 <outputs>
102
dad89ec8846f Uploaded
greg
parents: 101
diff changeset
154 <data name="output_log" format="txt" label="${tool.name} (output log) on ${on_string}">
89
d6ab97fb8aca Uploaded
greg
parents: 88
diff changeset
155 <filter>save_ideas_log == 'yes'</filter>
d6ab97fb8aca Uploaded
greg
parents: 88
diff changeset
156 </data>
131
5150fcdcd0fa Uploaded
greg
parents: 130
diff changeset
157 <collection name="output_pdf_collection" type="list" label="${tool.name} (heatmaps) on ${on_string}">
120
cfa683d96cae Uploaded
greg
parents: 119
diff changeset
158 <discover_datasets pattern="__name__" directory="output_pdf_dir" format="pdf"/>
131
5150fcdcd0fa Uploaded
greg
parents: 130
diff changeset
159 <filter>output_heatmaps == 'yes'</filter>
120
cfa683d96cae Uploaded
greg
parents: 119
diff changeset
160 </collection>
112
5753c1386737 Uploaded
greg
parents: 111
diff changeset
161 <collection name="output_txt_collection" type="list">
120
cfa683d96cae Uploaded
greg
parents: 119
diff changeset
162 <discover_datasets pattern="__name__" directory="output_txt_dir" format="txt"/>
153
1dc3ef0a6312 Uploaded
greg
parents: 151
diff changeset
163 <filter>perform_training_cond['perform_training'] == 'no'</filter>
1dc3ef0a6312 Uploaded
greg
parents: 151
diff changeset
164 </collection>
168
5c5e2f7b34c8 Uploaded
greg
parents: 165
diff changeset
165 <collection name="output_training_collection" type="list">
153
1dc3ef0a6312 Uploaded
greg
parents: 151
diff changeset
166 <discover_datasets pattern="__name__" directory="output_training_dir" format="txt"/>
1dc3ef0a6312 Uploaded
greg
parents: 151
diff changeset
167 <filter>perform_training_cond['perform_training'] == 'yes'</filter>
102
dad89ec8846f Uploaded
greg
parents: 101
diff changeset
168 </collection>
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
169 </outputs>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
170 <tests>
90
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
171 <test>
180
15bd502e6a0c Uploaded
greg
parents: 178
diff changeset
172 <param name="perform_training" value="yes"/>
15bd502e6a0c Uploaded
greg
parents: 178
diff changeset
173 <param name="training_iterations" value="3"/>
15bd502e6a0c Uploaded
greg
parents: 178
diff changeset
174 <param name="input" value="ideas_test1/input.html" dbkey="hg19" ftype="ideaspre">
15bd502e6a0c Uploaded
greg
parents: 178
diff changeset
175 <composite_data value='ideas_test1/chromosomes.bed' dbkey="hg19"/>
15bd502e6a0c Uploaded
greg
parents: 178
diff changeset
176 <composite_data value='ideas_test1/chromosome_windows.txt' dbkey="hg19"/>
15bd502e6a0c Uploaded
greg
parents: 178
diff changeset
177 <composite_data value='ideas_test1/IDEAS_input_config.txt' dbkey="hg19"/>
15bd502e6a0c Uploaded
greg
parents: 178
diff changeset
178 <composite_data value='ideas_test1/tmp.tar.gz' dbkey="hg19"/>
15bd502e6a0c Uploaded
greg
parents: 178
diff changeset
179 <param name="output_heatmaps" value="yes"/>
15bd502e6a0c Uploaded
greg
parents: 178
diff changeset
180 </param>
115
242ff1426fb4 Uploaded
greg
parents: 113
diff changeset
181 <param name="project_name" value="IDEAS_out"/>
173
843bfa2dff2c Uploaded
greg
parents: 172
diff changeset
182 <param name="save_ideas_log" value="yes"/>
180
15bd502e6a0c Uploaded
greg
parents: 178
diff changeset
183 <output_collection name="output_training_collection" type="list">
168
5c5e2f7b34c8 Uploaded
greg
parents: 165
diff changeset
184 <element name="IDEAS_out.chr1.cluster" file="IDEAS_out.cluster" ftype="txt"/>
180
15bd502e6a0c Uploaded
greg
parents: 178
diff changeset
185 <element name="IDEAS_out.chr2.cluster" file="IDEAS_out.cluster" ftype="txt"/>
15bd502e6a0c Uploaded
greg
parents: 178
diff changeset
186 <element name="IDEAS_out.chr3.cluster" file="IDEAS_out.cluster" ftype="txt"/>
15bd502e6a0c Uploaded
greg
parents: 178
diff changeset
187 <element name="IDEAS_out.chr4.cluster" file="IDEAS_out.cluster" ftype="txt"/>
15bd502e6a0c Uploaded
greg
parents: 178
diff changeset
188 <element name="IDEAS_out.chr5.cluster" file="IDEAS_out.cluster" ftype="txt"/>
15bd502e6a0c Uploaded
greg
parents: 178
diff changeset
189 <element name="IDEAS_out.chr1.state" file="IDEAS_out.chr1.state" ftype="txt"/>
15bd502e6a0c Uploaded
greg
parents: 178
diff changeset
190 <element name="IDEAS_out.chr2.state" file="IDEAS_out.chr2.state" ftype="txt"/>
15bd502e6a0c Uploaded
greg
parents: 178
diff changeset
191 <element name="IDEAS_out.chr3.state" file="IDEAS_out.chr3.state" ftype="txt"/>
15bd502e6a0c Uploaded
greg
parents: 178
diff changeset
192 <element name="IDEAS_out.chr4.state" file="IDEAS_out.chr4.state" ftype="txt"/>
15bd502e6a0c Uploaded
greg
parents: 178
diff changeset
193 <element name="IDEAS_out.chr5.state" file="IDEAS_out.chr5.state" ftype="txt"/>
15bd502e6a0c Uploaded
greg
parents: 178
diff changeset
194 <element name="IDEAS_out.para0" file="IDEAS_out.para0" ftype="txt"/>
15bd502e6a0c Uploaded
greg
parents: 178
diff changeset
195 <element name="IDEAS_out.profile0" file="IDEAS_out.profile0" ftype="txt"/>
15bd502e6a0c Uploaded
greg
parents: 178
diff changeset
196 </output_collection>
15bd502e6a0c Uploaded
greg
parents: 178
diff changeset
197 <output_collection name="output_pdf_collection" type="list">
15bd502e6a0c Uploaded
greg
parents: 178
diff changeset
198 <element name="IDEAS_out.state.1.pdf" file="IDEAS_out.state.1.pdf" ftype="pdf"/>
102
dad89ec8846f Uploaded
greg
parents: 101
diff changeset
199 </output_collection>
168
5c5e2f7b34c8 Uploaded
greg
parents: 165
diff changeset
200 <output name="output_log" file="output_log.txt" ftype="txt" compare="contains" />
90
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
201 </test>
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
202 </tests>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
203 <help>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
204 **What it does**
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
205
95
ff4d84a01fa7 Uploaded
greg
parents: 94
diff changeset
206 IDEAS (an **I**\ ntegrative and **D**\ iscriminative **E**\ pigenome **A**\ nnotation **S**\ ystem) identifies
ff4d84a01fa7 Uploaded
greg
parents: 94
diff changeset
207 de novo regulatory functions from epigenetic data in multiple cell types jointly. It is a full probabilistic
ff4d84a01fa7 Uploaded
greg
parents: 94
diff changeset
208 model defined on all data, and it combines signals across both the genome and cell types to boost power. The
ff4d84a01fa7 Uploaded
greg
parents: 94
diff changeset
209 underlying assumption of IDEAS is that, because all cell types share the same underlying DNA sequences,
ff4d84a01fa7 Uploaded
greg
parents: 94
diff changeset
210 **functions of each DNA segment should be correlated**. Also, cell type specific regulation is locus-dependent,
180
15bd502e6a0c Uploaded
greg
parents: 178
diff changeset
211 and so IDEAS uses local epigenetic landscape to **identify de novo and local cell type clusters** without
95
ff4d84a01fa7 Uploaded
greg
parents: 94
diff changeset
212 assuming or requiring a known global cell type relationship.
94
7d9af0d824ad Uploaded
greg
parents: 93
diff changeset
213
180
15bd502e6a0c Uploaded
greg
parents: 178
diff changeset
214 The input is a single dataset with the **IdeasPre** datatype, which is produced by the IDEAS Preprocessor tool.
94
7d9af0d824ad Uploaded
greg
parents: 93
diff changeset
215
7d9af0d824ad Uploaded
greg
parents: 93
diff changeset
216 .. image:: $PATH_TO_IMAGES/ideas.png
7d9af0d824ad Uploaded
greg
parents: 93
diff changeset
217
7d9af0d824ad Uploaded
greg
parents: 93
diff changeset
218 IDEAS predicts regulatory functions, denoted by epigenetic states, at each position in each cell type by
7d9af0d824ad Uploaded
greg
parents: 93
diff changeset
219 **combining information simultaneously learned from other cell types** at the same positions in cell types with
7d9af0d824ad Uploaded
greg
parents: 93
diff changeset
220 similar local epigenetic landscapes. Size of genomic intervals for determining the similarity are also learned.
7d9af0d824ad Uploaded
greg
parents: 93
diff changeset
221 All of the inferences are done through parallel infinite-state hidden Markov models (iHMM), which is a Bayesian
7d9af0d824ad Uploaded
greg
parents: 93
diff changeset
222 non-parametric technique to automatically determine the number of local cell type clusters and the number of
7d9af0d824ad Uploaded
greg
parents: 93
diff changeset
223 epigenetic states.
7d9af0d824ad Uploaded
greg
parents: 93
diff changeset
224
7d9af0d824ad Uploaded
greg
parents: 93
diff changeset
225 In addition to its improved power, IDEAS has two unique advantages:
7d9af0d824ad Uploaded
greg
parents: 93
diff changeset
226
96
90144b60033c Uploaded
greg
parents: 95
diff changeset
227 1) applies **linear time inference** with respect to the number of cell types, which allows it to study hundreds or more cell types jointly
90144b60033c Uploaded
greg
parents: 95
diff changeset
228 2) uses mini-batch training to **improve reproducibility** of the predicted epigenetic states, which is important because genome segmentation is not convex and hence cannot guarantee a global optimal solution.
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
229
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
230 -----
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
231
88
462ce06410c6 Uploaded
greg
parents: 87
diff changeset
232 **Options**
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
233
168
5c5e2f7b34c8 Uploaded
greg
parents: 165
diff changeset
234 * **Perform training** - select "Yes" to run the specified number of training iterations, running IDEAS with the parameter values and producing outputs. After training, these outputs are combined into a single dataset which is then used in conjunction with the inputs for the actual analysis. This process improves the accuracy of the final results.
5c5e2f7b34c8 Uploaded
greg
parents: 165
diff changeset
235
5c5e2f7b34c8 Uploaded
greg
parents: 165
diff changeset
236 * **Number of training iterations** - the number of times to execute IDEAS with the specified parameter values on the selected inputs to produce the training results. The minimum number of iterations is 3.
5c5e2f7b34c8 Uploaded
greg
parents: 165
diff changeset
237 * **Number of randomly selected windows for training** - the number of chromosome windows within the input datasets from which to randomly select data for training.
5c5e2f7b34c8 Uploaded
greg
parents: 165
diff changeset
238
88
462ce06410c6 Uploaded
greg
parents: 87
diff changeset
239 * **Set cell type and epigenetic factor names by** - cell type and epigenetic factor names can be set manually or by extracting them from the names of the selected input datasets. The latter case requires all selected datasets to have names that contain a "-" character.
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
240
93
0c2cf49dfb58 Uploaded
greg
parents: 91
diff changeset
241 * **BAM or BigWig files** - select one or more Bam or Bigwig files from your history, making sure that the name of every selected input include a "-" character (e.g., e001-h3k4me3.bigwig).
98
ed15c6ca3c51 Uploaded
greg
parents: 96
diff changeset
242 * **Cell type, Epigenetic factor and Input** - manually select any number of inputs, setting the cell type and epigenetic factor name for each. The combination of "cell type name" and "epigenetic factor name" must be unique for each input. For example, if you have replicate data you may want to specify the cell name as "rep1", "rep2", etc and the factor name as "rep1", "rep2", etc.
130
d088f25661d9 Uploaded
greg
parents: 129
diff changeset
243
88
462ce06410c6 Uploaded
greg
parents: 87
diff changeset
244 * **Cell type name** - cell type name
462ce06410c6 Uploaded
greg
parents: 87
diff changeset
245 * **Epigenetic factor name** - epigenetic factor name
462ce06410c6 Uploaded
greg
parents: 87
diff changeset
246 * **BAM or BigWig file** - BAM or BigWig file
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
247
108
021d1f60b0d3 Uploaded
greg
parents: 107
diff changeset
248 * **Project name** - datasets produced by IDEAS will have this base name.
98
ed15c6ca3c51 Uploaded
greg
parents: 96
diff changeset
249 * **Seed for IDEAS model initialization** - enter an integer to be used as the seed for the IDEAS model initialization. A zero value causes IDEAS to automatically generate a random seed, and this seed will be different for each job run.
44
8e449b51b581 Uploaded
greg
parents: 43
diff changeset
250 * **Output chromosomes in separate files** - select "Yes" to produce separate files for each chromosome, allowing you to run IDEAS on different chromosomes separately.
180
15bd502e6a0c Uploaded
greg
parents: 178
diff changeset
251 * **Calculate the signal in each window using** - use the bigWigAverageOverBed utility from the UCSC genome browser to calculate the signal (i.e., the number of reads per bp) in each window.
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
252 * **Standardize all datasets** - select "Yes" to standardize all datasets (e.g., reads / total_reads * 20 million) so that the signals from different cell types become comparable - your datasets can be read counts, logp-values or fold change.
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
253 * **Discourage state transition across chromosomes** - select "Yes" to produce similar states in adjacent windows, making the annotation smoother, but at risk of reducing precision.
98
ed15c6ca3c51 Uploaded
greg
parents: 96
diff changeset
254 * **Use log2(x+number) transformation** - perform Log2-transformation of the input data by log2(x+number) (recommended for read count data to reduce skewness). You can enter a number that is representative of the noise level in your data (e.g., a number less than 1). If this number is at a similar scale or larger than the signal in your data, it will lose power. For example, if your input data is mean read count per window, using 0.1 may produce better results.
33
695053a23fe4 Uploaded
greg
parents: 32
diff changeset
255 * **Maximum number of states to be inferred** - restrict the maximum number of states to be generated by IDEAS; the final number of inferred states may be smaller than the number you specified
695053a23fe4 Uploaded
greg
parents: 32
diff changeset
256 * **Initial number of states** - while IDEAS may infer 30 states or more by starting from just 20 states, it may not do so if it is trapped in a local mode. We recommend setting the initial number of states slightly larger than the number of states you expect.
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
257 * **Maximum number of position classes to be inferred** - Set this value only if:
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
258
88
462ce06410c6 Uploaded
greg
parents: 87
diff changeset
259 * you do not want position classes (e.g., for testing purposes), in this case set the value to 1
462ce06410c6 Uploaded
greg
parents: 87
diff changeset
260 * IDEAS runs slow because there are too many position classes, generally less than 100 position classes will run fine
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
261
98
ed15c6ca3c51 Uploaded
greg
parents: 96
diff changeset
262 * **Maximum number of cell type clusters allowed** - If you set the value to 1, then all cell types will be clustered in one group, which may be desirable if all cell types are homogeneous and you want IDEAS to use information in all cell types equally.
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
263 * **Prior concentration** - specify the prior concentration parameter; default is A=sqrt(number of cell types). A smaller concentration parameter (e.g., 1 or less) will emphasize more on position specificity and a larger concentration parameter (e.g., 10 * number of cell types) will emphasize more on global homogeneity.
98
ed15c6ca3c51 Uploaded
greg
parents: 96
diff changeset
264 * **Number of burnin steps** - specify the number of burnin steps; default is 20. Increasing the burnin and maximization steps will increase computing and only slightly increase accuracy, while decreasing them will reduce computing resources but may also reduce accuracy. We recommend to run IDEAS with at least 20 burnins and 20 maximizations. IDEAS will not stop even if it reaches a maximum mode.
ed15c6ca3c51 Uploaded
greg
parents: 96
diff changeset
265 * **Number of maximization steps** - specify the number of maximization steps; default is 20.
ed15c6ca3c51 Uploaded
greg
parents: 96
diff changeset
266 * **Minimum standard deviation for the emission Gaussian distribution** - This number multiplied by the overall standard deviation of your data will be used as a lower bound for the standard deviation for each factor in each epigenetic state (the default is 0.5). This number is useful for removing very subtle clusters in the data. Setting this value near 0 will allow IDEAS to discover many subtle states, while setting it greater than 1 will result in IDEAS losing the ability to detect meaningful states.
130
d088f25661d9 Uploaded
greg
parents: 129
diff changeset
267 * **Maximim standard deviation for the emission Gaussian distribution** - if you want to find fine-grained states you may use this option (if not used, IDEAS uses infinity), but it is rearely used unless you need more states to be inferred.
168
5c5e2f7b34c8 Uploaded
greg
parents: 165
diff changeset
268 * **Output heatmaps** - select "Yes" to produce an additional dataset collection consisting of PDF datasets, one for each dataset with a .para extension in the primary IDEAS output dataset collection.
5c5e2f7b34c8 Uploaded
greg
parents: 165
diff changeset
269 * **Save IDEAS log in an additional history item** - select "Yes" to produce an additional history item that contains the entire IDEAS processing log.
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
270 </help>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
271 <citations>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
272 <citation type="doi">10.1093/nar/gkw278</citation>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
273 </citations>
131
5150fcdcd0fa Uploaded
greg
parents: 130
diff changeset
274 </tool>