annotate ideas.xml @ 164:68d909206a15 draft

Uploaded
author greg
date Fri, 12 Jan 2018 14:56:24 -0500
parents 4609a1d14059
children bb5544d1c85e
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
d75cbb2db2c4 Uploaded
greg
parents: 0
diff changeset
1 <tool id="ideas" name="IDEAS" version="1.2.0">
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
2 <description>accounts for position dependent epigenetic events and detects local cell type relationships</description>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
3 <requirements>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
4 <requirement type="package" version="2.26.0">bedtools</requirement>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
5 <requirement type="package" version="332">ucsc-bedgraphtobigwig</requirement>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
6 <requirement type="package" version="332">ucsc-bedsort</requirement>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
7 <requirement type="package" version="332">ucsc-bigwigaverageoverbed</requirement>
100
86445eab5f51 Uploaded
greg
parents: 99
diff changeset
8 <requirement type="package" version="1.20">ideas</requirement>
151
9d34f7e6d80c Uploaded
greg
parents: 149
diff changeset
9 <requirement type="package" version="1.10.4">r-data.table</requirement>
106
a0e38f759ad5 Uploaded
greg
parents: 104
diff changeset
10 <requirement type="package" version="1.4.4">r-optparse</requirement>
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
11 </requirements>
30
1d854705de39 Uploaded
greg
parents: 29
diff changeset
12 <command detect_errors="exit_code"><![CDATA[
120
cfa683d96cae Uploaded
greg
parents: 119
diff changeset
13 #set output_pdf_dir = "output_pdf_dir"
cfa683d96cae Uploaded
greg
parents: 119
diff changeset
14 #set output_txt_dir = "output_txt_dir"
155
f70a86b6c6e0 Uploaded
greg
parents: 153
diff changeset
15 #set output_training_dir = "output_training_dir"
66
dc5d5a08c370 Uploaded
greg
parents: 65
diff changeset
16 #set tmp_dir = "tmp"
72
6dece3e06a10 Uploaded
greg
parents: 71
diff changeset
17 #set prep_input_config = "prep_input_config.txt"
119
1d99221369b1 Uploaded
greg
parents: 117
diff changeset
18 #set prep_output_config = "prep_output_config.txt"
149
a80b76535243 Uploaded
greg
parents: 142
diff changeset
19 #set specify_genomic_window = $specify_genomic_window_cond.specify_genomic_window
a80b76535243 Uploaded
greg
parents: 142
diff changeset
20 #set perform_training = $perform_training_cond.perform_training
3
9874800487e6 Uploaded
greg
parents: 1
diff changeset
21 ##############################################
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
22 ## Create the config file and prepare the data
3
9874800487e6 Uploaded
greg
parents: 1
diff changeset
23 ##############################################
132
8ce93420010c Uploaded
greg
parents: 131
diff changeset
24 #if str($output_heatmaps) == "yes":
8ce93420010c Uploaded
greg
parents: 131
diff changeset
25 mkdir '$output_pdf_dir' &&
8ce93420010c Uploaded
greg
parents: 131
diff changeset
26 #end if
153
1dc3ef0a6312 Uploaded
greg
parents: 151
diff changeset
27 #if str($perform_training) == "yes":
1dc3ef0a6312 Uploaded
greg
parents: 151
diff changeset
28 mkdir '$output_training_dir' &&
1dc3ef0a6312 Uploaded
greg
parents: 151
diff changeset
29 #end if
120
cfa683d96cae Uploaded
greg
parents: 119
diff changeset
30 mkdir '$output_txt_dir' &&
90
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
31 cp '$gen_prep_input_config' $prep_input_config &&
138
8a10282ce217 Uploaded
greg
parents: 134
diff changeset
32 sort $prep_input_config -o $prep_input_config &&
90
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
33 prepMat
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
34 $prep_input_config
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
35 #if str($specify_genomic_window) == "yes":
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
36 -bed '$specify_genomic_window_cond.bed_input'
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
37 #else:
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
38 -gsz '$chromInfo'
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
39 -wsz $specify_genomic_window_cond.window_size
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
40 #set restrict_chromosomes = $specify_genomic_window_cond.restrict_chromosomes_cond.restrict_chromosomes
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
41 #if str($restrict_chromosomes) == "yes":
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
42 #set chroms = []
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
43 #set chrom_repeat = $specify_genomic_window_cond.restrict_chromosomes_cond.chrom_repeat
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
44 #for $i in $chrom_repeat.chrom
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
45 $chroms.append($i)
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
46 #end for
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
47 -chr ",".join(chroms)
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
48 #end if
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
49 #end if
45
25f82826cfa1 Uploaded
greg
parents: 44
diff changeset
50 $bychr
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
51 -c $reads_per_bp
78
949de45a7269 Uploaded
greg
parents: 77
diff changeset
52 #if str($blacklist_input) not in ["None", ""]:
34
8d8f796a3bda Uploaded
greg
parents: 33
diff changeset
53 -exclude '$blacklist_input'
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
54 #end if
45
25f82826cfa1 Uploaded
greg
parents: 44
diff changeset
55 $norm
139
84c909b549a9 Uploaded
greg
parents: 138
diff changeset
56 &>prepmat_log.txt;
131
5150fcdcd0fa Uploaded
greg
parents: 130
diff changeset
57 if [[ $? -ne 0 ]]; then
139
84c909b549a9 Uploaded
greg
parents: 138
diff changeset
58 cp prepmat_log.txt '$output_txt_dir';
131
5150fcdcd0fa Uploaded
greg
parents: 130
diff changeset
59 exit 1;
5150fcdcd0fa Uploaded
greg
parents: 130
diff changeset
60 fi
3
9874800487e6 Uploaded
greg
parents: 1
diff changeset
61 ##############################################
29
06522a4aae6d Uploaded
greg
parents: 28
diff changeset
62 ## Coerce the prepMat config output to the
90
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
63 ## format expected by IDEAS.
26
53891e4e4c63 Uploaded
greg
parents: 25
diff changeset
64 ##############################################
65
e29e083ebef7 Uploaded
greg
parents: 64
diff changeset
65 && cut -d' ' $prep_input_config -f1,2 > file1.txt
29
06522a4aae6d Uploaded
greg
parents: 28
diff changeset
66 && ls tmp/*.bed.gz > file2.txt
90
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
67 && paste <(cat file1.txt) <(cat file2.txt) -d' ' > $prep_output_config
125
5e545c9030a0 Uploaded
greg
parents: 123
diff changeset
68 #if str($specify_genomic_window) == "yes":
132
8ce93420010c Uploaded
greg
parents: 131
diff changeset
69 ##############################################
8ce93420010c Uploaded
greg
parents: 131
diff changeset
70 ## Using a genomic window bed file, so categorize
8ce93420010c Uploaded
greg
parents: 131
diff changeset
71 ## the window positions by chromosome to enable
8ce93420010c Uploaded
greg
parents: 131
diff changeset
72 ## the IDEAS -inv option.
8ce93420010c Uploaded
greg
parents: 131
diff changeset
73 ##############################################
129
d064db60a06d Uploaded
greg
parents: 128
diff changeset
74 #import collections
d064db60a06d Uploaded
greg
parents: 128
diff changeset
75 #set window_positions_by_chroms = collections.OrderedDict()
d064db60a06d Uploaded
greg
parents: 128
diff changeset
76 #for count, line in enumerate(open($specify_genomic_window_cond.bed_input.file_name, 'r')):
d064db60a06d Uploaded
greg
parents: 128
diff changeset
77 #set $line = $line.strip()
d064db60a06d Uploaded
greg
parents: 128
diff changeset
78 #if not $line or $line.startswith('#'):
d064db60a06d Uploaded
greg
parents: 128
diff changeset
79 #continue
d064db60a06d Uploaded
greg
parents: 128
diff changeset
80 #end if
d064db60a06d Uploaded
greg
parents: 128
diff changeset
81 #set items = $line.split('\t')
d064db60a06d Uploaded
greg
parents: 128
diff changeset
82 #if $items[0] in $window_positions_by_chroms:
d064db60a06d Uploaded
greg
parents: 128
diff changeset
83 #set tup = $window_positions_by_chroms[$items[0]]
d064db60a06d Uploaded
greg
parents: 128
diff changeset
84 #set $tup[1] += 1
d064db60a06d Uploaded
greg
parents: 128
diff changeset
85 #set $window_positions_by_chroms[$items[0]] = $tup
d064db60a06d Uploaded
greg
parents: 128
diff changeset
86 #else:
d064db60a06d Uploaded
greg
parents: 128
diff changeset
87 #set $window_positions_by_chroms[$items[0]] = [$count, $count+1]
d064db60a06d Uploaded
greg
parents: 128
diff changeset
88 #end if
d064db60a06d Uploaded
greg
parents: 128
diff changeset
89 #end for
d064db60a06d Uploaded
greg
parents: 128
diff changeset
90 #for chrom, tup in $window_positions_by_chroms.items():
149
a80b76535243 Uploaded
greg
parents: 142
diff changeset
91 && Rscript '$__tool_directory__/ideas.R'
a80b76535243 Uploaded
greg
parents: 142
diff changeset
92 #if str($perform_training) == "yes":
a80b76535243 Uploaded
greg
parents: 142
diff changeset
93 --training_iterations $perform_training_cond.training_iterations
a80b76535243 Uploaded
greg
parents: 142
diff changeset
94 --training_windows $perform_training_cond.training_windows
a80b76535243 Uploaded
greg
parents: 142
diff changeset
95 #end if
a80b76535243 Uploaded
greg
parents: 142
diff changeset
96 --prep_output_config '$prep_output_config'
a80b76535243 Uploaded
greg
parents: 142
diff changeset
97 --windows_bed '$specify_genomic_window_cond.bed_input'
a80b76535243 Uploaded
greg
parents: 142
diff changeset
98 --hp $hp
a80b76535243 Uploaded
greg
parents: 142
diff changeset
99 --window_start $tup[0]
a80b76535243 Uploaded
greg
parents: 142
diff changeset
100 --window_end $tup[1]
129
d064db60a06d Uploaded
greg
parents: 128
diff changeset
101 #if str($log2) != "0.0":
149
a80b76535243 Uploaded
greg
parents: 142
diff changeset
102 --log2 $log2
129
d064db60a06d Uploaded
greg
parents: 128
diff changeset
103 #end if
d064db60a06d Uploaded
greg
parents: 128
diff changeset
104 #if str($max_states) != "0.0":
149
a80b76535243 Uploaded
greg
parents: 142
diff changeset
105 --max_states $max_states
129
d064db60a06d Uploaded
greg
parents: 128
diff changeset
106 #end if
d064db60a06d Uploaded
greg
parents: 128
diff changeset
107 #if str($initial_states) != "0":
149
a80b76535243 Uploaded
greg
parents: 142
diff changeset
108 --initial_states $initial_states
129
d064db60a06d Uploaded
greg
parents: 128
diff changeset
109 #end if
d064db60a06d Uploaded
greg
parents: 128
diff changeset
110 #if str($max_position_classes) != "0":
149
a80b76535243 Uploaded
greg
parents: 142
diff changeset
111 --max_position_classes $max_position_classes
129
d064db60a06d Uploaded
greg
parents: 128
diff changeset
112 #end if
d064db60a06d Uploaded
greg
parents: 128
diff changeset
113 #if str($max_cell_type_clusters) != "0":
149
a80b76535243 Uploaded
greg
parents: 142
diff changeset
114 --max_cell_type_clusters $max_cell_type_clusters
129
d064db60a06d Uploaded
greg
parents: 128
diff changeset
115 #end if
d064db60a06d Uploaded
greg
parents: 128
diff changeset
116 #if str($prior_concentration) != "0.0":
149
a80b76535243 Uploaded
greg
parents: 142
diff changeset
117 --prior_concentration $prior_concentration
129
d064db60a06d Uploaded
greg
parents: 128
diff changeset
118 #end if
149
a80b76535243 Uploaded
greg
parents: 142
diff changeset
119 --burnin_num $burnin_num
a80b76535243 Uploaded
greg
parents: 142
diff changeset
120 --mcmc_num $mcmc_num
129
d064db60a06d Uploaded
greg
parents: 128
diff changeset
121 #if str($minerr) != "0.0":
149
a80b76535243 Uploaded
greg
parents: 142
diff changeset
122 --minerr $minerr
129
d064db60a06d Uploaded
greg
parents: 128
diff changeset
123 #end if
d064db60a06d Uploaded
greg
parents: 128
diff changeset
124 #if str($maxerr) != "0.0":
149
a80b76535243 Uploaded
greg
parents: 142
diff changeset
125 --maxerr $maxerr
129
d064db60a06d Uploaded
greg
parents: 128
diff changeset
126 #end if
149
a80b76535243 Uploaded
greg
parents: 142
diff changeset
127 --rseed $rseed
a80b76535243 Uploaded
greg
parents: 142
diff changeset
128 --thread \${GALAXY_SLOTS:-4}
a80b76535243 Uploaded
greg
parents: 142
diff changeset
129 --project_name '$project_name.$chrom'
129
d064db60a06d Uploaded
greg
parents: 128
diff changeset
130 #if str($save_ideas_log) == "yes":
151
9d34f7e6d80c Uploaded
greg
parents: 149
diff changeset
131 --save_ideas_log $save_ideas_log
149
a80b76535243 Uploaded
greg
parents: 142
diff changeset
132 --output_log '$output_log'
129
d064db60a06d Uploaded
greg
parents: 128
diff changeset
133 #end if
128
de93d8e8a096 Uploaded
greg
parents: 126
diff changeset
134 #end for
de93d8e8a096 Uploaded
greg
parents: 126
diff changeset
135 #else:
132
8ce93420010c Uploaded
greg
parents: 131
diff changeset
136 ##############################################
8ce93420010c Uploaded
greg
parents: 131
diff changeset
137 ## Not using a genomic window bed file.
8ce93420010c Uploaded
greg
parents: 131
diff changeset
138 ##############################################
149
a80b76535243 Uploaded
greg
parents: 142
diff changeset
139 && Rscript '$__tool_directory__/ideas.R'
a80b76535243 Uploaded
greg
parents: 142
diff changeset
140 #if str($perform_training) == "yes":
a80b76535243 Uploaded
greg
parents: 142
diff changeset
141 --training_iterations $perform_training_cond.training_iterations
a80b76535243 Uploaded
greg
parents: 142
diff changeset
142 --training_windows $perform_training_cond.training_windows
a80b76535243 Uploaded
greg
parents: 142
diff changeset
143 #end if
a80b76535243 Uploaded
greg
parents: 142
diff changeset
144 --prep_output_config '$prep_output_config'
a80b76535243 Uploaded
greg
parents: 142
diff changeset
145 --windows_bed $tmp_dir/*.bed
a80b76535243 Uploaded
greg
parents: 142
diff changeset
146 --hp $hp
129
d064db60a06d Uploaded
greg
parents: 128
diff changeset
147 #if str($log2) != "0.0":
149
a80b76535243 Uploaded
greg
parents: 142
diff changeset
148 --log2 $log2
129
d064db60a06d Uploaded
greg
parents: 128
diff changeset
149 #end if
d064db60a06d Uploaded
greg
parents: 128
diff changeset
150 #if str($max_states) != "0.0":
149
a80b76535243 Uploaded
greg
parents: 142
diff changeset
151 --max_states $max_states
129
d064db60a06d Uploaded
greg
parents: 128
diff changeset
152 #end if
d064db60a06d Uploaded
greg
parents: 128
diff changeset
153 #if str($initial_states) != "0":
149
a80b76535243 Uploaded
greg
parents: 142
diff changeset
154 --initial_states $initial_states
129
d064db60a06d Uploaded
greg
parents: 128
diff changeset
155 #end if
d064db60a06d Uploaded
greg
parents: 128
diff changeset
156 #if str($max_position_classes) != "0":
149
a80b76535243 Uploaded
greg
parents: 142
diff changeset
157 --max_position_classes $max_position_classes
129
d064db60a06d Uploaded
greg
parents: 128
diff changeset
158 #end if
d064db60a06d Uploaded
greg
parents: 128
diff changeset
159 #if str($max_cell_type_clusters) != "0":
149
a80b76535243 Uploaded
greg
parents: 142
diff changeset
160 --max_cell_type_clusters $max_cell_type_clusters
129
d064db60a06d Uploaded
greg
parents: 128
diff changeset
161 #end if
d064db60a06d Uploaded
greg
parents: 128
diff changeset
162 #if str($prior_concentration) != "0.0":
149
a80b76535243 Uploaded
greg
parents: 142
diff changeset
163 --prior_concentration $prior_concentration
129
d064db60a06d Uploaded
greg
parents: 128
diff changeset
164 #end if
149
a80b76535243 Uploaded
greg
parents: 142
diff changeset
165 --burnin_num $burnin_num
a80b76535243 Uploaded
greg
parents: 142
diff changeset
166 --mcmc_num $mcmc_num
129
d064db60a06d Uploaded
greg
parents: 128
diff changeset
167 #if str($minerr) != "0.0":
149
a80b76535243 Uploaded
greg
parents: 142
diff changeset
168 --minerr $minerr
129
d064db60a06d Uploaded
greg
parents: 128
diff changeset
169 #end if
d064db60a06d Uploaded
greg
parents: 128
diff changeset
170 #if str($maxerr) != "0.0":
149
a80b76535243 Uploaded
greg
parents: 142
diff changeset
171 --maxerr $maxerr
129
d064db60a06d Uploaded
greg
parents: 128
diff changeset
172 #end if
149
a80b76535243 Uploaded
greg
parents: 142
diff changeset
173 --rseed $rseed
a80b76535243 Uploaded
greg
parents: 142
diff changeset
174 --thread \${GALAXY_SLOTS:-4}
a80b76535243 Uploaded
greg
parents: 142
diff changeset
175 --project_name '$project_name'
128
de93d8e8a096 Uploaded
greg
parents: 126
diff changeset
176 #if str($save_ideas_log) == "yes":
151
9d34f7e6d80c Uploaded
greg
parents: 149
diff changeset
177 --save_ideas_log $save_ideas_log
149
a80b76535243 Uploaded
greg
parents: 142
diff changeset
178 --output_log '$output_log'
128
de93d8e8a096 Uploaded
greg
parents: 126
diff changeset
179 #end if
125
5e545c9030a0 Uploaded
greg
parents: 123
diff changeset
180 #end if
153
1dc3ef0a6312 Uploaded
greg
parents: 151
diff changeset
181 #if str($perform_training) == "yes":
1dc3ef0a6312 Uploaded
greg
parents: 151
diff changeset
182 && mv ./*.para0 '$output_training_dir'
1dc3ef0a6312 Uploaded
greg
parents: 151
diff changeset
183 && mv ./*.profile0 '$output_training_dir'
158
c996089f1747 Uploaded
greg
parents: 157
diff changeset
184 #if str($output_heatmaps) == "yes":
c996089f1747 Uploaded
greg
parents: 157
diff changeset
185 && Rscript '$__tool_directory__/create_heatmaps.R'
164
68d909206a15 Uploaded
greg
parents: 161
diff changeset
186 --input_dir 'output_training_dir'
158
c996089f1747 Uploaded
greg
parents: 157
diff changeset
187 --output_dir '$output_pdf_dir'
c996089f1747 Uploaded
greg
parents: 157
diff changeset
188 --script_dir '$__tool_directory__'
161
4609a1d14059 Uploaded
greg
parents: 160
diff changeset
189 --in_training_mode true
158
c996089f1747 Uploaded
greg
parents: 157
diff changeset
190 #end if
153
1dc3ef0a6312 Uploaded
greg
parents: 151
diff changeset
191 #else:
1dc3ef0a6312 Uploaded
greg
parents: 151
diff changeset
192 && mv ./*.cluster '$output_txt_dir'
1dc3ef0a6312 Uploaded
greg
parents: 151
diff changeset
193 && mv ./*.para '$output_txt_dir'
1dc3ef0a6312 Uploaded
greg
parents: 151
diff changeset
194 && mv ./*.profile '$output_txt_dir'
1dc3ef0a6312 Uploaded
greg
parents: 151
diff changeset
195 && mv ./*.state '$output_txt_dir'
158
c996089f1747 Uploaded
greg
parents: 157
diff changeset
196 #if str($output_heatmaps) == "yes":
c996089f1747 Uploaded
greg
parents: 157
diff changeset
197 && Rscript '$__tool_directory__/create_heatmaps.R'
c996089f1747 Uploaded
greg
parents: 157
diff changeset
198 --input_dir '$output_txt_dir'
c996089f1747 Uploaded
greg
parents: 157
diff changeset
199 --output_dir '$output_pdf_dir'
c996089f1747 Uploaded
greg
parents: 157
diff changeset
200 --script_dir '$__tool_directory__'
c996089f1747 Uploaded
greg
parents: 157
diff changeset
201 #end if
131
5150fcdcd0fa Uploaded
greg
parents: 130
diff changeset
202 #end if
128
de93d8e8a096 Uploaded
greg
parents: 126
diff changeset
203 ]]></command>
de93d8e8a096 Uploaded
greg
parents: 126
diff changeset
204 <configfiles>
de93d8e8a096 Uploaded
greg
parents: 126
diff changeset
205 <configfile name="gen_prep_input_config"><![CDATA[#if str($cell_type_epigenetic_factor_cond.cell_type_epigenetic_factor) == "extract":
de93d8e8a096 Uploaded
greg
parents: 126
diff changeset
206 #set input_name_positions = $cell_type_epigenetic_factor_cond.input_name_positions
de93d8e8a096 Uploaded
greg
parents: 126
diff changeset
207 #for $i in $cell_type_epigenetic_factor_cond.input:
de93d8e8a096 Uploaded
greg
parents: 126
diff changeset
208 #set file_name_with_ext = $i.name
130
d088f25661d9 Uploaded
greg
parents: 129
diff changeset
209 #if str($file_name_with_ext).find("http") >= 0 or str($file_name_with_ext).find("ftp") >= 0:
d088f25661d9 Uploaded
greg
parents: 129
diff changeset
210 #set file_name_with_ext = $file_name_with_ext.split('/')[-1]
d088f25661d9 Uploaded
greg
parents: 129
diff changeset
211 #end if
128
de93d8e8a096 Uploaded
greg
parents: 126
diff changeset
212 #assert str($file_name_with_ext).find("-") >= 0, "The selected input '%s' is invalid because it does not include the '-' character which is required when setting cell type and epigenetic factor names by extracting them from the input file names." % $file_name_with_ext
de93d8e8a096 Uploaded
greg
parents: 126
diff changeset
213 #set file_name = $file_name_with_ext.split(".")[0]
de93d8e8a096 Uploaded
greg
parents: 126
diff changeset
214 #if str($input_name_positions) == "cell_first":
de93d8e8a096 Uploaded
greg
parents: 126
diff changeset
215 #set cell_type_name = $file_name.split("-")[0]
de93d8e8a096 Uploaded
greg
parents: 126
diff changeset
216 #set epigenetic_factor_name = $file_name.split("-")[1]
de93d8e8a096 Uploaded
greg
parents: 126
diff changeset
217 #else:
de93d8e8a096 Uploaded
greg
parents: 126
diff changeset
218 #set cell_type_name = $file_name.split("-")[1]
de93d8e8a096 Uploaded
greg
parents: 126
diff changeset
219 #set epigenetic_factor_name = $file_name.split("-")[0]
de93d8e8a096 Uploaded
greg
parents: 126
diff changeset
220 #end if
de93d8e8a096 Uploaded
greg
parents: 126
diff changeset
221 ${cell_type_name} ${epigenetic_factor_name} ${i}
de93d8e8a096 Uploaded
greg
parents: 126
diff changeset
222 #end for
de93d8e8a096 Uploaded
greg
parents: 126
diff changeset
223 #else:
de93d8e8a096 Uploaded
greg
parents: 126
diff changeset
224 #for $input_items in $cell_type_epigenetic_factor_cond.input_repeat:
de93d8e8a096 Uploaded
greg
parents: 126
diff changeset
225 ${input_items.cell_type_name} ${input_items.epigenetic_factor_name} ${input_items.input}
de93d8e8a096 Uploaded
greg
parents: 126
diff changeset
226 #end for
de93d8e8a096 Uploaded
greg
parents: 126
diff changeset
227 #end if]]></configfile>
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
228 </configfiles>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
229 <inputs>
149
a80b76535243 Uploaded
greg
parents: 142
diff changeset
230 <conditional name="perform_training_cond">
a80b76535243 Uploaded
greg
parents: 142
diff changeset
231 <param name="perform_training" type="select" label="Perform training?">
a80b76535243 Uploaded
greg
parents: 142
diff changeset
232 <option value="yes" selected="true">Yes</option>
a80b76535243 Uploaded
greg
parents: 142
diff changeset
233 <option value="no">No</option>
a80b76535243 Uploaded
greg
parents: 142
diff changeset
234 </param>
a80b76535243 Uploaded
greg
parents: 142
diff changeset
235 <when value="yes">
153
1dc3ef0a6312 Uploaded
greg
parents: 151
diff changeset
236 <param name="training_iterations" type="integer" value="20" min="2" label="Number of training iterations"/>
1dc3ef0a6312 Uploaded
greg
parents: 151
diff changeset
237 <param name="training_windows" type="integer" value="10000" min="2" label="Number of randomly selected windows for training"/>
149
a80b76535243 Uploaded
greg
parents: 142
diff changeset
238 </when>
a80b76535243 Uploaded
greg
parents: 142
diff changeset
239 <when value="no"/>
a80b76535243 Uploaded
greg
parents: 142
diff changeset
240 </conditional>
90
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
241 <conditional name="cell_type_epigenetic_factor_cond">
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
242 <param name="cell_type_epigenetic_factor" type="select" label="Set cell type and epigenetic factor names by">
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
243 <option value="extract" selected="true">extracting them from the selected input file names</option>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
244 <option value="manual">manually setting them for each selected input</option>
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
245 </param>
90
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
246 <when value="extract">
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
247 <param name="input" type="data" format="bigwig,bam" multiple="True" label="BAM or BigWig files">
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
248 <validator type="empty_field"/>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
249 <validator type="unspecified_build"/>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
250 </param>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
251 <param name="input_name_positions" type="select" display="radio" label="Selected input file name pattern is" help="A '-' character must separate cell type and epigenetic factor names within the selected input file names">
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
252 <option value="cell_first" selected="true">Cell type name - Epigenetic factor name</option>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
253 <option value="cell_last">Epigenetic factor name - Cell type name</option>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
254 </param>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
255 </when>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
256 <when value="manual">
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
257 <repeat name="input_repeat" title="Cell type, Epigenetic factor and Input" min="1">
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
258 <param name="cell_type_name" type="text" value="" label="Cell type name">
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
259 <validator type="empty_field"/>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
260 </param>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
261 <param name="epigenetic_factor_name" type="text" value="" label="Epigenetic factor name">
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
262 <validator type="empty_field"/>
32
58f5b2af9473 Uploaded
greg
parents: 31
diff changeset
263 </param>
90
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
264 <param name="input" type="data" format="bigwig,bam" label="BAM or BigWig file">
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
265 <validator type="empty_field"/>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
266 <validator type="unspecified_build"/>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
267 </param>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
268 </repeat>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
269 </when>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
270 </conditional>
113
ba7238a53377 Uploaded
greg
parents: 112
diff changeset
271 <param name="project_name" type="text" value="myProject" label="Project name" help="Outputs will have this base name">
112
5753c1386737 Uploaded
greg
parents: 111
diff changeset
272 <validator type="empty_field"/>
5753c1386737 Uploaded
greg
parents: 111
diff changeset
273 </param>
149
a80b76535243 Uploaded
greg
parents: 142
diff changeset
274 <param name="rseed" type="integer" value="1234" min="0" max="1000000" label="Seed for IDEAS model initialization" help="Zero value generates a random seed, and this seed will be different for each job run."/>
90
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
275 <conditional name="specify_genomic_window_cond">
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
276 <param name="specify_genomic_window" type="select" label="Select Bed file that defines genomic windows on which to process the data">
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
277 <option value="no" selected="true">No</option>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
278 <option value="yes">Yes</option>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
279 </param>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
280 <when value="no">
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
281 <param name="window_size" type="integer" value="200" label="Window size in base pairs"/>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
282 <conditional name="restrict_chromosomes_cond">
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
283 <param name="restrict_chromosomes" type="select" label="Restrict processing to specified chromosomes">
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
284 <option value="no" selected="true">No</option>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
285 <option value="yes">Yes</option>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
286 </param>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
287 <when value="no"/>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
288 <when value="yes">
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
289 <repeat name="chrom_repeat" title="Chromosomes" min="1">
98
ed15c6ca3c51 Uploaded
greg
parents: 96
diff changeset
290 <param name="chrom" type="text" value="" label="Chromosome" help="One chromosome (e.g., chr1, chr2, chrX) per text field"/>
68
5fb237a06499 Uploaded
greg
parents: 67
diff changeset
291 </repeat>
5fb237a06499 Uploaded
greg
parents: 67
diff changeset
292 </when>
5fb237a06499 Uploaded
greg
parents: 67
diff changeset
293 </conditional>
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
294 </when>
90
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
295 <when value="yes">
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
296 <param name="bed_input" type="data" format="bed" label="Bed file specifying the genomic windows"/>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
297 </when>
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
298 </conditional>
149
a80b76535243 Uploaded
greg
parents: 142
diff changeset
299 <param name="bychr" type="boolean" truevalue="true" falsevalue="" checked="False" label="Output chromosomes in separate files"/>
98
ed15c6ca3c51 Uploaded
greg
parents: 96
diff changeset
300 <param name="reads_per_bp" type="select" display="radio" label="Calculate the signal in each genomic window using">
36
859687afe7bc Uploaded
greg
parents: 35
diff changeset
301 <option value="6" selected="true">mean</option>
859687afe7bc Uploaded
greg
parents: 35
diff changeset
302 <option value="8">max</option>
859687afe7bc Uploaded
greg
parents: 35
diff changeset
303 </param>
33
695053a23fe4 Uploaded
greg
parents: 32
diff changeset
304 <param name="blacklist_input" type="data" format="bed" optional="True" multiple="True" label="Select file(s) containing regions to exclude"/>
149
a80b76535243 Uploaded
greg
parents: 142
diff changeset
305 <param name="norm" type="boolean" truevalue="true" falsevalue="" checked="False" label="Standardize all datasets"/>
a80b76535243 Uploaded
greg
parents: 142
diff changeset
306 <param name="hp" type="boolean" truevalue="true" falsevalue="" checked="False" label="Discourage state transition across chromosomes"/>
101
ad45c3ba16b2 Uploaded
greg
parents: 100
diff changeset
307 <param name="log2" type="float" value="0" min="0" label="Use log2(x+number) transformation" help="Zero means no log2 transformation"/>
ad45c3ba16b2 Uploaded
greg
parents: 100
diff changeset
308 <param name="max_states" type="float" value="0" min="0" label="Maximum number of states to be inferred" help="Zero sets the maximum to a large number"/>
ad45c3ba16b2 Uploaded
greg
parents: 100
diff changeset
309 <param name="initial_states" type="integer" value="20" min="0" label="Initial number of states" help="Positive integer"/>
ad45c3ba16b2 Uploaded
greg
parents: 100
diff changeset
310 <param name="max_position_classes" type="integer" value="0" min="0" label="Maximum number of position classes to be inferred" help="Zero sets the maximum to a large number"/>
ad45c3ba16b2 Uploaded
greg
parents: 100
diff changeset
311 <param name="max_cell_type_clusters" type="integer" value="0" min="0" label="Maximum number of cell type clusters allowed" help="Zero sets the maximum to a large number"/>
ad45c3ba16b2 Uploaded
greg
parents: 100
diff changeset
312 <param name="prior_concentration" type="float" value="1" min="0" label="Prior concentration" help="Zero value results in the default: sqrt(number of cell types)"/>
34
8d8f796a3bda Uploaded
greg
parents: 33
diff changeset
313 <param name="burnin_num" type="integer" value="20" min="1" label="Number of burnin steps"/>
8d8f796a3bda Uploaded
greg
parents: 33
diff changeset
314 <param name="mcmc_num" type="integer" value="20" min="1" label="Number of maximization steps"/>
101
ad45c3ba16b2 Uploaded
greg
parents: 100
diff changeset
315 <param name="minerr" type="float" value="0.5" min="0" label="Minimum standard deviation for the emission Gaussian distribution" help="Zero value results in the default: 0.5"/>
ad45c3ba16b2 Uploaded
greg
parents: 100
diff changeset
316 <param name="maxerr" type="float" value="1000000" min="0" label="Maximum standard deviation for the emission Gaussian distribution" help="Zero sets the maximum to a large number"/>
131
5150fcdcd0fa Uploaded
greg
parents: 130
diff changeset
317 <param name="output_heatmaps" type="select" display="radio" label="Output heatmaps?">
149
a80b76535243 Uploaded
greg
parents: 142
diff changeset
318 <option value="yes" selected="true">Yes</option>
a80b76535243 Uploaded
greg
parents: 142
diff changeset
319 <option value="no">No</option>
131
5150fcdcd0fa Uploaded
greg
parents: 130
diff changeset
320 </param>
89
d6ab97fb8aca Uploaded
greg
parents: 88
diff changeset
321 <param name="save_ideas_log" type="select" display="radio" label="Save IDEAS log in an additional history item">
d6ab97fb8aca Uploaded
greg
parents: 88
diff changeset
322 <option value="no" selected="true">No</option>
d6ab97fb8aca Uploaded
greg
parents: 88
diff changeset
323 <option value="yes">Yes</option>
d6ab97fb8aca Uploaded
greg
parents: 88
diff changeset
324 </param>
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
325 </inputs>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
326 <outputs>
102
dad89ec8846f Uploaded
greg
parents: 101
diff changeset
327 <data name="output_log" format="txt" label="${tool.name} (output log) on ${on_string}">
89
d6ab97fb8aca Uploaded
greg
parents: 88
diff changeset
328 <filter>save_ideas_log == 'yes'</filter>
d6ab97fb8aca Uploaded
greg
parents: 88
diff changeset
329 </data>
131
5150fcdcd0fa Uploaded
greg
parents: 130
diff changeset
330 <collection name="output_pdf_collection" type="list" label="${tool.name} (heatmaps) on ${on_string}">
120
cfa683d96cae Uploaded
greg
parents: 119
diff changeset
331 <discover_datasets pattern="__name__" directory="output_pdf_dir" format="pdf"/>
131
5150fcdcd0fa Uploaded
greg
parents: 130
diff changeset
332 <filter>output_heatmaps == 'yes'</filter>
120
cfa683d96cae Uploaded
greg
parents: 119
diff changeset
333 </collection>
112
5753c1386737 Uploaded
greg
parents: 111
diff changeset
334 <collection name="output_txt_collection" type="list">
120
cfa683d96cae Uploaded
greg
parents: 119
diff changeset
335 <discover_datasets pattern="__name__" directory="output_txt_dir" format="txt"/>
153
1dc3ef0a6312 Uploaded
greg
parents: 151
diff changeset
336 <filter>perform_training_cond['perform_training'] == 'no'</filter>
1dc3ef0a6312 Uploaded
greg
parents: 151
diff changeset
337 </collection>
1dc3ef0a6312 Uploaded
greg
parents: 151
diff changeset
338 <collection name="output_ttraining_collection" type="list">
1dc3ef0a6312 Uploaded
greg
parents: 151
diff changeset
339 <discover_datasets pattern="__name__" directory="output_training_dir" format="txt"/>
1dc3ef0a6312 Uploaded
greg
parents: 151
diff changeset
340 <filter>perform_training_cond['perform_training'] == 'yes'</filter>
102
dad89ec8846f Uploaded
greg
parents: 101
diff changeset
341 </collection>
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
342 </outputs>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
343 <tests>
90
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
344 <test>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
345 <param name="cell_type_epigenetic_factor" value="extract"/>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
346 <param name="input" value="e001-h3k4me3.bigwig" ftype="bigwig" dbkey="hg19"/>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
347 <param name="input_name_positions" value="cell_first"/>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
348 <param name="specify_genomic_window" value="yes"/>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
349 <param name="bed_input" value="genomic_windows.bed" ftype="bed" dbkey="hg19"/>
115
242ff1426fb4 Uploaded
greg
parents: 113
diff changeset
350 <param name="project_name" value="IDEAS_out"/>
112
5753c1386737 Uploaded
greg
parents: 111
diff changeset
351 <output_collection name="output_txt_collection" type="list">
5753c1386737 Uploaded
greg
parents: 111
diff changeset
352 <element name="IDEAS_out.cluster" file="IDEAS_out.cluster" ftype="txt"/>
120
cfa683d96cae Uploaded
greg
parents: 119
diff changeset
353 <element name="IDEAS_out.para" file="IDEAS_out.para" ftype="txt"/>
112
5753c1386737 Uploaded
greg
parents: 111
diff changeset
354 <element name="IDEAS_out.profile" file="IDEAS_out.profile" ftype="txt"/>
5753c1386737 Uploaded
greg
parents: 111
diff changeset
355 <element name="IDEAS_out.state" file="IDEAS_out.state" ftype="txt"/>
102
dad89ec8846f Uploaded
greg
parents: 101
diff changeset
356 </output_collection>
120
cfa683d96cae Uploaded
greg
parents: 119
diff changeset
357 <output_collection name="output_pdf_collection" type="list">
123
e2995f2f127f Uploaded
greg
parents: 120
diff changeset
358 <element name="IDEAS_out.pdf" file="IDEAS_out.pdf" compare="contains"/>
120
cfa683d96cae Uploaded
greg
parents: 119
diff changeset
359 </output_collection>
90
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
360 </test>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
361 <test>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
362 <param name="cell_type_epigenetic_factor" value="manual"/>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
363 <repeat name="input_repeat">
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
364 <param name="cell_type_name" value="e001" />
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
365 <param name="epigenetic_factor_name" value="h3k4me3"/>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
366 <param name="input" value="e001-h3k4me3.bigwig" ftype="bigwig" dbkey="hg19"/>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
367 </repeat>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
368 <param name="specify_genomic_window" value="yes"/>
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
369 <param name="bed_input" value="genomic_windows.bed" ftype="bed" dbkey="hg19"/>
115
242ff1426fb4 Uploaded
greg
parents: 113
diff changeset
370 <param name="project_name" value="IDEAS_out"/>
112
5753c1386737 Uploaded
greg
parents: 111
diff changeset
371 <output_collection name="output_txt_collection" type="list">
5753c1386737 Uploaded
greg
parents: 111
diff changeset
372 <element name="IDEAS_out.cluster" file="IDEAS_out.cluster" ftype="txt"/>
120
cfa683d96cae Uploaded
greg
parents: 119
diff changeset
373 <element name="IDEAS_out.para" file="IDEAS_out.para" ftype="txt"/>
112
5753c1386737 Uploaded
greg
parents: 111
diff changeset
374 <element name="IDEAS_out.profile" file="IDEAS_out.profile" ftype="txt"/>
5753c1386737 Uploaded
greg
parents: 111
diff changeset
375 <element name="IDEAS_out.state" file="IDEAS_out.state" ftype="txt"/>
5753c1386737 Uploaded
greg
parents: 111
diff changeset
376 </output_collection>
120
cfa683d96cae Uploaded
greg
parents: 119
diff changeset
377 <output_collection name="output_pdf_collection" type="list">
123
e2995f2f127f Uploaded
greg
parents: 120
diff changeset
378 <element name="IDEAS_out.pdf" file="IDEAS_out.pdf" compare="contains"/>
120
cfa683d96cae Uploaded
greg
parents: 119
diff changeset
379 </output_collection>
90
029e18c3c17b Uploaded
greg
parents: 89
diff changeset
380 </test>
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
381 </tests>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
382 <help>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
383 **What it does**
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
384
95
ff4d84a01fa7 Uploaded
greg
parents: 94
diff changeset
385 IDEAS (an **I**\ ntegrative and **D**\ iscriminative **E**\ pigenome **A**\ nnotation **S**\ ystem) identifies
ff4d84a01fa7 Uploaded
greg
parents: 94
diff changeset
386 de novo regulatory functions from epigenetic data in multiple cell types jointly. It is a full probabilistic
ff4d84a01fa7 Uploaded
greg
parents: 94
diff changeset
387 model defined on all data, and it combines signals across both the genome and cell types to boost power. The
ff4d84a01fa7 Uploaded
greg
parents: 94
diff changeset
388 underlying assumption of IDEAS is that, because all cell types share the same underlying DNA sequences,
ff4d84a01fa7 Uploaded
greg
parents: 94
diff changeset
389 **functions of each DNA segment should be correlated**. Also, cell type specific regulation is locus-dependent,
ff4d84a01fa7 Uploaded
greg
parents: 94
diff changeset
390 and thus IDEAS uses local epigenetic landscape to **identify de novo and local cell type clusters** without
ff4d84a01fa7 Uploaded
greg
parents: 94
diff changeset
391 assuming or requiring a known global cell type relationship.
94
7d9af0d824ad Uploaded
greg
parents: 93
diff changeset
392
7d9af0d824ad Uploaded
greg
parents: 93
diff changeset
393 IDEAS takes as input a list of epigenetic data sets (histones, chromatin accessibility, CpG methylation, TFs, etc)
7d9af0d824ad Uploaded
greg
parents: 93
diff changeset
394 or any other whole-genome data sets (e.g., scores). Currently the supported data formats include BigWig and BAM.
7d9af0d824ad Uploaded
greg
parents: 93
diff changeset
395 All data sets will first be mapped by IDEAS to a common genomic coordinate in a selected assembly (200bp windows
7d9af0d824ad Uploaded
greg
parents: 93
diff changeset
396 by default, or user-provided). The user can specify regions to be considered or removed from the analysis. The
7d9af0d824ad Uploaded
greg
parents: 93
diff changeset
397 input data may come from one cell type/condition/individual/time point (although it does not fully utilize the
7d9af0d824ad Uploaded
greg
parents: 93
diff changeset
398 advantage of IDEAS), or from multiple cell types/conditions/individuals/time points. The same set of epigenetic
7d9af0d824ad Uploaded
greg
parents: 93
diff changeset
399 features may not be present in all cell types, for which IDEAS will do imputation of the missing tracks if
7d9af0d824ad Uploaded
greg
parents: 93
diff changeset
400 specified.
7d9af0d824ad Uploaded
greg
parents: 93
diff changeset
401
7d9af0d824ad Uploaded
greg
parents: 93
diff changeset
402 .. image:: $PATH_TO_IMAGES/ideas.png
7d9af0d824ad Uploaded
greg
parents: 93
diff changeset
403
7d9af0d824ad Uploaded
greg
parents: 93
diff changeset
404 IDEAS predicts regulatory functions, denoted by epigenetic states, at each position in each cell type by
7d9af0d824ad Uploaded
greg
parents: 93
diff changeset
405 **combining information simultaneously learned from other cell types** at the same positions in cell types with
7d9af0d824ad Uploaded
greg
parents: 93
diff changeset
406 similar local epigenetic landscapes. Size of genomic intervals for determining the similarity are also learned.
7d9af0d824ad Uploaded
greg
parents: 93
diff changeset
407 All of the inferences are done through parallel infinite-state hidden Markov models (iHMM), which is a Bayesian
7d9af0d824ad Uploaded
greg
parents: 93
diff changeset
408 non-parametric technique to automatically determine the number of local cell type clusters and the number of
7d9af0d824ad Uploaded
greg
parents: 93
diff changeset
409 epigenetic states.
7d9af0d824ad Uploaded
greg
parents: 93
diff changeset
410
7d9af0d824ad Uploaded
greg
parents: 93
diff changeset
411 In addition to its improved power, IDEAS has two unique advantages:
7d9af0d824ad Uploaded
greg
parents: 93
diff changeset
412
96
90144b60033c Uploaded
greg
parents: 95
diff changeset
413 1) applies **linear time inference** with respect to the number of cell types, which allows it to study hundreds or more cell types jointly
90144b60033c Uploaded
greg
parents: 95
diff changeset
414 2) uses mini-batch training to **improve reproducibility** of the predicted epigenetic states, which is important because genome segmentation is not convex and hence cannot guarantee a global optimal solution.
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
415
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
416 -----
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
417
88
462ce06410c6 Uploaded
greg
parents: 87
diff changeset
418 **Options**
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
419
88
462ce06410c6 Uploaded
greg
parents: 87
diff changeset
420 * **Set cell type and epigenetic factor names by** - cell type and epigenetic factor names can be set manually or by extracting them from the names of the selected input datasets. The latter case requires all selected datasets to have names that contain a "-" character.
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
421
93
0c2cf49dfb58 Uploaded
greg
parents: 91
diff changeset
422 * **BAM or BigWig files** - select one or more Bam or Bigwig files from your history, making sure that the name of every selected input include a "-" character (e.g., e001-h3k4me3.bigwig).
98
ed15c6ca3c51 Uploaded
greg
parents: 96
diff changeset
423 * **Cell type, Epigenetic factor and Input** - manually select any number of inputs, setting the cell type and epigenetic factor name for each. The combination of "cell type name" and "epigenetic factor name" must be unique for each input. For example, if you have replicate data you may want to specify the cell name as "rep1", "rep2", etc and the factor name as "rep1", "rep2", etc.
130
d088f25661d9 Uploaded
greg
parents: 129
diff changeset
424
88
462ce06410c6 Uploaded
greg
parents: 87
diff changeset
425 * **Cell type name** - cell type name
462ce06410c6 Uploaded
greg
parents: 87
diff changeset
426 * **Epigenetic factor name** - epigenetic factor name
462ce06410c6 Uploaded
greg
parents: 87
diff changeset
427 * **BAM or BigWig file** - BAM or BigWig file
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
428
108
021d1f60b0d3 Uploaded
greg
parents: 107
diff changeset
429 * **Project name** - datasets produced by IDEAS will have this base name.
98
ed15c6ca3c51 Uploaded
greg
parents: 96
diff changeset
430 * **Seed for IDEAS model initialization** - enter an integer to be used as the seed for the IDEAS model initialization. A zero value causes IDEAS to automatically generate a random seed, and this seed will be different for each job run.
88
462ce06410c6 Uploaded
greg
parents: 87
diff changeset
431 * **Select Bed file that defines genomic windows on which to process the data** - if "No" is selected, IDEAS will run whole genome segmentation. If "Yes" is selected, IDEAS will segment genomes in the unit of the windows defined by the bed file. This file can be in BED3, BED4 or BED5 format, but only the first three columns (chr posst posed) will be used.
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
432
88
462ce06410c6 Uploaded
greg
parents: 87
diff changeset
433 * **Window size in base pairs** - Window size in base pairs (if "No" is selected)
462ce06410c6 Uploaded
greg
parents: 87
diff changeset
434 * **Restrict processing to specified chromosomes** - If "Yes" is selected, processing will be restricted to specified chromosomes
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
435
88
462ce06410c6 Uploaded
greg
parents: 87
diff changeset
436 * **Chromosomes** - processing will be restricted to specified chromosomes (if "Yes" is selected)
462ce06410c6 Uploaded
greg
parents: 87
diff changeset
437
462ce06410c6 Uploaded
greg
parents: 87
diff changeset
438 * **Bed file specifying the genomic windows** - bed file specifying the genomic windows (if "Yes" is selected)
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
439
44
8e449b51b581 Uploaded
greg
parents: 43
diff changeset
440 * **Output chromosomes in separate files** - select "Yes" to produce separate files for each chromosome, allowing you to run IDEAS on different chromosomes separately.
98
ed15c6ca3c51 Uploaded
greg
parents: 96
diff changeset
441 * **Calculate the signal in each genomic window using** - use the bigWigAverageOverBed utility from the UCSC genome browser to calculate the signal (i.e., the number of reads per bp) in each genomic window.
33
695053a23fe4 Uploaded
greg
parents: 32
diff changeset
442 * **Select file(s) containing regions to exclude** - select one or more bed files that contains regions you'd like excluded from your datasets.
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
443 * **Standardize all datasets** - select "Yes" to standardize all datasets (e.g., reads / total_reads * 20 million) so that the signals from different cell types become comparable - your datasets can be read counts, logp-values or fold change.
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
444
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
445 * **Discourage state transition across chromosomes** - select "Yes" to produce similar states in adjacent windows, making the annotation smoother, but at risk of reducing precision.
98
ed15c6ca3c51 Uploaded
greg
parents: 96
diff changeset
446 * **Use log2(x+number) transformation** - perform Log2-transformation of the input data by log2(x+number) (recommended for read count data to reduce skewness). You can enter a number that is representative of the noise level in your data (e.g., a number less than 1). If this number is at a similar scale or larger than the signal in your data, it will lose power. For example, if your input data is mean read count per window, using 0.1 may produce better results.
33
695053a23fe4 Uploaded
greg
parents: 32
diff changeset
447 * **Maximum number of states to be inferred** - restrict the maximum number of states to be generated by IDEAS; the final number of inferred states may be smaller than the number you specified
695053a23fe4 Uploaded
greg
parents: 32
diff changeset
448 * **Initial number of states** - while IDEAS may infer 30 states or more by starting from just 20 states, it may not do so if it is trapped in a local mode. We recommend setting the initial number of states slightly larger than the number of states you expect.
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
449 * **Maximum number of position classes to be inferred** - Set this value only if:
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
450
88
462ce06410c6 Uploaded
greg
parents: 87
diff changeset
451 * you do not want position classes (e.g., for testing purposes), in this case set the value to 1
462ce06410c6 Uploaded
greg
parents: 87
diff changeset
452 * IDEAS runs slow because there are too many position classes, generally less than 100 position classes will run fine
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
453
98
ed15c6ca3c51 Uploaded
greg
parents: 96
diff changeset
454 * **Maximum number of cell type clusters allowed** - If you set the value to 1, then all cell types will be clustered in one group, which may be desirable if all cell types are homogeneous and you want IDEAS to use information in all cell types equally.
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
455 * **Prior concentration** - specify the prior concentration parameter; default is A=sqrt(number of cell types). A smaller concentration parameter (e.g., 1 or less) will emphasize more on position specificity and a larger concentration parameter (e.g., 10 * number of cell types) will emphasize more on global homogeneity.
98
ed15c6ca3c51 Uploaded
greg
parents: 96
diff changeset
456 * **Number of burnin steps** - specify the number of burnin steps; default is 20. Increasing the burnin and maximization steps will increase computing and only slightly increase accuracy, while decreasing them will reduce computing resources but may also reduce accuracy. We recommend to run IDEAS with at least 20 burnins and 20 maximizations. IDEAS will not stop even if it reaches a maximum mode.
ed15c6ca3c51 Uploaded
greg
parents: 96
diff changeset
457 * **Number of maximization steps** - specify the number of maximization steps; default is 20.
ed15c6ca3c51 Uploaded
greg
parents: 96
diff changeset
458 * **Minimum standard deviation for the emission Gaussian distribution** - This number multiplied by the overall standard deviation of your data will be used as a lower bound for the standard deviation for each factor in each epigenetic state (the default is 0.5). This number is useful for removing very subtle clusters in the data. Setting this value near 0 will allow IDEAS to discover many subtle states, while setting it greater than 1 will result in IDEAS losing the ability to detect meaningful states.
130
d088f25661d9 Uploaded
greg
parents: 129
diff changeset
459 * **Maximim standard deviation for the emission Gaussian distribution** - if you want to find fine-grained states you may use this option (if not used, IDEAS uses infinity), but it is rearely used unless you need more states to be inferred.
0
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
460
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
461 </help>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
462 <citations>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
463 <citation type="doi">10.1093/nar/gkw278</citation>
3b27bfc37b83 Uploaded
greg
parents:
diff changeset
464 </citations>
131
5150fcdcd0fa Uploaded
greg
parents: 130
diff changeset
465 </tool>