comparison ideas.xml @ 169:7b0c6c6cb82b draft

Uploaded
author greg
date Thu, 25 Jan 2018 09:30:44 -0500
parents 5c5e2f7b34c8
children 445f67ea18f6
comparison
equal deleted inserted replaced
168:5c5e2f7b34c8 169:7b0c6c6cb82b
11 </requirements> 11 </requirements>
12 <command detect_errors="exit_code"><![CDATA[ 12 <command detect_errors="exit_code"><![CDATA[
13 #set output_pdf_dir = "output_pdf_dir" 13 #set output_pdf_dir = "output_pdf_dir"
14 #set output_txt_dir = "output_txt_dir" 14 #set output_txt_dir = "output_txt_dir"
15 #set output_training_dir = "output_training_dir" 15 #set output_training_dir = "output_training_dir"
16 #set tmp_dir = "tmp"
17 #set prep_input_config = "prep_input_config.txt"
18 #set prep_output_config = "prep_output_config.txt"
19 #set specify_genomic_window = $specify_genomic_window_cond.specify_genomic_window
20 #set windows_positions_by_chroms_config = "windows_positions_by_chroms_config.txt" 16 #set windows_positions_by_chroms_config = "windows_positions_by_chroms_config.txt"
21 #set perform_training = $perform_training_cond.perform_training 17 #set perform_training = $perform_training_cond.perform_training
22 ##############################################
23 ## Create the config file and prepare the data
24 ##############################################
25 #if str($output_heatmaps) == "yes":
26 mkdir '$output_pdf_dir' &&
27 #end if
28 #if str($perform_training) == "yes":
29 #set output_dir = $output_training_dir
30 mkdir '$output_training_dir' &&
31 #else:
32 #set output_dir = $output_txt_dir
33 mkdir '$output_txt_dir' &&
34 #end if
35 cp '$gen_prep_input_config' $prep_input_config &&
36 sort $prep_input_config -o $prep_input_config &&
37 prepMat
38 $prep_input_config
39 #if str($specify_genomic_window) == "yes":
40 -bed '$specify_genomic_window_cond.bed_input'
41 #else:
42 -gsz '$chromInfo'
43 -wsz $specify_genomic_window_cond.window_size
44 #set restrict_chromosomes = $specify_genomic_window_cond.restrict_chromosomes_cond.restrict_chromosomes
45 #if str($restrict_chromosomes) == "yes":
46 #set chroms = []
47 #set chrom_repeat = $specify_genomic_window_cond.restrict_chromosomes_cond.chrom_repeat
48 #for $i in $chrom_repeat.chrom
49 $chroms.append($i)
50 #end for
51 -chr ",".join(chroms)
52 #end if
53 #end if
54 $bychr
55 -c $reads_per_bp
56 #if str($blacklist_input) not in ["None", ""]:
57 -exclude '$blacklist_input'
58 #end if
59 $norm
60 &>prepmat_log.txt;
61 if [[ $? -ne 0 ]]; then
62 cp prepmat_log.txt '$output_dir';
63 exit 1;
64 fi
65 ##############################################
66 ## Coerce the prepMat config output to the
67 ## format expected by IDEAS.
68 ##############################################
69 && cut -d' ' $prep_input_config -f1,2 > file1.txt
70 && ls tmp/*.bed.gz > file2.txt
71 && paste <(cat file1.txt) <(cat file2.txt) -d' ' > $prep_output_config
72 #if str($specify_genomic_window) == "yes":
73 ##############################################
74 ## Using a genomic window bed file, so categorize
75 ## the window positions by chromosome to enable
76 ## the IDEAS -inv option.
77 ##############################################
78 && cp '$gen_windows_positions_by_chroms_config' $windows_positions_by_chroms_config
79 #end if
80 && Rscript '$__tool_directory__/ideas.R' 18 && Rscript '$__tool_directory__/ideas.R'
81 #if str($specify_genomic_window) == "yes": 19 --burnin_num $burnin_num
82 --windows_bed '$specify_genomic_window_cond.bed_input' 20 #if str($bychr) == "true":
83 --windows_config $windows_positions_by_chroms_config 21 --bychr true
84 #end if 22 #end if
85 #if str($perform_training) == "yes": 23 --chrom_bed_input $input.metadata.chrom_bed_input
86 --training_iterations $perform_training_cond.training_iterations 24 --chromosome_windows $input.metadata.chromosome_windows
87 --training_windows $perform_training_cond.training_windows 25 #if str($hp) == "true":
88 #end if 26 --hp true
89 --prep_output_config '$prep_output_config' 27 #end if
90 --hp $hp 28 #if str($initial_states) != "0":
29 --initial_states $initial_states
30 #end if
31 --input $input
32 --input_files_path $input.extra_files_path
33 --ideas_input_config $input.metadata.ideas_input_config
91 #if str($log2) != "0.0": 34 #if str($log2) != "0.0":
92 --log2 $log2 35 --log2 $log2
93 #end if 36 #end if
37 #if str($maxerr) != "0.0":
38 --maxerr $maxerr
39 #end if
40 #if str($max_cell_type_clusters) != "0":
41 --max_cell_type_clusters $max_cell_type_clusters
42 #end if
43 #if str($max_position_classes) != "0":
44 --max_position_classes $max_position_classes
45 #end if
94 #if str($max_states) != "0.0": 46 #if str($max_states) != "0.0":
95 --max_states $max_states 47 --max_states $max_states
96 #end if 48 #end if
97 #if str($initial_states) != "0":
98 --initial_states $initial_states
99 #end if
100 #if str($max_position_classes) != "0":
101 --max_position_classes $max_position_classes
102 #end if
103 #if str($max_cell_type_clusters) != "0":
104 --max_cell_type_clusters $max_cell_type_clusters
105 #end if
106 #if str($prior_concentration) != "0.0":
107 --prior_concentration $prior_concentration
108 #end if
109 --burnin_num $burnin_num
110 --mcmc_num $mcmc_num 49 --mcmc_num $mcmc_num
111 #if str($minerr) != "0.0": 50 #if str($minerr) != "0.0":
112 --minerr $minerr 51 --minerr $minerr
113 #end if 52 #end if
114 #if str($maxerr) != "0.0": 53 #if str($prior_concentration) != "0.0":
115 --maxerr $maxerr 54 --prior_concentration $prior_concentration
116 #end if 55 #end if
117 --rseed $rseed
118 --thread \${GALAXY_SLOTS:-4}
119 --project_name '$project_name' 56 --project_name '$project_name'
120 #if str($save_ideas_log) == "yes": 57 #if str($save_ideas_log) == "yes":
121 --save_ideas_log $save_ideas_log 58 --save_ideas_log $save_ideas_log
122 --output_log '$output_log' 59 --output_log '$output_log'
60 #end if
61 #if str($standardize_datasets) == "true":
62 --standardize_datasets true
63 #end if
64 --rseed $rseed
65 --thread \${GALAXY_SLOTS:-4}
66 #if str($perform_training) == "yes":
67 --training_iterations $perform_training_cond.training_iterations
68 --training_windows $perform_training_cond.training_windows
123 #end if 69 #end if
124 #if str($perform_training) == "yes": 70 #if str($perform_training) == "yes":
125 && mv ./*.para0 '$output_dir' 71 && mv ./*.para0 '$output_dir'
126 && mv ./*.profile0 '$output_dir' 72 && mv ./*.profile0 '$output_dir'
127 #else: 73 #else:
138 #if str($perform_training) == "yes": 84 #if str($perform_training) == "yes":
139 --in_training_mode true 85 --in_training_mode true
140 #end if 86 #end if
141 #end if 87 #end if
142 ]]></command> 88 ]]></command>
143 <configfiles>
144 <configfile name="gen_prep_input_config"><![CDATA[#if str($cell_type_epigenetic_factor_cond.cell_type_epigenetic_factor) == "extract":
145 #set input_name_positions = $cell_type_epigenetic_factor_cond.input_name_positions
146 #for $i in $cell_type_epigenetic_factor_cond.input:
147 #set file_name_with_ext = $i.name
148 #if str($file_name_with_ext).find("http") >= 0 or str($file_name_with_ext).find("ftp") >= 0:
149 #set file_name_with_ext = $file_name_with_ext.split('/')[-1]
150 #end if
151 #assert str($file_name_with_ext).find("-") >= 0, "The selected input '%s' is invalid because it does not include the '-' character which is required when setting cell type and epigenetic factor names by extracting them from the input file names." % $file_name_with_ext
152 #set file_name = $file_name_with_ext.split(".")[0]
153 #if str($input_name_positions) == "cell_first":
154 #set cell_type_name = $file_name.split("-")[0]
155 #set epigenetic_factor_name = $file_name.split("-")[1]
156 #else:
157 #set cell_type_name = $file_name.split("-")[1]
158 #set epigenetic_factor_name = $file_name.split("-")[0]
159 #end if
160 ${cell_type_name} ${epigenetic_factor_name} ${i}
161 #end for
162 #else:
163 #for $input_items in $cell_type_epigenetic_factor_cond.input_repeat:
164 ${input_items.cell_type_name} ${input_items.epigenetic_factor_name} ${input_items.input}
165 #end for
166 #end if]]></configfile>
167 <configfile name="gen_windows_positions_by_chroms_config"><![CDATA[#if str($specify_genomic_window_cond.specify_genomic_window) == "yes":
168 #import collections
169 #set window_positions_by_chroms_odict = $collections.OrderedDict()
170 #for count, line in enumerate(open($specify_genomic_window_cond.bed_input.file_name, 'r')):
171 #set $line = $line.strip()
172 #if not $line or $line.startswith('#'):
173 #continue
174 #end if
175 #set items = $line.split('\t')
176 #if $items[0] in $window_positions_by_chroms_odict:
177 #set tup = $window_positions_by_chroms_odict[$items[0]]
178 #set $tup[1] += 1
179 #set $window_positions_by_chroms_odict[$items[0]] = $tup
180 #else:
181 #set $window_positions_by_chroms_odict[$items[0]] = [$count, $count+1]
182 #end if
183 #end for
184 #for $chrom, $tup in $window_positions_by_chroms_odict.items():
185 ${chrom} ${tup[0]} ${tup[1]}
186 #end for
187 #end if]]></configfile>
188 </configfiles>
189 <inputs> 89 <inputs>
190 <conditional name="perform_training_cond"> 90 <conditional name="perform_training_cond">
191 <param name="perform_training" type="select" label="Perform training?"> 91 <param name="perform_training" type="select" label="Perform training?">
192 <option value="yes" selected="true">Yes</option> 92 <option value="yes" selected="true">Yes</option>
193 <option value="no">No</option> 93 <option value="no">No</option>
196 <param name="training_iterations" type="integer" value="20" min="3" label="Number of training iterations"/> 96 <param name="training_iterations" type="integer" value="20" min="3" label="Number of training iterations"/>
197 <param name="training_windows" type="integer" value="10000" min="2" label="Number of randomly selected windows for training"/> 97 <param name="training_windows" type="integer" value="10000" min="2" label="Number of randomly selected windows for training"/>
198 </when> 98 </when>
199 <when value="no"/> 99 <when value="no"/>
200 </conditional> 100 </conditional>
201 <conditional name="cell_type_epigenetic_factor_cond"> 101 <param name="input" type="data" format="ideaspre" label="Select IDEAS input">
202 <param name="cell_type_epigenetic_factor" type="select" label="Set cell type and epigenetic factor names by">
203 <option value="extract" selected="true">extracting them from the selected input file names</option>
204 <option value="manual">manually setting them for each selected input</option>
205 </param>
206 <when value="extract">
207 <param name="input" type="data" format="bigwig,bam" multiple="True" label="BAM or BigWig files">
208 <validator type="empty_field"/>
209 <validator type="unspecified_build"/>
210 </param>
211 <param name="input_name_positions" type="select" display="radio" label="Selected input file name pattern is" help="A '-' character must separate cell type and epigenetic factor names within the selected input file names">
212 <option value="cell_first" selected="true">Cell type name - Epigenetic factor name</option>
213 <option value="cell_last">Epigenetic factor name - Cell type name</option>
214 </param>
215 </when>
216 <when value="manual">
217 <repeat name="input_repeat" title="Cell type, Epigenetic factor and Input" min="1">
218 <param name="cell_type_name" type="text" value="" label="Cell type name">
219 <validator type="empty_field"/>
220 </param>
221 <param name="epigenetic_factor_name" type="text" value="" label="Epigenetic factor name">
222 <validator type="empty_field"/>
223 </param>
224 <param name="input" type="data" format="bigwig,bam" label="BAM or BigWig file">
225 <validator type="empty_field"/>
226 <validator type="unspecified_build"/>
227 </param>
228 </repeat>
229 </when>
230 </conditional>
231 <param name="project_name" type="text" value="myProject" label="Project name" help="Outputs will have this base name"> 102 <param name="project_name" type="text" value="myProject" label="Project name" help="Outputs will have this base name">
232 <validator type="empty_field"/> 103 <validator type="empty_field"/>
233 </param> 104 </param>
234 <param name="rseed" type="integer" value="1234" min="0" max="1000000" label="Seed for IDEAS model initialization" help="Zero value generates a random seed, and this seed will be different for each job run."/> 105 <param name="rseed" type="integer" value="1234" min="0" max="1000000" label="Seed for IDEAS model initialization" help="Zero value generates a random seed, and this seed will be different for each job run."/>
235 <conditional name="specify_genomic_window_cond">
236 <param name="specify_genomic_window" type="select" label="Select Bed file that defines genomic windows on which to process the data">
237 <option value="no" selected="true">No</option>
238 <option value="yes">Yes</option>
239 </param>
240 <when value="no">
241 <param name="window_size" type="integer" value="200" label="Window size in base pairs"/>
242 <conditional name="restrict_chromosomes_cond">
243 <param name="restrict_chromosomes" type="select" label="Restrict processing to specified chromosomes">
244 <option value="no" selected="true">No</option>
245 <option value="yes">Yes</option>
246 </param>
247 <when value="no"/>
248 <when value="yes">
249 <repeat name="chrom_repeat" title="Chromosomes" min="1">
250 <param name="chrom" type="text" value="" label="Chromosome" help="One chromosome (e.g., chr1, chr2, chrX) per text field"/>
251 </repeat>
252 </when>
253 </conditional>
254 </when>
255 <when value="yes">
256 <param name="bed_input" type="data" format="bed" label="Bed file specifying the genomic windows"/>
257 </when>
258 </conditional>
259 <param name="bychr" type="boolean" truevalue="true" falsevalue="" checked="False" label="Output chromosomes in separate files"/> 106 <param name="bychr" type="boolean" truevalue="true" falsevalue="" checked="False" label="Output chromosomes in separate files"/>
260 <param name="reads_per_bp" type="select" display="radio" label="Calculate the signal in each genomic window using"> 107 <param name="reads_per_bp" type="select" display="radio" label="Calculate the signal in each genomic window using">
261 <option value="6" selected="true">mean</option> 108 <option value="6" selected="true">mean</option>
262 <option value="8">max</option> 109 <option value="8">max</option>
263 </param> 110 </param>
264 <param name="blacklist_input" type="data" format="bed" optional="True" multiple="True" label="Select file(s) containing regions to exclude"/>
265 <param name="norm" type="boolean" truevalue="true" falsevalue="" checked="False" label="Standardize all datasets"/>
266 <param name="hp" type="boolean" truevalue="true" falsevalue="" checked="False" label="Discourage state transition across chromosomes"/> 111 <param name="hp" type="boolean" truevalue="true" falsevalue="" checked="False" label="Discourage state transition across chromosomes"/>
267 <param name="log2" type="float" value="0" min="0" label="Use log2(x+number) transformation" help="Zero means no log2 transformation"/> 112 <param name="log2" type="float" value="0" min="0" label="Use log2(x+number) transformation" help="Zero means no log2 transformation"/>
268 <param name="max_states" type="float" value="0" min="0" label="Maximum number of states to be inferred" help="Zero sets the maximum to a large number"/> 113 <param name="max_states" type="float" value="0" min="0" label="Maximum number of states to be inferred" help="Zero sets the maximum to a large number"/>
269 <param name="initial_states" type="integer" value="20" min="0" label="Initial number of states" help="Positive integer"/> 114 <param name="initial_states" type="integer" value="20" min="0" label="Initial number of states" help="Positive integer"/>
270 <param name="max_position_classes" type="integer" value="0" min="0" label="Maximum number of position classes to be inferred" help="Zero sets the maximum to a large number"/> 115 <param name="max_position_classes" type="integer" value="0" min="0" label="Maximum number of position classes to be inferred" help="Zero sets the maximum to a large number"/>
271 <param name="max_cell_type_clusters" type="integer" value="0" min="0" label="Maximum number of cell type clusters allowed" help="Zero sets the maximum to a large number"/> 116 <param name="max_cell_type_clusters" type="integer" value="0" min="0" label="Maximum number of cell type clusters allowed" help="Zero sets the maximum to a large number"/>
272 <param name="prior_concentration" type="float" value="1" min="0" label="Prior concentration" help="Zero value results in the default: sqrt(number of cell types)"/> 117 <param name="prior_concentration" type="float" value="1" min="0" label="Prior concentration" help="Zero value results in the default: sqrt(number of cell types)"/>
118 <param name="standardize_datasets" type="boolean" truevalue="true" falsevalue="" checked="False" label="Standardize all datasets"/>
273 <param name="burnin_num" type="integer" value="20" min="1" label="Number of burnin steps"/> 119 <param name="burnin_num" type="integer" value="20" min="1" label="Number of burnin steps"/>
274 <param name="mcmc_num" type="integer" value="20" min="1" label="Number of maximization steps"/> 120 <param name="mcmc_num" type="integer" value="20" min="1" label="Number of maximization steps"/>
275 <param name="minerr" type="float" value="0.5" min="0" label="Minimum standard deviation for the emission Gaussian distribution" help="Zero value results in the default: 0.5"/> 121 <param name="minerr" type="float" value="0.5" min="0" label="Minimum standard deviation for the emission Gaussian distribution" help="Zero value results in the default: 0.5"/>
276 <param name="maxerr" type="float" value="1000000" min="0" label="Maximum standard deviation for the emission Gaussian distribution" help="Zero sets the maximum to a large number"/> 122 <param name="maxerr" type="float" value="1000000" min="0" label="Maximum standard deviation for the emission Gaussian distribution" help="Zero sets the maximum to a large number"/>
277 <param name="output_heatmaps" type="select" display="radio" label="Output heatmaps?"> 123 <param name="output_heatmaps" type="select" display="radio" label="Output heatmaps?">
301 </collection> 147 </collection>
302 </outputs> 148 </outputs>
303 <tests> 149 <tests>
304 <test> 150 <test>
305 <param name="perform_training" value="no"/> 151 <param name="perform_training" value="no"/>
306 <param name="cell_type_epigenetic_factor" value="extract"/> 152 <param name="input" value="input.ideaspre" ftype="ideaspre" dbkey="hg19"/>
307 <param name="input" value="e001-h3k4me3.bigwig" ftype="bigwig" dbkey="hg19"/>
308 <param name="input_name_positions" value="cell_first"/>
309 <param name="specify_genomic_window" value="yes"/>
310 <param name="bed_input" value="genomic_windows.bed" ftype="bed" dbkey="hg19"/>
311 <param name="project_name" value="IDEAS_out"/> 153 <param name="project_name" value="IDEAS_out"/>
312 <param name="initial_states" value="2"/> 154 <param name="initial_states" value="2"/>
313 <param name="maxerr" value="1000"/> 155 <param name="maxerr" value="1000"/>
314 <param name="output_heatmaps" value="no"/> 156 <param name="output_heatmaps" value="no"/>
315 <output_collection name="output_txt_collection" type="list"> 157 <output_collection name="output_txt_collection" type="list">
317 <element name="IDEAS_out.chr1.para" file="IDEAS_out.para" ftype="txt"/> 159 <element name="IDEAS_out.chr1.para" file="IDEAS_out.para" ftype="txt"/>
318 <element name="IDEAS_out.chr1.profile" file="IDEAS_out.profile" ftype="txt"/> 160 <element name="IDEAS_out.chr1.profile" file="IDEAS_out.profile" ftype="txt"/>
319 <element name="IDEAS_out.chr1.state" file="IDEAS_out.state" ftype="txt"/> 161 <element name="IDEAS_out.chr1.state" file="IDEAS_out.state" ftype="txt"/>
320 </output_collection> 162 </output_collection>
321 <output name="output_log" file="output_log.txt" ftype="txt" compare="contains" /> 163 <output name="output_log" file="output_log.txt" ftype="txt" compare="contains" />
322 </test>
323 <test>
324 <param name="perform_training" value="no"/>
325 <param name="cell_type_epigenetic_factor" value="manual"/>
326 <repeat name="input_repeat">
327 <param name="cell_type_name" value="e001" />
328 <param name="epigenetic_factor_name" value="h3k4me3"/>
329 <param name="input" value="e001-h3k4me3.bigwig" ftype="bigwig" dbkey="hg19"/>
330 </repeat>
331 <param name="specify_genomic_window" value="yes"/>
332 <param name="bed_input" value="genomic_windows.bed" ftype="bed" dbkey="hg19"/>
333 <param name="project_name" value="IDEAS_out"/>
334 <param name="initial_states" value="2"/>
335 <param name="maxerr" value="1000"/>
336 <param name="output_heatmaps" value="no"/>
337 <output_collection name="output_txt_collection" type="list">
338 <element name="IDEAS_out.chr1.cluster" file="IDEAS_out.cluster" ftype="txt"/>
339 <element name="IDEAS_out.chr1.para" file="IDEAS_out.para" ftype="txt"/>
340 <element name="IDEAS_out.chr1.profile" file="IDEAS_out.profile" ftype="txt"/>
341 <element name="IDEAS_out.chr1.state" file="IDEAS_out.state" ftype="txt"/>
342 </output_collection>
343 </test> 164 </test>
344 </tests> 165 </tests>
345 <help> 166 <help>
346 **What it does** 167 **What it does**
347 168