comparison macs21_wrapper.xml @ 0:fdad0c8c0957 draft

Uploaded initial version to test toolshed.
author pjbriggs
date Wed, 21 Jan 2015 11:07:37 -0500
parents
children d0986d2be693
comparison
equal deleted inserted replaced
-1:000000000000 0:fdad0c8c0957
1 <tool id="fls_modencode_peakcalling_macs2.1" name="MACS2.1.0" version="2">
2 <requirements>
3 <requirement type="package" version="2.7">python</requirement>
4 <requirement type="package" version="1.8.1">numpy</requirement>
5 <requirement type="package" version="2.1.0.20140616">macs2</requirement>
6 </requirements>
7 <description>Model-based Analysis of ChIP-Seq [golem]</description>
8 <command interpreter="python">macs21_wrapper.py $options_file $outputs_file</command>
9 <inputs>
10 <!--experiment name and option of selecting paired or single end will always be present-->
11 <param name="experiment_name" type="text" value="MACS2.1.0 in Galaxy" size="50"
12 label="Experiment Name"/>
13 <!--select one of the 7 major commands offered by macs2-->
14 <conditional name="major_command">
15 <param name="major_command_selector" type="select" label="Select action to be performed">
16 <option value="callpeak">Peak Calling</option>
17 <!--<option value="filterdup">filterdup</option>
18 <option value="randsample">randsample</option>-->
19 <option value="bdgcmp">Compare .bdg Files</option>
20 <!--<option value="bdgdiff">bdgdiff</option>
21 <option value="bdgpeakcall">bdgpeakcall</option>
22 <option value="bdgbroadcall">bdgbroadcall</option>-->
23 </param>
24 <!--callpeak option of macs2-->
25 <when value="callpeak">
26 <!--choose 'broad' or 'narrow' regions-->
27 <conditional name="broad_options">
28 <param name="broad_regions" type="select" label="Type of region to call"
29 help="Broad regions are formed by linking nearby enriched regions">
30 <option value="" selected="true">Narrow regions</option>
31 <option value="broad">Broad regions</option>
32 </param>
33 <when value="broad">
34 <param name="broad_cutoff" type="float"
35 label="Cutoff for broad regions"
36 value="0.1" help="default: 0.1 (--broad-cutoff)"/>
37 </when>
38 </conditional>
39 <!--may need to add a few more formats at later time-->
40 <param name="input_chipseq_file1" type="data" format="bed,sam,bam"
41 label="ChIP-seq read file" />
42 <param name="input_control_file1" type="data" format="bed,sam,bam" optional="True"
43 label="ChIP-seq control read file" />
44 <conditional name="genome_size">
45 <param name="gsize" type="select" label="Effective genome size"
46 help="Either pre-defined (for common organisms), or user-defined (--gsize)">
47 <option value="hs" selected="true">Human (2.7e9)</option>
48 <option value="mm">Mouse (1.87e9)</option>
49 <option value="ce">C. elegans (9e7)</option>
50 <option value="dm">Fruitfly (1.2e8)</option>
51 <option value="">User-defined</option>
52 </param>
53 <when value="">
54 <!-- User-defined effective genome size -->
55 <param name="user_defined_gsize" type="float" value=""
56 label="Enter effective genome size (number of bases)"
57 help="e.g. '1.0e+9' or '1000000000'" />
58 </when>
59 </conditional>
60 <param name="bw" type="integer" label="Band width" value="300" help="(--bw)"/>
61 <param name="xls_to_interval" label="Include XLS file from MACS"
62 type="boolean" truevalue="True" falsevalue="False" checked="True"
63 help="MACS2 XLS file will be output to the history in 'interval' format (suitable for subsequent analysis in Galaxy). Note that start positions are 1-based."/>
64
65 <conditional name="bdg_options">
66 <param name="bdg"
67 label="Save fragment pileup, control lambda, -log10pvalue/qvalue in bedGraph"
68 type="boolean" truevalue="-B" falsevalue="" checked="False" />
69 <when value="-B">
70 <param name="spmr"
71 type="boolean" truevalue="--SPMR" falsevalue="" checked="False"
72 label="Save signal per million reads for fragment pileup profiles"
73 help="(--SPMR)" />
74 </when>
75 <when value="">
76 <!-- Display nothing -->
77 </when>
78 </conditional>
79
80 <conditional name="pq_options">
81 <param name="pq_options_selector" type="select"
82 label="Select p-value or q-value" help="default uses q-value">
83 <option value="qvalue">q-value</option>
84 <option value="pvalue">p-value</option>
85 </param>
86 <when value="pvalue">
87 <param name="pvalue" type="float"
88 label="p-value cutoff for binding region detection"
89 value="1e-2" help="default: 1e-2 (--pvalue)"/>
90 </when>
91 <when value="qvalue">
92 <param name="qvalue" type="float"
93 label="q-value cutoff for binding region detection"
94 value="0.01" help="default: 0.01 (--qvalue)"/>
95 </when>
96 </conditional>
97 <conditional name="advanced_options">
98 <param name="advanced_options_selector" type="select"
99 label="Display advanced options">
100 <option value="off">Hide</option>
101 <option value="on">Display</option>
102 </param>
103 <when value="on">
104 <param name="mfoldlo" type="integer"
105 label="Select the regions with MFOLD high-confidence enrichment ratio against background to build model (lower-limit)"
106 value="10" help="(--mfold)"/>
107 <param name="mfoldhi" type="integer"
108 label="Select the regions with MFOLD high-confidence enrichment ratio against background to build model (upper-limit)"
109 value="30" help="(--mfold)"/>
110 <param name="nolambda"
111 label="Use fixed background lambda as local lambda for every binding region"
112 type="boolean" truevalue="--nolambda" falsevalue="" checked="False"
113 help="(--nolambda)"/>
114 <param name="call_summits"
115 label="Detect subpeaks within binding region"
116 type="boolean" truevalue="--call-summits" falsevalue="" checked="False"
117 help="(--call-summits)"/>
118 <conditional name="keep_duplicates">
119 <param name="keep_dup" type="select"
120 label="Use of duplicate reads">
121 <option value="auto">Automatically calculate maximum number of duplicates to keep (auto)</option>
122 <option value="all">Use all duplicates (all)</option>
123 <option value="" selected="true">Manually specify maxium number of duplicates</option>
124 </param>
125 <when value="">
126 <param name="maximum_tags" type="integer" value="1"
127 label="Maxium number of duplicated tags to keep at each location"/>
128 </when>
129 </conditional>
130 </when>
131 <when value="off">
132 <!--display nothing-->
133 </when>
134 </conditional>
135 <conditional name="nomodel_type">
136 <param name="nomodel_type_selector" type="select" label="Build Model">
137 <option value="nomodel">Do not build the shifting model (--nomodel enabled)</option>
138 <option value="create_model" selected="true">Build the shifting model (--nomodel disabled)</option>
139 </param>
140 <when value="nomodel">
141 <param name="extsize" type="integer" label="Arbitrary extension size in bp" value="200" help="Used as fragment size to extend each read towards 3' end (--extsize)"/>
142 </when>
143 </conditional>
144 </when>
145
146 <!--callpeak option of macs2-->
147 <when value="bdgcmp">
148 <param name="input_chipseq_file1" type="data" format="bed,sam,bam"
149 label="ChIP-seq read file" />
150 <param name="input_control_file1" type="data" format="bed,sam,bam" optional="True"
151 label="ChIP-seq control read file" />
152 <param name="pseudocount" type="float" label="Set pseudocount" value="0.00001"
153 help="default: 0.00001 (-p)"/>
154 <conditional name="bdgcmp_options">
155 <param name="bdgcmp_options_selector" type="select"
156 label="Select action to be performed">
157 <option value="ppois">ppois</option>
158 <option value="qpois">qpois</option>
159 <option value="subtract">subtract</option>
160 <option value="logFE">logFE</option>
161 <option value="FE">FE</option>
162 <option value="logLR">logLR</option>
163 </param>
164 </conditional>
165 </when>
166 </conditional>
167 </inputs>
168
169 <outputs>
170 <!--callpeaks output-->
171 <data name="output_extra_files" format="html"
172 label="${tool.name}: callpeak on ${on_string} (html report)">
173 <filter>major_command['major_command_selector'] == 'callpeak'</filter>
174 </data>
175 <data name="output_summits_bed_file" format="bed"
176 label="${tool.name}: callpeak on ${on_string} (summits: bed)">
177 <filter>major_command['major_command_selector'] == 'callpeak'</filter>
178 </data>
179 <data name="output_peaks_file" format="xls"
180 label="${tool.name}: callpeak on ${on_string} (peaks: xls)">
181 <filter>major_command['major_command_selector'] == 'callpeak'</filter>
182 <filter>major_command['xls_to_interval'] is False</filter>
183 </data>
184 <data name="output_narrowpeaks_file" format="interval"
185 label="${tool.name}: callpeak on ${on_string} (peaks: narrowPeak)">
186 <filter>major_command['major_command_selector'] == 'callpeak'</filter>
187 <filter>major_command['broad_options']['broad_regions'] == ''</filter>
188 </data>
189 <data name="output_broadpeaks_file" format="interval"
190 label="${tool.name}: callpeak on ${on_string} (peaks: broadPeak)">
191 <filter>major_command['major_command_selector'] == 'callpeak'</filter>
192 <filter>major_command['broad_options']['broad_regions'] == 'broad'</filter>
193 </data>
194 <data name="output_gappedpeaks_file" format="interval"
195 label="${tool.name}: callpeak on ${on_string} (peaks: gappedPeak)">
196 <filter>major_command['major_command_selector'] == 'callpeak'</filter>
197 <filter>major_command['broad_options']['broad_regions'] == 'broad'</filter>
198 </data>
199 <data name="output_xls_to_interval_peaks_file" format="interval"
200 label="${tool.name}: callpeak on ${on_string} (peaks: interval)">
201 <filter>major_command['xls_to_interval'] is True</filter>
202 <filter>major_command['major_command_selector'] == 'callpeak'</filter>
203 </data>
204 <data name="output_treat_pileup_file" format="bedgraph"
205 label="${tool.name}: callpeak on ${on_string} (treat pileup: bedGraph)">
206 <filter>major_command['bdg_options']['bdg'] is True</filter>
207 <filter>major_command['major_command_selector'] == 'callpeak'</filter>
208 </data>
209 <data name="output_lambda_bedgraph_file" format="bedgraph"
210 label="${tool.name}: callpeak on ${on_string} (control lambda: bedGraph)">
211 <filter>major_command['bdg_options']['bdg'] is True</filter>
212 <filter>major_command['major_command_selector'] == 'callpeak'</filter>
213 </data>
214 <!--bdgcmp output-->
215 <data name="output_bdgcmp_file" format="bdg"
216 label="${tool.name}: bdgcmp on ${on_string} (bdg)">
217 <filter>major_command['major_command_selector'] == 'bdgcmp'</filter>
218 </data>
219 </outputs>
220 <configfiles>
221 <configfile name="outputs_file">&lt;%
222 import simplejson
223 %&gt;
224 ##=======================================================================================
225 #set $__outputs = { 'command':str( $major_command.major_command_selector ) }
226 #if str( $major_command.major_command_selector ) == 'callpeak':
227 #set $__outputs['output_summits_bed_file'] = str( $output_summits_bed_file )
228 #set $__outputs['output_extra_file'] = str( $output_extra_files )
229 #set $__outputs['output_extra_file_path'] = str( $output_extra_files.files_path )
230 #set $__outputs['output_peaks_file'] = str( $output_peaks_file )
231 #set $__outputs['output_narrowpeaks_file'] = str( $output_narrowpeaks_file )
232 #set $__outputs['output_broadpeaks_file'] = str( $output_broadpeaks_file )
233 #set $__outputs['output_gappedpeaks_file'] = str( $output_gappedpeaks_file )
234 #set $__outputs['output_xls_to_interval_peaks_file'] = str( $output_xls_to_interval_peaks_file )
235 #set $__outputs['output_treat_pileup_file'] = str( $output_treat_pileup_file )
236 #set $__outputs['output_lambda_bedgraph_file'] = str( $output_lambda_bedgraph_file )
237 #end if
238 ##=======================================================================================
239 #if str( $major_command.major_command_selector ) == 'bdgcmp':
240 #set $__outputs['output_bdgcmp_file'] = str( $output_bdgcmp_file )
241 #end if
242
243 ${ simplejson.dumps( __outputs ) }
244 </configfile>
245 <configfile name="options_file">&lt;%
246 import simplejson
247 %&gt;
248 ##=======================================================================================
249 #set $__options = { 'experiment_name':str( $experiment_name ) }
250 ##treatment/tag input files and format
251 #set $__options['input_chipseq'] = [ str( $major_command.input_chipseq_file1 ) ]
252 #set $__options['format'] = $major_command.input_chipseq_file1.extension.upper()
253
254 ##control/input files
255 #set $__options['input_control'] = []
256 #if str( $major_command.input_control_file1 ) != 'None':
257 #set $_hole = __options['input_control'].append( str( $major_command.input_control_file1 ) )
258 #end if
259
260 #if str( $major_command.major_command_selector ) == 'callpeak':
261 #set $__options['command'] = str( "callpeak" )
262 #set $__options['bw'] = str( $major_command.bw )
263 #set $__options['xls_to_interval'] = str( $major_command.xls_to_interval )
264
265 ##bdg options
266 #if $major_command.bdg_options.bdg == True:
267 #set $__options['bdg'] = str( "-B" )
268 #set $__options['spmr'] = str( $major_command.bdg_options.spmr )
269 #else:
270 #set $__options['bdg'] = str( "" )
271 #set $__options['spmr'] = str( "" )
272 #end if
273
274 ##broad_options
275 #if str( $major_command.broad_options.broad_regions ) == 'broad':
276 #set $__options['broad'] = str( $major_command.broad_options.broad_regions )
277 #set $__options['broad_cutoff'] = str( $major_command.broad_options.broad_cutoff )
278 #else:
279 #set $__options['broad'] = str( "" )
280 #set $__options['broad_cutoff'] = str( "" )
281 #end if
282
283 ##genome sizes
284 #if str( $major_command.genome_size.gsize ) == '':
285 #set $__options['gsize'] = int( $major_command.genome_size.user_defined_gsize )
286 #else:
287 #set $__options['gsize'] = str( $major_command.genome_size.gsize )
288 #end if
289
290 ##advanced options
291 #if str( $major_command.advanced_options.advanced_options_selector ) == 'on':
292 #set $__options['mfoldlo'] = int( $major_command.advanced_options.mfoldlo )
293 #set $__options['mfoldhi'] = int( $major_command.advanced_options.mfoldhi )
294 #set $__options['nolambda'] = str( $major_command.advanced_options.nolambda )
295 #set $__options['call_summits'] = str( $major_command.advanced_options.call_summits )
296 #if str( $major_command.advanced_options.keep_duplicates.keep_dup ) == '':
297 #set $__options['keep_dup'] = int( $major_command.advanced_options.keep_duplicates.maximum_tags )
298 #else:
299 #set $__options['keep_dup'] = str( $major_command.advanced_options.keep_duplicates.keep_dup )
300 #end if
301 #else:
302 #set $__options['mfoldlo'] = int( "5" )
303 #set $__options['mfoldhi'] = int( "50" )
304 #set $__options['nolambda'] = str( "" )
305 #set $__options['call_summits'] = str( "" )
306 #set $__options['keep_dup'] = int( "1" )
307 #end if
308
309 ##enable xls file options
310 ##if str( $major_command.xls_to_interval ) == 'create':
311 ##set $__options['xls_to_interval'] = { 'peaks_file': str( $output_xls_to_interval_peaks_file ), 'negative_peaks_file': str( $output_xls_to_interval_negative_peaks_file ) }
312 ##end if
313
314 ##pq value select options
315 #if str( $major_command.pq_options.pq_options_selector ) == 'qvalue':
316 #set $__options['qvalue'] = str( $major_command.pq_options.qvalue )
317 #else:
318 #set $__options['pvalue'] = str( $major_command.pq_options.pvalue )
319 #end if
320
321 ##model options
322 #if str( $major_command.nomodel_type.nomodel_type_selector ) == 'nomodel':
323 #set $__options['nomodel'] = str( $major_command.nomodel_type.extsize )
324 #end if
325 #end if
326 ##=======================================================================================
327 #if str( $major_command.major_command_selector ) == 'bdgcmp':
328 #set $__options['command'] = str( "bdgcmp" )
329 #set $__options['pseudocount'] = float( str( $major_command.pseudocount ) )
330 #set $__options['m'] = str( $major_command.bdgcmp_options.bdgcmp_options_selector )
331 #end if
332 ##=======================================================================================
333
334 ${ simplejson.dumps( __options ) }
335 </configfile>
336 </configfiles>
337 <tests>
338 <!--none yet for macs2-->
339 </tests>
340 <help>
341
342 .. class:: warningmark
343
344 **This is a modified version of the standard Galaxy toolshed "MACS2" tool,
345 which has been customised for users at the University of Manchester to work
346 with MACS 2.1.0.**
347
348 It is based on the 16:14f378e35191 revision of the tool at
349
350 * http://toolshed.g2.bx.psu.edu/view/modencode-dcc/macs2
351
352 ------
353
354 **What it does**
355
356 With the improvement of sequencing techniques, chromatin immunoprecipitation
357 followed by high throughput sequencing (ChIP-Seq) is getting popular to study
358 genome-wide protein-DNA interactions. To address the lack of powerful ChIP-Seq
359 analysis method, we present a novel algorithm, named Model-based Analysis of
360 ChIP-Seq (MACS), for identifying transcript factor binding sites. MACS captures
361 the influence of genome complexity to evaluate the significance of enriched
362 ChIP regions, and MACS improves the spatial resolution of binding sites through
363 combining the information of both sequencing tag position and orientation. MACS
364 can be easily used for ChIP-Seq data alone, or with control sample with the
365 increase of specificity.
366
367 View the original MACS2 documentation:
368 https://github.com/taoliu/MACS/blob/master/README.rst
369
370 ------
371
372 **Usage**
373
374 **Peak Calling**: Main MACS2 Function to Call peaks from alignment results.
375
376 **Compare .bdg files**: Deduct noise by comparing two signal tracks in bedGraph.
377
378
379 ------
380
381 **Citation**
382
383 For the underlying tool, please cite Zhang Y, Liu T, Meyer CA, Eeckhoute J, Johnson DS, Bernstein BE, Nusbaum C, Myers RM, Brown M, Li W, Liu XS. Model-based analysis of ChIP-Seq (MACS). Genome Biol. 2008;9(9):R137.
384
385 Integration of MACS2 with Galaxy performed by Ziru Zhou ( ziruzhou@gmail.com ). Please send your comments/questions to modENCODE DCC at help@modencode.org.
386 </help>
387 </tool>