comparison rnachipintegrator_wrapper.xml @ 0:0abe6bac47a6 draft

planemo upload for repository https://github.com/fls-bioinformatics-core/galaxy-tools/tree/master/rnachipintegrator commit 97d556dae96db5457590a3a257392b6e4093a912-dirty
author pjbriggs
date Wed, 24 Feb 2016 09:25:18 -0500
parents
children d37526cd12aa
comparison
equal deleted inserted replaced
-1:000000000000 0:0abe6bac47a6
1 <?xml version="1.0" encoding="utf-8"?>
2 <tool id="rnachipintegrator_wrapper" name="RnaChipIntegrator" version="@VERSION@.0">
3 <description>Integrated analysis of 'gene' and 'peak' data</description>
4 <macros>
5 <import>rnachipintegrator_macros.xml</import>
6 </macros>
7 <expand macro="requirements" />
8 <expand macro="version_command" />
9 <command interpreter="bash"><![CDATA[
10 rnachipintegrator_wrapper.sh
11 #if $peaks_in.metadata.chromCol
12 --peak_cols=${peaks_in.metadata.chromCol},${peaks_in.metadata.startCol},${peaks_in.metadata.endCol}
13 #end if
14 #if str( $cutoff ) != ""
15 --cutoff=$cutoff
16 #else
17 --cutoff=0
18 #end if
19 #if str( $number ) != ""
20 --number=$number
21 #end if
22 --promoter_region=$promoter_start,$promoter_end
23 --edge=$edge
24 $diff_expressed_only
25 --xlsx_file "$xlsx_out"
26 --output_files "$peaks_per_feature_out" "$features_per_peak_out"
27 #if $output.compact_format
28 --compact
29 #else
30 #if $output.summary
31 --summary_files "$peaks_per_feature_summary" "$features_per_peak_summary"
32 #end if
33 ${output.pad_output}
34 #end if
35 "$features_in" "$peaks_in"
36 ]]></command>
37 <inputs>
38 <param format="tabular" name="features_in" type="data"
39 label="Genes/genomic features" />
40 <param format="tabular" name="peaks_in" type="data"
41 label="Peaks/regions" />
42 <expand macro="analysis_options" />
43 <param name="diff_expressed_only" type="boolean"
44 truevalue="--only-DE" falsevalue="" checked="false"
45 label="Only consider genes which are flagged as differentially
46 expressed"
47 help="NB input feature data must include differential expression
48 flags (--only-DE)" />
49 <expand macro="output_options" />
50 </inputs>
51 <outputs>
52 <!-- Always produce XLSX output -->
53 <data format="xlsx" name="xlsx_out"
54 label="All RnaChipIntegrator analyses: ${features_in.name} vs ${peaks_in.name} (Excel spreadsheet)" />
55 <data format="tabular" name="peaks_per_feature_out"
56 label="Nearest peaks to each gene: ${features_in.name} vs ${peaks_in.name}" />
57 <data format="tabular" name="features_per_peak_out"
58 label="Nearest genes to each peak: ${features_in.name} vs ${peaks_in.name}" />
59 <data format="tabular" name="peaks_per_feature_summary"
60 label="Nearest peaks to each gene (summary): ${features_in.name} vs ${peaks_in.name}" >
61 <filter>output['compact_format'] is False</filter>
62 <filter>output['summary'] is True</filter>
63 </data>
64 <data format="tabular" name="features_per_peak_summary"
65 label="Nearest gene to each peak (summary): ${features_in.name} vs ${peaks_in.name}" >
66 <filter>output['compact_format'] is False</filter>
67 <filter>output['summary'] is True</filter>
68 </data>
69 </outputs>
70 <tests>
71 <!--
72 RnaChipIntegrator +name=test +cutoff=130000 +promoter_region=-10000,2500 +xlsx +compact features.txt summits.txt
73 -->
74 <test>
75 <param name="features_in" value="features.txt" ftype="tabular" />
76 <param name="peaks_in" value="summits.txt" ftype="tabular" />
77 <param name="cutoff" value="130000" />
78 <param name="promoter_start" value="-10000" />
79 <param name="promoter_end" value="2500" />
80 <output name="xlsx_out" file="summits.xlsx" compare="sim_size" />
81 <output name="peaks_per_feature_out" ftype="tabular"
82 file="summits_per_feature.out" />
83 <output name="features_per_peak_out" ftype="tabular"
84 file="features_per_summit.out" />
85 </test>
86 <!--
87 RnaChipIntegrator +name=test +cutoff=130000 +promoter_region=-10000,2500 +xlsx +compact features.txt peaks.txt
88 -->
89 <test>
90 <param name="features_in" value="features.txt" ftype="tabular" />
91 <param name="peaks_in" value="peaks.txt" ftype="tabular" />
92 <param name="cutoff" value="130000" />
93 <param name="promoter_start" value="-10000" />
94 <param name="promoter_end" value="2500" />
95 <output name="xlsx_out" file="peaks1.xlsx" compare="sim_size" />
96 <output name="peaks_per_feature_out" ftype="tabular"
97 file="peaks_per_feature1.out" />
98 <output name="features_per_peak_out" ftype="tabular"
99 file="features_per_peak1.out" />
100 </test>
101 <!--
102 RnaChipIntegrator +name=test +cutoff=130000 +xlsx features.txt peaks.txt
103 -->
104 <test>
105 <param name="features_in" value="features.txt" ftype="tabular" />
106 <param name="peaks_in" value="peaks.txt" ftype="tabular" />
107 <param name="cutoff" value="130000" />
108 <param name="compact_format" value="false" />
109 <output name="xlsx_out" file="peaks2.xlsx" compare="sim_size" />
110 <output name="peaks_per_feature_out" ftype="tabular"
111 file="peaks_per_feature2.out" />
112 <output name="features_per_peak_out" ftype="tabular"
113 file="features_per_peak2.out" />
114 </test>
115 <!--
116 RnaChipIntegrator +name=test +cutoff=130000 +only-DE +xlsx +compact features.txt peaks.txt
117 -->
118 <test>
119 <param name="features_in" value="features.txt" ftype="tabular" />
120 <param name="peaks_in" value="peaks.txt" ftype="tabular" />
121 <param name="cutoff" value="130000" />
122 <param name="diff_expressed_only" value="true" />
123 <output name="xlsx_out" file="peaks3.xlsx" compare="sim_size" />
124 <output name="peaks_per_feature_out" ftype="tabular"
125 file="peaks_per_feature3.out" />
126 <output name="features_per_peak_out" ftype="tabular"
127 file="features_per_peak3.out" />
128 </test>
129 <!--
130 RnaChipIntegrator +name=test +cutoff=130000 +promoter_region=-10000,2500 +xlsx +summary features.txt peaks.txt
131 -->
132 <test>
133 <param name="features_in" value="features.txt" ftype="tabular" />
134 <param name="peaks_in" value="peaks.txt" ftype="tabular" />
135 <param name="cutoff" value="130000" />
136 <param name="compact_format" value="false" />
137 <param name="summary" value="true" />
138 <param name="pad_output" value="true" />
139 <output name="xlsx_out" file="peaks4.xlsx" compare="sim_size" />
140 <output name="peaks_per_feature_out" ftype="tabular"
141 file="peaks_per_feature4.out" />
142 <output name="features_per_peak_out" ftype="tabular"
143 file="features_per_peak4.out" />
144 <output name="peaks_per_feature_summary" ftype="tabular"
145 file="peaks_per_feature4.summary" />
146 <output name="features_per_peak_summary" ftype="tabular"
147 file="features_per_peak4.summary" />
148 </test>
149 </tests>
150 <help>
151
152 .. class:: infomark
153
154 **What it does**
155
156 Performs integrated analyses of genes (or other genomic feature data)
157 gainst a set of peaks (e.g. ChIP data), identifying the nearest peaks to
158 each feature and vice versa.
159
160 The program was originally written specifically for ChIP-Seq and RNA-Seq
161 data but works equally well for ChIP-chip and microarray expression data,
162 and can also be used to integrate any set of genomic features (e.g.
163 canonical genes, CpG islands) with expression data.
164
165 RnaChipIntegrator can be obtained from
166 https://pypi.python.org/pypi/RnaChipIntegrator/
167
168 -------------
169
170 .. class:: infomark
171
172 **Input**
173
174 The gene data must be in a tabular file with the following columns
175 of data for each gene or genomic feature (one gene per line):
176
177 ====== ========== ======================================================================
178 Column Name Description
179 ====== ========== ======================================================================
180 1 ID Name used to identify the gene in the output
181 2 chr Chromosome name
182 3 start Start position of the gene
183 4 end End position of the gene
184 5 strand Must be either '+' or '-'
185 6 diff_expr Optional: indicates gene is differentially expressed (1) or not (0)
186 ====== ========== ======================================================================
187
188 The peak data must be in a tabular file with at least 3 columns of data
189 for each peak (one peak per line):
190
191 ====== ========== =================================
192 Column Name Description
193 ====== ========== =================================
194 1 chr Chromosome name
195 2 start Start position of the peak
196 3 end End position of the peak
197 ====== ========== =================================
198
199 If peak data is in ``bed`` format then the tool will automatically
200 assign the correct columns, otherwise the first three columns of data
201 will be used.
202
203 -------------
204
205 .. class:: infomark
206
207 **Outputs**
208
209 The key outputs from the tool are two lists compromising the nearest
210 peaks for each gene, and the nearest gene for each peak (one dataset
211 for each list).
212
213 There are two formats for reporting: "compact" and "full":
214
215 * **Compact output** reports all the hits for each peak or gene on
216 a single line of output;
217 * **Full output** reports each peak/gene pair on a separate line
218 (i.e. a multi-line output format).
219
220 In "full" output mode, additional options are available:
221
222 * The output files can be "padded" with extra (empty) lines to ensure
223 that there are always the same number of lines for each peak or
224 gene, if fewer than the requested number of hits are found.
225 * "Summary" datasets can also be requested, which include just the
226 nearest peak reported for each gene (and vice versa).
227
228 In either mode these data will also be output in a single MS Excel file,
229 which contains one sheet per result set.
230
231 .. class:: warning
232
233 Using "compact" output with the number of hits limited to more than 4
234 peak/gene pairs (or with no limit at all) can result in a large number
235 of columns in the output files, which in some versions of Galaxy will
236 not be properly displayed. However the data files themselves should be
237 okay.
238
239 -------------
240
241 .. class:: informark
242
243 **More information**
244
245 It is recommended that you refer to the ``RnaChipIntegrator``
246 documentation for information on the contents of each output file:
247
248 * http://rnachipintegrator.readthedocs.org/en/latest/
249
250 -------------
251
252 .. class:: infomark
253
254 **Credits**
255
256 This Galaxy tool has been developed within the Bioinformatics Core Facility at the
257 University of Manchester. It runs the RnaChipIntegrator package which has also been
258 developed by this group, and is documented at
259 http://fls-bioinformatics-core.github.com/RnaChipIntegrator/
260
261 Please kindly acknowledge the Bioinformatics Core Facility if you use this tool.
262 </help>
263 <expand macro="citations" />
264 </tool>