Mercurial > repos > pjbriggs > rnachipintegrator
comparison rnachipintegrator_wrapper.xml @ 0:0abe6bac47a6 draft
planemo upload for repository https://github.com/fls-bioinformatics-core/galaxy-tools/tree/master/rnachipintegrator commit 97d556dae96db5457590a3a257392b6e4093a912-dirty
author | pjbriggs |
---|---|
date | Wed, 24 Feb 2016 09:25:18 -0500 |
parents | |
children | d37526cd12aa |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:0abe6bac47a6 |
---|---|
1 <?xml version="1.0" encoding="utf-8"?> | |
2 <tool id="rnachipintegrator_wrapper" name="RnaChipIntegrator" version="@VERSION@.0"> | |
3 <description>Integrated analysis of 'gene' and 'peak' data</description> | |
4 <macros> | |
5 <import>rnachipintegrator_macros.xml</import> | |
6 </macros> | |
7 <expand macro="requirements" /> | |
8 <expand macro="version_command" /> | |
9 <command interpreter="bash"><![CDATA[ | |
10 rnachipintegrator_wrapper.sh | |
11 #if $peaks_in.metadata.chromCol | |
12 --peak_cols=${peaks_in.metadata.chromCol},${peaks_in.metadata.startCol},${peaks_in.metadata.endCol} | |
13 #end if | |
14 #if str( $cutoff ) != "" | |
15 --cutoff=$cutoff | |
16 #else | |
17 --cutoff=0 | |
18 #end if | |
19 #if str( $number ) != "" | |
20 --number=$number | |
21 #end if | |
22 --promoter_region=$promoter_start,$promoter_end | |
23 --edge=$edge | |
24 $diff_expressed_only | |
25 --xlsx_file "$xlsx_out" | |
26 --output_files "$peaks_per_feature_out" "$features_per_peak_out" | |
27 #if $output.compact_format | |
28 --compact | |
29 #else | |
30 #if $output.summary | |
31 --summary_files "$peaks_per_feature_summary" "$features_per_peak_summary" | |
32 #end if | |
33 ${output.pad_output} | |
34 #end if | |
35 "$features_in" "$peaks_in" | |
36 ]]></command> | |
37 <inputs> | |
38 <param format="tabular" name="features_in" type="data" | |
39 label="Genes/genomic features" /> | |
40 <param format="tabular" name="peaks_in" type="data" | |
41 label="Peaks/regions" /> | |
42 <expand macro="analysis_options" /> | |
43 <param name="diff_expressed_only" type="boolean" | |
44 truevalue="--only-DE" falsevalue="" checked="false" | |
45 label="Only consider genes which are flagged as differentially | |
46 expressed" | |
47 help="NB input feature data must include differential expression | |
48 flags (--only-DE)" /> | |
49 <expand macro="output_options" /> | |
50 </inputs> | |
51 <outputs> | |
52 <!-- Always produce XLSX output --> | |
53 <data format="xlsx" name="xlsx_out" | |
54 label="All RnaChipIntegrator analyses: ${features_in.name} vs ${peaks_in.name} (Excel spreadsheet)" /> | |
55 <data format="tabular" name="peaks_per_feature_out" | |
56 label="Nearest peaks to each gene: ${features_in.name} vs ${peaks_in.name}" /> | |
57 <data format="tabular" name="features_per_peak_out" | |
58 label="Nearest genes to each peak: ${features_in.name} vs ${peaks_in.name}" /> | |
59 <data format="tabular" name="peaks_per_feature_summary" | |
60 label="Nearest peaks to each gene (summary): ${features_in.name} vs ${peaks_in.name}" > | |
61 <filter>output['compact_format'] is False</filter> | |
62 <filter>output['summary'] is True</filter> | |
63 </data> | |
64 <data format="tabular" name="features_per_peak_summary" | |
65 label="Nearest gene to each peak (summary): ${features_in.name} vs ${peaks_in.name}" > | |
66 <filter>output['compact_format'] is False</filter> | |
67 <filter>output['summary'] is True</filter> | |
68 </data> | |
69 </outputs> | |
70 <tests> | |
71 <!-- | |
72 RnaChipIntegrator +name=test +cutoff=130000 +promoter_region=-10000,2500 +xlsx +compact features.txt summits.txt | |
73 --> | |
74 <test> | |
75 <param name="features_in" value="features.txt" ftype="tabular" /> | |
76 <param name="peaks_in" value="summits.txt" ftype="tabular" /> | |
77 <param name="cutoff" value="130000" /> | |
78 <param name="promoter_start" value="-10000" /> | |
79 <param name="promoter_end" value="2500" /> | |
80 <output name="xlsx_out" file="summits.xlsx" compare="sim_size" /> | |
81 <output name="peaks_per_feature_out" ftype="tabular" | |
82 file="summits_per_feature.out" /> | |
83 <output name="features_per_peak_out" ftype="tabular" | |
84 file="features_per_summit.out" /> | |
85 </test> | |
86 <!-- | |
87 RnaChipIntegrator +name=test +cutoff=130000 +promoter_region=-10000,2500 +xlsx +compact features.txt peaks.txt | |
88 --> | |
89 <test> | |
90 <param name="features_in" value="features.txt" ftype="tabular" /> | |
91 <param name="peaks_in" value="peaks.txt" ftype="tabular" /> | |
92 <param name="cutoff" value="130000" /> | |
93 <param name="promoter_start" value="-10000" /> | |
94 <param name="promoter_end" value="2500" /> | |
95 <output name="xlsx_out" file="peaks1.xlsx" compare="sim_size" /> | |
96 <output name="peaks_per_feature_out" ftype="tabular" | |
97 file="peaks_per_feature1.out" /> | |
98 <output name="features_per_peak_out" ftype="tabular" | |
99 file="features_per_peak1.out" /> | |
100 </test> | |
101 <!-- | |
102 RnaChipIntegrator +name=test +cutoff=130000 +xlsx features.txt peaks.txt | |
103 --> | |
104 <test> | |
105 <param name="features_in" value="features.txt" ftype="tabular" /> | |
106 <param name="peaks_in" value="peaks.txt" ftype="tabular" /> | |
107 <param name="cutoff" value="130000" /> | |
108 <param name="compact_format" value="false" /> | |
109 <output name="xlsx_out" file="peaks2.xlsx" compare="sim_size" /> | |
110 <output name="peaks_per_feature_out" ftype="tabular" | |
111 file="peaks_per_feature2.out" /> | |
112 <output name="features_per_peak_out" ftype="tabular" | |
113 file="features_per_peak2.out" /> | |
114 </test> | |
115 <!-- | |
116 RnaChipIntegrator +name=test +cutoff=130000 +only-DE +xlsx +compact features.txt peaks.txt | |
117 --> | |
118 <test> | |
119 <param name="features_in" value="features.txt" ftype="tabular" /> | |
120 <param name="peaks_in" value="peaks.txt" ftype="tabular" /> | |
121 <param name="cutoff" value="130000" /> | |
122 <param name="diff_expressed_only" value="true" /> | |
123 <output name="xlsx_out" file="peaks3.xlsx" compare="sim_size" /> | |
124 <output name="peaks_per_feature_out" ftype="tabular" | |
125 file="peaks_per_feature3.out" /> | |
126 <output name="features_per_peak_out" ftype="tabular" | |
127 file="features_per_peak3.out" /> | |
128 </test> | |
129 <!-- | |
130 RnaChipIntegrator +name=test +cutoff=130000 +promoter_region=-10000,2500 +xlsx +summary features.txt peaks.txt | |
131 --> | |
132 <test> | |
133 <param name="features_in" value="features.txt" ftype="tabular" /> | |
134 <param name="peaks_in" value="peaks.txt" ftype="tabular" /> | |
135 <param name="cutoff" value="130000" /> | |
136 <param name="compact_format" value="false" /> | |
137 <param name="summary" value="true" /> | |
138 <param name="pad_output" value="true" /> | |
139 <output name="xlsx_out" file="peaks4.xlsx" compare="sim_size" /> | |
140 <output name="peaks_per_feature_out" ftype="tabular" | |
141 file="peaks_per_feature4.out" /> | |
142 <output name="features_per_peak_out" ftype="tabular" | |
143 file="features_per_peak4.out" /> | |
144 <output name="peaks_per_feature_summary" ftype="tabular" | |
145 file="peaks_per_feature4.summary" /> | |
146 <output name="features_per_peak_summary" ftype="tabular" | |
147 file="features_per_peak4.summary" /> | |
148 </test> | |
149 </tests> | |
150 <help> | |
151 | |
152 .. class:: infomark | |
153 | |
154 **What it does** | |
155 | |
156 Performs integrated analyses of genes (or other genomic feature data) | |
157 gainst a set of peaks (e.g. ChIP data), identifying the nearest peaks to | |
158 each feature and vice versa. | |
159 | |
160 The program was originally written specifically for ChIP-Seq and RNA-Seq | |
161 data but works equally well for ChIP-chip and microarray expression data, | |
162 and can also be used to integrate any set of genomic features (e.g. | |
163 canonical genes, CpG islands) with expression data. | |
164 | |
165 RnaChipIntegrator can be obtained from | |
166 https://pypi.python.org/pypi/RnaChipIntegrator/ | |
167 | |
168 ------------- | |
169 | |
170 .. class:: infomark | |
171 | |
172 **Input** | |
173 | |
174 The gene data must be in a tabular file with the following columns | |
175 of data for each gene or genomic feature (one gene per line): | |
176 | |
177 ====== ========== ====================================================================== | |
178 Column Name Description | |
179 ====== ========== ====================================================================== | |
180 1 ID Name used to identify the gene in the output | |
181 2 chr Chromosome name | |
182 3 start Start position of the gene | |
183 4 end End position of the gene | |
184 5 strand Must be either '+' or '-' | |
185 6 diff_expr Optional: indicates gene is differentially expressed (1) or not (0) | |
186 ====== ========== ====================================================================== | |
187 | |
188 The peak data must be in a tabular file with at least 3 columns of data | |
189 for each peak (one peak per line): | |
190 | |
191 ====== ========== ================================= | |
192 Column Name Description | |
193 ====== ========== ================================= | |
194 1 chr Chromosome name | |
195 2 start Start position of the peak | |
196 3 end End position of the peak | |
197 ====== ========== ================================= | |
198 | |
199 If peak data is in ``bed`` format then the tool will automatically | |
200 assign the correct columns, otherwise the first three columns of data | |
201 will be used. | |
202 | |
203 ------------- | |
204 | |
205 .. class:: infomark | |
206 | |
207 **Outputs** | |
208 | |
209 The key outputs from the tool are two lists compromising the nearest | |
210 peaks for each gene, and the nearest gene for each peak (one dataset | |
211 for each list). | |
212 | |
213 There are two formats for reporting: "compact" and "full": | |
214 | |
215 * **Compact output** reports all the hits for each peak or gene on | |
216 a single line of output; | |
217 * **Full output** reports each peak/gene pair on a separate line | |
218 (i.e. a multi-line output format). | |
219 | |
220 In "full" output mode, additional options are available: | |
221 | |
222 * The output files can be "padded" with extra (empty) lines to ensure | |
223 that there are always the same number of lines for each peak or | |
224 gene, if fewer than the requested number of hits are found. | |
225 * "Summary" datasets can also be requested, which include just the | |
226 nearest peak reported for each gene (and vice versa). | |
227 | |
228 In either mode these data will also be output in a single MS Excel file, | |
229 which contains one sheet per result set. | |
230 | |
231 .. class:: warning | |
232 | |
233 Using "compact" output with the number of hits limited to more than 4 | |
234 peak/gene pairs (or with no limit at all) can result in a large number | |
235 of columns in the output files, which in some versions of Galaxy will | |
236 not be properly displayed. However the data files themselves should be | |
237 okay. | |
238 | |
239 ------------- | |
240 | |
241 .. class:: informark | |
242 | |
243 **More information** | |
244 | |
245 It is recommended that you refer to the ``RnaChipIntegrator`` | |
246 documentation for information on the contents of each output file: | |
247 | |
248 * http://rnachipintegrator.readthedocs.org/en/latest/ | |
249 | |
250 ------------- | |
251 | |
252 .. class:: infomark | |
253 | |
254 **Credits** | |
255 | |
256 This Galaxy tool has been developed within the Bioinformatics Core Facility at the | |
257 University of Manchester. It runs the RnaChipIntegrator package which has also been | |
258 developed by this group, and is documented at | |
259 http://fls-bioinformatics-core.github.com/RnaChipIntegrator/ | |
260 | |
261 Please kindly acknowledge the Bioinformatics Core Facility if you use this tool. | |
262 </help> | |
263 <expand macro="citations" /> | |
264 </tool> |