comparison ruvseq.xml @ 0:958ed8091d7b draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ruvseq commit 545e858c8f600fa6e12a0a38546e155f22019dcb-dirty
author mvdbeek
date Mon, 03 Sep 2018 01:18:09 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:958ed8091d7b
1 <tool id="ruvseq" name="Remove Unwanted Variation" version="2.11.40.2+gx1">
2 <description>from RNA-seq data</description>
3 <requirements>
4 <requirement type="package" version="1.12.0">bioconductor-ruvseq</requirement>
5 <requirement type="package" version="1.18.1">bioconductor-deseq2</requirement>
6 <requirement type="package" version="1.6.0">bioconductor-tximport</requirement>
7 <requirement type="package" version="1.30.0">bioconductor-genomicfeatures</requirement>
8 <requirement type="package" version="0.6.5">r-ggrepel</requirement>
9 </requirements>
10 <stdio>
11 <regex match="Execution halted"
12 source="both"
13 level="fatal"
14 description="Execution halted." />
15 <regex match="Error in"
16 source="both"
17 level="fatal"
18 description="An undefined error occurred, please check your input carefully and contact your administrator." />
19 <regex match="Fatal error"
20 source="both"
21 level="fatal"
22 description="An undefined error occurred, please check your input carefully and contact your administrator." />
23 </stdio>
24 <version_command><![CDATA[
25 echo $(R --version | grep version | grep -v GNU)", RUVSeq version" $(R --vanilla --slave -e "library(RUVSeq); cat(sessionInfo()\$otherPkgs\$RUVSeq\$Version)" 2> /dev/null | grep -v -i "WARNING: ")
26 ]]></version_command>
27 <command><![CDATA[
28 #if $tximport.tximport_selector == 'tximport':
29 #if $tximport.mapping_format.mapping_format_selector == 'gtf':
30 ln -s '$tximport.mapping_format.gtf_file' mapping.gtf &&
31 #else:
32 ln -s '$tximport.mapping_format.tabular_file' mapping.txt &&
33 #end if
34 #end if
35
36 Rscript '${__tool_directory__}/ruvseq.R'
37 #if $pdf:
38 -p '$plots'
39 #end if
40
41 --sample_json '$sampleTable'
42
43 $header
44
45 --min_k $min_k
46 --max_k $max_k
47
48 #if $tximport.tximport_selector == 'tximport':
49 --txtype $tximport.txtype
50 #if $tximport.mapping_format.mapping_format_selector == 'gtf':
51 --tx2gene mapping.gtf
52 #else:
53 --tx2gene mapping.txt
54 #end if
55 #end if
56 ]]></command>
57 <configfiles>
58 <configfile name="sampleTable">
59 #import json
60 #set sample_table = []
61 #for $level in $rep_factorLevel:
62 #for $group in $level.groups.value:
63 #for $file in $countsFile.get_datasets_for_group($group):
64 #silent $sample_table.append({"path": str($file), "label": str($file.element_identifier), "condition": str($group)})
65 #end for
66 #end for
67 #end for
68 #echo json.dumps($sample_table)
69 </configfile>
70 </configfiles>
71 <inputs>
72 <param name="countsFile" type="data" format="tabular" multiple="true" label="Counts file(s)"/>
73 <repeat name="rep_factorLevel" title="Factor level" min="2" default="2">
74 <param name="factorLevel" type="text" value="FactorLevel" label="Specify a factor level, typical values could be 'tumor', 'normal', 'treated' or 'control'"
75 help="Only letters, numbers and underscores will be retained in this field">
76 <sanitizer>
77 <valid initial="string.letters,string.digits"><add value="_" /></valid>
78 </sanitizer>
79 </param>
80 <param name="groups" type="group_tag" data_ref="countsFile" multiple="true" label="Select groups that correspond to this factor level"/>
81 </repeat>
82 <param name="min_k" type="integer" value="1" min="1" label="Try to find at least this many factors of unwanted variation" />
83 <param name="max_k" type="integer" value="1" min="1" label="Try to find at most this many factors of unwanted variation" />
84 <param name="min_mean_count" type="integer" value="5" min="0" label="Ignore genes with fewer than this many counts on average" />
85 <param name="header" type="boolean" truevalue="-H" falsevalue="" checked="true" label="Files have header?" help="If this option is set to Yes, the tool will assume that the count files have column headers in the first row. Default: Yes" />
86
87 <conditional name="tximport">
88 <param name="tximport_selector" type="select" label="Choice of Input data">
89 <option value="count" selected="True">Count data (e.g. from HTSeq-count, featureCounts or StringTie)</option>
90 <option value="tximport">TPM values (e.g. from kallisto, sailfish or salmon)</option>
91 </param>
92 <when value="tximport">
93 <param name="txtype" type="select" label="Program used to generate TPMs">
94 <option value="kallisto">kallisto</option>
95 <option value="sailfish">Sailfish</option>
96 <option value="salmon">Salmon</option>
97 </param>
98 <conditional name="mapping_format">
99 <param name="mapping_format_selector" type="select" label="Gene mapping format">
100 <option value="gtf" selected="True">GTF</option>
101 <option value="tabular">Transcript-ID and Gene-ID mapping file</option>
102 </param>
103 <when value="gtf">
104 <param name="gtf_file" type="data" format="gtf,gff3" label="GTF/GFF3 file with Transcript - Gene mapping"/>
105 </when>
106 <when value="tabular">
107 <param name="tabular_file" type="data" format="tabular" label="Tabular file with Transcript - Gene mapping"/>
108 </when>
109 </conditional>
110 </when>
111 <when value="count" />
112 </conditional>
113 <param name="pdf" type="boolean" truevalue="1" falsevalue="0" checked="true"
114 label="Visualising the analysis results"
115 help="output an additional PDF files" />
116 </inputs>
117 <outputs>
118 <collection name="unwanted_variation" type="list" label="RUVSeq covariate files on ${on_string}">
119 <discover_datasets pattern="(?P&lt;designation&gt;.+)\.tabular" format="tabular" directory="." visible="false"/>
120 </collection>
121 <data format="pdf" name="plots" label="RUVSeq diagonstic plots on ${on_string}">
122 <filter>pdf == True</filter>
123 </data>
124 </outputs>
125 <tests>
126 <!--Ensure counts files with header works -->
127 <test>
128 <repeat name="rep_factorLevel">
129 <param name="factorLevel" value="Treated"/>
130 <param name="countsFile" value="GSM461179_treat_single.counts,GSM461180_treat_paired.counts,GSM461181_treat_paired.counts"/>
131 </repeat>
132 <repeat name="rep_factorLevel">
133 <param name="factorLevel" value="Untreated"/>
134 <param name="countsFile" value="GSM461176_untreat_single.counts,GSM461177_untreat_paired.counts,GSM461178_untreat_paired.counts,GSM461182_untreat_single.counts"/>
135 </repeat>
136 <param name="pdf" value="true"/>
137 <output name="plots" file="ruvseq_diag.pdf" ftype="pdf" compare="sim_size"/>
138 <output_collection name="unwanted_variation" type="list">
139 <element name="batch_effects_control_method_k1">
140 <assert_contents>
141 <has_text_matching expression="identifier\tcondition\tW_1"/>
142 <has_text_matching expression="GSM461179.*\tTreated\t-0.49.*"/>
143 </assert_contents>
144 </element>
145 <element name="batch_effects_replicate_method_k1">
146 <assert_contents>
147 <has_text_matching expression="identifier\tcondition\tW_1"/>
148 <has_text_matching expression="GSM461179.*\tTreated\t-0.25.*"/>
149 </assert_contents>
150 </element>
151 <element name="batch_effects_residual_method_k1">
152 <assert_contents>
153 <has_text_matching expression="identifier\tcondition\tW_1"/>
154 <has_text_matching expression="GSM461179.*\tTreated\t-0.60.*"/>
155 </assert_contents>
156 </element>
157 </output_collection>
158 </test>
159 <!--Ensure counts files without header works -->
160 <test>
161 <repeat name="rep_factorLevel">
162 <param name="factorLevel" value="Treated"/>
163 <param name="countsFile" value="GSM461179_treat_single.counts.noheader,GSM461180_treat_paired.counts.noheader,GSM461181_treat_paired.counts.noheader"/>
164 </repeat>
165 <repeat name="rep_factorLevel">
166 <param name="factorLevel" value="Untreated"/>
167 <param name="countsFile" value="GSM461176_untreat_single.counts.noheader,GSM461177_untreat_paired.counts.noheader,GSM461178_untreat_paired.counts.noheader,GSM461182_untreat_single.counts.noheader"/>
168 </repeat>
169 <param name="pdf" value="true"/>
170 <param name="header" value="false"/>
171 <output name="plots" file="ruvseq_diag.pdf" ftype="pdf" compare="sim_size"/>
172 <output_collection name="unwanted_variation" type="list">
173 <element name="batch_effects_control_method_k1">
174 <assert_contents>
175 <has_text_matching expression="identifier\tcondition\tW_1"/>
176 <has_text_matching expression="GSM461179.*\tTreated\t-0.49.*"/>
177 </assert_contents>
178 </element>
179 <element name="batch_effects_replicate_method_k1">
180 <assert_contents>
181 <has_text_matching expression="identifier\tcondition\tW_1"/>
182 <has_text_matching expression="GSM461179.*\tTreated\t-0.25.*"/>
183 </assert_contents>
184 </element>
185 <element name="batch_effects_residual_method_k1">
186 <assert_contents>
187 <has_text_matching expression="identifier\tcondition\tW_1"/>
188 <has_text_matching expression="GSM461179.*\tTreated\t-0.60.*"/>
189 </assert_contents>
190 </element>
191 </output_collection>
192 </test>
193 <!--Ensure sailfish files work -->
194 <test>
195 <repeat name="rep_factorLevel">
196 <param name="factorLevel" value="Treated"/>
197 <param name="countsFile" value="sailfish/sailfish_quant.sf1.tab,sailfish/sailfish_quant.sf2.tab,sailfish/sailfish_quant.sf3.tab"/>
198 </repeat>
199 <repeat name="rep_factorLevel">
200 <param name="factorLevel" value="Untreated"/>
201 <param name="countsFile" value="sailfish/sailfish_quant.sf4.tab,sailfish/sailfish_quant.sf5.tab,sailfish/sailfish_quant.sf6.tab"/>
202 </repeat>
203 <param name="pdf" value="true"/>
204 <param name="tximport_selector" value="tximport"/>
205 <param name="txtype" value="sailfish"/>
206 <param name="mapping_format_selector" value="tabular"/>
207 <param name="tabular_file" value="tx2gene.tab"/>
208 <output name="plots" file="ruvseq_diag_sailfish.pdf" ftype="pdf" compare="sim_size"/>
209 <output_collection name="unwanted_variation" type="list">
210 <element name="batch_effects_control_method_k1">
211 <assert_contents>
212 <has_text_matching expression="identifier\tcondition\tW_1"/>
213 <has_text_matching expression="sailfish_quant.sf1.tab\tTreated\t-0.28.*"/>
214 </assert_contents>
215 </element>
216 <element name="batch_effects_replicate_method_k1">
217 <assert_contents>
218 <has_text_matching expression="identifier\tcondition\tW_1"/>
219 <has_text_matching expression="sailfish_quant.sf1.tab\tTreated\t-0.44.*"/>
220 </assert_contents>
221 </element>
222 <element name="batch_effects_residual_method_k1">
223 <assert_contents>
224 <has_text_matching expression="identifier\tcondition\tW_1"/>
225 <has_text_matching expression="sailfish_quant.sf1.tab\tTreated\t-0.22.*"/>
226 </assert_contents>
227 </element>
228 </output_collection>
229 </test>
230 </tests>
231 <help><![CDATA[
232 .. class:: infomark
233
234 **What it does**
235
236 RUVSeq normalizes RNA-seq data using factor analysis of control genes or samples. RUVSeq has been designed for detecting unwanted variation using replicate sample information. The current RUVSeq Galaxy tool only implements estimating unwanted variation for primary factors.
237 RUVSeq implements 3 different methods for the estimation of unwanted variation:
238
239 RUVg estimates the factors of unwanted variation using control genes
240
241 RUVs estimates the factors of unwanted variation using replicate samples
242
243 RUVr estimating the factors of unwanted variation using residuals.
244
245 This tool runs all RUV methods and outputs diagnostic plots and tables with covariates that
246 may be used for differential expression analsys.
247
248 -----
249
250 **Inputs**
251
252 **Count Files**
253
254 RUVSeq_ takes count tables generated from **featureCounts**, **HTSeq-count** or **StringTie** as input. Count tables must be generated for each sample individually. One header row is assumed, but files with no header (e.g from HTSeq) can be input with the *Files have header?* option set to No.
255
256 RUVSeq_ can also take transcript-level counts from quantification tools such as, **kallisto**, **Salmon** and **Sailfish**, and this Galaxy wrapper incorporates the Bioconductor tximport_ package to process the transcript counts for DESeq2.
257
258 **Salmon or Sailfish Files**
259
260 Salmon or Sailfish ``quant.sf`` files can be imported by setting type to *Salmon* or *Sailfish* respectively above. Note: for previous version of Salmon or Sailfish, in which the quant.sf files start with comment lines you will need to remove the comment lines before inputting here. An example of the format is shown below.
261
262 Example:
263
264 ============ ========== =============== =========== ===========
265 Name Length EffectiveLength TPM NumReads
266 ------------ ---------- --------------- ----------- -----------
267 NR_001526 164 20.4518 0 0
268 NR_001526_1 164 20.4518 0 0
269 NR_001526_2 164 20.4518 0 0
270 NM_130786 1764 1956.04 2.47415 109.165
271 NR_015380 2129 2139.53 1.77331 85.5821
272 NM_001198818 9360 7796.58 2.38616e-07 4.19648e-05
273 NM_001198819 9527 7964.62 0 0
274 NM_001198820 9410 7855.78 0 0
275 NM_014576 9267 7714.88 0.0481114 8.37255
276 ============ ========== =============== =========== ===========
277
278 **kallisto Files**
279
280 kallisto ``abundance.tsv`` files can be imported by setting type to *kallisto* above. An example of the format is shown below.
281
282 Example:
283
284 ============ ========== =============== =========== ===========
285 target_id length eff_length est_counts tpm
286 ------------ ---------- --------------- ----------- -----------
287 NR_001526 164 20.4518 0 0
288 NR_001526_1 164 20.4518 0 0
289 NR_001526_2 164 20.4518 0 0
290 NM_130786 1764 1956.04 109.165 2.47415
291 NR_015380 2129 2139.53 85.5821 1.77331
292 NM_001198818 9360 7796.58 4.19648e-05 2.38616e-07
293 NM_001198819 9527 7964.62 0 0
294 NM_001198820 9410 7855.78 0 0
295 NM_014576 9267 7714.88 8.37255 0.0481114
296 ============ ========== =============== =========== ===========
297
298 -----
299
300 **Output**
301
302 RUVSeq_ generates a tabular file for each method and each k of variation as well as a summary PDF.
303
304 .. _RUVSeq: http://master.bioconductor.org/packages/release/bioc/html/RUVSeq.html
305 .. _tximport: https://bioconductor.org/packages/devel/bioc/vignettes/tximport/inst/doc/tximport.html
306 ]]></help>
307 <citations>
308 <citation type="doi">10.1038/nbt.2931</citation>
309 </citations>
310 </tool>