comparison flagRemove.xml @ 0:ca0ac330f1a4 draft

planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit 2948ce35fa7fffe5a64711cb30be971031e79019-dirty
author tomnl
date Fri, 24 May 2019 09:13:42 -0400
parents
children a03407367135
comparison
equal deleted inserted replaced
-1:000000000000 0:ca0ac330f1a4
1 <tool id="mspurity_flagremove" name="msPurity.flagRemove" version="0.2.0">
2 <description>Tool to flag and remove XCMS grouped peaks from the xcmsSet object based on various thresholds
3 (e.g. RSD of intensity and retention time).
4 </description>
5
6 <macros>
7 <import>macros.xml</import>
8 </macros>
9
10 <expand macro="requirements" />
11
12 <stdio>
13 <exit_code range="1:" />
14 </stdio>
15 <command interpreter="Rscript"><![CDATA[
16 flagRemove.R
17 --xset_path=$xset_path
18 --out_dir=.
19
20 #if $sample_flag.sample_flag=='update'
21 --rsd_i_sample=$sample_flag.rsd_i_sample
22 --minfrac_sample=$sample_flag.minfrac_sample
23 --ithres_sample=$sample_flag.ithres_sample
24 #end if
25
26 #if $blank_flag.blank_flag=='update'
27 --rsd_i_blank=$blank_flag.rsd_i_blank
28 --minfrac_blank=$blank_flag.minfrac_blank
29 --ithres_blank=$blank_flag.ithres_blank
30 --blank_class=$blank_flag.blank_class
31 #end if
32
33 #if $peak_removal.peak_removal=='remove'
34 --remove_spectra
35 --minfrac_xcms=$peak_removal.minfrac_xcms
36 --mzwid=$peak_removal.mzwid
37 --bw=$peak_removal.bw
38 #end if
39
40 #if $advanced.advanced=='update'
41 --egauss_thr=$advanced.egauss_thr
42 --polarity=$advanced.polarity
43 --grp_rm_ids=$advanced.grp_rm_ids
44 --xset_name=$advanced.xset_name
45 $advanced.temp_save.value
46
47 #end if
48 #if $choose_samp.choose_samp=='yes'
49 --samplelist=$choose_samp.samplelist
50 #end if
51 ]]></command>
52 <inputs>
53 <param type="data" name="xset_path" format='rdata.xcms.raw,rdata.xcms.group,rdata.xcms.retcor,rdata.xcms.fillpeaks,rdata'
54 help="The path to the xcmsSet object saved as an RData file"/>
55 <conditional name="sample_flag">
56 <param name="sample_flag" type="select" label="Change biological sample flag parameters?">
57 <option value="update" >Update biological sample flag parameters</option>
58 <option value="" selected="true">Use default biological sample flag parameters</option>
59 </param>
60 <when value="">
61 </when>
62 <when value="update">
63 <param name="rsd_i_sample" type="text" label="rsd_i_sample" value="NA"
64 help="Relative Standard Deviation threshold for the sample classes (--rsd_i_sample)"/>
65 <param name="minfrac_sample" type="float" label="minfrac_sample" value="0.5" min="0.0" max="1"
66 help="minimum fraction of files for features needed for the sample classes (--minfrac_sample)"/>
67 <param name="rsd_rt_sample" type="text" label="rsd_rt_sample" value="NA"
68 help="Relative standard Deviation threshold for the retention time of the sample
69 classes (--rsd_rt_sample)"/>
70 <param name="ithres_sample" type="text" label="ithres_sample" value="NA"
71 help="Intensity threshold for the sample (--ithres_sample)"/>
72 </when>
73 </conditional>
74 <conditional name="blank_flag">
75 <param name="blank_flag" type="select" label="Change blank flag parameters?">
76 <option value="update" >Update blank flag parameters</option>
77 <option value="" selected="true">Use default blank flag parameters</option>
78 </param>
79 <when value="">
80 </when>
81 <when value="update">
82 <param name="blank_class" type="text" label="blank_class" value="blank"
83 help="A string representing the class that will be used for the blank (--blank_class)"/>
84 <param name="rsd_i_blank" type="text" label="rsd_i_blank" value="NA"
85 help="RSD threshold for the blank (--rsd_i_blank)"/>
86 <param name="minfrac_blank" type="float" label="minfrac_blank" value="0.5" min="0.0" max="1"
87 help="minimum fraction of files for features needed for the blank (--minfrac_blank)"/>
88 <param name="rsd_rt_blank" type="text" label="rsd_rt_blank" value="NA"
89 help="RSD threshold for the retention time of the blank (--rsd_rt_blank)"/>
90 <param name="ithres_blank" type="text" label="ithres_blank" value="NA"
91 help="Intensity threshold for the blank (--ithres_blank)"/>
92 <param name="s2b" type="float" label="s2b" value="10"
93 help="fold change (sample/blank) needed for sample peak to be allowed. e.g.
94 if s2b set to 10 and the recorded sample 'intensity' value was 100 and blank was 10.
95 1000/10 = 100, so sample has fold change higher than the threshold and the peak
96 is not considered a blank (--s2b)"/>
97 </when>
98 </conditional>
99 <conditional name="peak_removal">
100 <param name="peak_removal" type="select" label="Remove peaks from xcmsSet object?">
101 <option value="remove" >Remove peaks and re-group</option>
102 <option value="" selected="true">Only flag peaks (do not remove and re-group)</option>
103 </param>
104 <when value="">
105 </when>
106 <when value="remove">
107 <param name="minfrac_xcms" type="float" label="minfrac_xcms" value="0.7" min="0.0" max="1"
108 help="minfrac for xcms grouping (--minfrac_xcms)"/>
109 <param name="mzwid" type="float" label="mzwid" value="0.001"
110 help="mzwid for xcms grouping (--minfrac_xcms)"/>
111 <param name="bw" type="float" label="bw" value="5"
112 help="bw for xcms grouping(--minfrac_xcms)"/>
113 </when>
114 </conditional>
115 <conditional name="advanced">
116 <param name="advanced" type="select" label="Advanced parameters">
117 <option value="update" >Update advanced and testing parameters</option>
118 <option value="" selected="true">Use default advanced parameters</option>
119 </param>
120 <when value="">
121 </when>
122 <when value="update">
123 <param name="egauss_thr" type="text" label="egauss_thr" value="NA"
124 help="Threshold for filtering out non gaussian shaped peaks. Note this only works
125 if the 'verbose columns' and 'fit gauss' was used with xcms (--egauss_thr)"/>
126 <param name="temp_save" type="boolean" label="temp_save" checked="false" truevalue="--temp_save" falsevalue=""
127 help="Assign True if files for each step saved (for testing purposes) (--temp_save)"/>
128 <param name="polarity" type="select" label="polarity"
129 help="polarity (just used for naming purpose when files are saved) (--polarity)">
130 <option value="positive">Positive</option>
131 <option value="negative" >Negative</option>
132 <option value="NA" selected="true">NA</option>
133 </param>
134 <param name="grp_rm_ids" type="text" label="grp_rm_ids" value="NA"
135 help="comma seperated list of grouped_xcms peak ids to remove (corresponds to the row from xcms::group output)
136 e.g '1,20,30,56'
137 (--grp_rm_ids)"/>
138 <param name="xset_name" type="text" label="xset_name" value="xset"
139 help="Name of the xcmsSet object within the RData file (--xset_name)"/>
140 </when>
141 </conditional>
142 <conditional name="choose_samp">
143 <param name="choose_samp" type="select" label="Samplelist">
144 <option value="yes" >Use samplelist</option>
145 <option value="" selected="true">Don't use samplelist</option>
146 </param>
147 <when value="">
148 </when>
149 <when value="yes">
150 <param name="samplelist" type="data" label="samplelist" format="tsv,tabular"
151 help="A samplelist can be provided to find an appriopiate blank class (requires a column 'blank' where 'yes' indicates the class should be used as the blank (--samplelist)"/>
152
153 </when>
154 </conditional>
155 </inputs>
156 <outputs>
157 <data name="peaklist_filtered" format="tsv" label="${tool.name} on ${on_string}: peaklist_filtered (tsv)"
158 from_work_dir="peaklist_filtered.tsv" />
159 <data name="removed_peaks" format="tsv" label="${tool.name} on ${on_string}: removed_peaks (tsv)"
160 from_work_dir="removed_peaks.tsv" />
161 <data name="xset_filtered" format="rdata" label="xset_filtered"
162 from_work_dir="xset_filtered.RData"/>
163 </outputs>
164 <tests>
165 <test>
166 <param name="blank_flag.blank_flag" value="update" />
167 <param name="xset_path" value="flagRemove_input.RData"/>
168
169 <param name="blank_flag.blank_flag" value="update" />
170 <conditional name="blank_flag">
171 <param name="blank_flag" value="update"/>
172 <param name="blank_class" value="KO" />
173 </conditional>
174 <conditional name="peak_removal">
175 <param name="peak_removal" value="remove"/>
176 </conditional>
177 <output name="peaklist_filtered" file="flagRemove_output.tsv"/>
178 </test>
179 </tests>
180 <help><![CDATA[
181
182 =======================================
183 Flag & remove peaks from xcmsSet object
184 =======================================
185 -----------
186 Description
187 -----------
188
189 Tool to flag XCMS grouped peaks based on various criteria (e.g RSD, intensity). The flagged grouped peaks can then be removed
190 completely from the xcmsSet object (xset). This means removing the individual peaks associated for each file. Located
191 in the **xset@peaks** socket of the xcmsSet object.
192
193 Additionally a list of ids of the xcms grouped peaks can be supplied, all peaks associated with these ids can be
194 be removed.
195
196 **Note**: grouped peak refers to a peak that has been grouped together by xcms::group function
197
198 -----------------
199 Updated peaklist
200 -----------------
201 The calculated columns for the update peaklist dataframe include:
202
203 * RSD of intensity for grouped peaks across each class
204 * RSD of retention time for grouped peaks across each class
205 * Coverage across all classes
206 * mzmin_full & mzmax_full: the full mzrange of each grouped peak
207 * rtmin_full & rtmax_full: the full rtrange of each grouped peak
208 * flag for criteria for the blank class (if 1 it means the blank is valid in at least 1 condition) and this grouped peak will be removed
209 * flag for criteria for the sample classes (if 1 it means that this grouped peak is valid for this class)
210 * all_sample_valid: flag for all samples (if 1 it means that at least 1 sample class is valid, this ignores the blank)
211
212 To filter out blank peaks just filter out all peaks where the blank_valid is equal to 1
213 This is in addition to the standard output from the xcmsSet peaklist
214
215 **flag example**
216
217 Dataset consists of 3 classes. Blank, cond1 and cond2. The classes cond1 and cond2 are biological sample classes.
218
219 ============= ============= ============= ================ ================
220 blank_valid cond1_valid cond2_valid all_sample_valid Keep peak?
221 ============= ============= ============= ================ ================
222 0 0 1 1 Yes
223 ------------- ------------- ------------- ---------------- ----------------
224 0 1 1 1 Yes
225 ------------- ------------- ------------- ---------------- ----------------
226 1 0 1 1 No
227 ------------- ------------- ------------- ---------------- ----------------
228 1 0 0 0 No
229 ------------- ------------- ------------- ---------------- ----------------
230 0 0 0 0 No
231 ============= ============= ============= ================ ================
232
233
234
235 -----------------
236 Filters for flags
237 -----------------
238
239 The following filters can be used to determine if a grouped peak gets flagged to keep. If the column all_sample_valid is 0 then
240 the grouped peak will be removed
241
242
243 * RSD of intensity for each biological sample class
244 * minfrac for each biological sample class
245 * RSD of retention time for each biological sample class
246 * intensity threshold for each biological sample class
247 * Blank subtraction. If a blank peak is found where the intensity of any corresponding biological sample class is not greater that the he s2b threshold (sample/blank). Then this grouped peak will be flagged for removal
248
249 The blank grouped peaks also have there own filters. If the blank peak passes this criteria and s2b threshold detailed above
250 then the peak will be removed.
251
252 * RSD of intensity
253 * minfrac
254 * intensity threshold
255
256 Additionally there is a filter for assessing how well the peaks fit the gaussian shape. Note that this can only be performed
257 when XCMS has fit_gauss option and verbose columns set to TRUE. Also, these peaks are just removed and not flagged.
258
259
260
261 -----------
262 Regrouping
263 -----------
264 The resulting xcmsSet object where all peaks have been removed needs to be regrouped otherwise the individual peaks
265 associated with each file will not be correctly linked to the grouped peaks.
266
267 This tool will re-group the xcmsSet object and check the newly created re-grouped xcmsSet object to see if any peaks are
268 still being flagged. If so the process will be repeated untill the xcmsSet object only contains peaks that match
269 the peak criteria.
270
271 The output file is an xcmsSet.RData file.
272 ]]></help>
273
274 <expand macro="citations" />
275 </tool>