Mercurial > repos > tomnl > mspurity_flagremove
comparison flagRemove.xml @ 0:ca0ac330f1a4 draft
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit 2948ce35fa7fffe5a64711cb30be971031e79019-dirty
| author | tomnl |
|---|---|
| date | Fri, 24 May 2019 09:13:42 -0400 |
| parents | |
| children | a03407367135 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:ca0ac330f1a4 |
|---|---|
| 1 <tool id="mspurity_flagremove" name="msPurity.flagRemove" version="0.2.0"> | |
| 2 <description>Tool to flag and remove XCMS grouped peaks from the xcmsSet object based on various thresholds | |
| 3 (e.g. RSD of intensity and retention time). | |
| 4 </description> | |
| 5 | |
| 6 <macros> | |
| 7 <import>macros.xml</import> | |
| 8 </macros> | |
| 9 | |
| 10 <expand macro="requirements" /> | |
| 11 | |
| 12 <stdio> | |
| 13 <exit_code range="1:" /> | |
| 14 </stdio> | |
| 15 <command interpreter="Rscript"><![CDATA[ | |
| 16 flagRemove.R | |
| 17 --xset_path=$xset_path | |
| 18 --out_dir=. | |
| 19 | |
| 20 #if $sample_flag.sample_flag=='update' | |
| 21 --rsd_i_sample=$sample_flag.rsd_i_sample | |
| 22 --minfrac_sample=$sample_flag.minfrac_sample | |
| 23 --ithres_sample=$sample_flag.ithres_sample | |
| 24 #end if | |
| 25 | |
| 26 #if $blank_flag.blank_flag=='update' | |
| 27 --rsd_i_blank=$blank_flag.rsd_i_blank | |
| 28 --minfrac_blank=$blank_flag.minfrac_blank | |
| 29 --ithres_blank=$blank_flag.ithres_blank | |
| 30 --blank_class=$blank_flag.blank_class | |
| 31 #end if | |
| 32 | |
| 33 #if $peak_removal.peak_removal=='remove' | |
| 34 --remove_spectra | |
| 35 --minfrac_xcms=$peak_removal.minfrac_xcms | |
| 36 --mzwid=$peak_removal.mzwid | |
| 37 --bw=$peak_removal.bw | |
| 38 #end if | |
| 39 | |
| 40 #if $advanced.advanced=='update' | |
| 41 --egauss_thr=$advanced.egauss_thr | |
| 42 --polarity=$advanced.polarity | |
| 43 --grp_rm_ids=$advanced.grp_rm_ids | |
| 44 --xset_name=$advanced.xset_name | |
| 45 $advanced.temp_save.value | |
| 46 | |
| 47 #end if | |
| 48 #if $choose_samp.choose_samp=='yes' | |
| 49 --samplelist=$choose_samp.samplelist | |
| 50 #end if | |
| 51 ]]></command> | |
| 52 <inputs> | |
| 53 <param type="data" name="xset_path" format='rdata.xcms.raw,rdata.xcms.group,rdata.xcms.retcor,rdata.xcms.fillpeaks,rdata' | |
| 54 help="The path to the xcmsSet object saved as an RData file"/> | |
| 55 <conditional name="sample_flag"> | |
| 56 <param name="sample_flag" type="select" label="Change biological sample flag parameters?"> | |
| 57 <option value="update" >Update biological sample flag parameters</option> | |
| 58 <option value="" selected="true">Use default biological sample flag parameters</option> | |
| 59 </param> | |
| 60 <when value=""> | |
| 61 </when> | |
| 62 <when value="update"> | |
| 63 <param name="rsd_i_sample" type="text" label="rsd_i_sample" value="NA" | |
| 64 help="Relative Standard Deviation threshold for the sample classes (--rsd_i_sample)"/> | |
| 65 <param name="minfrac_sample" type="float" label="minfrac_sample" value="0.5" min="0.0" max="1" | |
| 66 help="minimum fraction of files for features needed for the sample classes (--minfrac_sample)"/> | |
| 67 <param name="rsd_rt_sample" type="text" label="rsd_rt_sample" value="NA" | |
| 68 help="Relative standard Deviation threshold for the retention time of the sample | |
| 69 classes (--rsd_rt_sample)"/> | |
| 70 <param name="ithres_sample" type="text" label="ithres_sample" value="NA" | |
| 71 help="Intensity threshold for the sample (--ithres_sample)"/> | |
| 72 </when> | |
| 73 </conditional> | |
| 74 <conditional name="blank_flag"> | |
| 75 <param name="blank_flag" type="select" label="Change blank flag parameters?"> | |
| 76 <option value="update" >Update blank flag parameters</option> | |
| 77 <option value="" selected="true">Use default blank flag parameters</option> | |
| 78 </param> | |
| 79 <when value=""> | |
| 80 </when> | |
| 81 <when value="update"> | |
| 82 <param name="blank_class" type="text" label="blank_class" value="blank" | |
| 83 help="A string representing the class that will be used for the blank (--blank_class)"/> | |
| 84 <param name="rsd_i_blank" type="text" label="rsd_i_blank" value="NA" | |
| 85 help="RSD threshold for the blank (--rsd_i_blank)"/> | |
| 86 <param name="minfrac_blank" type="float" label="minfrac_blank" value="0.5" min="0.0" max="1" | |
| 87 help="minimum fraction of files for features needed for the blank (--minfrac_blank)"/> | |
| 88 <param name="rsd_rt_blank" type="text" label="rsd_rt_blank" value="NA" | |
| 89 help="RSD threshold for the retention time of the blank (--rsd_rt_blank)"/> | |
| 90 <param name="ithres_blank" type="text" label="ithres_blank" value="NA" | |
| 91 help="Intensity threshold for the blank (--ithres_blank)"/> | |
| 92 <param name="s2b" type="float" label="s2b" value="10" | |
| 93 help="fold change (sample/blank) needed for sample peak to be allowed. e.g. | |
| 94 if s2b set to 10 and the recorded sample 'intensity' value was 100 and blank was 10. | |
| 95 1000/10 = 100, so sample has fold change higher than the threshold and the peak | |
| 96 is not considered a blank (--s2b)"/> | |
| 97 </when> | |
| 98 </conditional> | |
| 99 <conditional name="peak_removal"> | |
| 100 <param name="peak_removal" type="select" label="Remove peaks from xcmsSet object?"> | |
| 101 <option value="remove" >Remove peaks and re-group</option> | |
| 102 <option value="" selected="true">Only flag peaks (do not remove and re-group)</option> | |
| 103 </param> | |
| 104 <when value=""> | |
| 105 </when> | |
| 106 <when value="remove"> | |
| 107 <param name="minfrac_xcms" type="float" label="minfrac_xcms" value="0.7" min="0.0" max="1" | |
| 108 help="minfrac for xcms grouping (--minfrac_xcms)"/> | |
| 109 <param name="mzwid" type="float" label="mzwid" value="0.001" | |
| 110 help="mzwid for xcms grouping (--minfrac_xcms)"/> | |
| 111 <param name="bw" type="float" label="bw" value="5" | |
| 112 help="bw for xcms grouping(--minfrac_xcms)"/> | |
| 113 </when> | |
| 114 </conditional> | |
| 115 <conditional name="advanced"> | |
| 116 <param name="advanced" type="select" label="Advanced parameters"> | |
| 117 <option value="update" >Update advanced and testing parameters</option> | |
| 118 <option value="" selected="true">Use default advanced parameters</option> | |
| 119 </param> | |
| 120 <when value=""> | |
| 121 </when> | |
| 122 <when value="update"> | |
| 123 <param name="egauss_thr" type="text" label="egauss_thr" value="NA" | |
| 124 help="Threshold for filtering out non gaussian shaped peaks. Note this only works | |
| 125 if the 'verbose columns' and 'fit gauss' was used with xcms (--egauss_thr)"/> | |
| 126 <param name="temp_save" type="boolean" label="temp_save" checked="false" truevalue="--temp_save" falsevalue="" | |
| 127 help="Assign True if files for each step saved (for testing purposes) (--temp_save)"/> | |
| 128 <param name="polarity" type="select" label="polarity" | |
| 129 help="polarity (just used for naming purpose when files are saved) (--polarity)"> | |
| 130 <option value="positive">Positive</option> | |
| 131 <option value="negative" >Negative</option> | |
| 132 <option value="NA" selected="true">NA</option> | |
| 133 </param> | |
| 134 <param name="grp_rm_ids" type="text" label="grp_rm_ids" value="NA" | |
| 135 help="comma seperated list of grouped_xcms peak ids to remove (corresponds to the row from xcms::group output) | |
| 136 e.g '1,20,30,56' | |
| 137 (--grp_rm_ids)"/> | |
| 138 <param name="xset_name" type="text" label="xset_name" value="xset" | |
| 139 help="Name of the xcmsSet object within the RData file (--xset_name)"/> | |
| 140 </when> | |
| 141 </conditional> | |
| 142 <conditional name="choose_samp"> | |
| 143 <param name="choose_samp" type="select" label="Samplelist"> | |
| 144 <option value="yes" >Use samplelist</option> | |
| 145 <option value="" selected="true">Don't use samplelist</option> | |
| 146 </param> | |
| 147 <when value=""> | |
| 148 </when> | |
| 149 <when value="yes"> | |
| 150 <param name="samplelist" type="data" label="samplelist" format="tsv,tabular" | |
| 151 help="A samplelist can be provided to find an appriopiate blank class (requires a column 'blank' where 'yes' indicates the class should be used as the blank (--samplelist)"/> | |
| 152 | |
| 153 </when> | |
| 154 </conditional> | |
| 155 </inputs> | |
| 156 <outputs> | |
| 157 <data name="peaklist_filtered" format="tsv" label="${tool.name} on ${on_string}: peaklist_filtered (tsv)" | |
| 158 from_work_dir="peaklist_filtered.tsv" /> | |
| 159 <data name="removed_peaks" format="tsv" label="${tool.name} on ${on_string}: removed_peaks (tsv)" | |
| 160 from_work_dir="removed_peaks.tsv" /> | |
| 161 <data name="xset_filtered" format="rdata" label="xset_filtered" | |
| 162 from_work_dir="xset_filtered.RData"/> | |
| 163 </outputs> | |
| 164 <tests> | |
| 165 <test> | |
| 166 <param name="blank_flag.blank_flag" value="update" /> | |
| 167 <param name="xset_path" value="flagRemove_input.RData"/> | |
| 168 | |
| 169 <param name="blank_flag.blank_flag" value="update" /> | |
| 170 <conditional name="blank_flag"> | |
| 171 <param name="blank_flag" value="update"/> | |
| 172 <param name="blank_class" value="KO" /> | |
| 173 </conditional> | |
| 174 <conditional name="peak_removal"> | |
| 175 <param name="peak_removal" value="remove"/> | |
| 176 </conditional> | |
| 177 <output name="peaklist_filtered" file="flagRemove_output.tsv"/> | |
| 178 </test> | |
| 179 </tests> | |
| 180 <help><![CDATA[ | |
| 181 | |
| 182 ======================================= | |
| 183 Flag & remove peaks from xcmsSet object | |
| 184 ======================================= | |
| 185 ----------- | |
| 186 Description | |
| 187 ----------- | |
| 188 | |
| 189 Tool to flag XCMS grouped peaks based on various criteria (e.g RSD, intensity). The flagged grouped peaks can then be removed | |
| 190 completely from the xcmsSet object (xset). This means removing the individual peaks associated for each file. Located | |
| 191 in the **xset@peaks** socket of the xcmsSet object. | |
| 192 | |
| 193 Additionally a list of ids of the xcms grouped peaks can be supplied, all peaks associated with these ids can be | |
| 194 be removed. | |
| 195 | |
| 196 **Note**: grouped peak refers to a peak that has been grouped together by xcms::group function | |
| 197 | |
| 198 ----------------- | |
| 199 Updated peaklist | |
| 200 ----------------- | |
| 201 The calculated columns for the update peaklist dataframe include: | |
| 202 | |
| 203 * RSD of intensity for grouped peaks across each class | |
| 204 * RSD of retention time for grouped peaks across each class | |
| 205 * Coverage across all classes | |
| 206 * mzmin_full & mzmax_full: the full mzrange of each grouped peak | |
| 207 * rtmin_full & rtmax_full: the full rtrange of each grouped peak | |
| 208 * flag for criteria for the blank class (if 1 it means the blank is valid in at least 1 condition) and this grouped peak will be removed | |
| 209 * flag for criteria for the sample classes (if 1 it means that this grouped peak is valid for this class) | |
| 210 * all_sample_valid: flag for all samples (if 1 it means that at least 1 sample class is valid, this ignores the blank) | |
| 211 | |
| 212 To filter out blank peaks just filter out all peaks where the blank_valid is equal to 1 | |
| 213 This is in addition to the standard output from the xcmsSet peaklist | |
| 214 | |
| 215 **flag example** | |
| 216 | |
| 217 Dataset consists of 3 classes. Blank, cond1 and cond2. The classes cond1 and cond2 are biological sample classes. | |
| 218 | |
| 219 ============= ============= ============= ================ ================ | |
| 220 blank_valid cond1_valid cond2_valid all_sample_valid Keep peak? | |
| 221 ============= ============= ============= ================ ================ | |
| 222 0 0 1 1 Yes | |
| 223 ------------- ------------- ------------- ---------------- ---------------- | |
| 224 0 1 1 1 Yes | |
| 225 ------------- ------------- ------------- ---------------- ---------------- | |
| 226 1 0 1 1 No | |
| 227 ------------- ------------- ------------- ---------------- ---------------- | |
| 228 1 0 0 0 No | |
| 229 ------------- ------------- ------------- ---------------- ---------------- | |
| 230 0 0 0 0 No | |
| 231 ============= ============= ============= ================ ================ | |
| 232 | |
| 233 | |
| 234 | |
| 235 ----------------- | |
| 236 Filters for flags | |
| 237 ----------------- | |
| 238 | |
| 239 The following filters can be used to determine if a grouped peak gets flagged to keep. If the column all_sample_valid is 0 then | |
| 240 the grouped peak will be removed | |
| 241 | |
| 242 | |
| 243 * RSD of intensity for each biological sample class | |
| 244 * minfrac for each biological sample class | |
| 245 * RSD of retention time for each biological sample class | |
| 246 * intensity threshold for each biological sample class | |
| 247 * Blank subtraction. If a blank peak is found where the intensity of any corresponding biological sample class is not greater that the he s2b threshold (sample/blank). Then this grouped peak will be flagged for removal | |
| 248 | |
| 249 The blank grouped peaks also have there own filters. If the blank peak passes this criteria and s2b threshold detailed above | |
| 250 then the peak will be removed. | |
| 251 | |
| 252 * RSD of intensity | |
| 253 * minfrac | |
| 254 * intensity threshold | |
| 255 | |
| 256 Additionally there is a filter for assessing how well the peaks fit the gaussian shape. Note that this can only be performed | |
| 257 when XCMS has fit_gauss option and verbose columns set to TRUE. Also, these peaks are just removed and not flagged. | |
| 258 | |
| 259 | |
| 260 | |
| 261 ----------- | |
| 262 Regrouping | |
| 263 ----------- | |
| 264 The resulting xcmsSet object where all peaks have been removed needs to be regrouped otherwise the individual peaks | |
| 265 associated with each file will not be correctly linked to the grouped peaks. | |
| 266 | |
| 267 This tool will re-group the xcmsSet object and check the newly created re-grouped xcmsSet object to see if any peaks are | |
| 268 still being flagged. If so the process will be repeated untill the xcmsSet object only contains peaks that match | |
| 269 the peak criteria. | |
| 270 | |
| 271 The output file is an xcmsSet.RData file. | |
| 272 ]]></help> | |
| 273 | |
| 274 <expand macro="citations" /> | |
| 275 </tool> |
