comparison genehunter_modscore.xml @ 0:a84f5184784f draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genehunter_modscore/ commit bf4a43ac2ae894eeeb6e608badb6ea7f8288c8d9
author iuc
date Sat, 09 Dec 2017 05:57:27 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:a84f5184784f
1 <tool id="genehunter_modscore" name="Genehunter-Modscore" version="@VERSION@.0" >
2 <description>Linkage and Haplotypes analysis</description>
3 <macros>
4 <token name="@VERSION@">3.0.0</token>
5 <xml name="macro_npl_opts" >
6 <param name="extra_npl_score" type="select" label="Type of NPL scoring">
7 <option value="all" selected="true" >All</option>
8 <option value="pairs" >Pairs</option>
9 <option value="hom" >Homozygous</option>
10 </param>
11 </xml>
12 <!-- Input test file collection -->
13 <xml name="test_input_files">
14 <param name="inp_ped" value="pedin_1.21" />
15 <param name="inp_dat" value="datain_1.21" />
16 <param name="inp_map" value="map_1.21" />
17 </xml>
18 <!-- End Input test file collection -->
19 <!-- Output test file(s) -->
20 <xml name="test_output_fparam">
21 <output name="fparam" >
22 <assert_contents>
23 <has_text_matching expression="\s*\d+\.\d+\s+-\d+\.\d+(\s+\d+\.\d+){2}\s+rs17000204$" />
24 </assert_contents>
25 </output>
26 </xml>
27 <xml name="test_output_haplo">
28 <output name="ihaplo" >
29 <assert_contents>
30 <has_text_matching expression="1\s+206006\s+206001\s+206002\s+2\s+2(\s+[0-2])+" />
31 </assert_contents>
32 </output>
33 </xml>
34 <!-- End Output test file(s) -->
35 </macros>
36 <requirements>
37 <requirement type="package" version="@VERSION@" >ghm</requirement>
38 <requirement type="package" version="2017.3" >linkage2allegro</requirement>
39 </requirements>
40 <version_command><![CDATA[
41 echo q | ghm | grep -oP "(?<=(\(version\ ))[^)]+"
42 ]]>
43 </version_command>
44 <command detect_errors='exit_code'><![CDATA[
45 ghm < '$setup_file'
46
47 && linkage2allegro
48 '${inp_ped}'
49 '${inp_map}'
50 genehunter
51 -l gh.out
52 #if $section_haplo.analysis_haplo.extra_haplotype
53 -h haplo.dump
54 #end if
55
56 && mv linkage.allegro_lod '${fparam}'
57 #if $section_haplo.analysis_haplo.extra_haplotype
58 && mv linkage.allegro_haplo '${ihaplo}'
59 #end if
60 ]]>
61 </command>
62 <configfiles>
63 <configfile name="setup_file" ><![CDATA[
64 photo gh.out
65
66 ps off
67
68 ## Initiate mod score lod calculation and store IVs, off by default
69 modcalc ${section_options.section_pvalues.advanced_options_modcalc.extra_modcalc}
70 ## global / single / off
71
72 haplotype ${section_haplo.analysis_haplo.extra_haplotype}
73 ## on / off, generate Haplotypes
74
75 #if $section_haplo.analysis_haplo.extra_haplotype
76 haplotype method ${section_haplo.analysis_haplo.extra_haplotype_method}
77 ## [MaxProb] / Viterbi
78 #end if
79
80 analysis ${section_linkage.npl_scoring.extra_mod_analysis}
81 ## NPL / LOD / BOTH, type of linkage analysis
82
83 #if $section_linkage.npl_scoring.extra_mod_analysis.value != 'LOD':
84 score ${section_linkage.npl_scoring.extra_npl_score}
85 ## pairs / all / hom
86 #else
87 score all
88 #end if
89
90 #if $section_linkage.extra_singlepoint
91 single point ${section_linkage.extra_singlepoint}
92 ## on / off, dont use multi-point parametric
93 #end if
94
95 ## -- Re-enable in 3.1
96 ## Algebraic calculation for P-values, default on
97 ## #if $section_options.section_pvalues.advanced_options_alg.extra_alg
98 ## alg ${section_options.section_pvalues.advanced_options_alg.extra_alg}
99 ## Use more memory for algebraic calculations
100 ## algebra ${section_options.section_pvalues.advanced_options_alg.extra_alg_mem}
101 ## #end if
102
103 ## Add custom trait models
104 #if $section_options.section_pvalues.advanced_options_model.extra_mod_model
105 model ${section_options.section_pvalues.advanced_options_model.extra_mod_modeldisfreq} ${section_options.section_pvalues.advanced_options_model.extra_mod_modelpenet}
106 #end if
107
108 ## Range of markers positions instead of all range
109 #if $section_options.section_range.advanced_options_positions.extra_mod_positions
110 ## #try
111 ## #assert $section_options.section_range.advanced_options_positions.extra_mod_positions_lowest < $section_options.section_range.advanced_options_positions.extra_mod_positions_highest
112 ## #except AssertionError
113 ## #echo Range minimum is not less than the maximum
114 ## #end try
115 positions ${section_options.section_range.advanced_options_positions.extra_mod_positions_lowest} ${section_options.section_range.advanced_options_positions.extra_mod_positions_highest}
116 #end if
117
118 ## Untyped, default on
119 #if $section_haplo.analysis_haplo.extra_haplotype or $section_options.section_sample.extra_includeuntyped
120 include untyped on
121 #else:
122 include untyped off
123 #end if
124
125 ## Eliminate uninformative individuals, default off
126 discard ${section_options.section_sample.extra_discard}
127
128 ## Use untyped founders, default off
129 ufo ${section_options.section_sample.extra_ufo}
130
131 ## Restrict penetrances so that hom wildtype LEQ het LEQ hom mutant, on default
132 pr ${section_options.section_allfreq.extra_mod_penetrancerestrict}
133
134 ## Restrict disease allele frequency to be not higher than the highest allfreq (default 0.5)
135 #if $section_options.section_allfreq.extra_mod_allfreq
136 ar $section_options.section_allfreq.extra_mod_allfreq
137 ## Set upper disease allele freq for MOD, default 0.5 ( 0 -> 1 )
138 ha $section_options.section_allfreq.extra_mod_highallele
139 #end if
140
141 ## Number of parameters varied together in MOD, default 2 ( 1 -> 5 )
142 dimensions ${section_options.section_pvalues.extra_mod_dimensions}
143
144 ## Normalize Allele Frequencies, default off
145 naf ${section_options.section_allfreq.extra_nmaf}
146
147 ## off by default
148 #if '${section_options.section_pvalues.advanced_options_modcalc.extra_modcalc}' == 'single':
149 ## Number of trait models saved in MOD score.
150 ## Only works if algebraic calc is off, and in single modcalc
151 #if not('${extra_alg}'):
152 #if '${extra_mod_savedmodels}'!=-1:
153 saved models $extra_mod_savedmodels
154 #end if
155 #end if
156
157 ## Long output for modcalc single
158 $section_options.section_pvalues.extra_mod_longmod
159
160 ## Calculate P-values only at the best position, off by default
161 $section_options.section_pvalues.extra_mod_bep
162 #end if
163
164
165 ## Calculate p-values for MOD/LOD scores
166 #if $section_options.section_pvalues.advanced_options_calcpval.extra_calcpval
167 cpv $section_options.section_pvalues.advanced_options_calcpval.extra_calcpval_file
168 ## Number of replicates in P-score evaluation, default 0
169 #if $section_options.section_pvalues.advanced_options_calcpval.extra_cpv_nor > 0
170 nor $section_options.section_pvalues.advanced_options_calcpval.extra_cpv_nor
171 #end if
172 #end if
173
174 ## Number of sequential simulations in P-score evaluation, default 0
175 seq ${section_options.section_pvalues.extra_seq}
176
177 ## Store replicates during P-score evaluation, default off
178 str ${section_options.section_pvalues.extra_storereplicates} ## pre / both / off (default)
179
180 ## Simulate untyped individuals, required for haplotypes
181 #if $section_haplo.analysis_haplo.extra_haplotype or $section_options.section_sample.extra_sun
182 sun on
183 #else
184 sun off
185 #end if
186
187 ## Set random seed for P-score evaluation, default -1
188 srs ${section_options.section_pvalues.extra_srs}
189
190 ## Display distribution of replicates, default off
191 sdi ${section_options.section_display.extra_sdi}
192
193 #### General
194 ## Count the number of recombintions, default off
195 count recs ${section_options.section_display.extra_countrec}
196
197 ## Do not skip fully homozygous markers when generating haplotypes, default off
198 fin ${section_options.section_sample.extra_cpv_fin}
199
200 ## Print scores to screen, default on
201 display scores on
202
203 ## Margin before and after marker range to compute scores, default 0 cM
204 off end ${section_options.section_range.extra_offend}
205
206 ## Distance between adjacent scores, either in cM 'distance' irrespective of
207 ## map, or equal 'steps'. Default steps 2
208 increment ${section_options.section_range.advanced_options_increment.extra_increment_type} ${section_options.section_range.advanced_options_increment.extra_increment_sizepavu}
209
210 ## Units, default haldane
211 map function ${section_options.section_display.extra_mapfunc}
212 ## Units in scan output, default cM
213 units ${section_options.section_display.extra_scan_units}
214
215 ## Max pedigree size calculated by 2N - F, default 19, trim individuals beyond this
216 max bits ${section_options.section_range.extra_maxbits}
217 ## Split pedigree larger than max ped size, default off
218 skip large ${section_options.section_range.extra_maxbits_skiplarge}
219 ## Stores IBD matrics for linkage, default off
220 cs ${section_options.section_pvalues.extra_computesharing}
221
222
223 load markers ${inp_dat}
224 read map ${inp_map}
225 use
226
227 scan ${inp_ped}
228
229 ## Show total scores from a scan of multiple peds
230 ## - 'het' [alpha], if not given then alpha varies
231 ## - 'stat'
232 ## default below, overridden by params
233 total stat het
234
235
236 ## TODO:
237 ## Qualitative / Quantitative trait mapping of sibs, Variance component analysis, TDT
238 q
239 ]]>
240 </configfile>
241 </configfiles>
242 <inputs>
243 <param name="inp_ped" type="data" format="linkage_pedin" label="Pedigree" />
244 <param name="inp_dat" type="data" format="linkage_datain" label="Recombination Freqs" />
245 <param name="inp_map" type="data" format="linkage_map" label="Marker Positions" />
246
247 <section name="section_haplo" title="Haplotypes" expanded="true" >
248 <conditional name="analysis_haplo" >
249 <param name="extra_haplotype" type="boolean" truevalue="on" falsevalue="off" checked="false" label="Haplotype Analysis" />
250 <when value="on">
251 <param name="extra_haplotype_method" type="select" label="Haplotype Reconstruction Algorithm. MaxProb is fastest, and has more global solution." >
252 <option value="MaxProb" selected="true" >Maximisation Probability</option>
253 <option value="Viterbi" >Viterbi</option>
254 </param>
255 </when>
256 <when value="off" />
257 </conditional>
258 </section>
259
260 <section name="section_linkage" title="Linkage" expanded="false" >
261 <param name="extra_singlepoint" type="boolean" truevalue="on" falsevalue="off" checked="false" label="Single-point Analysis"/>
262 <conditional name="npl_scoring" >
263 <param name="extra_mod_analysis" type="select" label="Type of linkage analysis" >
264 <option value="BOTH" selected="true" >Both</option>
265 <option value="NPL" >Non-paremetric Linkage</option>
266 <option value="LOD" >Logarithm-of-the-Odds</option>
267 </param>
268 <when value="BOTH" ><expand macro="macro_npl_opts" /></when>
269 <when value="NPL" ><expand macro="macro_npl_opts" /></when>
270 <when value="LOD" />
271 </conditional>
272 </section>
273
274 <section name="section_options" expanded="false" title="Advanced Options" >
275 <!-- P-values -->
276 <section name="section_pvalues" expanded="false"
277 title="P-value Options" >
278 <conditional name="advanced_options_alg" >
279 <param name="extra_alg" type="boolean" truevalue="on" falsevalue="off" checked="true" label="Use Algebraic calculations for P-Values"/>
280
281 <when value="on" >
282 <param name="extra_alg_mem" type="boolean" truevalue="on" falsevalue="off" checked="true" label="Remove large memory restrictions for algebraic calculations" />
283 </when>
284 <when value="off" >
285 <param name="extra_mod_savedmodels" type="integer" value="-1" label="Number of models to save" />
286 </when>
287 </conditional>
288
289 <conditional name="advanced_options_model" >
290 <param name="extra_mod_model" type="boolean" truevalue="on" falsevalue="off" checked="false" label="Use custom trait models" />
291 <when value="on" >
292 <param name="extra_mod_modeldisfreq" type="float" value="" min="0" max="1" label="Disease allele frequency" />
293 <param name="extra_mod_modelpenet" type="text" value="" label="3 or 4 penetrances" />
294 </when>
295 <when value="off" />
296 </conditional>
297
298 <conditional name="advanced_options_calcpval" >
299 <param name="extra_calcpval" type="boolean" truevalue="on" falsevalue="off" checked="false" label="Calculate P-values for MOD/LOD scores" />
300
301 <when value="on" >
302 <param name="extra_calcpval_file" type="data" format="txt" label="Filename to produce values" />
303 <param name="extra_cpv_nor" type="integer" value="0" min="0" label="Number of replicates in P-value calculations" />
304 </when>
305 <when value="off" />
306 </conditional>
307
308 <conditional name="advanced_options_modcalc" >
309 <param name="extra_modcalc" type="select" label="Inheritance Vector storage for LOD and P-value calculations" >
310 <option value="global" >Global</option>
311 <option value="single" >Single</option>
312 <option value="off" selected="true" >Off</option>
313 </param>
314 <when value="single" >
315 <param name="extra_mod_longmod" type="boolean" truevalue="lm on" falsevalue="" checked="false" label="Produce long output for scores" />
316 <param name="extra_mod_bep" type="boolean" truevalue="bep on" falsevalue="" checked="false" label="Calculate P-values only at the best LOD positions" />
317 </when>
318 <when value="global" />
319 <when value="off" />
320 </conditional>
321
322 <param name="extra_seq" type="integer" value="0" min="0" label="Number of sequential simulations in P-value calculations" />
323 <param name="extra_storereplicates" type="select" label="Store replicates during P-value calculations" >
324 <option value="pre" >Pre</option>
325 <option value="both" >Both</option>
326 <option value="off" selected="true" >Off</option>
327 </param>
328 <param name="extra_srs" type="integer" value="-1" label="Set Random seed for P-value calculations" />
329 <param name="extra_computesharing" type="boolean" truevalue="on" falsevalue="off" checked="false" label="Store IBD matrices" />
330 <param name="extra_mod_dimensions" type="integer" min="1" max="5" value="2" label="Number of parameters to vary in LOD calculations" />
331 </section>
332 <!-- End of P Values: Works -->
333
334 <!-- Display Options -->
335 <section name="section_display" expanded="false" title="Display options" >
336 <param name="extra_sdi" type="boolean" truevalue="on" falsevalue="off" checked="false" label="Display distribution of replicates" />
337 <param name="extra_countrec" type="boolean" truevalue="on" falsevalue="off" checked="false" label="Print the number of recombinations" />
338 <param name="extra_mapfunc" type="select" label="Genetic map function units" >
339 <option value="haldane" selected="true">Haldane</option>
340 <option value="kosambi" >Kosambi</option>
341 </param>
342 <param name="extra_scan_units" type="select" label="Output units" >
343 <option value="cM" selected="true">CentiMorgans</option>
344 <option value="rec-frac" >Recombination Fractions</option>
345 </param>
346 </section>
347 <!-- End of display: Works -->
348
349 <!-- Range section -->
350 <section name="section_range" expanded="false" title="Range options" >
351 <conditional name="advanced_options_positions" >
352 <param name="extra_mod_positions" type="boolean" truevalue="on" falsevalue="off" checked="false" label="Define custom position range" />
353 <when value="off" />
354 <when value="on" >
355 <param name="extra_mod_positions_lowest" type="float" value="0" min="0" max="1000" label="Lowest position (cM)" />
356 <param name="extra_mod_positions_highest" type="float" value="1" min="0" max="1000" label="Highest position (cM)" />
357 <!-- Assert: lowest < higher -->
358 </when>
359 </conditional>
360
361 <param name="extra_offend" type="float" value="0.0" label="Margin before and after marker range to compute scores (cM)" />
362
363 <conditional name="advanced_options_increment" >
364 <param name="extra_increment_type" type="select" label="Increment either the genetic distance across the whole range of markers, or the number of equally-spaced steps between adjacent markers" help="Note that the total number of steps (markers * calc. per step) must not exceed 1000." >
365 <option value="distance" >Distance</option>
366 <option value="steps" selected="true" >Steps</option>
367 </param>
368 <when value="distance" >
369 <param name="extra_increment_sizepavu" type="float" min="1.0" value="1" max="1000" label="centiMorgan interval" />
370 </when>
371 <when value="steps" >
372 <param name="extra_increment_sizepavu" type="integer" min="1" value="2" max="30" label="Number of steps between markers." />
373 </when>
374 </conditional>
375
376 <param name="extra_maxbits" type="integer" min="3" value="19" max="30" label="Max bit-size of the pedigree, computed via '2N-F', for F founders and N non-founders" />
377 <param name="extra_maxbits_skiplarge" type="boolean" truevalue="on" falsevalue="off" checked="false" label="Split pedigrees if larger than the max pedigree size" />
378 </section>
379 <!-- End of range:Works -->
380
381 <!-- Frequency section -->
382 <section name="section_allfreq" title="Allele Frequency Options">
383 <param name="extra_nmaf" type="boolean" truevalue="on" falsevalue="off" checked="false" label="Normalize Allele Frequencies" />
384
385 <param name="extra_mod_allfreq" type="boolean" truevalue="on" falsevalue="off" checked="false" label="Restrict disease allele frequency" />
386
387 <param name="extra_mod_highallele" type="float" min="0" max="1" value="0.5" label="Set maximum disease allele frequency" />
388
389 <param name="extra_mod_penetrancerestrict" type="boolean" truevalue="on" falsevalue="off" checked="true" label="Restrict Penetrances" />
390
391 </section>
392 <!-- End of Frequency section:Works -->
393
394 <!-- Sample Individuals section -->
395 <section name="section_sample" title="Sample Options" >
396 <param name="extra_sun" type="boolean" truevalue="on" falsevalue="off" checked="false" label="Simulate untyped individuals" />
397 <param name="extra_includeuntyped" type="boolean" truevalue="on" falsevalue="off" checked="true" label="Include untyped individuals" />
398 <param name="extra_cpv_fin" type="boolean" truevalue="on" falsevalue="off" checked="true" label="Process fully homozygous (uninformative) genotypes" />
399 <param name="extra_discard" type="boolean" truevalue="on" falsevalue="off" checked="false" label="Discard uninformative individuals" />
400 <param name="extra_ufo" type="boolean" truevalue="on" falsevalue="off" checked="false" label="Use untyped founders" />
401 </section>
402 <!-- End of sample individuals -->
403 </section>
404 <!-- End of advanced options -->
405 </inputs>
406
407 <outputs>
408 <!-- All outputs convert to an Allegro format -->
409 <data name="ihaplo" format="allegro_ihaplo" label="${tool.name} on ${on_string}: Haplotypes" />
410 <data name="fparam" format="allegro_fparam" label="${tool.name} on ${on_string}: MPT Linkage" />
411 </outputs>
412
413 <tests>
414 <test><!-- Defaults with haplo -->
415 <expand macro="test_input_files" />
416
417 <param name="extra_haplotype" value="on" />
418
419 <expand macro="test_output_fparam" />
420 <expand macro="test_output_haplo" />
421 </test>
422 <test><!-- Haplotypes via Viterbi resolution -->
423 <expand macro="test_input_files" />
424
425 <param name="extra_mod_analysis" value="BOTH" />
426 <param name="extra_haplotype" value="on" />
427 <param name="extra_haplotype_method" value="Viterbi" />
428 <param name="extra_increment_sizepavu" value="10" />
429
430 <expand macro="test_output_haplo" />
431 <expand macro="test_output_fparam" />
432 </test>
433 <test><!-- Parametric LOD with restricted scoring -->
434 <expand macro="test_input_files" />
435
436 <param name="extra_mod_allfreq" value="on" />
437 <param name="extra_mod_highallele" value="0.8" />
438 <param name="extra_npl_score" value="hom" />
439
440 <expand macro="test_output_fparam" />
441 </test>
442 <test><!-- Haplo + Single IBS with computed founders -->
443 <expand macro="test_input_files" />
444
445 <param name="extra_cpv_fin" value="on" />
446 <param name="extra_modcalc" value="single" />
447 <param name="extra_mod_bep" value="bep on" />
448 <param name="extra_srs" value="10" />
449
450 <expand macro="test_output_fparam" />
451 </test>
452 </tests>
453
454 <help><![CDATA[
455
456 **Genehunter-MODscore** calculates a *maximized LOD* (MOD) score over a set of genotypes for use in linkage and haplotype analysis.
457
458 Haplotypes are generated using either this maximum probability approach, or via slower more conventional Viterbi crawling.
459
460 Untyped founders can be simulated by reconstructing their haplotypes from offspring, and points of recombination can still be accurately determined in lieu of this.
461
462 Due to the stochastic nature of the analysis, a random seed can be set by the user to produce reproducible results.
463
464 Many more configurable options are outlined in the the official manual_.
465
466 .. _manual: https://www.helmholtz-muenchen.de/fileadmin/GENEPI/downloads/ghm-3.0.pdf
467
468 ]]>
469 </help>
470 <citations>
471 <citation type="doi">10.1159/000369065</citation>
472 <citation type="doi">10.1002/gepi.20264</citation>
473 <citation type="doi">10.1093/bioinformatics/btl539</citation>
474 <citation type="doi">10.1186/1471-2156-6-S1-S162</citation>
475 </citations>
476 </tool>