comparison gemini_roh.xml @ 0:80dccf38d55f draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit e88029bb12e5262687267293f9d2a694eb00d3f0-dirty
author iuc
date Tue, 29 Dec 2015 10:23:41 -0500
parents
children 737b03b7b34a
comparison
equal deleted inserted replaced
-1:000000000000 0:80dccf38d55f
1 <tool id="gemini_@BINARY@" name="GEMINI @BINARY@" version="@VERSION@.0">
2 <description>Identifying runs of homozygosity</description>
3 <macros>
4 <import>gemini_macros.xml</import>
5 <token name="@BINARY@">roh</token>
6 </macros>
7 <expand macro="requirements" />
8 <expand macro="stdio" />
9 <expand macro="version_command" />
10 <command>
11 <![CDATA[
12 gemini @BINARY@
13 --min-snps $min_snps
14 --min-total-depth $min_total_depth
15 --min-gt-depth $min_gt_depth
16 --min-size $min_size
17 --max-hets $max_hets
18 --max-unknowns $max_unknowns
19 #if $samples.strip():
20 -s "${samples}"
21 #end if
22 "${ infile }"
23 > "${ outfile }"
24 ]]>
25 </command>
26 <inputs>
27 <expand macro="infile" />
28
29 <param name="min_snps" type="integer" value="25" label="Minimum number of expected homozygous SNPs" help="default: 25 (--min-snps)">
30 <validator type="in_range" min="0"/>
31 </param>
32 <param name="min_total_depth" type="integer" value="20" label="The minimum overall sequencing depth requiredfor a SNP to be considered" help="default: 20 (--min-total-depth)">
33 <validator type="in_range" min="0"/>
34 </param>
35 <param name="min_gt_depth" type="integer" value="0" label="The minimum required sequencing depth underlying a given sample's genotype for a SNP to be considered"
36 help="default: 0 (--min-gt-depth)">
37 <validator type="in_range" min="0"/>
38 </param>
39 <param name="min_size" type="integer" value="100000" label="Minimum run size in base pairs" help="default: 100000 (--min-size)">
40 <validator type="in_range" min="1"/>
41 </param>
42 <param name="max_hets" type="integer" value="1" label="Maximum number of allowed hets in the run" help="default: 1 (--max-hets)">
43 <validator type="in_range" min="1"/>
44 </param>
45 <param name="max_unknowns" type="integer" value="3" label="Maximum number of allowed unknowns in the run" help="default: 3 (-max-unknowns)">
46 <validator type="in_range" min="0"/>
47 </param>
48
49 <param name="samples" type="text" value="" label="Comma separated list of samples to screen for ROHs" help="e.g S120,S450 (-s)"/>
50
51 </inputs>
52
53 <outputs>
54 <data name="outfile" format="tabular" />
55 </outputs>
56 <tests>
57 <test>
58 <param name="infile" value="gemini_burden_input.db" ftype="gemini.sqlite" />
59 <param name="min_snps" value="3" />
60 <param name="min_size" value="10" />
61 <param name="min_total_depth" value="0" />
62 <output name="outfile" file="gemini_roh_result.tabular" />
63 </test>
64 </tests>
65 <help><![CDATA[
66
67 **What it does**
68
69 ===========================================================================
70 ``ROH``: Identifying runs of homozygosity
71 ===========================================================================
72 Runs of homozygosity are long stretches of homozygous genotypes that reflect
73 segments shared identically by descent and are a result of consanguinity or
74 natural selection. Consanguinity elevates the occurrence of rare recessive
75 diseases (e.g. cystic fibrosis) that represent homozygotes for strongly deleterious
76 mutations. Hence, the identification of these runs holds medical value.
77
78 The 'roh' tool in GEMINI returns runs of homozygosity identified in whole genome data.
79 The tool basically looks at every homozygous position on the chromosome as a possible
80 start site for the run and looks for those that could give rise to a potentially long
81 stretch of homozygous genotypes.
82
83 For e.g. for the given example allowing ``1 HET`` genotype (h) and ``2 UKW`` genotypes (u)
84 the possible roh runs (H) would be:
85
86
87 ::
88
89 genotype_run = H H H H h H H H H u H H H H H u H H H H H H H h H H H H H h H H H H H
90 roh_run1 = H H H H h H H H H u H H H H H u H H H H H H H
91 roh_run2 = H H H H u H H H H H u H H H H H H H h H H H H H
92 roh_run3 = H H H H H u H H H H H H H h H H H H H
93 roh_run4 = H H H H H H H h H H H H H
94
95 roh returned for --min-snps = 20 would be:
96
97 ::
98
99 roh_run1 = H H H H h H H H H u H H H H H u H H H H H H H
100 roh_run2 = H H H H u H H H H H u H H H H H H H h H H H H H
101
102
103 As you can see, the immediate homozygous position right of a break (h or u) would be the possible
104 start of a new roh run and genotypes to the left of a break are pruned since they cannot
105 be part of a longer run than we have seen before.
106
107
108 ]]></help>
109 <expand macro="citations"/>
110 </tool>