|
0
|
1 <tool id="gemini_@BINARY@" name="GEMINI @BINARY@" version="@VERSION@.0">
|
|
|
2 <description>Identifying runs of homozygosity</description>
|
|
|
3 <expand macro="requirements" />
|
|
|
4 <expand macro="version_command" />
|
|
|
5 <macros>
|
|
|
6 <import>gemini_macros.xml</import>
|
|
|
7 <token name="@BINARY@">roh</token>
|
|
|
8 </macros>
|
|
|
9 <command>
|
|
|
10 <![CDATA[
|
|
|
11 gemini @BINARY@
|
|
|
12 --min-snps $min_snps
|
|
|
13 --min-total-depth $min_total_depth
|
|
|
14 --min-gt-depth $min_gt_depth
|
|
|
15 --min-size $min_size
|
|
|
16 --max-hets §max_hets
|
|
|
17 --max-unknowns $max_unknowns
|
|
|
18 -s $samples
|
|
|
19 "${ infile }"
|
|
|
20 -> "${ outfile }"
|
|
|
21 ]]>
|
|
|
22 </command>
|
|
|
23 <expand macro="stdio" />
|
|
|
24 <inputs>
|
|
|
25 <param name="infile" type="data" format="sqlite" label="GEMINI database" />
|
|
|
26
|
|
|
27 <param name="min_snps" type="integer" value="25" size="5" label="Minimum number of expected homozygous SNPs" help="default: 25 (--min-snps)">
|
|
|
28 <validator type="in_range" min="0"/>
|
|
|
29 </param>
|
|
|
30 <param name="min_total_depth" type="integer" value="20" size="10" label="The minimum overall sequencing depth requiredfor a SNP to be considered" help="default: 20 (--min-total-depth)">
|
|
|
31 <validator type="in_range" min="0"/>
|
|
|
32 </param>
|
|
|
33 <param name="min_gt_depth" type="integer" value="0" size="10" label="The minimum required sequencing depth underlying a given sample's genotype for a SNP to be considered"
|
|
|
34 help="default: 0 (--min-gt-depth)">
|
|
|
35 <validator type="in_range" min="0"/>
|
|
|
36 </param>
|
|
|
37 <param name="min_size" type="integer" value="100000" size="10" label="Minimum run size in base pairs" help="default: 100000 (--min-size)">
|
|
|
38 <validator type="in_range" min="1"/>
|
|
|
39 </param>
|
|
|
40 <param name="max_hets" type="integer" value="1" size="5" label="Maximum number of allowed hets in the run" help="default: 1 (--max-hets)">
|
|
|
41 <validator type="in_range" min="1"/>
|
|
|
42 </param>
|
|
|
43 <param name="max_unknowns" type="integer" value="3" size="5" label="Maximum number of allowed unknowns in the run" help="default: 3 (-max-unknowns)">
|
|
|
44 <validator type="in_range" min="0"/>
|
|
|
45 </param>
|
|
|
46
|
|
|
47 <param name="samples" size="30" type="text" value="" label="Comma separated list of samples to screen for ROHs" help="e.g S120,S450 (-s)"/>
|
|
|
48
|
|
|
49 </inputs>
|
|
|
50
|
|
|
51 <outputs>
|
|
|
52 <data name="outfile" format="tabular" label="${tool.name} on ${on_string}" />
|
|
|
53 </outputs>
|
|
|
54 <tests>
|
|
|
55 <test>
|
|
|
56 </test>
|
|
|
57 </tests>
|
|
|
58 <help>
|
|
|
59
|
|
|
60 **What it does**
|
|
|
61
|
|
|
62 ===========================================================================
|
|
|
63 ``ROH``: Identifying runs of homozygosity
|
|
|
64 ===========================================================================
|
|
|
65 Runs of homozygosity are long stretches of homozygous genotypes that reflect
|
|
|
66 segments shared identically by descent and are a result of consanguinity or
|
|
|
67 natural selection. Consanguinity elevates the occurrence of rare recessive
|
|
|
68 diseases (e.g. cystic fibrosis) that represent homozygotes for strongly deleterious
|
|
|
69 mutations. Hence, the identification of these runs holds medical value.
|
|
|
70
|
|
|
71 The 'roh' tool in GEMINI returns runs of homozygosity identified in whole genome data.
|
|
|
72 The tool basically looks at every homozygous position on the chromosome as a possible
|
|
|
73 start site for the run and looks for those that could give rise to a potentially long
|
|
|
74 stretch of homozygous genotypes.
|
|
|
75
|
|
|
76 For e.g. for the given example allowing ``1 HET`` genotype (h) and ``2 UKW`` genotypes (u)
|
|
|
77 the possible roh runs (H) would be:
|
|
|
78
|
|
|
79
|
|
|
80 ::
|
|
|
81
|
|
|
82 genotype_run = H H H H h H H H H u H H H H H u H H H H H H H h H H H H H h H H H H H
|
|
|
83 roh_run1 = H H H H h H H H H u H H H H H u H H H H H H H
|
|
|
84 roh_run2 = H H H H u H H H H H u H H H H H H H h H H H H H
|
|
|
85 roh_run3 = H H H H H u H H H H H H H h H H H H H
|
|
|
86 roh_run4 = H H H H H H H h H H H H H
|
|
|
87
|
|
|
88 roh returned for --min-snps = 20 would be:
|
|
|
89
|
|
|
90 ::
|
|
|
91
|
|
|
92 roh_run1 = H H H H h H H H H u H H H H H u H H H H H H H
|
|
|
93 roh_run2 = H H H H u H H H H H u H H H H H H H h H H H H H
|
|
|
94
|
|
|
95
|
|
|
96 As you can see, the immediate homozygous position right of a break (h or u) would be the possible
|
|
|
97 start of a new roh run and genotypes to the left of a break are pruned since they cannot
|
|
|
98 be part of a longer run than we have seen before.
|
|
|
99
|
|
|
100
|
|
|
101 @CITATION@
|
|
|
102 </help>
|
|
|
103 <expand macro="citations"/>
|
|
|
104 </tool>
|