comparison optimizer_genetic.xml @ 0:4c6529d120c3 draft

Uploaded
author elixir-it
date Tue, 09 Jun 2020 16:07:19 +0000
parents
children 35c308dd6420
comparison
equal deleted inserted replaced
-1:000000000000 0:4c6529d120c3
1 <tool id="optimizer" name="optimizer" version="1">
2 <description> VINYL score optimizer applies genetic algorithms to identify the best scoring system (i.e. scoring weights) for VINYL</description>
3 <requirements>
4 <requirement type="package" >perl</requirement>
5 <requirement type="package" >r-base</requirement>
6 <requirement type="package" >r-genalg</requirement>
7 </requirements>
8 <command> <![CDATA[
9 ln -s $__tool_directory__/score_complete_alt_M.pl &&
10 ln -s $__tool_directory__/GENEO_VINYL.R &&
11 ln -s $fileR fileR &&
12 ln -s $fileC fileC &&
13
14
15
16
17 perl $__tool_directory__/optimizer_genetic.pl
18
19
20 ###INPUT VCF FILE
21
22 -fileR fileR -fileC fileC
23
24 #if $qfile
25 -leQTL $qfile
26 #end if
27
28 #if $similarD
29 -similarD $similarD
30 #end if
31
32 #if $disease
33 -disease $disease
34 #end if
35
36 #if $lgenes
37 -lgenes $lgenes
38 #end if
39
40 #if $kfile
41 -keywords $kfile
42 #else
43 -keywords $__tool_directory__/kfile
44 #end if
45
46 #if $efile
47 -effects $efile
48 #else
49 -effects $__tool_directory__/efile
50 #end if
51
52
53 ###RANGE VALUES
54 -disease_clinvar $score_DB_MIN:$score_DB_MAX
55
56 -score_AF $score_RV_MIN:$score_RV_MAX
57
58 -score_functional $score_FE_MIN:$score_FE_MAX
59
60 -score_NS $score_NS_MIN:$score_NS_MAX
61
62 -score_nIND $score_OR_MIN:$score_OR_MAX
63
64 -AF $AF
65
66 -scoreeQTL $score_eQ_MIN:$score_eQ_MAX
67
68 -scoreG $score_AD_MIN:$score_AD_MAX
69
70 -scoreT $score_T_MIN:$score_T_MAX
71
72 -scoreGW $score_GW_MIN:$score_GW_MAX
73
74 -scoreR $score_R_MIN:$score_R_MAX
75
76 -scoreM $score_M_MIN:$score_M_MAX
77
78 -scoreSP $score_SP_MIN:$score_SP_MAX
79
80 -nind $nind
81
82 -AD $AD
83
84 -XL $XL
85
86 ###OUTPUT
87 -ofile $ofile
88
89 2>>$log
90
91
92
93
94 ]]>
95 </command>
96 <inputs>
97 <param format="vcf" name="fileR" type="data" label="AffectedVCF" help="VCF files of genetic variants for the population of affected individuals"/>
98 <param format="vcf" name="fileC" type="data" label="UnaffectedVCF" help="VCF files of genetic variants for the population of unaffected individuals"/>
99 <!-- default values-->
100 <param name="score_DB_MIN" value="1" type="float" min="1" max="20" label="score_DB_MIN" help="Minimum value for the Pathogenicity score component"/>
101 <param name="score_DB_MAX" value="20" type="float" min="1" max="20" label="score_DB_MAX" help="Maximum value for the Pathogenicity score component"/>
102 <param name="score_RV_MIN" value="1" type="float" min="1" max="20" label="score_RV_MIN" help="Minum value for the Allele Frequency score component"/>
103 <param name="score_RV_MAX" value="20" type="float" min="1" max="20" label="score_RV_MAX" help="Maximum value for the Allele Frequency score component"/>
104 <param name="score_FE_MIN" value="1" min="1" max="20" type="float" label="score_FE_MIN" help="Minimum value for the Functional effect score component"/>
105 <param name="score_FE_MAX" value="20" min="1" max="20" type="float" label="score_FE_MAX" help="Maximum value for the Functional effect score component"/>
106 <param name="score_NS_MIN" value="1" min="1" max="20" type="float" label="score_NS_MIN" help="Minum value for component of the score associated with Predicted disruptive non-synonymous variants"/>
107 <param name="score_NS_MAX" value="20" min="1" max="20" type="float" label="score_NS_MAX" help="Maximum value for the score component of the score associated with Predicted disruptive non-synonymous variants"/>
108 <param name="score_OR_MIN" value="1" type="float" min="1" max="20" label="score_OR_MIN" help="Minimum value for the component of the score associated with over-representation of the variant in the dataset (Rare variants associated to more than N individuals) "/>
109 <param name="score_OR_MAX" value="20" type="float" min="1" max="20" label="score_OR_MAX" help="Maximum value for the component of associated with over-representation of the variant in the dataset (Rare variants associated to more than N individuals)" />
110 <param name="score_eQ_MIN" value="1" min="1" max="20" type="float" help="Minimum value for the component of the score associated with eQTLs"/>
111 <param name="score_eQ_MAX" value="20" min="1" max="20" type="float" help="Maximum value for the component of the score associated with eQTLs"/>
112 <param name="score_AD_MIN" value="1" min="1" max="20" type="float" label="score_AD_MIN" help="Minimum value for the component of the score associated with genes implicated in the pathological condition or associated with similar phenotypes" />
113 <param name="score_AD_MAX" value="20" min="1" max="20" type="float" label="score_AD_MAX" help="Minimum value for the component of the score associated with genes implicated in the pathological condition or associated with similar phenotypes" />
114 <param name="score_T_MIN" value="1" min="1" max="20" type="float" label="score_T_MIN" help="Minimum value for the component of the score associated with TFBS"/>
115 <param name="score_T_MAX" value="20" min="1" max="20" type="float" label="score_T_MAX" help="Maximum value for the component of the score associated with TFBS"/>
116 <param name="score_GW_MIN" value="1" min="1" max="20" type="float" label="score_GW_MIN" help="Minimum value for component of the score for SNPs associated to relevant phenotypic traits according to GWAS studies"/>
117 <param name="score_GW_MAX" value="20" min="1" max="20" type="float" label="score_GW_MAX" help="Maximum value for component of the score for SNPs associated to relevant phenotypic traits according to GWAS studies"/>
118 <param name="score_M_MIN" value="1" min="1" max="20" type="float" label="score_M_MIN" help="Minimum score for SNPs associated with miRNA binding sites"/>
119 <param name="score_M_MAX" value="20" min="1" max="20" type="float" label="score_M_MAX" help="Maximum score for SNPs associated with miRNA binding sites"/>
120 <param name="score_R_MIN" value="1" min="1" max="10" type="float" label="score_R_MIN" help="Minimum value for the component of the score associated with regulatory elements SNPs"/>
121 <param name="score_R_MAX" value="20" min="1" max="20" type="float" label="score_R_MAX" help="Maximum value for the component of the score associated with regulatory elements SNPs"/>
122 <param name="score_SP_MIN" value="1" min="1" max="10" type="float" label="score_SP_MIN" help="Minimum value for the component of the score associated with SNPs predicted to have highly disruptive effects on splice-sites"/>
123 <param name="score_SP_MAX" value="20" min="1" max="20" type="float" label="score_SP_MAX" help="Maximum value for the component of the score associated with SNPs predicted to have highly disruptive effects on splice-sites"/>
124
125
126 <param name="nind" value="5" type="integer" label="Nind Cutoff" help="Cut off value for the Over-reprentation score. The value specified by scoreNind is addeded to the pathogenicity score only for variants that have an allele count in the cohort equal to or greater than this value. As a rule of tumb, this should be set to approximately 5-10% of the size of your cohort of individuals"/>
127 <param name="AF" value="0.0001" min="0" max="1" type="float" label="AlleleFrequCutOff" help="Cut off value for the Allele frequency score. The value specified by scoreAF is addeded to the pathogenicity score only for variants that have an allele frequency lower or equal to this cut-off value " />
128 <param name="AD" type="text" value="T" label="Autosomic Dominant" help="If set to T (TRUE) VYNIL assumes an Autosomic Dominant model of inherithance of the disease. If FALSE (F) the model is Autosomic Recessive. Valid values are T=TRUE or F=FALSE Default is T"/>
129 <param name="XL" type="text" value="F" label="X-linked" help="When T (TRUE) an X-linked model of Disease inheritance is used. Valid values are T=TRUE and F=FALSE. Default is FALSE" />
130
131
132 <!--optional values -->
133 <param format="txt" name="kfile" type="data" optional="true" label="keywords file" help="This is a configuration file that specifies the keywords that are used by VINYL for the extraction of relevant annoations from the VCF file and for the computation of the pathogenicity score. Names of these keywords need to match exactly names as used by Annovar. A file with default values is incorporated in VINYL. Custom files can be provided (see Manual for the format) "/>
134 <param format="txt" name="efile" type="data" optional="true" label="Functional Effects files" help="This configuration file specifies the predicted functional effects for which the value specified by the score_functional parameter is be added to the global pathogenicity score. See above for further explanations."/>
135 <param format="txt" name="qfile" type="data" optional="true" label="eQTLlist" help="This configuration file provides a list of tissues that are used by VINYL for the annotation of eQTL and the scoring of variants associated with eQTLs in that tissue. Names of tissues need match names used in the GTEx project. See the manual for more details about the format of the file "/>
136 <param name="disease" type="text" optional="true" label="Disease" help="Name or functional description of the pathological condition. This parameter is used to perform a soft check of the annotation in Clinvar and to identify variants that have been previously implicated in the disease. Highly recommended. "/>
137 <param name="similarD" type="data" format="txt" optional="true" label="Symptoms" help="This file provides a list of symptoms or related keywords that are used by VINYL to screen the Annotations of Clinvar and identify variants that have been implicated in similar pathologies or phenotype. See the manual for a full description of the file format. User are strongly encouraged to provide this file "/>
138 <param name="lgenes" type="data" format="txt" optional="true" label="List of Disease Genes" help="This file provides a list of genes that have been previously implicated in the disease of in similar pathological conditions. Users are highly recommended to provide this type of information. A full desciption of the format of this file is found in the VYNIL manual" />
139
140
141 <!--others-->
142
143 </inputs>
144 <outputs>
145 <data format="tsv" name="ofile" label="${tool.name} on ${on_string}: tsv "/>
146 <data format="txt" name="log" label="${tool.name} on ${on_string}: log file "/>
147 </outputs>
148 <stdio>
149 </stdio>
150 <tests>
151 <test>
152 <param name="fileR" value="R.csv" ftype="csv" />
153 <param name="fileC" value="T.csv" ftype="csv" />
154 <!-- default values-->
155 <param name="score_DB_MIN" value="1" type="float" min="1" max="20" />
156 <param name="score_DB_MAX" value="20" type="float" min="1" max="20" />
157 <param name="score_RV_MIN" value="1" type="float" min="1" max="20" />
158 <param name="score_RV_MAX" value="20" type="float" min="1" max="20" />
159 <param name="score_FE_MIN" value="1" min="1" max="20" type="float" />
160 <param name="score_FE_MAX" value="20" min="1" max="20" type="float" />
161 <param name="score_NS_MIN" value="1" min="1" max="20" type="float" />
162 <param name="score_NS_MAX" value="20" min="1" max="20" type="float" />
163 <param name="score_OR_MIN" value="1" type="float" min="1" max="20" />
164 <param name="score_OR_MAX" value="20" type="float" min="1" max="20" />
165 <param name="score_eQ_MIN" value="1" min="1" max="20" type="float" />
166 <param name="score_eQ_MAX" value="20" min="1" max="20" type="float" />
167 <param name="score_AD_MIN" value="1" min="1" max="20" type="float" />
168 <param name="score_AD_MAX" value="20" min="1" max="20" type="float" />
169 <param name="score_T_MIN" value="1" min="1" max="20" type="float" />
170 <param name="score_T_MAX" value="20" min="1" max="20" type="float" />
171 <param name="score_GW_MIN" value="1" min="1" max="20" type="float" />
172 <param name="score_GW_MAX" value="20" min="1" max="20" type="float" />
173 <param name="score_M_MIN" value="1" min="1" max="20" type="float" />
174 <param name="score_M_MAX" value="20" min="1" max="20" type="float" />
175 <param name="score_R_MIN" value="1" min="1" max="10" type="float" />
176 <param name="score_R_MAX" value="20" min="1" max="20" type="float" />
177 <param name="score_SP_MIN" value="1" min="1" max="10" type="float" />
178 <param name="score_SP_MAX" value="20" min="1" max="20" type="float" />
179 <param name="nind" value="5" type="integer" />
180 <param name="AF" value="0.0001" min="0" max="1" type="float" />
181 <param name="AD" type="text" value="T" />
182 <param name="XL" type="text" value="F" />
183 <output name="ofile" file="optimizer_output_test.tsv" ftype="tsv" />
184 </test>
185 </tests>
186 <help>
187 **What it does**
188 VINYL is a software designed to assist in variant prioritization in medium-large cohort of patients. The program computes an aggregate score, which is based on an extensive collection of publicly available annotations, in order to identify/prioritize variants that are likely to be pathogenic or have a clinical significance. In order to derive an optimal cut off score for the variants, VINYL uses a strategy based on "survival analysis", where the pathogenicity score distribution of the affected individuals is compared with a matched cohort of unaffected individuals.
189 To facilitate the usage of the software, VINYL is provided in the form of a public Galaxy instance, based on the Laniakea suite. To ensure the maximum level of security, VINYL uses Encrypted data volumes for the storage of the data.
190
191
192
193 **Important Usage Note**
194 This wrapper provides the module of VINYL that perform score optimization. Two input VCF files need to be provided, one containing genetic variants from a cohort of affected individuals, and one from a population of unaffected controls. If the latter is not available to you, you can take advantage of one of the several VCF files of genetic variants associated with geographic human populations that are available in VINYL at XXX. Please be aware that ideally you should select the population that is more closely related to your cohort of patients. If you have performed a targeted resequencing study, please see the manual for instruction on how to pre-process the VCF files included in VINYL.
195
196 See the "survival" and the "VINYL" utilities in the for the delineation of the score cut-off value and the calculation of individual scores from a single vcf file.
197 A complete workflow that automates the exectuion of VINYL is avaiable at XXX
198
199
200 </help>
201 <citations>
202 </citations>
203 </tool>