comparison genetic/optimizer_genetic.xml @ 3:221db2eb3c8e draft default tip

Uploaded
author elixir-it
date Wed, 22 Jul 2020 19:23:14 +0000
parents 6e4eb4856874
children
comparison
equal deleted inserted replaced
2:6e4eb4856874 3:221db2eb3c8e
1 <tool id="optimizer" name="optimizer" version="1">
2 <description> Optimizer</description>
3 <requirements>
4 <requirement type="package" >perl</requirement>
5 <requirement type="package" >r-base</requirement>
6 <requirement type="package" >r-genalg</requirement>
7 </requirements>
8 <command> <![CDATA[
9 ln -s $__tool_directory__/score_complete_alt_M.pl 2>$log &&
10 ln -s $__tool_directory__/GENEO_VINYL.R 2 >> $log &&
11 ln -s $fileR fileR &&
12 ln -s $fileC fileC &&
13
14
15
16
17 perl $__tool_directory__/optimizer_genetic.pl
18
19
20 ###INPUT VCF FILE
21
22 -fileR fileR -fileC fileC
23
24 #if $qfile
25 -leQTL $qfile
26 #end if
27
28 #if $similarD
29 -similarD $similarD
30 #end if
31
32 #if $disease
33 -disease $disease
34 #end if
35
36 #if $lgenes
37 -lgenes $lgenes
38 #end if
39
40 #if $kfile
41 -keywords $kfile
42 #else
43 -keywords $__tool_directory__/kfile
44 #end if
45
46 #if $efile
47 -effects $efile
48 #else
49 -effects $__tool_directory__/efile
50 #end if
51
52 #if $ifile
53 -ifile $ifile
54 #end if
55
56 ###RANGE VALUES
57 -disease_clinvar $score_DB_MIN:$score_DB_MAX
58
59 -score_AF $score_RV_MIN:$score_RV_MAX
60
61 -score_functional $score_FE_MIN:$score_FE_MAX
62
63 -score_NS $score_NS_MIN:$score_NS_MAX
64
65 -score_nIND $score_OR_MIN:$score_OR_MAX
66
67 -AF $AF
68
69 -scoreeQTL $score_eQ_MIN:$score_eQ_MAX
70
71 -scoreG $score_AD_MIN:$score_AD_MAX
72
73 -scoreT $score_T_MIN:$score_T_MAX
74
75 -scoreGW $score_GW_MIN:$score_GW_MAX
76
77 -scoreR $score_R_MIN:$score_R_MAX
78
79 -scoreM $score_M_MIN:$score_M_MAX
80
81 -scoreSP $score_SP_MIN:$score_SP_MAX
82
83 -nind $nind
84
85 -AD $AD
86
87 -XL $XL
88
89 ###OUTPUT
90 -ofile $ofile
91
92 2>>$log
93
94
95
96 ]]>
97 </command>
98 <inputs>
99 <param format="vcf" name="fileR" type="data" label="AffectedVCF" help="VCF files of genetic variants for the population of affected individuals"/>
100 <param format="vcf" name="fileC" type="data" label="UnaffectedVCF" help="VCF files of genetic variants for the population of unaffected individuals"/>
101 <!-- default values-->
102 <param name="score_DB_MIN" value="4" type="integer" min="1" max="12" label="score_DB_MIN" help="Minimum value for the Pathogenicity score component"/>
103 <param name="score_DB_MAX" value="6" type="integer" min="1" max="12" label="score_DB_MAX" help="Maximum value for the Pathogenicity score component"/>
104 <param name="score_RVM_IN" value="2" type="integer" min="1" max="12" label="score_RV_MIN" help="Minum value for the Allele Frequency score component"/>
105 <param name="score_RV_MAX" value="4" type="integer" min="1" max="12" label="score_RV_MAX" help="Maximum value for the Allele Frequency score component"/>
106 <param name="score_FE_MIN" value="4" min="1" max="12" type="integer" label="score_FE_MIN" help="Minimum value for the Functional effect score component"/>
107 <param name="score_FE_MAX" value="6" min="1" max="12" type="integer" label="score_FE_MAX" help="Maximum value for the Functional effect score component"/>
108 <param name="score_NS_MIN" value="2" min="1" max="12" type="integer" label="score_NS_MIN" help="Minum value for component of the score associated with Predicted disruptive non-synonymous variants"/>
109 <param name="score_NS_MAX" value="4" min="1" max="12" type="integer" label="score_NS_MAX" help="Maximum value for the score component of the score associated with Predicted disruptive non-synonymous variants"/>
110 <param name="score_OR_MIN" value="2" type="integer" min="1" max="12" label="score_OR_MIN" help="Minimum value for the component of the score associated with over-representation of the variant in the dataset (Rare variants associated to more than N individuals) "/>
111 <param name="score_OR_MAX" value="4" type="integer" min="1" max="12" label="score_OR_MAX" help="Maximum value for the component of associated with over-representation of the variant in the dataset (Rare variants associated to more than N individuals)" />
112 <param name="score_eQ_MIN" value="2" min="1" max="12" type="integer" help="Minimum value for the component of the score associated with eQTLs"/>
113 <param name="score_eQ_MAX" value="4" min="1" max="12" type="integer" help="Maximum value for the component of the score associated with eQTLs"/>
114 <param name="score_AD_MIN" value="3" min="1" max="12" type="integer" label="score_AD_MIN" help="Minimum value for the component of the score associated with genes implicated in the pathological condition or associated with similar phenotypes" />
115 <param name="score_AD_MAX" value="5" min="1" max="12" type="integer" label="score_AD_MAX" help="Minimum value for the component of the score associated with genes implicated in the pathological condition or associated with similar phenotypes" />
116 <param name="score_T_MIN" value="1" min="1" max="10" type="integer" label="score_T_MIN" help="Minimum value for the component of the score associated with TFBS"/>
117 <param name="score_T_MAX" value="5" min="2" max="20" type="integer" label="score_T_MAX" help="Maximum value for the component of the score associated with TFBS"/>
118 <param name="score_GW_MIN" value="2" min="1" max="20" type="integer" label="score_GW_MIN" help="Minimum value for component of the score for SNPs associated to relevant phenotypic traits according to GWAS studies"/>
119 <param name="score_GW_MAX" value="4" min="2" max="20" type="float" label="score_GW_MAX" help="Maximum value for component of the score for SNPs associated to relevant phenotypic traits according to GWAS studies"/>
120 <param name="score_M_MIN" value="2" min="1" max="20" type="integer" label="score_M_MIN" help="Minimum score for SNPs associated with miRNA binding sites"/>
121 <param name="score_M_MAX" value="5" min="1" max="20" type="float" label="score_M_MAX" help="Maximum score for SNPs associated with miRNA binding sites"/>
122 <param name="score_R_MIN" value="1" min="1" max="10" type="integer" label="score_R_MIN" help="Minimum value for the component of the score associated with regulatory elements SNPs"/>
123 <param name="score_R_MAX" value="5" min="2" max="20" type="integer" label="score_R_MAX" help="Maximum value for the component of the score associated with regulatory elements SNPs"/>
124 <param name="score_SP_MIN" value="1" min="1" max="10" type="integer" label="score_SP_MIN" help="Minimum value for the component of the score associated with SNPs predicted to have highly disruptive effects on splice-sites"/>
125 <param name="score_SP_MAX" value="5" min="2" max="20" type="integer" label="score_SP_MAX" help="Maximum value for the component of the score associated with SNPs predicted to have highly disruptive effects on splice-sites"/>
126
127
128 <param name="nind" value="5" type="integer" label="Nind Cutoff" help="Cut off value for the Over-reprentation score. The value specified by scoreNind is addeded to the pathogenicity score only for variants that have an allele count in the cohort equal to or greater than this value. As a rule of tumb, this should be set to approximately 5-10% of the size of your cohort of individuals"/>
129 <param name="AF" value="0.0001" min="0" max="1" type="float" label="AlleleFrequCutOff" help="Cut off value for the Allele frequency score. The value specified by scoreAF is addeded to the pathogenicity score only for variants that have an allele frequency lower or equal to this cut-off value " />
130 <param name="AD" type="text" value="T" label="Autosomic Dominant" help="If set to T (TRUE) VYNIL assumes an Autosomic Dominant model of inherithance of the disease. If FALSE (F) the model is Autosomic Recessive. Valid values are T=TRUE or F=FALSE Default is T"/>
131 <param name="XL" type="text" value="F" label="X-linked" help="When T (TRUE) an X-linked model of Disease inheritance is used. Valid values are T=TRUE and F=FALSE. Default is FALSE" />
132
133
134 <!--optional values -->
135 <param format="txt" name="kfile" type="data" optional="true" label="keywords file" help="This is a configuration file that specifies the keywords that are used by VINYL for the extraction of relevant annoations from the VCF file and for the computation of the pathogenicity score. Names of these keywords need to match exactly names as used by Annovar. A file with default values is incorporated in VINYL. Custom files can be provided (see Manual for the format) "/>
136 <param format="txt" name="efile" type="data" optional="true" label="Functional Effects files" help="This configuration file specifies the predicted functional effects for which the value specified by the score_functional parameter is be added to the global pathogenicity score. See above for further explanations."/>
137 <param format="txt" name="qfile" type="data" optional="true" label="eQTLlist" help="This configuration file provides a list of tissues that are used by VINYL for the annotation of eQTL and the scoring of variants associated with eQTLs in that tissue. Names of tissues need match names used in the GTEx project. See the manual for more details about the format of the file "/>
138 <param format="txt" name="ifile" type="data" optional="true" label="Protein-Protein interaction file" help="By default this file provides protein-protein interaction data according to the string database (https://string-db.org/). Alternative files can be specified by the user. See the manual of VINYL for a detailed discussion of the format"/>
139 <param name="disease" type="text" optional="true" label="Disease" help="Name or functional description of the pathological condition. This parameter is used to perform a soft check of the annotation in Clinvar and to identify variants that have been previously implicated in the disease. Highly recommended. "/>
140 <param name="similarD" type="data" format="txt" optional="true" label="Symptoms" help="This file provides a list of symptoms or related keywords that are used by VINYL to screen the Annotations of Clinvar and identify variants that have been implicated in similar pathologies or phenotype. See the manual for a full description of the file format. User are strongly encouraged to provide this file "/>
141 <param name="lgenes" type="data" format="txt" optional="true" label="List of Disease Genes" help="This file provides a list of genes that have been previously implicated in the disease of in similar pathological conditions. Users are highly recommended to provide this type of information. A full desciption of the format of this file is found in the VYNIL manual" />
142
143
144 <!--others-->
145
146 </inputs>
147 <outputs>
148 <data format="txt" name="log" label="${tool.name} on ${on_string}: log file "/>
149 <data format="tabular" name="ofile" label="${tool.name} on ${on_string}: tabular "/>
150 </outputs>
151 <stdio>
152 </stdio>
153 <tests>
154 <test>
155 </test>
156 </tests>
157 <help>
158 **What it does**
159 VINYL is a software designed to assist in variant prioritization in medium-large cohort of patients. The program computes an aggregate score, which is based on an extensive collection of publicly available annotations, in order to identify/prioritize variants that are likely to be pathogenic or have a clinical significance. In order to derive an optimal cut off score for the variants, VINYL uses a strategy based on "survival analysis", where the pathogenicity score distribution of the affected individuals is compared with a matched cohort of unaffected individuals.
160 To facilitate the usage of the software, VINYL is provided in the form of a public Galaxy instance, based on the Laniakea suite. To ensure the maximum level of security, VINYL uses Encrypted data volumes for the storage of the data.
161
162
163
164 **Important Usage Note**
165 This wrapper provides the module of VINYL that perform score optimization. Two input VCF files need to be provided, one containing genetic variants from a cohort of affected individuals, and one from a population of unaffected controls. If the latter is not available to you, you can take advantage of one of the several VCF files of genetic variants associated with geographic human populations that are available in VINYL at XXX. Please be aware that ideally you should select the population that is more closely related to your cohort of patients. If you have performed a targeted resequencing study, please see the manual for instruction on how to pre-process the VCF files included in VINYL.
166
167 See the "survival" and the "VINYL" utilities in the for the delineation of the score cut-off value and the calculation of individual scores from a single vcf file.
168 A complete workflow that automates the exectuion of VINYL is avaiable at XXX
169
170
171 </help>
172 <citations>
173 </citations>
174 </tool>