comparison old_version/optimizer_M.xml @ 3:221db2eb3c8e draft default tip

Uploaded
author elixir-it
date Wed, 22 Jul 2020 19:23:14 +0000
parents 6e4eb4856874
children
comparison
equal deleted inserted replaced
2:6e4eb4856874 3:221db2eb3c8e
1 <tool id="optimizer" name="optimizer" version="1">
2 <description> Optimizer</description>
3 <requirements>
4 <requirement type="package" >perl</requirement>
5 <requirement type="package" >r-base</requirement>
6 </requirements>
7 <command> <![CDATA[
8 ln -s $__tool_directory__/score_complete_alt_M.pl 2>$log &&
9 ln -s $__tool_directory__/wilcox.R 2>>$log &&
10 ln -s $__tool_directory__/survival_optM.R 2 >> $log &&
11 ln -s $fileR fileR &&
12 ln -s $fileC fileC &&
13
14
15
16
17 perl $__tool_directory__/optimizer_M.pl
18
19
20 ###INPUT VCF FILE
21
22 -fileR fileR -fileC fileC
23
24
25
26
27 #if $qfile
28 -leQTL $qfile
29 #end if
30
31 #if $similarD
32 -similarD $similarD
33 #end if
34
35 #if $disease
36 -disease $disease
37 #end if
38
39 #if $lgenes
40 -lgenes $lgenes
41 #end if
42
43 #if $kfile
44 -keywords $kfile
45 #else
46 -keywords $__tool_directory__/kfile
47 #end if
48
49 #if $efile
50 -effects $efile
51 #else
52 -effects $__tool_directory__/efile
53 #end if
54
55 #if $ifile
56 -ifile $ifile
57 #end if
58
59 ###RANGE VALUES
60 -disease_clinvar $score_DB_MIN:$score_DB_MAX
61
62 -score_AF $score_RVM_IN:$score_RV_MAX
63
64 -score_functional $score_FE_MIN:$score_FE_MAX
65
66 -score_NS $score_NS_MIN:$score_NS_MAX
67
68 -score_nIND $score_OR_MIN:$score_OR_MAX
69
70 -AF $AF
71
72 -scoreeQTL $score_eQ_MIN:$score_eQ_MAX
73
74 -scoreG $score_AD_MIN:$score_AD_MAX
75
76 -scoreI $score_GD_MIN:$score_GD_MAX
77
78 -nind $nind
79
80 -AD $AD
81
82 -XL $XL
83
84 ###OUTPUT
85 -ofile $ofile
86
87 2>>$log
88
89
90
91 ]]>
92 </command>
93 <inputs>
94 <param format="vcf" name="fileR" type="data" label="AffectedVCF" help="VCF files of genetic variants for the population of affected individuals"/>
95 <param format="vcf" name="fileC" type="data" label="UnaffectedVCF" help="VCF files of genetic variants for the population of unaffected individuals"/>
96 <!-- default values-->
97 <param name="score_DB_MIN" value="4" type="integer" min="1" max="12" label="score_DB_MIN" help="Minimum value for the Pathogenicity score component"/>
98 <param name="score_DB_MAX" value="6" type="integer" min="1" max="12" label="score_DB_MAX" help="Maximum value for the Pathogenicity score component"/>
99 <param name="score_RVM_IN" value="2" type="integer" min="1" max="12" label="score_RV_MIN" help="Minum value for the Allele Frequency score component"/>
100 <param name="score_RV_MAX" value="4" type="integer" min="1" max="12" label="score_RV_MAX" help="Maximum value for the Allele Frequency score component"/>
101 <param name="score_FE_MIN" value="4" min="1" max="12" type="integer" label="score_FE_MIN" help="Minimum value for the Functional effect score component"/>
102 <param name="score_FE_MAX" value="6" min="1" max="12" type="integer" label="score_FE_MAX" help="Maximum value for the Functional effect score component"/>
103 <param name="score_NS_MIN" value="2" min="1" max="12" type="integer" label="score_NS_MIN" help="Minum value for component of the score associated with Predicted disruptive non-synonymous variants"/>
104 <param name="score_NS_MAX" value="4" min="1" max="12" type="integer" label="score_NS_MAX" help="Maximum value for the score component of the score associated with Predicted disruptive non-synonymous variants"/>
105 <param name="score_OR_MIN" value="2" type="integer" min="1" max="12" label="score_OR_MIN" help="Minimum value for the component of the score associated with over-representation of the variant in the dataset (Rare variants associated to more than N individuals) "/>
106 <param name="score_OR_MAX" value="4" type="integer" min="1" max="12" label="score_OR_MAX" help="Maximum value for the component of associated with over-representation of the variant in the dataset (Rare variants associated to more than N individuals)" />
107 <param name="score_eQ_MIN" value="2" min="1" max="12" type="integer" help="Minimum value for the component of the score associated with eQTLs"/>
108 <param name="score_eQ_MAX" value="4" min="1" max="12" type="integer" help="Maximum value for the component of the score associated with eQTLs"/>
109 <param name="score_AD_MIN" value="3" min="1" max="12" type="integer" label="score_AD_MIN" help="Minimum value for the component of the score associated with genes implicated in the pathological condition or associated with similar phenotypes" />
110 <param name="score_AD_MAX" value="5" min="1" max="12" type="integer" label="score_AD_MAX" help="Minimum value for the component of the score associated with genes implicated in the pathological condition or associated with similar phenotypes" />
111 <param name="score_GD_MIN" value="0.1" min="0.1" max="1" type="float" label="score_GD_MIN" help="Minimum value for the Protein-Protein interaction score component"/>
112 <param name="score_GD_MAX" value="0.3" min="0.1" max="1" type="float" label="score_GD_MAX" help="Maximum value for the Protein-Protein interaction score component"/>
113
114 <param name="nind" value="5" type="integer" label="Nind Cutoff" help="Cut off value for the Over-reprentation score. The value specified by scoreNind is addeded to the pathogenicity score only for variants that have an allele count in the cohort equal to or greater than this value. As a rule of tumb, this should be set to approximately 5-10% of the size of your cohort of individuals"/>
115 <param name="AF" value="0.0001" min="0" max="1" type="float" label="AlleleFrequCutOff" help="Cut off value for the Allele frequency score. The value specified by scoreAF is addeded to the pathogenicity score only for variants that have an allele frequency lower or equal to this cut-off value " />
116 <param name="AD" type="text" value="T" label="Autosomic Dominant" help="If set to T (TRUE) VYNIL assumes an Autosomic Dominant model of inherithance of the disease. If FALSE (F) the model is Autosomic Recessive. Valid values are T=TRUE or F=FALSE Default is T"/>
117 <param name="XL" type="text" value="F" label="X-linked" help="When T (TRUE) an X-linked model of Disease inheritance is used. Valid values are T=TRUE and F=FALSE. Default is FALSE" />
118
119
120 <!--optional values -->
121 <param format="txt" name="kfile" type="data" optional="true" label="keywords file" help="This is a configuration file that specifies the keywords that are used by VINYL for the extraction of relevant annoations from the VCF file and for the computation of the pathogenicity score. Names of these keywords need to match exactly names as used by Annovar. A file with default values is incorporated in VINYL. Custom files can be provided (see Manual for the format) "/>
122 <param format="txt" name="efile" type="data" optional="true" label="Functional Effects files" help="This configuration file specifies the predicted functional effects for which the value specified by the score_functional parameter is be added to the global pathogenicity score. See above for further explanations."/>
123 <param format="txt" name="qfile" type="data" optional="true" label="eQTLlist" help="This configuration file provides a list of tissues that are used by VINYL for the annotation of eQTL and the scoring of variants associated with eQTLs in that tissue. Names of tissues need match names used in the GTEx project. See the manual for more details about the format of the file "/>
124 <param format="txt" name="ifile" type="data" optional="true" label="Protein-Protein interaction file" help="By default this file provides protein-protein interaction data according to the string database (https://string-db.org/). Alternative files can be specified by the user. See the manual of VINYL for a detailed discussion of the format"/>
125 <param name="disease" type="text" optional="true" label="Disease" help="Name or functional description of the pathological condition. This parameter is used to perform a soft check of the annotation in Clinvar and to identify variants that have been previously implicated in the disease. Highly recommended. "/>
126 <param name="similarD" type="data" format="txt" optional="true" label="Symptoms" help="This file provides a list of symptoms or related keywords that are used by VINYL to screen the Annotations of Clinvar and identify variants that have been implicated in similar pathologies or phenotype. See the manual for a full description of the file format. User are strongly encouraged to provide this file "/>
127 <param name="lgenes" type="data" format="txt" optional="true" label="List of Disease Genes" help="This file provides a list of genes that have been previously implicated in the disease of in similar pathological conditions. Users are highly recommended to provide this type of information. A full desciption of the format of this file is found in the VYNIL manual" />
128
129
130 <!--others-->
131
132 </inputs>
133 <outputs>
134 <data format="txt" name="log" label="${tool.name} on ${on_string}: log file "/>
135 <data format="tabular" name="ofile" label="${tool.name} on ${on_string}: tabular "/>
136 </outputs>
137 <stdio>
138 </stdio>
139 <tests>
140 <test>
141 </test>
142 </tests>
143 <help>
144 **What it does**
145 VINYL is a software designed to assist in variant prioritization in medium-large cohort of patients. The program computes an aggregate score, which is based on an extensive collection of publicly available annotations, in order to identify/prioritize variants that are likely to be pathogenic or have a clinical significance. In order to derive an optimal cut off score for the variants, VINYL uses a strategy based on "survival analysis", where the pathogenicity score distribution of the affected individuals is compared with a matched cohort of unaffected individuals.
146 To facilitate the usage of the software, VINYL is provided in the form of a public Galaxy instance, based on the Laniakea suite. To ensure the maximum level of security, VINYL uses Encrypted data volumes for the storage of the data.
147
148
149
150 **Important Usage Note**
151 This wrapper provides the module of VINYL that perform score optimization. Two input VCF files need to be provided, one containing genetic variants from a cohort of affected individuals, and one from a population of unaffected controls. If the latter is not available to you, you can take advantage of one of the several VCF files of genetic variants associated with geographic human populations that are available in VINYL at XXX. Please be aware that ideally you should select the population that is more closely related to your cohort of patients. If you have performed a targeted resequencing study, please see the manual for instruction on how to pre-process the VCF files included in VINYL.
152
153 See the "survival" and the "VINYL" utilities in the for the delineation of the score cut-off value and the calculation of individual scores from a single vcf file.
154 A complete workflow that automates the exectuion of VINYL is avaiable at XXX
155
156
157 </help>
158 <citations>
159 </citations>
160 </tool>