Mercurial > repos > iuc > snpsift
comparison snpSift_extractFields.xml @ 15:8bd645802765 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpsift/snpsift commit fbc18d9128669e461e76ed13307ee88dd774afa5
| author | iuc |
|---|---|
| date | Mon, 12 Jun 2017 10:24:36 -0400 |
| parents | 80da7fae14b8 |
| children | ede8954f5c82 |
comparison
equal
deleted
inserted
replaced
| 14:80da7fae14b8 | 15:8bd645802765 |
|---|---|
| 1 <tool id="snpSift_extractFields" name="SnpSift Extract Fields" version="@WRAPPER_VERSION@.1"> | 1 <tool id="snpSift_extractFields" name="SnpSift Extract Fields" version="@WRAPPER_VERSION@.0"> |
| 2 <options sanitize="False" /> | 2 <options sanitize="False" /> |
| 3 <description>from a VCF file into a tabular file</description> | 3 <description>from a VCF file into a tabular file</description> |
| 4 <macros> | 4 <macros> |
| 5 <import>snpSift_macros.xml</import> | 5 <import>snpSift_macros.xml</import> |
| 6 </macros> | 6 </macros> |
| 7 <expand macro="requirements" /> | 7 <expand macro="requirements" /> |
| 8 <expand macro="stdio" /> | 8 <expand macro="stdio" /> |
| 9 <expand macro="version_command" /> | 9 <expand macro="version_command" /> |
| 10 <command><![CDATA[ | 10 <command><![CDATA[ |
| 11 @CONDA_SNPSIFT_JAR_PATH@ && | 11 @CONDA_SNPSIFT_JAR_PATH@ && |
| 12 cat "$input" | 12 cat '$input' |
| 13 #if $one_effect_per_line: | 13 #if $one_effect_per_line: |
| 14 | "\$SNPSIFT_JAR_PATH/scripts/vcfEffOnePerLine.pl" | 14 | "\$SNPSIFT_JAR_PATH/scripts/vcfEffOnePerLine.pl" |
| 15 #end if | 15 #end if |
| 16 | java -Xmx6G -jar "\$SNPSIFT_JAR_PATH/SnpSift.jar" extractFields | 16 | SnpSift -Xmx6G extractFields |
| 17 #if $separator: | 17 #if $separator: |
| 18 -s '$separator' | 18 -s '$separator' |
| 19 #end if | 19 #end if |
| 20 #if $empty_text: | 20 #if $empty_text: |
| 21 -e '$empty_text' | 21 -e '$empty_text' |
| 22 #end if | 22 #end if |
| 23 - | 23 - |
| 24 #echo ' '.join(['"%s"' % x for x in $extract.split()]) | 24 #echo ' '.join(['"%s"' % x for x in $extract.split()]) |
| 25 > "$output" | 25 > '$output' |
| 26 ]]> | 26 ]]></command> |
| 27 </command> | |
| 28 <inputs> | 27 <inputs> |
| 29 <param format="vcf" name="input" type="data" label="Variant input file in VCF format"/> | 28 <param name="input" type="data" format="vcf" label="Variant input file in VCF format"/> |
| 30 <param name="extract" type="text" label="Extract" help="Need help? See below a few examples." /> | 29 <param name="extract" type="text" label="Extract" help="Need help? See below a few examples." /> |
| 31 <param name="one_effect_per_line" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="One effect per line" help="When variants have more than one effect, lists one effect per line, while all other parameters in the line are repeated across mutiple lines" /> | 30 <param name="one_effect_per_line" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="One effect per line" help="When variants have more than one effect, lists one effect per line, while all other parameters in the line are repeated across mutiple lines" /> |
| 32 <param name="separator" type="text" value="" label="multiple field separator" help="Separate multiple fields in one column with this character, e.g. a comma, rather than a column for each of the multiple values" /> | 31 <param name="separator" type="text" value="" label="multiple field separator" help="Separate multiple fields in one column with this character, e.g. a comma, rather than a column for each of the multiple values" /> |
| 33 <param name="empty_text" type="text" value="" label="empty field text" help="Represent empty fields with this value, rather than leaving them blank" /> | 32 <param name="empty_text" type="text" value="" label="empty field text" help="Represent empty fields with this value, rather than leaving them blank" /> |
| 34 </inputs> | 33 </inputs> |
| 35 <outputs> | 34 <outputs> |
| 36 <data format="tabular" name="output" /> | 35 <data name="output" format="tabular" /> |
| 37 </outputs> | 36 </outputs> |
| 38 <tests> | 37 <tests> |
| 39 <test> | 38 <test> |
| 40 <param name="input" ftype="vcf" value="test_rmInfo.vcf"/> | 39 <param name="input" ftype="vcf" value="test_rmInfo.vcf"/> |
| 41 <param name="extract" value="CHROM POS REF ALT EFF[*].EFFECT"/> | 40 <param name="extract" value="CHROM POS REF ALT EFF[*].EFFECT"/> |
| 55 <assert_contents> | 54 <assert_contents> |
| 56 <has_text text="DOWNSTREAM,INTRAGENIC,INTRON,UTR_3_PRIME" /> | 55 <has_text text="DOWNSTREAM,INTRAGENIC,INTRON,UTR_3_PRIME" /> |
| 57 </assert_contents> | 56 </assert_contents> |
| 58 </output> | 57 </output> |
| 59 </test> | 58 </test> |
| 60 | |
| 61 </tests> | 59 </tests> |
| 62 <help><![CDATA[ | 60 <help><![CDATA[ |
| 63 | |
| 64 **SnpSift Extract Fields** | 61 **SnpSift Extract Fields** |
| 65 | 62 |
| 66 Extract fields from a VCF file to a TXT, tab separated format, that you can easily load in R, XLS, etc. | 63 Extract fields from a VCF file to a TXT, tab separated format, that you can easily load in R, XLS, etc. |
| 67 | 64 |
| 68 http://snpeff.sourceforge.net/SnpSift.html#Extract | 65 http://snpeff.sourceforge.net/SnpSift.html#Extract |
| 69 | 66 |
| 70 You can also use sub-fields and genotype fields / sub-fields such as: | 67 You can also use sub-fields and genotype fields / sub-fields such as:: |
| 71 | |
| 72 :: | |
| 73 | 68 |
| 74 Standard VCF fields: | 69 Standard VCF fields: |
| 75 CHROM | 70 CHROM |
| 76 POS | 71 POS |
| 77 ID | 72 ID |
| 125 "NMD[*].GENE" | 120 "NMD[*].GENE" |
| 126 "NMD[*].GENEID" | 121 "NMD[*].GENEID" |
| 127 "NMD[*].NUMTR" | 122 "NMD[*].NUMTR" |
| 128 "NMD[*].PERC" | 123 "NMD[*].PERC" |
| 129 | 124 |
| 130 | |
| 131 Some examples: | 125 Some examples: |
| 132 | 126 |
| 133 - *Extracting chromosome, position, ID and allele frequency from a VCF file:* | 127 - *Extracting chromosome, position, ID and allele frequency from a VCF file*: |
| 134 | 128 |
| 135 **CHROM POS ID AF** | 129 **CHROM POS ID AF** |
| 136 | 130 |
| 137 The result will look something like: | 131 The result will look something like:: |
| 138 | 132 |
| 139 :: | 133 #CHROM POS ID AF |
| 134 1 69134 0.086 | |
| 135 1 69496 rs150690004 0.001 | |
| 140 | 136 |
| 141 #CHROM POS ID AF | 137 - *Extracting genotype fields*: |
| 142 1 69134 0.086 | |
| 143 1 69496 rs150690004 0.001 | |
| 144 | 138 |
| 139 **CHROM POS ID THETA GEN[0].GL[1] GEN[1].GL GEN[3].GL[*] GEN[*].GT** | |
| 145 | 140 |
| 146 - *Extracting genotype fields:* | 141 This means to extract: |
| 147 | 142 |
| 148 **CHROM POS ID THETA GEN[0].GL[1] GEN[1].GL GEN[3].GL[*] GEN[*].GT** | 143 - CHROM POS ID: regular fields (as in the previous example) |
| 144 - THETA : This one is from INFO | |
| 145 - GEN[0].GL[1] : Second likelihood from first genotype | |
| 146 - GEN[1].GL : The whole GL fiels (all entries without separating them) | |
| 147 - GEN[3].GL[*] : All likelihoods form genotype 3 (this time they will be tab separated, as opposed to the previous one). | |
| 148 - GEN[*].GT : Genotype subfields (GT) from ALL samples (tab separated). | |
| 149 | 149 |
| 150 This means to extract: | 150 The result will look something like:: |
| 151 | 151 |
| 152 - CHROM POS ID: regular fields (as in the previous example) | 152 #CHROM POS ID THETA GEN[0].GL[1] GEN[1].GL GEN[3].GL[*] GEN[*].GT |
| 153 - THETA : This one is from INFO | 153 1 10583 rs58108140 0.0046 -0.47 -0.24,-0.44,-1.16 -0.48 -0.48 -0.48 0|0 0|0 0|0 0|1 0|0 0|1 0|0 0|0 0|1 |
| 154 - GEN[0].GL[1] : Second likelihood from first genotype | 154 1 10611 rs189107123 0.0077 -0.48 -0.24,-0.44,-1.16 -0.48 -0.48 -0.48 0|0 0|1 0|0 0|0 0|0 0|0 0|0 0|0 0|0 |
| 155 - GEN[1].GL : The whole GL fiels (all entries without separating them) | 155 1 13302 rs180734498 0.0048 -0.58 -2.45,-0.00,-5.00 -0.48 -0.48 -0.48 0|0 0|1 0|0 0|0 0|0 1|0 0|0 0|1 0|0 |
| 156 - GEN[3].GL[*] : All likelihoods form genotype 3 (this time they will be tab separated, as opposed to the previous one). | |
| 157 - GEN[*].GT : Genotype subfields (GT) from ALL samples (tab separated). | |
| 158 | 156 |
| 159 The result will look something like: | 157 - *Extracting fields with multiple values*: |
| 158 (notice that there are multiple effect columns per line because there are mutiple effects per variant) | |
| 160 | 159 |
| 161 :: | 160 **CHROM POS REF ALT ANN[*].EFFECT** |
| 162 | 161 |
| 163 #CHROM POS ID THETA GEN[0].GL[1] GEN[1].GL GEN[3].GL[*] GEN[*].GT | 162 The result will look something like:: |
| 164 1 10583 rs58108140 0.0046 -0.47 -0.24,-0.44,-1.16 -0.48 -0.48 -0.48 0|0 0|0 0|0 0|1 0|0 0|1 0|0 0|0 0|1 | |
| 165 1 10611 rs189107123 0.0077 -0.48 -0.24,-0.44,-1.16 -0.48 -0.48 -0.48 0|0 0|1 0|0 0|0 0|0 0|0 0|0 0|0 0|0 | |
| 166 1 13302 rs180734498 0.0048 -0.58 -2.45,-0.00,-5.00 -0.48 -0.48 -0.48 0|0 0|1 0|0 0|0 0|0 1|0 0|0 0|1 0|0 | |
| 167 | 163 |
| 168 - *Extracting fields with multiple values:* | 164 #CHROM POS REF ALT ANN[*].EFFECT |
| 169 (notice that there are multiple effect columns per line because there are mutiple effects per variant) | 165 22 17071756 T C 3_prime_UTR_variant downstream_gene_variant |
| 166 22 17072035 C T missense_variant downstream_gene_variant | |
| 167 22 17072258 C A missense_variant downstream_gene_variant | |
| 170 | 168 |
| 171 **CHROM POS REF ALT ANN[*].EFFECT** | 169 - *Extracting fields with multiple values using a comma as a multipe field separator:* |
| 172 | 170 |
| 173 The result will look something like: | 171 **CHROM POS REF ALT ANN[*].EFFECT ANN[*].HGVS_P** |
| 174 | 172 |
| 175 :: | 173 The result will look something like:: |
| 176 | 174 |
| 177 #CHROM POS REF ALT ANN[*].EFFECT | 175 #CHROM POS REF ALT ANN[*].EFFECT ANN[*].HGVS_P |
| 178 22 17071756 T C 3_prime_UTR_variant downstream_gene_variant | 176 22 17071756 T C 3_prime_UTR_variant,downstream_gene_variant .,. |
| 179 22 17072035 C T missense_variant downstream_gene_variant | 177 22 17072035 C T missense_variant,downstream_gene_variant p.Gly469Glu,. |
| 180 22 17072258 C A missense_variant downstream_gene_variant | 178 22 17072258 C A missense_variant,downstream_gene_variant p.Gly395Cys,. |
| 181 | 179 |
| 182 - *Extracting fields with multiple values using a comma as a multipe field separator:* | 180 - *Extracting fields with multiple values, one effect per line:* |
| 183 | 181 |
| 184 **CHROM POS REF ALT ANN[*].EFFECT ANN[*].HGVS_P** | 182 **CHROM POS REF ALT ANN[*].EFFECT** |
| 185 | 183 |
| 186 The result will look something like: | 184 The result will look something like:: |
| 187 | 185 |
| 188 :: | 186 #CHROM POS REF ALT ANN[*].EFFECT |
| 189 | 187 22 17071756 T C 3_prime_UTR_variant |
| 190 #CHROM POS REF ALT ANN[*].EFFECT ANN[*].HGVS_P | 188 22 17071756 T C downstream_gene_variant |
| 191 22 17071756 T C 3_prime_UTR_variant,downstream_gene_variant .,. | 189 22 17072035 C T missense_variant |
| 192 22 17072035 C T missense_variant,downstream_gene_variant p.Gly469Glu,. | 190 22 17072035 C T downstream_gene_variant |
| 193 22 17072258 C A missense_variant,downstream_gene_variant p.Gly395Cys,. | 191 22 17072258 C A missense_variant |
| 194 | 192 22 17072258 C A downstream_gene_variant |
| 195 | |
| 196 - *Extracting fields with multiple values, one effect per line:* | |
| 197 | |
| 198 **CHROM POS REF ALT ANN[*].EFFECT** | |
| 199 | |
| 200 The result will look something like: | |
| 201 | |
| 202 :: | |
| 203 | |
| 204 #CHROM POS REF ALT ANN[*].EFFECT | |
| 205 22 17071756 T C 3_prime_UTR_variant | |
| 206 22 17071756 T C downstream_gene_variant | |
| 207 22 17072035 C T missense_variant | |
| 208 22 17072035 C T downstream_gene_variant | |
| 209 22 17072258 C A missense_variant | |
| 210 22 17072258 C A downstream_gene_variant | |
| 211 | |
| 212 | 193 |
| 213 @EXTERNAL_DOCUMENTATION@ | 194 @EXTERNAL_DOCUMENTATION@ |
| 214 http://snpeff.sourceforge.net/SnpSift.html#Extract | 195 - http://snpeff.sourceforge.net/SnpSift.html#Extract |
| 215 | 196 ]]></help> |
| 216 ]]> | |
| 217 </help> | |
| 218 <expand macro="citations" /> | 197 <expand macro="citations" /> |
| 219 </tool> | 198 </tool> |
