Mercurial > repos > iuc > snpsift
comparison snpSift_extractFields.xml @ 14:80da7fae14b8 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpsift/snpsift commit d12355cea76843e3ed6f09d96c3e9fe22afe4a4f
| author | iuc |
|---|---|
| date | Mon, 05 Dec 2016 12:10:54 -0500 |
| parents | ed810da439cc |
| children | 8bd645802765 |
comparison
equal
deleted
inserted
replaced
| 13:ed810da439cc | 14:80da7fae14b8 |
|---|---|
| 1 <tool id="snpSift_extractFields" name="SnpSift Extract Fields" version="@WRAPPER_VERSION@.0"> | 1 <tool id="snpSift_extractFields" name="SnpSift Extract Fields" version="@WRAPPER_VERSION@.1"> |
| 2 <options sanitize="False" /> | 2 <options sanitize="False" /> |
| 3 <description>from a VCF file inot a tabular file</description> | 3 <description>from a VCF file into a tabular file</description> |
| 4 <macros> | 4 <macros> |
| 5 <import>snpSift_macros.xml</import> | 5 <import>snpSift_macros.xml</import> |
| 6 </macros> | 6 </macros> |
| 7 <expand macro="requirements" /> | 7 <expand macro="requirements" /> |
| 8 <expand macro="stdio" /> | 8 <expand macro="stdio" /> |
| 9 <expand macro="version_command" /> | 9 <expand macro="version_command" /> |
| 10 <command><![CDATA[ | 10 <command><![CDATA[ |
| 11 @CONDA_SNPSIFT_JAR_PATH@ && | |
| 11 cat "$input" | 12 cat "$input" |
| 12 #if $one_effect_per_line: | 13 #if $one_effect_per_line: |
| 13 | "\$SNPEFF_JAR_PATH/scripts/vcfEffOnePerLine.pl" | 14 | "\$SNPSIFT_JAR_PATH/scripts/vcfEffOnePerLine.pl" |
| 14 #end if | 15 #end if |
| 15 | java -Xmx6G -jar "\$SNPEFF_JAR_PATH/SnpSift.jar" extractFields | 16 | java -Xmx6G -jar "\$SNPSIFT_JAR_PATH/SnpSift.jar" extractFields |
| 16 #if $separator: | 17 #if $separator: |
| 17 -s '$separator' | 18 -s '$separator' |
| 18 #end if | 19 #end if |
| 19 #if $empty_text: | 20 #if $empty_text: |
| 20 -e '$empty_text' | 21 -e '$empty_text' |
| 21 #end if | 22 #end if |
| 22 - | 23 - |
| 23 #echo ' '.join(['"%s"' % x for x in $extract.split()]) | 24 #echo ' '.join(['"%s"' % x for x in $extract.split()]) |
| 24 > "$output" | 25 > "$output" |
| 25 ]]> | 26 ]]> |
| 26 </command> | 27 </command> |
| 27 <inputs> | 28 <inputs> |
| 28 <param format="vcf" name="input" type="data" label="Variant input file in VCF format"/> | 29 <param format="vcf" name="input" type="data" label="Variant input file in VCF format"/> |
| 29 <param name="extract" type="text" label="Extract" help="Need help? See below a few examples." /> | 30 <param name="extract" type="text" label="Extract" help="Need help? See below a few examples." /> |
| 30 <param name="one_effect_per_line" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="One effect per line" help="When variants have more than one effect, lists one effect per line, while all other parameters in the line are repeated across mutiple lines" /> | 31 <param name="one_effect_per_line" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="One effect per line" help="When variants have more than one effect, lists one effect per line, while all other parameters in the line are repeated across mutiple lines" /> |
| 31 <param name="separator" type="text" value="" optional="true" label="multiple field separator" help="Separate multiple fields in one column with this character, e.g. a comma, rather than a column for each of the multiple values"> | 32 <param name="separator" type="text" value="" label="multiple field separator" help="Separate multiple fields in one column with this character, e.g. a comma, rather than a column for each of the multiple values" /> |
| 32 </param> | 33 <param name="empty_text" type="text" value="" label="empty field text" help="Represent empty fields with this value, rather than leaving them blank" /> |
| 33 <param name="empty_text" type="text" value="" optional="true" label="empty field text" help="Represent empty fields with this value, rather than leaving them blank" > | |
| 34 </param> | |
| 35 </inputs> | 34 </inputs> |
| 36 <outputs> | 35 <outputs> |
| 37 <data format="tabular" name="output" /> | 36 <data format="tabular" name="output" /> |
| 38 </outputs> | 37 </outputs> |
| 39 <tests> | 38 <tests> |
| 76 CHROM | 75 CHROM |
| 77 POS | 76 POS |
| 78 ID | 77 ID |
| 79 REF | 78 REF |
| 80 ALT | 79 ALT |
| 81 FILTER | 80 FILTER |
| 82 INFO fields: | 81 INFO fields: |
| 83 AF | 82 AF |
| 84 AC | 83 AC |
| 85 DP | 84 DP |
| 86 MQ | 85 MQ |
| 87 etc. (any info field available) | 86 etc. (any info field available) |
| 88 SnpEff 'ANN' fields: | 87 SnpEff 'ANN' fields: |
| 89 "ANN[*].ALLELE" (alias GENOTYPE) | 88 "ANN[*].ALLELE" (alias GENOTYPE) |
| 90 "ANN[*].EFFECT" (alias ANNOTATION): Effect in Sequence ontology terms (e.g. 'missense_variant', 'synonymous_variant', 'stop_gained', etc.) | 89 "ANN[*].EFFECT" (alias ANNOTATION): Effect in Sequence ontology terms (e.g. 'missense_variant', 'synonymous_variant', 'stop_gained', etc.) |
| 91 "ANN[*].IMPACT" { HIGH, MODERATE, LOW, MODIFIER } | 90 "ANN[*].IMPACT" { HIGH, MODERATE, LOW, MODIFIER } |
| 92 "ANN[*].GENE" Gene name (e.g. 'PSD3') | 91 "ANN[*].GENE" Gene name (e.g. 'PSD3') |
| 102 "ANN[*].CDS_POS" (alias POS_CDS) | 101 "ANN[*].CDS_POS" (alias POS_CDS) |
| 103 "ANN[*].CDS_LEN" (alias LEN_CDS) | 102 "ANN[*].CDS_LEN" (alias LEN_CDS) |
| 104 "ANN[*].AA_POS" (alias POS_AA) | 103 "ANN[*].AA_POS" (alias POS_AA) |
| 105 "ANN[*].AA_LEN" (alias LEN_AA) | 104 "ANN[*].AA_LEN" (alias LEN_AA) |
| 106 "ANN[*].DISTANCE" | 105 "ANN[*].DISTANCE" |
| 107 "ANN[*].ERRORS" (alias WARNING, INFOS) | 106 "ANN[*].ERRORS" (alias WARNING, INFOS) |
| 108 SnpEff 'EFF' fields (this is for older SnpEff/SnpSift versions, new version use 'ANN' field): | 107 SnpEff 'EFF' fields (this is for older SnpEff/SnpSift versions, new version use 'ANN' field): |
| 109 "EFF[*].EFFECT" | 108 "EFF[*].EFFECT" |
| 110 "EFF[*].IMPACT" | 109 "EFF[*].IMPACT" |
| 111 "EFF[*].FUNCLASS" | 110 "EFF[*].FUNCLASS" |
| 112 "EFF[*].CODON" | 111 "EFF[*].CODON" |
| 114 "EFF[*].AA_LEN" | 113 "EFF[*].AA_LEN" |
| 115 "EFF[*].GENE" | 114 "EFF[*].GENE" |
| 116 "EFF[*].BIOTYPE" | 115 "EFF[*].BIOTYPE" |
| 117 "EFF[*].CODING" | 116 "EFF[*].CODING" |
| 118 "EFF[*].TRID" | 117 "EFF[*].TRID" |
| 119 "EFF[*].RANK" | 118 "EFF[*].RANK" |
| 120 SnpEff 'LOF' fields: | 119 SnpEff 'LOF' fields: |
| 121 "LOF[*].GENE" | 120 "LOF[*].GENE" |
| 122 "LOF[*].GENEID" | 121 "LOF[*].GENEID" |
| 123 "LOF[*].NUMTR" | 122 "LOF[*].NUMTR" |
| 124 "LOF[*].PERC" | 123 "LOF[*].PERC" |
| 125 SnpEff' NMD' fields: | 124 SnpEff' NMD' fields: |
| 126 "NMD[*].GENE" | 125 "NMD[*].GENE" |
| 127 "NMD[*].GENEID" | 126 "NMD[*].GENEID" |
| 128 "NMD[*].NUMTR" | 127 "NMD[*].NUMTR" |
| 129 "NMD[*].PERC" | 128 "NMD[*].PERC" |
| 130 | 129 |
| 131 | 130 |
| 132 Some examples: | 131 Some examples: |
| 133 | 132 |
| 134 - *Extracting chromosome, position, ID and allele frequency from a VCF file:* | 133 - *Extracting chromosome, position, ID and allele frequency from a VCF file:* |
| 135 | 134 |
| 136 **CHROM POS ID AF** | 135 **CHROM POS ID AF** |
| 137 | 136 |
| 138 The result will look something like: | 137 The result will look something like: |
| 139 | 138 |
| 140 :: | 139 :: |
| 141 | 140 |
| 142 #CHROM POS ID AF | 141 #CHROM POS ID AF |
| 143 1 69134 0.086 | 142 1 69134 0.086 |
| 153 - CHROM POS ID: regular fields (as in the previous example) | 152 - CHROM POS ID: regular fields (as in the previous example) |
| 154 - THETA : This one is from INFO | 153 - THETA : This one is from INFO |
| 155 - GEN[0].GL[1] : Second likelihood from first genotype | 154 - GEN[0].GL[1] : Second likelihood from first genotype |
| 156 - GEN[1].GL : The whole GL fiels (all entries without separating them) | 155 - GEN[1].GL : The whole GL fiels (all entries without separating them) |
| 157 - GEN[3].GL[*] : All likelihoods form genotype 3 (this time they will be tab separated, as opposed to the previous one). | 156 - GEN[3].GL[*] : All likelihoods form genotype 3 (this time they will be tab separated, as opposed to the previous one). |
| 158 - GEN[*].GT : Genotype subfields (GT) from ALL samples (tab separated). | 157 - GEN[*].GT : Genotype subfields (GT) from ALL samples (tab separated). |
| 159 | 158 |
| 160 The result will look something like: | 159 The result will look something like: |
| 161 | 160 |
| 162 :: | 161 :: |
| 163 | 162 |
| 164 #CHROM POS ID THETA GEN[0].GL[1] GEN[1].GL GEN[3].GL[*] GEN[*].GT | 163 #CHROM POS ID THETA GEN[0].GL[1] GEN[1].GL GEN[3].GL[*] GEN[*].GT |
| 165 1 10583 rs58108140 0.0046 -0.47 -0.24,-0.44,-1.16 -0.48 -0.48 -0.48 0|0 0|0 0|0 0|1 0|0 0|1 0|0 0|0 0|1 | 164 1 10583 rs58108140 0.0046 -0.47 -0.24,-0.44,-1.16 -0.48 -0.48 -0.48 0|0 0|0 0|0 0|1 0|0 0|1 0|0 0|0 0|1 |
| 166 1 10611 rs189107123 0.0077 -0.48 -0.24,-0.44,-1.16 -0.48 -0.48 -0.48 0|0 0|1 0|0 0|0 0|0 0|0 0|0 0|0 0|0 | 165 1 10611 rs189107123 0.0077 -0.48 -0.24,-0.44,-1.16 -0.48 -0.48 -0.48 0|0 0|1 0|0 0|0 0|0 0|0 0|0 0|0 0|0 |
| 167 1 13302 rs180734498 0.0048 -0.58 -2.45,-0.00,-5.00 -0.48 -0.48 -0.48 0|0 0|1 0|0 0|0 0|0 1|0 0|0 0|1 0|0 | 166 1 13302 rs180734498 0.0048 -0.58 -2.45,-0.00,-5.00 -0.48 -0.48 -0.48 0|0 0|1 0|0 0|0 0|0 1|0 0|0 0|1 0|0 |
| 168 | 167 |
| 169 - *Extracting fields with multiple values:* | 168 - *Extracting fields with multiple values:* |
| 170 (notice that there are multiple effect columns per line because there are mutiple effects per variant) | 169 (notice that there are multiple effect columns per line because there are mutiple effects per variant) |
| 171 | 170 |
| 172 **CHROM POS REF ALT ANN[*].EFFECT** | 171 **CHROM POS REF ALT ANN[*].EFFECT** |
| 173 | 172 |
| 174 The result will look something like: | 173 The result will look something like: |
| 175 | 174 |
| 176 :: | 175 :: |
| 177 | 176 |
| 178 #CHROM POS REF ALT ANN[*].EFFECT | 177 #CHROM POS REF ALT ANN[*].EFFECT |
| 179 22 17071756 T C 3_prime_UTR_variant downstream_gene_variant | 178 22 17071756 T C 3_prime_UTR_variant downstream_gene_variant |
| 180 22 17072035 C T missense_variant downstream_gene_variant | 179 22 17072035 C T missense_variant downstream_gene_variant |
| 181 22 17072258 C A missense_variant downstream_gene_variant | 180 22 17072258 C A missense_variant downstream_gene_variant |
| 182 | 181 |
| 183 - *Extracting fields with multiple values using a comma as a multipe field separator:* | 182 - *Extracting fields with multiple values using a comma as a multipe field separator:* |
| 184 | 183 |
| 185 **CHROM POS REF ALT ANN[*].EFFECT ANN[*].HGVS_P** | 184 **CHROM POS REF ALT ANN[*].EFFECT ANN[*].HGVS_P** |
| 186 | 185 |
| 187 The result will look something like: | 186 The result will look something like: |
| 188 | 187 |
| 189 :: | 188 :: |
| 190 | 189 |
| 191 #CHROM POS REF ALT ANN[*].EFFECT ANN[*].HGVS_P | 190 #CHROM POS REF ALT ANN[*].EFFECT ANN[*].HGVS_P |
| 192 22 17071756 T C 3_prime_UTR_variant,downstream_gene_variant .,. | 191 22 17071756 T C 3_prime_UTR_variant,downstream_gene_variant .,. |
| 193 22 17072035 C T missense_variant,downstream_gene_variant p.Gly469Glu,. | 192 22 17072035 C T missense_variant,downstream_gene_variant p.Gly469Glu,. |
| 194 22 17072258 C A missense_variant,downstream_gene_variant p.Gly395Cys,. | 193 22 17072258 C A missense_variant,downstream_gene_variant p.Gly395Cys,. |
| 196 | 195 |
| 197 - *Extracting fields with multiple values, one effect per line:* | 196 - *Extracting fields with multiple values, one effect per line:* |
| 198 | 197 |
| 199 **CHROM POS REF ALT ANN[*].EFFECT** | 198 **CHROM POS REF ALT ANN[*].EFFECT** |
| 200 | 199 |
| 201 The result will look something like: | 200 The result will look something like: |
| 202 | 201 |
| 203 :: | 202 :: |
| 204 | 203 |
| 205 #CHROM POS REF ALT ANN[*].EFFECT | 204 #CHROM POS REF ALT ANN[*].EFFECT |
| 206 22 17071756 T C 3_prime_UTR_variant | 205 22 17071756 T C 3_prime_UTR_variant |
| 207 22 17071756 T C downstream_gene_variant | 206 22 17071756 T C downstream_gene_variant |
| 208 22 17072035 C T missense_variant | 207 22 17072035 C T missense_variant |
