# HG changeset patch # User greg # Date 1452787260 18000 # Node ID 338e991cdd1f68adcb9d19fc79aff9b33b4ff34b # Parent 3d40351fc9ac46019539b7d8d5da726e021d5efe Uploaded diff -r 3d40351fc9ac -r 338e991cdd1f extract_genomic_dna.xml --- a/extract_genomic_dna.xml Thu Jan 14 10:51:45 2016 -0500 +++ b/extract_genomic_dna.xml Thu Jan 14 11:01:00 2016 -0500 @@ -77,35 +77,21 @@ - - - - - - - - - - - - - - - + - + - + @@ -113,14 +99,14 @@ - + - + @@ -129,7 +115,7 @@ - + @@ -137,23 +123,38 @@ - + .. class:: warningmark -The following will cause a line from the input dataset to be skipped and a warning generated. +This tool requires interval or gff (special tabular formatted data). If your data is not TAB delimited, first use *Text Manipulation->Convert*. + +.. class:: warningmark + +Make sure that the genome build is specified for the dataset from which you are extracting sequences (click the pencil icon in the history item if it is not specified). + +.. class:: warningmark - - Sequences that fall outside of the range of a line's start and end coordinates. - - Chromosome start or end coordinates that are invalid for the specified build. +All of the following will cause a line from the input dataset to be skipped and a warning generated. The number of warnings and skipped lines is documented in the resulting history item. + - Any lines that do not contain at least 3 columns, a chromosome and numerical start and end coordinates. + - Sequences that fall outside of the range of a line's start and end coordinates. + - Chromosome, start or end coordinates that are invalid for the specified build. + - Any lines whose data columns are not separated by a **TAB** character ( other white-space characters are invalid ). + +.. class:: infomark + + **Extract genomic DNA using coordinates from ASSEMBLED genomes and UNassembled genomes** previously were achieved by two separate tools. ----- **What it does** -This tool uses coordinate, strand, and build information to fetch genomic DNA from gff data, producing fasta data. +This tool uses coordinate, strand, and build information to fetch genomic DNAs in FASTA or interval format. + +If strand is not defined, the default value is "+". ----- @@ -165,7 +166,7 @@ chr7 127485994 127486166 NM_000230 0 + chr7 127486011 127486166 D49487 0 + -Extracting sequences returns:: +Extracting sequences with **FASTA** output data type returns:: >hg17_chr7_127475281_127475310_+ NM_000230 GTAGGAATCGCAGCGCCAGCGGTTGCAAG @@ -180,6 +181,12 @@ CACCAAAACCCTCATCAAGACAATTGTCACCAGGATCAATGACATTTCAC ACACG +Extracting sequences with **Interval** output data type returns:: + + chr7 127475281 127475310 NM_000230 0 + GTAGGAATCGCAGCGCCAGCGGTTGCAAG + chr7 127485994 127486166 NM_000230 0 + GCCCAAGAAGCCCATCCTGGGAAGGAAAATGCATTGGGGAACCCTGTGCGGATTCTTGTGGCTTTGGCCCTATCTTTTCTATGTCCAAGCTGTGCCCATCCAAAAAGTCCAAGATGACACCAAAACCCTCATCAAGACAATTGTCACCAGGATCAATGACATTTCACACACG + chr7 127486011 127486166 D49487 0 + TGGGAAGGAAAATGCATTGGGGAACCCTGTGCGGATTCTTGTGGCTTTGGCCCTATCTTTTCTATGTCCAAGCTGTGCCCATCCAAAAAGTCCAAGATGACACCAAAACCCTCATCAAGACAATTGTCACCAGGATCAATGACATTTCACACACG +