Mercurial > repos > greg > extract_genomic_dna
changeset 7:338e991cdd1f draft
Uploaded
| author | greg | 
|---|---|
| date | Thu, 14 Jan 2016 11:01:00 -0500 | 
| parents | 3d40351fc9ac | 
| children | 32c6057529a4 | 
| files | extract_genomic_dna.xml | 
| diffstat | 1 files changed, 33 insertions(+), 26 deletions(-) [+] | 
line wrap: on
 line diff
--- a/extract_genomic_dna.xml Thu Jan 14 10:51:45 2016 -0500 +++ b/extract_genomic_dna.xml Thu Jan 14 11:01:00 2016 -0500 @@ -77,35 +77,21 @@ <param name="interpret_features" value="yes"/> <param name="index_source" value="cached"/> <param name="out_format" value="fasta"/> - <output name="out_file1" file="extract_genomic_dna_out1.fasta"> - <assert_contents> - <!-- First few lines... --> - <has_text text=">hg17_chr1_147962192_147962580_- CCDS989.1_cds_0_0_chr1_147962193_r" /> - <has_text text="ACTTGATCCTGCTCCCTCGGTGTCTGCATTGACTCCTCATGCTGGGACTG" /> - <has_text text="GACCCGTCAACCCCCCTGCTCGCTGCTCACGTACCTTCATCACTTTTAGT" /> - <has_text text="GATGATGCAACTTTCGAGGAATGGTTCCCCCAAGGGCGGCCCCCAAAAGT" /> - <!-- Last few lines... --> - <has_text text="GCTGTGGCACAGAACATGGACTCTGTGTTTAAGGAGCTCTTGGGAAAGAC" /> - <has_text text="CTCTGTCCGCCAGGGCCTTGGGCCAGCATCTACCACCTCTCCCAGTCCTG" /> - <has_text text="GGCCCCGAAGCCCAAAGGCCCCGCCCAGCAGCCGCCTGGGCAGGAACAAA" /> - <has_text text="GGCTTCTCCCGGGGCCCTGGGGCCCCAGCCTCACCCTCAGCTTCCCACCC" /> - <has_text text="CCAGGGCCTAGACACGACCCCCAAGCCACACTGA" /> - </assert_contents> - </output> + <output name="out_file1" file="extract_genomic_dna_out1.fasta" compare="contains" /> </test> <test> <param name="input" value="droPer1.bed" dbkey="droPer1" ftype="bed" /> <param name="interpret_features" value="yes"/> <param name="index_source" value="cached"/> <param name="out_format" value="fasta"/> - <output name="out_file1" file="extract_genomic_dna_out2.fasta" compare="contains"/> + <output name="out_file1" file="extract_genomic_dna_out2.fasta" compare="contains" /> </test> <test> <param name="input" value="1.bed" dbkey="hg17" ftype="bed" /> <param name="interpret_features" value="yes"/> <param name="index_source" value="cached"/> <param name="out_format" value="interval"/> - <output name="out_file1" file="extract_genomic_dna_out3.interval" /> + <output name="out_file1" file="extract_genomic_dna_out3.interval" compare="contains" /> </test> <!-- Test GFF file support. --> <test> @@ -113,14 +99,14 @@ <param name="interpret_features" value="no"/> <param name="index_source" value="cached"/> <param name="out_format" value="interval"/> - <output name="out_file1" file="extract_genomic_dna_out4.gff" compare="contains"/> + <output name="out_file1" file="extract_genomic_dna_out4.gff" compare="contains" /> </test> <test> <param name="input" value="gff_filter_by_attribute_out1.gff" dbkey="mm9" ftype="gff" /> <param name="interpret_features" value="no"/> <param name="out_format" value="fasta"/> <param name="index_source" value="cached"/> - <output name="out_file1" file="extract_genomic_dna_out5.fasta" compare="contains"/> + <output name="out_file1" file="extract_genomic_dna_out5.fasta" compare="contains" /> </test> <!-- Test custom sequences support and GFF feature interpretation. --> <test> @@ -129,7 +115,7 @@ <param name="index_source" value="history"/> <param name="ref_file" value="tophat_in1.fasta"/> <param name="out_format" value="fasta"/> - <output name="out_file1" file="extract_genomic_dna_out6.fasta" compare="contains"/> + <output name="out_file1" file="extract_genomic_dna_out6.fasta" compare="contains" /> </test> <test> <param name="input" value="cufflinks_out1.gtf" dbkey="mm9" ftype="gff" /> @@ -137,23 +123,38 @@ <param name="index_source" value="history"/> <param name="ref_file" value="tophat_in1.fasta"/> <param name="out_format" value="fasta"/> - <output name="out_file1" file="extract_genomic_dna_out7.fasta" compare="contains"/> + <output name="out_file1" file="extract_genomic_dna_out7.fasta" compare="contains" /> </test> </tests> <help> .. class:: warningmark -The following will cause a line from the input dataset to be skipped and a warning generated. +This tool requires interval or gff (special tabular formatted data). If your data is not TAB delimited, first use *Text Manipulation->Convert*. + +.. class:: warningmark + +Make sure that the genome build is specified for the dataset from which you are extracting sequences (click the pencil icon in the history item if it is not specified). + +.. class:: warningmark - - Sequences that fall outside of the range of a line's start and end coordinates. - - Chromosome start or end coordinates that are invalid for the specified build. +All of the following will cause a line from the input dataset to be skipped and a warning generated. The number of warnings and skipped lines is documented in the resulting history item. + - Any lines that do not contain at least 3 columns, a chromosome and numerical start and end coordinates. + - Sequences that fall outside of the range of a line's start and end coordinates. + - Chromosome, start or end coordinates that are invalid for the specified build. + - Any lines whose data columns are not separated by a **TAB** character ( other white-space characters are invalid ). + +.. class:: infomark + + **Extract genomic DNA using coordinates from ASSEMBLED genomes and UNassembled genomes** previously were achieved by two separate tools. ----- **What it does** -This tool uses coordinate, strand, and build information to fetch genomic DNA from gff data, producing fasta data. +This tool uses coordinate, strand, and build information to fetch genomic DNAs in FASTA or interval format. + +If strand is not defined, the default value is "+". ----- @@ -165,7 +166,7 @@ chr7 127485994 127486166 NM_000230 0 + chr7 127486011 127486166 D49487 0 + -Extracting sequences returns:: +Extracting sequences with **FASTA** output data type returns:: >hg17_chr7_127475281_127475310_+ NM_000230 GTAGGAATCGCAGCGCCAGCGGTTGCAAG @@ -180,6 +181,12 @@ CACCAAAACCCTCATCAAGACAATTGTCACCAGGATCAATGACATTTCAC ACACG +Extracting sequences with **Interval** output data type returns:: + + chr7 127475281 127475310 NM_000230 0 + GTAGGAATCGCAGCGCCAGCGGTTGCAAG + chr7 127485994 127486166 NM_000230 0 + GCCCAAGAAGCCCATCCTGGGAAGGAAAATGCATTGGGGAACCCTGTGCGGATTCTTGTGGCTTTGGCCCTATCTTTTCTATGTCCAAGCTGTGCCCATCCAAAAAGTCCAAGATGACACCAAAACCCTCATCAAGACAATTGTCACCAGGATCAATGACATTTCACACACG + chr7 127486011 127486166 D49487 0 + TGGGAAGGAAAATGCATTGGGGAACCCTGTGCGGATTCTTGTGGCTTTGGCCCTATCTTTTCTATGTCCAAGCTGTGCCCATCCAAAAAGTCCAAGATGACACCAAAACCCTCATCAAGACAATTGTCACCAGGATCAATGACATTTCACACACG + </help> <citations> <citation type="bibtex">
