Mercurial > repos > greg > extract_genomic_dna
view extract_genomic_dna.xml @ 0:cff5b7c9be55 draft
Uploaded
author | greg |
---|---|
date | Thu, 14 Jan 2016 07:55:22 -0500 |
parents | |
children | 311febbd33d6 |
line wrap: on
line source
<tool id="Extract genomic DNA 1" name="Extract Genomic DNA" version="3.0.0"> <description>using coordinates from assembled/unassembled genomes</description> <command> <![CDATA[ #set input_format $input_format_cond.input_format #set input $input_format_cond.input #set dbkey = $input.metadata.dbkey #set datatype = $input.datatype mkdir -p output_dir && python $__tool_directory__/extract_genomic_dna.py --input_format $input_format --input "$input" --dbkey $dbkey #if str($input_format) == "gff": --interpret_features $input_format_cond.interpret_features #end if #if isinstance($datatype, $__app__.datatypes_registry.get_datatype_by_extension('gff').__class__): --columns "1,4,5,7" #else: --columns "${input.metadata.chromCol},${input.metadata.startCol},${input.metadata.endCol},${input.metadata.strandCol},${input.metadata.nameCol}" #end if --reference_genome_source $reference_genome_cond.reference_genome_source #if str($reference_genome_cond.reference_genome_source) == "cached" --reference_genome $reference_genome_cond.reference_genome.fields.path #else: --reference_genome $reference_genome_cond.reference_genome #end if --output_format $output_format --output $output ]]> </command> <inputs> <conditional name="input_format_cond"> <param name="input_format" type="select" label="Input file format"> <option value="gff" selected="True">Gff</option> <option value="interval">Interval</option> </param> <when value="gff"> <param name="input" type="data" format="gff" label="Fetch sequences for intervals in"> <validator type="unspecified_build" /> </param> <param name="interpret_features" type="select" label="Interpret features when possible"> <option value="yes">Yes</option> <option value="no">No</option> </param> </when> <when value="interval"> <param name="input" type="data" format="interval" label="Fetch sequences for intervals in"> <validator type="unspecified_build" /> </param> </when> </conditional> <conditional name="reference_genome_cond"> <param name="reference_genome_source" type="select" label="Choose the source for the reference genome"> <option value="cached">locally cached</option> <option value="history">from history</option> </param> <when value="cached"> <param name="reference_genome" type="select" label="Using reference genome"> <options from_data_table="alignseq_seq"> <filter type="data_meta" key="dbkey" ref="input" column="dbkey"/> </options> <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/> </param> </when> <when value="history"> <param name="reference_genome" type="data" format="fasta" label="Using reference genome"> <options> <filter type="data_meta" key="dbkey" ref="input_bam" /> </options> <validator type="no_options" message="The current history does not include a fasta dataset with the build associated with the selected input file"/> </param> </when> </conditional> <param name="output_format" type="select" label="Select output format"> <option value="fasta" selected="True">fasta</option> <option value="interval">interval</option> </param> </inputs> <outputs> <data name="output" format="gff"> <change_format> <when output_format="interval" format="interval" /> </change_format> </data> </outputs> <tests> <test> <param name="input" value="1.bed" dbkey="hg17" ftype="bed" /> <param name="interpret_features" value="yes"/> <param name="index_source" value="cached"/> <param name="out_format" value="fasta"/> <output name="out_file1"> <assert_contents> <!-- First few lines... --> <has_text text=">hg17_chr1_147962192_147962580_- CCDS989.1_cds_0_0_chr1_147962193_r" /> <has_text text="ACTTGATCCTGCTCCCTCGGTGTCTGCATTGACTCCTCATGCTGGGACTG" /> <has_text text="GACCCGTCAACCCCCCTGCTCGCTGCTCACGTACCTTCATCACTTTTAGT" /> <has_text text="GATGATGCAACTTTCGAGGAATGGTTCCCCCAAGGGCGGCCCCCAAAAGT" /> <!-- Last few lines... --> <has_text text="GCTGTGGCACAGAACATGGACTCTGTGTTTAAGGAGCTCTTGGGAAAGAC" /> <has_text text="CTCTGTCCGCCAGGGCCTTGGGCCAGCATCTACCACCTCTCCCAGTCCTG" /> <has_text text="GGCCCCGAAGCCCAAAGGCCCCGCCCAGCAGCCGCCTGGGCAGGAACAAA" /> <has_text text="GGCTTCTCCCGGGGCCCTGGGGCCCCAGCCTCACCCTCAGCTTCCCACCC" /> <has_text text="CCAGGGCCTAGACACGACCCCCAAGCCACACTGA" /> </assert_contents> </output> </test> <test> <param name="input" value="droPer1.bed" dbkey="droPer1" ftype="bed" /> <param name="interpret_features" value="yes"/> <param name="index_source" value="cached"/> <param name="out_format" value="fasta"/> <output name="out_file1" file="extract_genomic_dna_out2.fasta" /> </test> <test> <param name="input" value="1.bed" dbkey="hg17" ftype="bed" /> <param name="interpret_features" value="yes"/> <param name="index_source" value="cached"/> <param name="out_format" value="interval"/> <output name="out_file1" file="extract_genomic_dna_out3.interval" /> </test> <!-- Test GFF file support. --> <test> <param name="input" value="gff_filter_by_attribute_out1.gff" dbkey="mm9" ftype="gff" /> <param name="interpret_features" value="no"/> <param name="index_source" value="cached"/> <param name="out_format" value="interval"/> <output name="out_file1" file="extract_genomic_dna_out4.gff" /> </test> <test> <param name="input" value="gff_filter_by_attribute_out1.gff" dbkey="mm9" ftype="gff" /> <param name="interpret_features" value="no"/> <param name="out_format" value="fasta"/> <param name="index_source" value="cached"/> <output name="out_file1" file="extract_genomic_dna_out5.fasta" /> </test> <!-- Test custom sequences support and GFF feature interpretation. --> <test> <param name="input" value="cufflinks_out1.gtf" dbkey="mm9" ftype="gff" /> <param name="interpret_features" value="no"/> <param name="index_source" value="history"/> <param name="ref_file" value="tophat_in1.fasta"/> <param name="out_format" value="fasta"/> <output name="out_file1" file="extract_genomic_dna_out6.fasta" /> </test> <test> <param name="input" value="cufflinks_out1.gtf" dbkey="mm9" ftype="gff" /> <param name="interpret_features" value="yes"/> <param name="index_source" value="history"/> <param name="ref_file" value="tophat_in1.fasta"/> <param name="out_format" value="fasta"/> <output name="out_file1" file="extract_genomic_dna_out7.fasta" /> </test> </tests> <help> .. class:: warningmark The following will cause a line from the input dataset to be skipped and a warning generated. - Sequences that fall outside of the range of a line's start and end coordinates. - Chromosome start or end coordinates that are invalid for the specified build. ----- **What it does** This tool uses coordinate, strand, and build information to fetch genomic DNA from gff data, producing fasta data. ----- **Example** If the input dataset is:: chr7 127475281 127475310 NM_000230 0 + chr7 127485994 127486166 NM_000230 0 + chr7 127486011 127486166 D49487 0 + Extracting sequences returns:: >hg17_chr7_127475281_127475310_+ NM_000230 GTAGGAATCGCAGCGCCAGCGGTTGCAAG >hg17_chr7_127485994_127486166_+ NM_000230 GCCCAAGAAGCCCATCCTGGGAAGGAAAATGCATTGGGGAACCCTGTGCG GATTCTTGTGGCTTTGGCCCTATCTTTTCTATGTCCAAGCTGTGCCCATC CAAAAAGTCCAAGATGACACCAAAACCCTCATCAAGACAATTGTCACCAG GATCAATGACATTTCACACACG >hg17_chr7_127486011_127486166_+ D49487 TGGGAAGGAAAATGCATTGGGGAACCCTGTGCGGATTCTTGTGGCTTTGG CCCTATCTTTTCTATGTCCAAGCTGTGCCCATCCAAAAAGTCCAAGATGA CACCAAAACCCTCATCAAGACAATTGTCACCAGGATCAATGACATTTCAC ACACG </help> <citations> <citation type="bibtex"> @unpublished{None, author = {}, title = {None}, year = {None}, eprint = {None}, url = {http://www.bx.psu.edu/~anton/labSite/} }</citation> </citations> </tool>