|
0
|
1 <?xml version="1.0"?>
|
|
|
2 <tool id="bigbed_to_fa" name="bigBedToFa" version="1.0">
|
|
|
3 <description>Retrieve sequences associated with bigBed features</description>
|
|
|
4
|
|
|
5 <macros>
|
|
|
6 <import>ucsc_macros.xml</import>
|
|
|
7 </macros>
|
|
|
8
|
|
|
9 <expand macro="requirements_twobit">
|
|
|
10 <requirement type="package" version="340">ucsc_bigbed</requirement>
|
|
|
11 </expand>
|
|
|
12
|
|
|
13 <command detect_errors="exit_code">
|
|
|
14 <![CDATA[
|
|
|
15 #set no_mask = ""
|
|
|
16 #if str($masking_option) == "upper":
|
|
|
17 #set no_mask = "-noMask"
|
|
|
18 #end if
|
|
|
19
|
|
|
20 ${__tool_directory__}/bigbed_to_standard_bed.pl -i "${bigbed_input}" |
|
|
|
21 twoBitToFa ${no_mask} -bed=stdin "${twobit_input}" stdout
|
|
|
22
|
|
|
23 ## Add extra option to change soft masked bases to N's
|
|
|
24 #if str($masking_option) == "hard":
|
|
|
25 | awk '{ if (/^>/) { print } else { gsub(/[acgt]/, "N"); print } }'
|
|
|
26 #end if
|
|
|
27
|
|
|
28 > "${fasta_output}"
|
|
|
29 ]]>
|
|
|
30 </command>
|
|
|
31 <inputs>
|
|
|
32 <param name="twobit_input" type="data" format="twobit" label="twoBit input file" />
|
|
|
33
|
|
|
34 <param name="bigbed_input" type="data" format="bigbed" label="bigBed input file" />
|
|
|
35
|
|
|
36 <param name="masking_option" type="select"
|
|
|
37 label="Repeat masking option"
|
|
|
38 help="Specify how repeats within the twoBit file should be represented">
|
|
|
39
|
|
|
40 <option value="upper">Show repeats in uppercase (no masking)</option>
|
|
|
41
|
|
|
42 <option value="lower" selected="true">Show repeats in lowercase (soft masking)</option>
|
|
|
43
|
|
|
44 <option value="hard">Show repeats as N's (hard masking)</option>
|
|
|
45 </param>
|
|
|
46
|
|
|
47 </inputs>
|
|
|
48 <outputs>
|
|
|
49 <data name="fasta_output" format="fasta" />
|
|
|
50 </outputs>
|
|
|
51 <tests>
|
|
|
52 <test>
|
|
|
53 <!-- Test bigBedToFa with bed4 and default (soft) masking -->
|
|
|
54 <param name="twobit_input" value="contigs.out.2bit" ftype="twobit" />
|
|
|
55 <param name="bigbed_input" value="contigs.trf_4.bb" ftype="bigbed" />
|
|
|
56 <output name="fasta_output" file="contigs.trf_4.soft.fa" />
|
|
|
57 </test>
|
|
|
58 <test>
|
|
|
59 <!-- Test bigBedToFa with bed4 and no masking -->
|
|
|
60 <param name="twobit_input" value="contigs.out.2bit" ftype="twobit" />
|
|
|
61 <param name="bigbed_input" value="contigs.trf_4.bb" ftype="bigbed" />
|
|
|
62 <param name="masking_option" value="upper" />
|
|
|
63 <output name="fasta_output" file="contigs.trf_4.nomask.fa" />
|
|
|
64 </test>
|
|
|
65 <test>
|
|
|
66 <!-- Test bigBedToFa with bed4 and hard masking -->
|
|
|
67 <param name="twobit_input" value="contigs.out.2bit" ftype="twobit" />
|
|
|
68 <param name="bigbed_input" value="contigs.trf_4.bb" ftype="bigbed" />
|
|
|
69 <param name="masking_option" value="hard" />
|
|
|
70 <output name="fasta_output" file="contigs.trf_4.hard.fa" />
|
|
|
71 </test>
|
|
|
72 <test>
|
|
|
73 <!-- Test bigBedToFa with bed4+12 with default masking -->
|
|
|
74 <param name="twobit_input" value="contigs.out.2bit" ftype="twobit" />
|
|
|
75 <param name="bigbed_input" value="contigs.trf_4_12.bb" ftype="bigbed" />
|
|
|
76 <output name="fasta_output" file="contigs.trf_4_12.fa" />
|
|
|
77 </test>
|
|
|
78 <test>
|
|
|
79 <!-- Test bigBedToFa with bed6+2 with default masking -->
|
|
|
80 <param name="twobit_input" value="contigs.out.2bit" ftype="twobit" />
|
|
|
81 <param name="bigbed_input" value="contigs.models_6_2.bb" ftype="bigbed" />
|
|
|
82 <output name="fasta_output" file="contigs.models_6_2.fa" />
|
|
|
83 </test>
|
|
|
84 <test>
|
|
|
85 <!-- Test bigBedToFa with bed12+2 with default masking -->
|
|
|
86 <param name="twobit_input" value="contigs.out.2bit" ftype="twobit" />
|
|
|
87 <param name="bigbed_input" value="contigs.models_12_2.bb" ftype="bigbed" />
|
|
|
88 <output name="fasta_output" file="contigs.models_12_2.fa" />
|
|
|
89 </test>
|
|
|
90 </tests>
|
|
|
91 <help>
|
|
|
92 <![CDATA[
|
|
|
93 **What it does**
|
|
|
94
|
|
|
95 bigBedToFa retrieves sequences associated with the features in a
|
|
|
96 `bigBed <https://genome.ucsc.edu/goldenpath/help/bigBed.html>`_ file
|
|
|
97 from a `twoBit Sequence Archive <https://genome.ucsc.edu/goldenpath/help/twoBit.html>`_.
|
|
|
98
|
|
|
99 .. class:: infomark
|
|
|
100
|
|
|
101 **Extracted sequences:**
|
|
|
102
|
|
|
103 For files with `12 standard BED columns <https://genome.ucsc.edu/FAQ/FAQformat.html#format1>`_,
|
|
|
104 this program will concatenate the sequences associated with each block (e.g., exons), and
|
|
|
105 exclude the sequences between adjacent blocks (e.g., introns).
|
|
|
106
|
|
|
107 ]]></help>
|
|
|
108
|
|
|
109 <expand macro="citations" />
|
|
|
110 </tool>
|