0
|
1 <tool id="ideas_preprocessor" name="IDEAS preprocessor" version="1.0.0">
|
|
2 <description></description>
|
|
3 <requirements>
|
|
4 <requirement type="package" version="2.5.4">deeptools</requirement>
|
|
5 <requirement type="package" version="1.10.4">r-data.table</requirement>
|
|
6 <requirement type="package" version="1.4.4">r-optparse</requirement>
|
|
7 <requirement type="package" version="1.6">samtools</requirement>
|
|
8 <requirement type="package" version="357">ucsc-bigwigaverageoverbed</requirement>
|
|
9 </requirements>
|
|
10 <command detect_errors="exit_code"><![CDATA[
|
|
11 #set tmp_dir = "tmp"
|
4
|
12 #set ideaspre_input_config = "ideaspre_input_config.txt"
|
|
13 #set ideas_input_config = "IDEAS_input_config.txt"
|
|
14 #set specify_chrom_window = $specify_chrom_window_cond.specify_chrom_window
|
2
|
15 mkdir $tmp_dir &&
|
|
16 mkdir $output.files_path &&
|
4
|
17 #if str($specify_chrom_window) == "yes":
|
2
|
18 ##############################################
|
|
19 ## Using a genomic window bed file, so categorize
|
|
20 ## the window positions by chromosome to enable
|
|
21 ## the IDEAS -inv option.
|
|
22 ##############################################
|
4
|
23 #set chromosome_windows = "chromosome_windows.txt"
|
|
24 cp '$gen_chromosome_windows' $chromosome_windows &&
|
2
|
25 #end if
|
0
|
26 ##############################################
|
|
27 ## Create the config file and prepare the data
|
|
28 ##############################################
|
4
|
29 cp '$gen_ideaspre_input_config' $ideaspre_input_config &&
|
|
30 sort $ideaspre_input_config -o $ideaspre_input_config &&
|
0
|
31 Rscript '$__tool_directory__/ideas_preprocessor.R'
|
4
|
32 --ideaspre_input_config '$ideaspre_input_config'
|
|
33 #if str($specify_chrom_window) == "yes":
|
6
|
34 --chrom_bed_input '$specify_chrom_window_cond.chrom_bed_input'
|
4
|
35 --chromosome_windows '$chromosome_windows'
|
6
|
36 --ideas_input_config '$ideas_input_config'
|
0
|
37 #else:
|
|
38 --chrom_len_file '$chromInfo'
|
4
|
39 --window_size $specify_chrom_window_cond.window_size
|
|
40 #set restrict_chromosomes = $specify_chrom_window_cond.restrict_chromosomes_cond.restrict_chromosomes
|
0
|
41 #if str($restrict_chromosomes) == "yes":
|
|
42 #set chroms = []
|
4
|
43 #set chrom_repeat = $specify_chrom_window_cond.restrict_chromosomes_cond.chrom_repeat
|
0
|
44 #for $i in $chrom_repeat.chrom
|
|
45 $chroms.append($i)
|
|
46 #end for
|
|
47 --restrict_to_chroms ",".join(chroms)
|
|
48 #end if
|
|
49 #end if
|
|
50 --reads_per_bp $reads_per_bp
|
2
|
51 #if str($exclude_input) not in ["None", ""]:
|
0
|
52 --exclude_input '$exclude_input'
|
|
53 #end if
|
|
54 --output '$output'
|
4
|
55 --output_hid $output.hid
|
0
|
56 --output_files_path '$output.files_path'
|
|
57 &> ideas_preprocessor_log.txt;
|
|
58 if [[ $? -ne 0 ]]; then
|
|
59 cp ideas_preprocessor_log.txt '$output';
|
|
60 exit 1;
|
|
61 fi
|
|
62 ]]></command>
|
|
63 <configfiles>
|
4
|
64 <configfile name="gen_ideaspre_input_config"><![CDATA[#if str($cell_type_epigenetic_factor_cond.cell_type_epigenetic_factor) == "extract":
|
0
|
65 #set input_name_positions = $cell_type_epigenetic_factor_cond.input_name_positions
|
|
66 #for $i in $cell_type_epigenetic_factor_cond.input:
|
|
67 #set file_name_with_ext = $i.name
|
|
68 #if str($file_name_with_ext).find("http") >= 0 or str($file_name_with_ext).find("ftp") >= 0:
|
|
69 #set file_name_with_ext = $file_name_with_ext.split('/')[-1]
|
|
70 #end if
|
|
71 #assert str($file_name_with_ext).find("-") >= 0, "The selected input '%s' is invalid because it does not include the '-' character which is required when setting cell type and epigenetic factor names by extracting them from the input file names." % $file_name_with_ext
|
|
72 #set file_name = $file_name_with_ext.split(".")[0]
|
|
73 #if str($input_name_positions) == "cell_first":
|
|
74 #set cell_type_name = $file_name.split("-")[0]
|
|
75 #set epigenetic_factor_name = $file_name.split("-")[1]
|
|
76 #else:
|
|
77 #set cell_type_name = $file_name.split("-")[1]
|
|
78 #set epigenetic_factor_name = $file_name.split("-")[0]
|
|
79 #end if
|
2
|
80 ${cell_type_name} ${epigenetic_factor_name} ${i} ${file_name} ${i.ext}
|
0
|
81 #end for
|
|
82 #else:
|
|
83 #for $input_items in $cell_type_epigenetic_factor_cond.input_repeat:
|
2
|
84 ${input_items.cell_type_name} ${input_items.epigenetic_factor_name} ${input_items.input} ${file_name} ${input_items.input.ext}
|
0
|
85 #end for
|
|
86 #end if]]></configfile>
|
4
|
87 <configfile name="gen_chromosome_windows"><![CDATA[#if str($specify_chrom_window_cond.specify_chrom_window) == "yes":
|
0
|
88 #import collections
|
|
89 #set window_positions_by_chroms_odict = $collections.OrderedDict()
|
4
|
90 #for count, line in enumerate(open($specify_chrom_window_cond.chrom_bed_input.file_name, 'r')):
|
0
|
91 #set $line = $line.strip()
|
|
92 #if not $line or $line.startswith('#'):
|
|
93 #continue
|
|
94 #end if
|
|
95 #set items = $line.split('\t')
|
|
96 #if $items[0] in $window_positions_by_chroms_odict:
|
|
97 #set tup = $window_positions_by_chroms_odict[$items[0]]
|
|
98 #set $tup[1] += 1
|
|
99 #set $window_positions_by_chroms_odict[$items[0]] = $tup
|
|
100 #else:
|
|
101 #set $window_positions_by_chroms_odict[$items[0]] = [$count, $count+1]
|
|
102 #end if
|
|
103 #end for
|
|
104 #for $chrom, $tup in $window_positions_by_chroms_odict.items():
|
|
105 ${chrom} ${tup[0]} ${tup[1]}
|
|
106 #end for
|
|
107 #end if]]></configfile>
|
|
108 </configfiles>
|
|
109 <inputs>
|
|
110 <conditional name="cell_type_epigenetic_factor_cond">
|
|
111 <param name="cell_type_epigenetic_factor" type="select" label="Set cell type and epigenetic factor names by">
|
|
112 <option value="extract" selected="true">extracting them from the selected input file names</option>
|
|
113 <option value="manual">manually setting them for each selected input</option>
|
|
114 </param>
|
|
115 <when value="extract">
|
|
116 <param name="input" type="data" format="bigwig,bam" multiple="True" label="BAM or BigWig files">
|
|
117 <validator type="empty_field"/>
|
|
118 <validator type="unspecified_build"/>
|
|
119 </param>
|
|
120 <param name="input_name_positions" type="select" display="radio" label="Selected input file name pattern is" help="A '-' character must separate cell type and epigenetic factor names within the selected input file names">
|
|
121 <option value="cell_first" selected="true">Cell type name - Epigenetic factor name</option>
|
|
122 <option value="cell_last">Epigenetic factor name - Cell type name</option>
|
|
123 </param>
|
|
124 </when>
|
|
125 <when value="manual">
|
|
126 <repeat name="input_repeat" title="Cell type, Epigenetic factor and Input" min="1">
|
|
127 <param name="cell_type_name" type="text" value="" label="Cell type name">
|
|
128 <validator type="empty_field"/>
|
|
129 </param>
|
|
130 <param name="epigenetic_factor_name" type="text" value="" label="Epigenetic factor name">
|
|
131 <validator type="empty_field"/>
|
|
132 </param>
|
|
133 <param name="input" type="data" format="bigwig,bam" label="BAM or BigWig file">
|
|
134 <validator type="empty_field"/>
|
|
135 <validator type="unspecified_build"/>
|
|
136 </param>
|
|
137 </repeat>
|
|
138 </when>
|
|
139 </conditional>
|
4
|
140 <conditional name="specify_chrom_window_cond">
|
|
141 <param name="specify_chrom_window" type="select" label="Specify window positions using a bed file?">
|
0
|
142 <option value="no" selected="true">No</option>
|
|
143 <option value="yes">Yes</option>
|
|
144 </param>
|
|
145 <when value="no">
|
|
146 <param name="window_size" type="integer" value="200" label="Window size in base pairs"/>
|
|
147 <conditional name="restrict_chromosomes_cond">
|
|
148 <param name="restrict_chromosomes" type="select" label="Restrict processing to specified chromosomes">
|
|
149 <option value="no" selected="true">No</option>
|
|
150 <option value="yes">Yes</option>
|
|
151 </param>
|
|
152 <when value="no"/>
|
|
153 <when value="yes">
|
|
154 <repeat name="chrom_repeat" title="Chromosomes" min="1">
|
|
155 <param name="chrom" type="text" value="" label="Chromosome" help="One chromosome (e.g., chr1, chr2, chrX) per text field"/>
|
|
156 </repeat>
|
|
157 </when>
|
|
158 </conditional>
|
|
159 </when>
|
|
160 <when value="yes">
|
4
|
161 <param name="chrom_bed_input" type="data" format="bed" label="Bed file specifying the window positions"/>
|
0
|
162 </when>
|
|
163 </conditional>
|
|
164 <param argument="--bychr" type="boolean" truevalue="true" falsevalue="" checked="False" label="Output chromosomes in separate files"/>
|
|
165 <param name="reads_per_bp" type="select" display="radio" label="Calculate the signal in each genomic window using">
|
|
166 <option value="6" selected="true">mean</option>
|
|
167 <option value="8">max</option>
|
|
168 </param>
|
|
169 <param name="exclude_input" type="data" format="bed" optional="True" multiple="True" label="Select file(s) containing regions to exclude"/>
|
|
170 <param argument="--standardize_datasets" type="boolean" truevalue="true" falsevalue="" checked="False" label="Standardize all datasets"/>
|
|
171 </inputs>
|
|
172 <outputs>
|
4
|
173 <data name="output" format="ideaspre"/>
|
0
|
174 </outputs>
|
|
175 <tests>
|
|
176 <test>
|
|
177 </test>
|
|
178 </tests>
|
|
179 <help>
|
|
180 **What it does**
|
|
181
|
|
182 -----
|
|
183
|
|
184 **Required options**
|
|
185
|
|
186 </help>
|
|
187 <citations>
|
|
188 <citation type="doi">10.1093/nar/gkw278</citation>
|
|
189 </citations>
|
|
190 </tool>
|