annotate ideas_preprocessor.xml @ 0:f060a0fbd4fe draft

Uploaded
author greg
date Mon, 22 Jan 2018 14:35:19 -0500
parents
children 248b5f72fc02
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
1 <tool id="ideas_preprocessor" name="IDEAS preprocessor" version="1.0.0">
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
2 <description></description>
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
3 <requirements>
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
4 <requirement type="package" version="2.5.4">deeptools</requirement>
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
5 <requirement type="package" version="1.10.4">r-data.table</requirement>
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
6 <requirement type="package" version="1.4.4">r-optparse</requirement>
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
7 <requirement type="package" version="1.6">samtools</requirement>
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
8 <requirement type="package" version="357">ucsc-bigwigaverageoverbed</requirement>
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
9 </requirements>
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
10 <command detect_errors="exit_code"><![CDATA[
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
11 #set tmp_dir = "tmp"
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
12 #set prep_input_config = "prep_input_config.txt"
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
13 #set prep_output_config = "prep_output_config.txt"
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
14 #set specify_genomic_window = $specify_genomic_window_cond.specify_genomic_window
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
15 #set windows_positions_by_chroms_config = "windows_positions_by_chroms_config.txt"
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
16 ##############################################
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
17 ## Create the config file and prepare the data
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
18 ##############################################
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
19 #set output_dir = $output_txt_dir
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
20 #set tmp_dir = "tmp"
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
21 mkdir '$output_txt_dir' &&
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
22 cp '$gen_prep_input_config' $prep_input_config &&
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
23 sort $prep_input_config -o $prep_input_config &&
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
24 Rscript '$__tool_directory__/ideas_preprocessor.R'
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
25 --prep_input_config '$prep_input_config'
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
26 #if str($specify_genomic_window) == "yes":
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
27 --bed_input '$specify_genomic_window_cond.bed_input'
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
28 #else:
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
29 --chrom_len_file '$chromInfo'
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
30 --window_size $specify_genomic_window_cond.window_size
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
31 #set restrict_chromosomes = $specify_genomic_window_cond.restrict_chromosomes_cond.restrict_chromosomes
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
32 #if str($restrict_chromosomes) == "yes":
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
33 #set chroms = []
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
34 #set chrom_repeat = $specify_genomic_window_cond.restrict_chromosomes_cond.chrom_repeat
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
35 #for $i in $chrom_repeat.chrom
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
36 $chroms.append($i)
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
37 #end for
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
38 --restrict_to_chroms ",".join(chroms)
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
39 #end if
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
40 #end if
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
41 --reads_per_bp $reads_per_bp
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
42 #if str($blacklist_input) not in ["None", ""]:
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
43 --exclude_input '$exclude_input'
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
44 #end if
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
45 --output '$output'
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
46 --output_files_path '$output.files_path'
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
47 &> ideas_preprocessor_log.txt;
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
48 if [[ $? -ne 0 ]]; then
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
49 cp ideas_preprocessor_log.txt '$output';
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
50 exit 1;
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
51 fi
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
52 ##############################################
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
53 ## Coerce the prepMat config output to the
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
54 ## format expected by IDEAS.
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
55 ##############################################
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
56 && cut -d' ' $prep_input_config -f1,2 > file1.txt
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
57 && ls $tmp_dir/*.bed.gz > file2.txt
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
58 && paste <(cat file1.txt) <(cat file2.txt) -d' ' > $prep_output_config
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
59 #if str($specify_genomic_window) == "yes":
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
60 ##############################################
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
61 ## Using a genomic window bed file, so categorize
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
62 ## the window positions by chromosome to enable
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
63 ## the IDEAS -inv option.
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
64 ##############################################
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
65 && cp '$gen_windows_positions_by_chroms_config' $windows_positions_by_chroms_config
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
66 #end if
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
67 ]]></command>
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
68 <configfiles>
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
69 <configfile name="gen_prep_input_config"><![CDATA[#if str($cell_type_epigenetic_factor_cond.cell_type_epigenetic_factor) == "extract":
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
70 #set input_name_positions = $cell_type_epigenetic_factor_cond.input_name_positions
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
71 #for $i in $cell_type_epigenetic_factor_cond.input:
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
72 #set file_name_with_ext = $i.name
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
73 #if str($file_name_with_ext).find("http") >= 0 or str($file_name_with_ext).find("ftp") >= 0:
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
74 #set file_name_with_ext = $file_name_with_ext.split('/')[-1]
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
75 #end if
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
76 #assert str($file_name_with_ext).find("-") >= 0, "The selected input '%s' is invalid because it does not include the '-' character which is required when setting cell type and epigenetic factor names by extracting them from the input file names." % $file_name_with_ext
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
77 #set file_name = $file_name_with_ext.split(".")[0]
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
78 #if str($input_name_positions) == "cell_first":
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
79 #set cell_type_name = $file_name.split("-")[0]
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
80 #set epigenetic_factor_name = $file_name.split("-")[1]
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
81 #else:
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
82 #set cell_type_name = $file_name.split("-")[1]
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
83 #set epigenetic_factor_name = $file_name.split("-")[0]
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
84 #end if
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
85 ${cell_type_name} ${epigenetic_factor_name} ${i} ${i.filename} ${i.ext}
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
86 #end for
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
87 #else:
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
88 #for $input_items in $cell_type_epigenetic_factor_cond.input_repeat:
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
89 ${input_items.cell_type_name} ${input_items.epigenetic_factor_name} ${input_items.input} ${input_items.input.filename} ${input_items.input.ext}
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
90 #end for
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
91 #end if]]></configfile>
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
92 <configfile name="gen_windows_positions_by_chroms_config"><![CDATA[#if str($specify_genomic_window_cond.specify_genomic_window) == "yes":
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
93 #import collections
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
94 #set window_positions_by_chroms_odict = $collections.OrderedDict()
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
95 #for count, line in enumerate(open($specify_genomic_window_cond.bed_input.file_name, 'r')):
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
96 #set $line = $line.strip()
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
97 #if not $line or $line.startswith('#'):
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
98 #continue
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
99 #end if
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
100 #set items = $line.split('\t')
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
101 #if $items[0] in $window_positions_by_chroms_odict:
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
102 #set tup = $window_positions_by_chroms_odict[$items[0]]
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
103 #set $tup[1] += 1
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
104 #set $window_positions_by_chroms_odict[$items[0]] = $tup
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
105 #else:
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
106 #set $window_positions_by_chroms_odict[$items[0]] = [$count, $count+1]
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
107 #end if
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
108 #end for
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
109 #for $chrom, $tup in $window_positions_by_chroms_odict.items():
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
110 ${chrom} ${tup[0]} ${tup[1]}
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
111 #end for
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
112 #end if]]></configfile>
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
113 </configfiles>
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
114 <inputs>
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
115 <conditional name="cell_type_epigenetic_factor_cond">
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
116 <param name="cell_type_epigenetic_factor" type="select" label="Set cell type and epigenetic factor names by">
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
117 <option value="extract" selected="true">extracting them from the selected input file names</option>
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
118 <option value="manual">manually setting them for each selected input</option>
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
119 </param>
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
120 <when value="extract">
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
121 <param name="input" type="data" format="bigwig,bam" multiple="True" label="BAM or BigWig files">
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
122 <validator type="empty_field"/>
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
123 <validator type="unspecified_build"/>
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
124 </param>
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
125 <param name="input_name_positions" type="select" display="radio" label="Selected input file name pattern is" help="A '-' character must separate cell type and epigenetic factor names within the selected input file names">
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
126 <option value="cell_first" selected="true">Cell type name - Epigenetic factor name</option>
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
127 <option value="cell_last">Epigenetic factor name - Cell type name</option>
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
128 </param>
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
129 </when>
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
130 <when value="manual">
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
131 <repeat name="input_repeat" title="Cell type, Epigenetic factor and Input" min="1">
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
132 <param name="cell_type_name" type="text" value="" label="Cell type name">
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
133 <validator type="empty_field"/>
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
134 </param>
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
135 <param name="epigenetic_factor_name" type="text" value="" label="Epigenetic factor name">
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
136 <validator type="empty_field"/>
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
137 </param>
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
138 <param name="input" type="data" format="bigwig,bam" label="BAM or BigWig file">
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
139 <validator type="empty_field"/>
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
140 <validator type="unspecified_build"/>
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
141 </param>
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
142 </repeat>
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
143 </when>
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
144 </conditional>
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
145 <conditional name="specify_genomic_window_cond">
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
146 <param name="specify_genomic_window" type="select" label="Select Bed file that defines genomic windows on which to process the data">
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
147 <option value="no" selected="true">No</option>
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
148 <option value="yes">Yes</option>
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
149 </param>
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
150 <when value="no">
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
151 <param name="window_size" type="integer" value="200" label="Window size in base pairs"/>
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
152 <conditional name="restrict_chromosomes_cond">
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
153 <param name="restrict_chromosomes" type="select" label="Restrict processing to specified chromosomes">
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
154 <option value="no" selected="true">No</option>
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
155 <option value="yes">Yes</option>
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
156 </param>
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
157 <when value="no"/>
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
158 <when value="yes">
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
159 <repeat name="chrom_repeat" title="Chromosomes" min="1">
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
160 <param name="chrom" type="text" value="" label="Chromosome" help="One chromosome (e.g., chr1, chr2, chrX) per text field"/>
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
161 </repeat>
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
162 </when>
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
163 </conditional>
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
164 </when>
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
165 <when value="yes">
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
166 <param name="bed_input" type="data" format="bed" label="Bed file specifying the genomic windows"/>
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
167 </when>
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
168 </conditional>
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
169 <param argument="--bychr" type="boolean" truevalue="true" falsevalue="" checked="False" label="Output chromosomes in separate files"/>
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
170 <param name="reads_per_bp" type="select" display="radio" label="Calculate the signal in each genomic window using">
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
171 <option value="6" selected="true">mean</option>
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
172 <option value="8">max</option>
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
173 </param>
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
174 <param name="exclude_input" type="data" format="bed" optional="True" multiple="True" label="Select file(s) containing regions to exclude"/>
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
175 <param argument="--standardize_datasets" type="boolean" truevalue="true" falsevalue="" checked="False" label="Standardize all datasets"/>
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
176 </inputs>
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
177 <outputs>
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
178 <data name="output" format="html"/>
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
179 </outputs>
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
180 <tests>
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
181 <test>
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
182 </test>
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
183 </tests>
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
184 <help>
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
185 **What it does**
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
186
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
187 -----
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
188
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
189 **Required options**
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
190
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
191 </help>
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
192 <citations>
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
193 <citation type="doi">10.1093/nar/gkw278</citation>
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
194 </citations>
f060a0fbd4fe Uploaded
greg
parents:
diff changeset
195 </tool>