Mercurial > repos > greg > ideas_preprocessor
changeset 0:f060a0fbd4fe draft
Uploaded
| author | greg | 
|---|---|
| date | Mon, 22 Jan 2018 14:35:19 -0500 | 
| parents | |
| children | 91c5dbb14a13 | 
| files | ideas_preprocessor.xml | 
| diffstat | 1 files changed, 195 insertions(+), 0 deletions(-) [+] | 
line wrap: on
 line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ideas_preprocessor.xml Mon Jan 22 14:35:19 2018 -0500 @@ -0,0 +1,195 @@ +<tool id="ideas_preprocessor" name="IDEAS preprocessor" version="1.0.0"> + <description></description> + <requirements> + <requirement type="package" version="2.5.4">deeptools</requirement> + <requirement type="package" version="1.10.4">r-data.table</requirement> + <requirement type="package" version="1.4.4">r-optparse</requirement> + <requirement type="package" version="1.6">samtools</requirement> + <requirement type="package" version="357">ucsc-bigwigaverageoverbed</requirement> + </requirements> + <command detect_errors="exit_code"><![CDATA[ +#set tmp_dir = "tmp" +#set prep_input_config = "prep_input_config.txt" +#set prep_output_config = "prep_output_config.txt" +#set specify_genomic_window = $specify_genomic_window_cond.specify_genomic_window +#set windows_positions_by_chroms_config = "windows_positions_by_chroms_config.txt" +############################################## +## Create the config file and prepare the data +############################################## +#set output_dir = $output_txt_dir +#set tmp_dir = "tmp" +mkdir '$output_txt_dir' && +cp '$gen_prep_input_config' $prep_input_config && +sort $prep_input_config -o $prep_input_config && +Rscript '$__tool_directory__/ideas_preprocessor.R' +--prep_input_config '$prep_input_config' +#if str($specify_genomic_window) == "yes": + --bed_input '$specify_genomic_window_cond.bed_input' +#else: + --chrom_len_file '$chromInfo' + --window_size $specify_genomic_window_cond.window_size + #set restrict_chromosomes = $specify_genomic_window_cond.restrict_chromosomes_cond.restrict_chromosomes + #if str($restrict_chromosomes) == "yes": + #set chroms = [] + #set chrom_repeat = $specify_genomic_window_cond.restrict_chromosomes_cond.chrom_repeat + #for $i in $chrom_repeat.chrom + $chroms.append($i) + #end for + --restrict_to_chroms ",".join(chroms) + #end if +#end if +--reads_per_bp $reads_per_bp +#if str($blacklist_input) not in ["None", ""]: + --exclude_input '$exclude_input' +#end if +--output '$output' +--output_files_path '$output.files_path' +&> ideas_preprocessor_log.txt; +if [[ $? -ne 0 ]]; then + cp ideas_preprocessor_log.txt '$output'; + exit 1; +fi +############################################## +## Coerce the prepMat config output to the +## format expected by IDEAS. +############################################## +&& cut -d' ' $prep_input_config -f1,2 > file1.txt +&& ls $tmp_dir/*.bed.gz > file2.txt +&& paste <(cat file1.txt) <(cat file2.txt) -d' ' > $prep_output_config +#if str($specify_genomic_window) == "yes": + ############################################## + ## Using a genomic window bed file, so categorize + ## the window positions by chromosome to enable + ## the IDEAS -inv option. + ############################################## + && cp '$gen_windows_positions_by_chroms_config' $windows_positions_by_chroms_config +#end if + ]]></command> + <configfiles> + <configfile name="gen_prep_input_config"><![CDATA[#if str($cell_type_epigenetic_factor_cond.cell_type_epigenetic_factor) == "extract": + #set input_name_positions = $cell_type_epigenetic_factor_cond.input_name_positions + #for $i in $cell_type_epigenetic_factor_cond.input: + #set file_name_with_ext = $i.name + #if str($file_name_with_ext).find("http") >= 0 or str($file_name_with_ext).find("ftp") >= 0: + #set file_name_with_ext = $file_name_with_ext.split('/')[-1] + #end if + #assert str($file_name_with_ext).find("-") >= 0, "The selected input '%s' is invalid because it does not include the '-' character which is required when setting cell type and epigenetic factor names by extracting them from the input file names." % $file_name_with_ext + #set file_name = $file_name_with_ext.split(".")[0] + #if str($input_name_positions) == "cell_first": + #set cell_type_name = $file_name.split("-")[0] + #set epigenetic_factor_name = $file_name.split("-")[1] + #else: + #set cell_type_name = $file_name.split("-")[1] + #set epigenetic_factor_name = $file_name.split("-")[0] + #end if +${cell_type_name} ${epigenetic_factor_name} ${i} ${i.filename} ${i.ext} + #end for +#else: + #for $input_items in $cell_type_epigenetic_factor_cond.input_repeat: +${input_items.cell_type_name} ${input_items.epigenetic_factor_name} ${input_items.input} ${input_items.input.filename} ${input_items.input.ext} + #end for +#end if]]></configfile> + <configfile name="gen_windows_positions_by_chroms_config"><![CDATA[#if str($specify_genomic_window_cond.specify_genomic_window) == "yes": + #import collections + #set window_positions_by_chroms_odict = $collections.OrderedDict() + #for count, line in enumerate(open($specify_genomic_window_cond.bed_input.file_name, 'r')): + #set $line = $line.strip() + #if not $line or $line.startswith('#'): + #continue + #end if + #set items = $line.split('\t') + #if $items[0] in $window_positions_by_chroms_odict: + #set tup = $window_positions_by_chroms_odict[$items[0]] + #set $tup[1] += 1 + #set $window_positions_by_chroms_odict[$items[0]] = $tup + #else: + #set $window_positions_by_chroms_odict[$items[0]] = [$count, $count+1] + #end if + #end for + #for $chrom, $tup in $window_positions_by_chroms_odict.items(): +${chrom} ${tup[0]} ${tup[1]} + #end for +#end if]]></configfile> + </configfiles> + <inputs> + <conditional name="cell_type_epigenetic_factor_cond"> + <param name="cell_type_epigenetic_factor" type="select" label="Set cell type and epigenetic factor names by"> + <option value="extract" selected="true">extracting them from the selected input file names</option> + <option value="manual">manually setting them for each selected input</option> + </param> + <when value="extract"> + <param name="input" type="data" format="bigwig,bam" multiple="True" label="BAM or BigWig files"> + <validator type="empty_field"/> + <validator type="unspecified_build"/> + </param> + <param name="input_name_positions" type="select" display="radio" label="Selected input file name pattern is" help="A '-' character must separate cell type and epigenetic factor names within the selected input file names"> + <option value="cell_first" selected="true">Cell type name - Epigenetic factor name</option> + <option value="cell_last">Epigenetic factor name - Cell type name</option> + </param> + </when> + <when value="manual"> + <repeat name="input_repeat" title="Cell type, Epigenetic factor and Input" min="1"> + <param name="cell_type_name" type="text" value="" label="Cell type name"> + <validator type="empty_field"/> + </param> + <param name="epigenetic_factor_name" type="text" value="" label="Epigenetic factor name"> + <validator type="empty_field"/> + </param> + <param name="input" type="data" format="bigwig,bam" label="BAM or BigWig file"> + <validator type="empty_field"/> + <validator type="unspecified_build"/> + </param> + </repeat> + </when> + </conditional> + <conditional name="specify_genomic_window_cond"> + <param name="specify_genomic_window" type="select" label="Select Bed file that defines genomic windows on which to process the data"> + <option value="no" selected="true">No</option> + <option value="yes">Yes</option> + </param> + <when value="no"> + <param name="window_size" type="integer" value="200" label="Window size in base pairs"/> + <conditional name="restrict_chromosomes_cond"> + <param name="restrict_chromosomes" type="select" label="Restrict processing to specified chromosomes"> + <option value="no" selected="true">No</option> + <option value="yes">Yes</option> + </param> + <when value="no"/> + <when value="yes"> + <repeat name="chrom_repeat" title="Chromosomes" min="1"> + <param name="chrom" type="text" value="" label="Chromosome" help="One chromosome (e.g., chr1, chr2, chrX) per text field"/> + </repeat> + </when> + </conditional> + </when> + <when value="yes"> + <param name="bed_input" type="data" format="bed" label="Bed file specifying the genomic windows"/> + </when> + </conditional> + <param argument="--bychr" type="boolean" truevalue="true" falsevalue="" checked="False" label="Output chromosomes in separate files"/> + <param name="reads_per_bp" type="select" display="radio" label="Calculate the signal in each genomic window using"> + <option value="6" selected="true">mean</option> + <option value="8">max</option> + </param> + <param name="exclude_input" type="data" format="bed" optional="True" multiple="True" label="Select file(s) containing regions to exclude"/> + <param argument="--standardize_datasets" type="boolean" truevalue="true" falsevalue="" checked="False" label="Standardize all datasets"/> + </inputs> + <outputs> + <data name="output" format="html"/> + </outputs> + <tests> + <test> + </test> + </tests> + <help> +**What it does** + +----- + +**Required options** + + </help> + <citations> + <citation type="doi">10.1093/nar/gkw278</citation> + </citations> +</tool>
