view macs21_wrapper.xml @ 7:0c6b14f3fefc draft

Attempt to fix installation of tool dependencies for UCSC/bigwig generation, and trap for undefined genome build.
author pjbriggs
date Tue, 21 Apr 2015 08:29:16 -0400
parents 344dd37d1704
children 78c15c0a96ae
line wrap: on
line source

<tool id="macs2_1_peakcalling" name="MACS2.1.0" version="2.1.0-2">
  <requirements>
    <requirement type="package" version="2.7">python</requirement>
    <requirement type="package" version="1.9">numpy</requirement>
    <requirement type="package" version="2.1.0.20140616">macs2</requirement>
    <requirement type="package" version="3.1.2">R</requirement>
    <requirement type="package" version="1.0">ucsc_tools_for_macs21</requirement>
  </requirements>
  <description>Model-based Analysis of ChIP-Seq</description>
  <command interpreter="python">
    macs21_wrapper.py
    ##
    ## Major command
    $major_command.major_command_selector
    ##
    ## ChIP-seq input
    $major_command.input_chipseq_file1
    ##
    ## ChIP-seq control
    #if str($major_command.input_control_file1) != 'None'
       -c $major_command.input_control_file1
    #end if
    ##
    ## Call peaks
    #if str($major_command.major_command_selector) == 'callpeak'
       --format=$major_command.input_chipseq_file1.extension
       --name="$experiment_name"
       --bw=$major_command.bw
       ##
       ## Genome size
       #if str($major_command.genome_size.gsize) == ''
	  --gsize=$major_command.genome_size.user_defined_gsize
       #else:
          --gsize=$major_command.genome_size.gsize
       #end if
       ##
       ## Broad peaks
       #if str($major_command.broad_options.broad_regions) == 'broad'
          --broad --broad-cutoff=$major_command.broad_options.broad_cutoff
       #end if
       ##
       ## (no)model options
       #if str($major_command.nomodel_type.nomodel_type_selector) == 'nomodel'
          --nomodel --extsize=$major_command.nomodel_type.extsize
       #end if
       ##
       ## pq value select options
       #if str($major_command.pq_options.pq_options_selector) == 'qvalue'
          --qvalue=$major_command.pq_options.qvalue
       #else
          --pvalue=$major_command.pq_options.pvalue
       #end if
       ##
       ## Bedgraph options
       #if $major_command.bdg_options.bdg == True
          -B $major_command.bdg_options.spmr
       #end if
       ##
       ## Advanced options
       #if str($major_command.advanced_options.advanced_options_selector) == 'on'
          --mfold $major_command.advanced_options.mfoldlo $major_command.advanced_options.mfoldhi
          $major_command.advanced_options.nolambda
          $major_command.advanced_options.call_summits
          #if str($major_command.advanced_options.keep_duplicates.keep_dup) == ''
             --keep-dup $major_command.advanced_options.keep_duplicates.maximum_tags
          #else
             --keep-dup $major_command.advanced_options.keep_duplicates.keep_dup
          #end if
       #else
          ## Defaults if advanced options not set
          --mfold 10 30 --keep-dup 1
       #end if
       ##
       ## Output files
       --output-summits=$output_summits_bed_file
       --output-extra-files=$output_extra_files
       --output-extra-files-path=$output_extra_files.files_path
       ##
       ## Narrow/broad peak outputs
       #if str($major_command.broad_options.broad_regions) == 'broad'
          --output-broadpeaks=$output_broadpeaks_file
          --output-gappedpeaks=$output_gappedpeaks_file
       #else
          --output-narrowpeaks=$output_narrowpeaks_file
       #end if
       ##
       ## Bedgraph outputs
       #if str($major_command.bdg_options.bdg) == 'True'
          --output-pileup=$output_treat_pileup_file 
          --output-lambda-bedgraph=$output_lambda_bedgraph_file
	  #if str($major_command.bdg_options.make_bigwig) == 'True'
             --output-bigwig=$output_bigwig_file
             --length=$GALAXY_DATA_INDEX_DIR/shared/ucsc/chrom/${major_command.input_chipseq_file1.dbkey}.len
	  #end if
       #end if
       ##
       ## XLS/interval output
       #if str($major_command.xls_to_interval) == 'True'
          --output-xls-to-interval=$output_xls_to_interval_peaks_file
       #else
          --output-peaks=$output_peaks_file
       #end if
    #end if
    ##
    ## Compare .bdg files
    #if str($major_command.major_command_selector) == 'bdgcmp'
       -m $major_command.bdgcmp_options.bdgcmp_options_selector
       -p $major_command.pseudocount
       --output-bdgcmp $output_bdgcmp_file
    #end if
  </command>
  <inputs>
    <!--experiment name used as base for output file names -->
    <param name="experiment_name" type="text" value="MACS2.1.0 in Galaxy" size="50"
	   label="Experiment Name"/>
    <!--select a major MACS2 command-->
    <conditional name="major_command">
      <param name="major_command_selector" type="select" label="Select action to be performed">
	<option value="callpeak">Peak Calling</option>
	<option value="bdgcmp">Compare .bdg Files</option>
      </param>
      <!--callpeak option of macs2-->
      <when value="callpeak">
	<!--choose 'broad' or 'narrow' regions-->
	<conditional name="broad_options">
	  <param name="broad_regions" type="select" label="Type of region to call"
		 help="Broad regions are formed by linking nearby enriched regions">
	    <option value="" selected="true">Narrow regions</option>
	    <option value="broad">Broad regions</option>
	  </param>
	  <when value="broad">
	    <param name="broad_cutoff" type="float"
		   label="Cutoff for broad regions"
		   value="0.1" help="default: 0.1 (--broad-cutoff)"/>
	  </when>
	</conditional>
        <param name="input_chipseq_file1" type="data" format="bed,sam,bam"
	       label="ChIP-seq read file" />
        <param name="input_control_file1" type="data" format="bed,sam,bam" optional="True"
	       label="ChIP-seq control read file" />
	<conditional name="genome_size">
	  <param name="gsize" type="select" label="Effective genome size"
		 help="Either pre-defined (for common organisms), or user-defined (--gsize)">
	    <option value="hs" selected="true">Human (2.7e9)</option>
	    <option value="mm">Mouse (1.87e9)</option>
	    <option value="ce">C. elegans (9e7)</option>
	    <option value="dm">Fruitfly (1.2e8)</option>
	    <option value="">User-defined</option>
	  </param>
	  <when value="">
	    <!-- User-defined effective genome size -->
	    <param name="user_defined_gsize" type="float" value=""
		   label="Enter effective genome size (number of bases)"
		   help="e.g. '1.0e+9' or '1000000000'" />
	  </when>
	</conditional>
	<param name="bw" type="integer" label="Band width" value="300" help="(--bw)"/>
	<param name="xls_to_interval" label="Include XLS file from MACS"
	       type="boolean" truevalue="True" falsevalue="False" checked="True"
	       help="MACS2 XLS file will be output to the history in 'interval' format (suitable for subsequent analysis in Galaxy). Note that start positions are 1-based."/>

	<conditional name="bdg_options">
	  <param name="bdg"
		 label="Save treatment and control lambda pileups in bedGraph"
		 type="boolean" truevalue="-B" falsevalue="" checked="False" />
	  <when value="-B">
	    <param name="spmr"
		   type="boolean" truevalue="--SPMR" falsevalue="" checked="False"
		   label="Save signal per million reads for fragment pileup profiles"
		   help="(--SPMR)" />
	    <param name="make_bigwig" type="boolean" checked="True"
		   truevalue="True" falsevalue=""
		   label="Also generate bigWig file from bedGraph"
		   help="bigWig file can used in subsequent analyses e.g. CEAS" />
	  </when>
	  <when value="">
	    <!-- Display nothing -->
	  </when>
	</conditional>

	<conditional name="pq_options">
	  <param name="pq_options_selector" type="select"
		 label="Select p-value or q-value" help="default uses q-value">
	    <option value="qvalue">q-value</option>
	    <option value="pvalue">p-value</option>
	  </param>
	  <when value="pvalue">
	    <param name="pvalue" type="float"
		   label="p-value cutoff for binding region detection"
		   value="1e-2" help="default: 1e-2 (--pvalue)"/>
	  </when>
	  <when value="qvalue">
	    <param name="qvalue" type="float"
		   label="q-value cutoff for binding region detection"
		   value="0.01" help="default: 0.01 (--qvalue)"/>
	  </when>
	</conditional>
	<conditional name="advanced_options">
	  <param name="advanced_options_selector" type="select"
		 label="Display advanced options">
	    <option value="off">Hide</option>
	    <option value="on">Display</option>
	  </param>
	  <when value="on">
            <param name="mfoldlo" type="integer"
		   label="Select the regions with MFOLD high-confidence enrichment ratio against background to build model (lower-limit)"
		   value="10" help="(--mfold)"/>
	    <param name="mfoldhi" type="integer"
		   label="Select the regions with MFOLD high-confidence enrichment ratio against background to build model (upper-limit)"
		   value="30" help="(--mfold)"/>
	    <param name="nolambda"
		   label="Use fixed background lambda as local lambda for every binding region"
		   type="boolean" truevalue="--nolambda" falsevalue="" checked="False"
		   help="(--nolambda)"/>
	    <param name="call_summits"
		   label="Detect subpeaks within binding region"
		   type="boolean" truevalue="--call-summits" falsevalue="" checked="False"
		   help="(--call-summits)"/>
	    <conditional name="keep_duplicates">
	      <param name="keep_dup" type="select"
		     label="Use of duplicate reads">
		<option value="auto">Automatically calculate maximum number of duplicates to keep (auto)</option>
		<option value="all">Use all duplicates (all)</option>
		<option value="" selected="true">Manually specify maxium number of duplicates</option>
	      </param>
	      <when value="">
		<param name="maximum_tags" type="integer" value="1"
		       label="Maxium number of duplicated tags to keep at each location"/>
	      </when>
	    </conditional>
	  </when>
	  <when value="off">
	    <!--display nothing-->
	  </when>
	</conditional>
    	<conditional name="nomodel_type">
          <param name="nomodel_type_selector" type="select" label="Build Model">
	   <option value="nomodel">Do not build the shifting model (--nomodel enabled)</option>
           <option value="create_model" selected="true">Build the shifting model (--nomodel disabled)</option>
          </param>
          <when value="nomodel">
            <param name="extsize" type="integer" label="Arbitrary extension size in bp" value="200" help="Used as fragment size to extend each read towards 3' end (--extsize)"/>
          </when>
        </conditional>
      </when>

      <!--callpeak option of macs2-->
      <when value="bdgcmp">
        <param name="input_chipseq_file1" type="data" format="bed,sam,bam"
	       label="ChIP-seq read file" />
        <param name="input_control_file1" type="data" format="bed,sam,bam" optional="True"
	       label="ChIP-seq control read file" />
	<param name="pseudocount" type="float" label="Set pseudocount" value="0.00001"
	       help="default: 0.00001 (-p)"/>
        <conditional name="bdgcmp_options">
          <param name="bdgcmp_options_selector" type="select"
		 label="Select action to be performed">
	    <option value="ppois">ppois</option>
	    <option value="qpois">qpois</option>
	    <option value="subtract">subtract</option>
	    <option value="logFE">logFE</option>
	    <option value="FE">FE</option>
	    <option value="logLR">logLR</option>
          </param>
	</conditional>
      </when>
    </conditional>
  </inputs>

  <outputs>
    <!--callpeaks output-->
    <data name="output_extra_files" format="html"
	  label="${tool.name}: callpeak on ${on_string} (html report)">
      <filter>major_command['major_command_selector'] == 'callpeak'</filter>
    </data>
    <data name="output_summits_bed_file" format="bed"
	  label="${tool.name}: callpeak on ${on_string} (summits: bed)">
      <filter>major_command['major_command_selector'] == 'callpeak'</filter>
    </data>
    <data name="output_peaks_file" format="xls"
	  label="${tool.name}: callpeak on ${on_string} (peaks: xls)">
      <filter>major_command['major_command_selector'] == 'callpeak'</filter>
      <filter>major_command['xls_to_interval'] is False</filter>
    </data>
    <data name="output_narrowpeaks_file" format="interval"
	  label="${tool.name}: callpeak on ${on_string} (peaks: narrowPeak)">
      <filter>major_command['major_command_selector'] == 'callpeak'</filter>
      <filter>major_command['broad_options']['broad_regions'] == ''</filter>
    </data>
    <data name="output_broadpeaks_file" format="interval"
	  label="${tool.name}: callpeak on ${on_string} (peaks: broadPeak)">
      <filter>major_command['major_command_selector'] == 'callpeak'</filter>
      <filter>major_command['broad_options']['broad_regions'] == 'broad'</filter>
    </data>
    <data name="output_gappedpeaks_file" format="interval"
	  label="${tool.name}: callpeak on ${on_string} (peaks: gappedPeak)">
      <filter>major_command['major_command_selector'] == 'callpeak'</filter>
      <filter>major_command['broad_options']['broad_regions'] == 'broad'</filter>
    </data>
    <data name="output_xls_to_interval_peaks_file" format="interval"
	  label="${tool.name}: callpeak on ${on_string} (peaks: interval)">
      <filter>major_command['xls_to_interval'] is True</filter>
      <filter>major_command['major_command_selector'] == 'callpeak'</filter>
    </data>
    <data name="output_treat_pileup_file" format="bedgraph"
	  label="${tool.name}: callpeak on ${on_string} (treat pileup: bedGraph)">
      <filter>major_command['bdg_options']['bdg'] is True</filter>
      <filter>major_command['major_command_selector'] == 'callpeak'</filter>
    </data>
    <data name="output_lambda_bedgraph_file" format="bedgraph"
	  label="${tool.name}: callpeak on ${on_string} (control lambda: bedGraph)">
      <filter>major_command['bdg_options']['bdg'] is True</filter>
      <filter>major_command['major_command_selector'] == 'callpeak'</filter>
    </data>
    <data name="output_bigwig_file" format="bigwig"
	  label="${tool.name}: callpeak on ${on_string} (treat pileup: bigWig)">
      <filter>major_command['major_command_selector'] == 'callpeak'</filter>
      <filter>major_command['bdg_options']['bdg'] is True</filter>
      <filter>major_command['bdg_options']['make_bigwig'] is True</filter>
    </data>
    <!--bdgcmp output-->
    <data name="output_bdgcmp_file" format="bdg"
	  label="${tool.name}: bdgcmp on ${on_string} (bdg)">
      <filter>major_command['major_command_selector'] == 'bdgcmp'</filter>
    </data>
  </outputs>
  <tests>
	<!--none yet for macs2-->
  </tests>
  <help>

.. class:: warningmark

**This is a modified version of the standard Galaxy toolshed "MACS2" tool,
which has been customised for users at the University of Manchester to work
with MACS 2.1.0.**

It is based on the 16:14f378e35191 revision of the tool at

 *  http://toolshed.g2.bx.psu.edu/view/modencode-dcc/macs2 

------

**What it does**

With the improvement of sequencing techniques, chromatin immunoprecipitation
followed by high throughput sequencing (ChIP-Seq) is getting popular to study
genome-wide protein-DNA interactions. To address the lack of powerful ChIP-Seq
analysis method, we present a novel algorithm, named Model-based Analysis of
ChIP-Seq (MACS), for identifying transcript factor binding sites. MACS captures
the influence of genome complexity to evaluate the significance of enriched
ChIP regions, and MACS improves the spatial resolution of binding sites through
combining the information of both sequencing tag position and orientation. MACS
can be easily used for ChIP-Seq data alone, or with control sample with the
increase of specificity.

View the original MACS2 documentation:
https://github.com/taoliu/MACS/blob/master/README.rst

------

**Usage**

**Peak Calling**: Main MACS2 Function to Call peaks from alignment results.

**Compare .bdg files**: Deduct noise by comparing two signal tracks in bedGraph.


------

**Citation**

For the underlying tool, please cite Zhang Y, Liu T, Meyer CA, Eeckhoute J, Johnson DS, Bernstein BE, Nusbaum C, Myers RM, Brown M, Li W, Liu XS. Model-based analysis of ChIP-Seq (MACS). Genome Biol. 2008;9(9):R137.

Integration of MACS2 with Galaxy performed by Ziru Zhou ( ziruzhou@gmail.com ). Please send your comments/questions to modENCODE DCC at help@modencode.org.
  </help>
</tool>