view macs21_wrapper.xml @ 8:78c15c0a96ae draft

Uploaded new version with minor fixes and updates to help text.
author pjbriggs
date Tue, 21 Apr 2015 10:33:52 -0400
parents 344dd37d1704
children 7aecd0908b3c
line wrap: on
line source

<tool id="macs2_1_peakcalling" name="MACS2.1.0" version="2.1.0-2">
  <requirements>
    <requirement type="package" version="2.7">python</requirement>
    <requirement type="package" version="1.9">numpy</requirement>
    <requirement type="package" version="2.1.0.20140616">macs2</requirement>
    <requirement type="package" version="3.1.2">R</requirement>
    <requirement type="package" version="1.0">ucsc_tools_for_macs21</requirement>
  </requirements>
  <description>Model-based Analysis of ChIP-Seq</description>
  <command interpreter="python">
    macs21_wrapper.py
    ##
    ## Major command
    $major_command.major_command_selector
    ##
    ## ChIP-seq input
    $major_command.input_chipseq_file1
    ##
    ## ChIP-seq control
    #if str($major_command.input_control_file1) != 'None'
       -c $major_command.input_control_file1
    #end if
    ##
    ## Call peaks
    #if str($major_command.major_command_selector) == 'callpeak'
       --format=$major_command.input_chipseq_file1.extension
       --name="$experiment_name"
       --bw=$major_command.bw
       ##
       ## Genome size
       #if str($major_command.genome_size.gsize) == ''
	  --gsize=$major_command.genome_size.user_defined_gsize
       #else:
          --gsize=$major_command.genome_size.gsize
       #end if
       ##
       ## Broad peaks
       #if str($major_command.broad_options.broad_regions) == 'broad'
          --broad --broad-cutoff=$major_command.broad_options.broad_cutoff
       #end if
       ##
       ## (no)model options
       #if str($major_command.nomodel_type.nomodel_type_selector) == 'nomodel'
          --nomodel --extsize=$major_command.nomodel_type.extsize
       #end if
       ##
       ## pq value select options
       #if str($major_command.pq_options.pq_options_selector) == 'qvalue'
          --qvalue=$major_command.pq_options.qvalue
       #else
          --pvalue=$major_command.pq_options.pvalue
       #end if
       ##
       ## Bedgraph options
       #if $major_command.bdg_options.bdg == True
          -B $major_command.bdg_options.spmr
       #end if
       ##
       ## Advanced options
       #if str($major_command.advanced_options.advanced_options_selector) == 'on'
          --mfold $major_command.advanced_options.mfoldlo $major_command.advanced_options.mfoldhi
          $major_command.advanced_options.nolambda
          $major_command.advanced_options.call_summits
          #if str($major_command.advanced_options.keep_duplicates.keep_dup) == ''
             --keep-dup $major_command.advanced_options.keep_duplicates.maximum_tags
          #else
             --keep-dup $major_command.advanced_options.keep_duplicates.keep_dup
          #end if
       #else
          ## Defaults if advanced options not set
          --mfold 10 30 --keep-dup 1
       #end if
       ##
       ## Output files
       --output-summits=$output_summits_bed_file
       --output-extra-files=$output_extra_files
       --output-extra-files-path=$output_extra_files.files_path
       ##
       ## Narrow/broad peak outputs
       #if str($major_command.broad_options.broad_regions) == 'broad'
          --output-broadpeaks=$output_broadpeaks_file
          --output-gappedpeaks=$output_gappedpeaks_file
       #else
          --output-narrowpeaks=$output_narrowpeaks_file
       #end if
       ##
       ## Bedgraph outputs
       #if str($major_command.bdg_options.bdg) == 'True'
          --output-pileup=$output_treat_pileup_file 
          --output-lambda-bedgraph=$output_lambda_bedgraph_file
	  #if str($major_command.bdg_options.make_bigwig) == 'True'
             --output-bigwig=$output_bigwig_file
             --length=$GALAXY_DATA_INDEX_DIR/shared/ucsc/chrom/${major_command.input_chipseq_file1.dbkey}.len
	  #end if
       #end if
       ##
       ## XLS/interval output
       #if str($major_command.xls_to_interval) == 'True'
          --output-xls-to-interval=$output_xls_to_interval_peaks_file
       #else
          --output-peaks=$output_peaks_file
       #end if
    #end if
    ##
    ## Compare .bdg files
    #if str($major_command.major_command_selector) == 'bdgcmp'
       -m $major_command.bdgcmp_options.bdgcmp_options_selector
       -p $major_command.pseudocount
       --output-bdgcmp $output_bdgcmp_file
    #end if
  </command>
  <inputs>
    <!--experiment name used as base for output file names -->
    <param name="experiment_name" type="text" value="MACS2.1.0 in Galaxy" size="50"
	   label="Experiment Name"/>
    <!--select a major MACS2 command-->
    <conditional name="major_command">
      <param name="major_command_selector" type="select" label="Select action to be performed">
	<option value="callpeak">Peak Calling</option>
	<option value="bdgcmp">Compare .bdg Files</option>
      </param>
      <!--callpeak option of macs2-->
      <when value="callpeak">
	<!--choose 'broad' or 'narrow' regions-->
	<conditional name="broad_options">
	  <param name="broad_regions" type="select" label="Type of region to call"
		 help="Broad regions are formed by linking nearby enriched regions">
	    <option value="" selected="true">Narrow regions</option>
	    <option value="broad">Broad regions</option>
	  </param>
	  <when value="broad">
	    <param name="broad_cutoff" type="float"
		   label="Cutoff for broad regions"
		   value="0.1" help="default: 0.1 (--broad-cutoff)"/>
	  </when>
	</conditional>
        <param name="input_chipseq_file1" type="data" format="bed,sam,bam"
	       label="ChIP-seq read file" />
        <param name="input_control_file1" type="data" format="bed,sam,bam" optional="True"
	       label="ChIP-seq control read file" />
	<conditional name="genome_size">
	  <param name="gsize" type="select" label="Effective genome size"
		 help="Either pre-defined (for common organisms), or user-defined (--gsize)">
	    <option value="hs" selected="true">Human (2.7e9)</option>
	    <option value="mm">Mouse (1.87e9)</option>
	    <option value="ce">C. elegans (9e7)</option>
	    <option value="dm">Fruitfly (1.2e8)</option>
	    <option value="">User-defined</option>
	  </param>
	  <when value="">
	    <!-- User-defined effective genome size -->
	    <param name="user_defined_gsize" type="float" value=""
		   label="Enter effective genome size (number of bases)"
		   help="e.g. '1.0e+9' or '1000000000'" />
	  </when>
	</conditional>
	<param name="bw" type="integer" label="Band width" value="300" help="(--bw)"/>
	<param name="xls_to_interval" label="Include XLS file from MACS"
	       type="boolean" truevalue="True" falsevalue="False" checked="True"
	       help="MACS2 XLS file will be output to the history in 'interval' format (suitable for subsequent analysis in Galaxy). Note that start positions are 1-based."/>

	<conditional name="bdg_options">
	  <param name="bdg"
		 label="Save treatment and control lambda pileups in bedGraph"
		 type="boolean" truevalue="-B" falsevalue="" checked="False" />
	  <when value="-B">
	    <param name="spmr"
		   type="boolean" truevalue="--SPMR" falsevalue="" checked="False"
		   label="Save signal per million reads for fragment pileup profiles"
		   help="(--SPMR)" />
	    <param name="make_bigwig" type="boolean" checked="True"
		   truevalue="True" falsevalue=""
		   label="Also generate bigWig file from bedGraph"
		   help="bigWig file can used in subsequent analyses e.g. CEAS" />
	  </when>
	  <when value="">
	    <!-- Display nothing -->
	  </when>
	</conditional>

	<conditional name="pq_options">
	  <param name="pq_options_selector" type="select"
		 label="Select p-value or q-value" help="default uses q-value">
	    <option value="qvalue">q-value</option>
	    <option value="pvalue">p-value</option>
	  </param>
	  <when value="pvalue">
	    <param name="pvalue" type="float"
		   label="p-value cutoff for binding region detection"
		   value="1e-2" help="default: 1e-2 (--pvalue)"/>
	  </when>
	  <when value="qvalue">
	    <param name="qvalue" type="float"
		   label="q-value cutoff for binding region detection"
		   value="0.01" help="default: 0.01 (--qvalue)"/>
	  </when>
	</conditional>
	<conditional name="advanced_options">
	  <param name="advanced_options_selector" type="select"
		 label="Display advanced options">
	    <option value="off">Hide</option>
	    <option value="on">Display</option>
	  </param>
	  <when value="on">
            <param name="mfoldlo" type="integer"
		   label="Select the regions with MFOLD high-confidence enrichment ratio against background to build model (lower-limit)"
		   value="10" help="(--mfold)"/>
	    <param name="mfoldhi" type="integer"
		   label="Select the regions with MFOLD high-confidence enrichment ratio against background to build model (upper-limit)"
		   value="30" help="(--mfold)"/>
	    <param name="nolambda"
		   label="Use fixed background lambda as local lambda for every binding region"
		   type="boolean" truevalue="--nolambda" falsevalue="" checked="False"
		   help="(--nolambda)"/>
	    <param name="call_summits"
		   label="Detect subpeaks within binding region"
		   type="boolean" truevalue="--call-summits" falsevalue="" checked="False"
		   help="(--call-summits)"/>
	    <conditional name="keep_duplicates">
	      <param name="keep_dup" type="select"
		     label="Use of duplicate reads">
		<option value="auto">Automatically calculate maximum number of duplicates to keep (auto)</option>
		<option value="all">Use all duplicates (all)</option>
		<option value="" selected="true">Manually specify maxium number of duplicates</option>
	      </param>
	      <when value="">
		<param name="maximum_tags" type="integer" value="1"
		       label="Maxium number of duplicated tags to keep at each location"/>
	      </when>
	    </conditional>
	  </when>
	  <when value="off">
	    <!--display nothing-->
	  </when>
	</conditional>
    	<conditional name="nomodel_type">
          <param name="nomodel_type_selector" type="select" label="Build Model">
	   <option value="nomodel">Do not build the shifting model (--nomodel enabled)</option>
           <option value="create_model" selected="true">Build the shifting model (--nomodel disabled)</option>
          </param>
          <when value="nomodel">
            <param name="extsize" type="integer" label="Arbitrary extension size in bp" value="200" help="Used as fragment size to extend each read towards 3' end (--extsize)"/>
          </when>
        </conditional>
      </when>

      <!--callpeak option of macs2-->
      <when value="bdgcmp">
        <param name="input_chipseq_file1" type="data" format="bed,sam,bam"
	       label="ChIP-seq read file" />
        <param name="input_control_file1" type="data" format="bed,sam,bam" optional="True"
	       label="ChIP-seq control read file" />
	<param name="pseudocount" type="float" label="Set pseudocount" value="0.00001"
	       help="default: 0.00001 (-p)"/>
        <conditional name="bdgcmp_options">
          <param name="bdgcmp_options_selector" type="select"
		 label="Select action to be performed">
	    <option value="ppois">ppois</option>
	    <option value="qpois">qpois</option>
	    <option value="subtract">subtract</option>
	    <option value="logFE">logFE</option>
	    <option value="FE">FE</option>
	    <option value="logLR">logLR</option>
          </param>
	</conditional>
      </when>
    </conditional>
  </inputs>

  <outputs>
    <!--callpeaks output-->
    <data name="output_extra_files" format="html"
	  label="${tool.name}: callpeak on ${on_string} (html report)">
      <filter>major_command['major_command_selector'] == 'callpeak'</filter>
    </data>
    <data name="output_summits_bed_file" format="bed"
	  label="${tool.name}: callpeak on ${on_string} (summits: bed)">
      <filter>major_command['major_command_selector'] == 'callpeak'</filter>
    </data>
    <data name="output_peaks_file" format="xls"
	  label="${tool.name}: callpeak on ${on_string} (peaks: xls)">
      <filter>major_command['major_command_selector'] == 'callpeak'</filter>
      <filter>major_command['xls_to_interval'] is False</filter>
    </data>
    <data name="output_narrowpeaks_file" format="interval"
	  label="${tool.name}: callpeak on ${on_string} (peaks: narrowPeak)">
      <filter>major_command['major_command_selector'] == 'callpeak'</filter>
      <filter>major_command['broad_options']['broad_regions'] == ''</filter>
    </data>
    <data name="output_broadpeaks_file" format="interval"
	  label="${tool.name}: callpeak on ${on_string} (peaks: broadPeak)">
      <filter>major_command['major_command_selector'] == 'callpeak'</filter>
      <filter>major_command['broad_options']['broad_regions'] == 'broad'</filter>
    </data>
    <data name="output_gappedpeaks_file" format="interval"
	  label="${tool.name}: callpeak on ${on_string} (peaks: gappedPeak)">
      <filter>major_command['major_command_selector'] == 'callpeak'</filter>
      <filter>major_command['broad_options']['broad_regions'] == 'broad'</filter>
    </data>
    <data name="output_xls_to_interval_peaks_file" format="interval"
	  label="${tool.name}: callpeak on ${on_string} (peaks: interval)">
      <filter>major_command['xls_to_interval'] is True</filter>
      <filter>major_command['major_command_selector'] == 'callpeak'</filter>
    </data>
    <data name="output_treat_pileup_file" format="bedgraph"
	  label="${tool.name}: callpeak on ${on_string} (treat pileup: bedGraph)">
      <filter>major_command['bdg_options']['bdg'] is True</filter>
      <filter>major_command['major_command_selector'] == 'callpeak'</filter>
    </data>
    <data name="output_lambda_bedgraph_file" format="bedgraph"
	  label="${tool.name}: callpeak on ${on_string} (control lambda: bedGraph)">
      <filter>major_command['bdg_options']['bdg'] is True</filter>
      <filter>major_command['major_command_selector'] == 'callpeak'</filter>
    </data>
    <data name="output_bigwig_file" format="bigwig"
	  label="${tool.name}: callpeak on ${on_string} (treat pileup: bigWig)">
      <filter>major_command['major_command_selector'] == 'callpeak'</filter>
      <filter>major_command['bdg_options']['bdg'] is True</filter>
      <filter>major_command['bdg_options']['make_bigwig'] is True</filter>
    </data>
    <!--bdgcmp output-->
    <data name="output_bdgcmp_file" format="bdg"
	  label="${tool.name}: bdgcmp on ${on_string} (bdg)">
      <filter>major_command['major_command_selector'] == 'bdgcmp'</filter>
    </data>
  </outputs>
  <tests>
	<!--none yet for macs2-->
  </tests>
  <help>
**What it does**

MACS (Model-based Analysis of ChIP-seq) provides algorithms for transcript
factor binding sites. The program can be used either for ChIP-Seq data alone,
or with control sample datat to improve specificity.

View the MACS2 documentation at:
https://github.com/taoliu/MACS/blob/master/README.rst

------

**Usage**

The tool interfaces with two main functions in MACS:

 * **callpeaks** (the main function) calls peaks from alignment results
 * **bdgcmp** deducts noise by comparing two signal tracks in bedGraph format.

------

**Credits**

This Galaxy tool was based on the MACS2 tool hosted in the Galaxy toolshed at

 * http://toolshed.g2.bx.psu.edu/view/modencode-dcc/macs2

(specifically the 16:14f378e35191 revision of the tool) which is credited to Ziru
Zhou. This version is a reimplemented version developed within the Bioinformatics
Core Facility at the University of Manchester, which uses more up-to-date Galaxy
syntax and adds some extra features.

The tool runs Tao Liu's MACS2 software:

 * https://github.com/taoliu/MACS

The reference for MACS is:

 * Zhang Y, Liu T, Meyer CA, Eeckhoute J, Johnson DS, Bernstein BE, Nusbaum C,
   Myers RM, Brown M, Li W, Liu XS. Model-based analysis of ChIP-Seq (MACS).
   Genome Biol. 2008;9(9):R137.

Please kindly acknowledge both this Galaxy tool and the MACS2 package if you
use it.
  </help>
  <citations>
    <!--
    See https://wiki.galaxyproject.org/Admin/Tools/ToolConfigSyntax#A.3Ccitations.3E_tag_set
    Can be either DOI or Bibtex
    Use http://www.bioinformatics.org/texmed/ to convert PubMed to Bibtex
    -->
    <citation type="doi">10.1186/gb-2008-9-9-r137</citation>
  </citations>
</tool>