diff rnachipintegrator_wrapper.xml @ 0:0abe6bac47a6 draft

planemo upload for repository https://github.com/fls-bioinformatics-core/galaxy-tools/tree/master/rnachipintegrator commit 97d556dae96db5457590a3a257392b6e4093a912-dirty
author pjbriggs
date Wed, 24 Feb 2016 09:25:18 -0500
parents
children d37526cd12aa
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rnachipintegrator_wrapper.xml	Wed Feb 24 09:25:18 2016 -0500
@@ -0,0 +1,264 @@
+<?xml version="1.0" encoding="utf-8"?>
+<tool id="rnachipintegrator_wrapper" name="RnaChipIntegrator" version="@VERSION@.0">
+  <description>Integrated analysis of 'gene' and 'peak' data</description>
+  <macros>
+    <import>rnachipintegrator_macros.xml</import>
+  </macros>
+  <expand macro="requirements" />
+  <expand macro="version_command" />
+  <command interpreter="bash"><![CDATA[
+  rnachipintegrator_wrapper.sh
+  #if $peaks_in.metadata.chromCol
+    --peak_cols=${peaks_in.metadata.chromCol},${peaks_in.metadata.startCol},${peaks_in.metadata.endCol}
+  #end if
+  #if str( $cutoff ) != ""
+    --cutoff=$cutoff
+  #else
+    --cutoff=0
+  #end if
+  #if str( $number ) != ""
+    --number=$number
+  #end if
+  --promoter_region=$promoter_start,$promoter_end
+  --edge=$edge
+  $diff_expressed_only
+  --xlsx_file "$xlsx_out"
+  --output_files "$peaks_per_feature_out" "$features_per_peak_out"
+  #if $output.compact_format
+    --compact
+  #else
+    #if $output.summary
+      --summary_files "$peaks_per_feature_summary" "$features_per_peak_summary"
+    #end if
+    ${output.pad_output}
+  #end if
+  "$features_in" "$peaks_in"
+  ]]></command>
+  <inputs>
+    <param format="tabular" name="features_in" type="data"
+	   label="Genes/genomic features" />
+    <param format="tabular" name="peaks_in" type="data"
+	   label="Peaks/regions" />
+    <expand macro="analysis_options" />
+    <param name="diff_expressed_only" type="boolean"
+	   truevalue="--only-DE" falsevalue="" checked="false"
+	   label="Only consider genes which are flagged as differentially
+		  expressed"
+           help="NB input feature data must include differential expression
+		 flags (--only-DE)" />
+    <expand macro="output_options" />
+  </inputs>
+  <outputs>
+    <!-- Always produce XLSX output -->
+    <data format="xlsx" name="xlsx_out"
+	  label="All RnaChipIntegrator analyses: ${features_in.name} vs ${peaks_in.name} (Excel spreadsheet)" />
+    <data format="tabular" name="peaks_per_feature_out"
+	  label="Nearest peaks to each gene: ${features_in.name} vs ${peaks_in.name}" />
+    <data format="tabular" name="features_per_peak_out"
+	  label="Nearest genes to each peak: ${features_in.name} vs ${peaks_in.name}" />
+    <data format="tabular" name="peaks_per_feature_summary"
+	  label="Nearest peaks to each gene (summary): ${features_in.name} vs ${peaks_in.name}" >
+      <filter>output['compact_format'] is False</filter>
+      <filter>output['summary'] is True</filter>
+    </data>
+    <data format="tabular" name="features_per_peak_summary"
+	  label="Nearest gene to each peak (summary): ${features_in.name} vs ${peaks_in.name}" >
+      <filter>output['compact_format'] is False</filter>
+      <filter>output['summary'] is True</filter>
+    </data>
+  </outputs>
+  <tests>
+    <!--
+	RnaChipIntegrator +name=test +cutoff=130000 +promoter_region=-10000,2500 +xlsx +compact features.txt summits.txt
+    -->
+    <test>
+      <param name="features_in" value="features.txt" ftype="tabular" />
+      <param name="peaks_in" value="summits.txt" ftype="tabular" />
+      <param name="cutoff" value="130000" />
+      <param name="promoter_start" value="-10000" />
+      <param name="promoter_end" value="2500" />
+      <output name="xlsx_out" file="summits.xlsx" compare="sim_size" />
+      <output name="peaks_per_feature_out" ftype="tabular"
+	      file="summits_per_feature.out" />
+      <output name="features_per_peak_out" ftype="tabular"
+	      file="features_per_summit.out" />
+    </test>
+    <!--
+	RnaChipIntegrator +name=test +cutoff=130000 +promoter_region=-10000,2500 +xlsx +compact features.txt peaks.txt
+    -->
+    <test>
+      <param name="features_in" value="features.txt" ftype="tabular" />
+      <param name="peaks_in" value="peaks.txt" ftype="tabular" />
+      <param name="cutoff" value="130000" />
+      <param name="promoter_start" value="-10000" />
+      <param name="promoter_end" value="2500" />
+      <output name="xlsx_out" file="peaks1.xlsx" compare="sim_size" />
+      <output name="peaks_per_feature_out" ftype="tabular"
+	      file="peaks_per_feature1.out" />
+      <output name="features_per_peak_out" ftype="tabular"
+	      file="features_per_peak1.out" />
+    </test>
+    <!--
+	RnaChipIntegrator +name=test +cutoff=130000 +xlsx features.txt peaks.txt
+    -->
+    <test>
+      <param name="features_in" value="features.txt" ftype="tabular" />
+      <param name="peaks_in" value="peaks.txt" ftype="tabular" />
+      <param name="cutoff" value="130000" />
+      <param name="compact_format" value="false" />
+      <output name="xlsx_out" file="peaks2.xlsx" compare="sim_size" />
+      <output name="peaks_per_feature_out" ftype="tabular"
+	      file="peaks_per_feature2.out" />
+      <output name="features_per_peak_out" ftype="tabular"
+	      file="features_per_peak2.out" />
+    </test>
+    <!--
+	RnaChipIntegrator +name=test +cutoff=130000 +only-DE +xlsx +compact features.txt peaks.txt
+    -->
+    <test>
+      <param name="features_in" value="features.txt" ftype="tabular" />
+      <param name="peaks_in" value="peaks.txt" ftype="tabular" />
+      <param name="cutoff" value="130000" />
+      <param name="diff_expressed_only" value="true" />
+      <output name="xlsx_out" file="peaks3.xlsx" compare="sim_size" />
+      <output name="peaks_per_feature_out" ftype="tabular"
+	      file="peaks_per_feature3.out" />
+      <output name="features_per_peak_out" ftype="tabular"
+	      file="features_per_peak3.out" />
+    </test>
+    <!--
+	RnaChipIntegrator +name=test +cutoff=130000 +promoter_region=-10000,2500 +xlsx +summary features.txt peaks.txt
+    -->
+    <test>
+      <param name="features_in" value="features.txt" ftype="tabular" />
+      <param name="peaks_in" value="peaks.txt" ftype="tabular" />
+      <param name="cutoff" value="130000" />
+      <param name="compact_format" value="false" />
+      <param name="summary" value="true" />
+      <param name="pad_output" value="true" />
+      <output name="xlsx_out" file="peaks4.xlsx" compare="sim_size" />
+      <output name="peaks_per_feature_out" ftype="tabular"
+	      file="peaks_per_feature4.out" />
+      <output name="features_per_peak_out" ftype="tabular"
+	      file="features_per_peak4.out" />
+      <output name="peaks_per_feature_summary" ftype="tabular"
+	      file="peaks_per_feature4.summary" />
+      <output name="features_per_peak_summary" ftype="tabular"
+	      file="features_per_peak4.summary" />
+    </test>
+  </tests>
+  <help>
+
+.. class:: infomark
+
+**What it does**
+
+Performs integrated analyses of genes (or other genomic feature data)
+gainst a set of peaks (e.g. ChIP data), identifying the nearest peaks to
+each feature and vice versa.
+
+The program was originally written specifically for ChIP-Seq and RNA-Seq
+data but works equally well for ChIP-chip and microarray expression data,
+and can also be used to integrate any set of genomic features (e.g.
+canonical genes, CpG islands) with expression data.
+
+RnaChipIntegrator can be obtained from
+https://pypi.python.org/pypi/RnaChipIntegrator/
+
+-------------
+
+.. class:: infomark
+
+**Input**
+
+The gene data must be in a tabular file with the following columns
+of data for each gene or genomic feature (one gene per line):
+
+====== ========== ======================================================================
+Column Name       Description
+====== ========== ======================================================================
+     1 ID         Name used to identify the gene in the output
+     2 chr        Chromosome name
+     3 start      Start position of the gene
+     4 end        End position of the gene
+     5 strand     Must be either '+' or '-'
+     6 diff_expr  Optional: indicates gene is differentially expressed (1) or not (0)
+====== ========== ======================================================================
+
+The peak data must be in a tabular file with at least 3 columns of data
+for each peak (one peak per line):
+
+====== ========== =================================
+Column Name       Description
+====== ========== =================================
+     1 chr        Chromosome name
+     2 start      Start position of the peak 
+     3 end        End position of the peak
+====== ========== =================================
+
+If peak data is in ``bed`` format then the tool will automatically
+assign the correct columns, otherwise the first three columns of data
+will be used.
+
+-------------
+
+.. class:: infomark
+
+**Outputs**
+
+The key outputs from the tool are two lists compromising the nearest
+peaks for each gene, and the nearest gene for each peak (one dataset
+for each list).
+
+There are two formats for reporting: "compact" and "full":
+
+ * **Compact output** reports all the hits for each peak or gene on
+   a single line of output;
+ * **Full output** reports each peak/gene pair on a separate line
+   (i.e. a multi-line output format).
+
+In "full" output mode, additional options are available:
+
+ * The output files can be "padded" with extra (empty) lines to ensure
+   that there are always the same number of lines for each peak or
+   gene, if fewer than the requested number of hits are found.
+ * "Summary" datasets can also be requested, which include just the
+   nearest peak reported for each gene (and vice versa).
+
+In either mode these data will also be output in a single MS Excel file,
+which contains one sheet per result set.
+
+.. class:: warning
+
+Using "compact" output with the number of hits limited to more than 4
+peak/gene pairs (or with no limit at all) can result in a large number
+of columns in the output files, which in some versions of Galaxy will
+not be properly displayed. However the data files themselves should be
+okay.
+
+-------------
+
+.. class:: informark
+
+**More information**
+
+It is recommended that you refer to the ``RnaChipIntegrator``
+documentation for information on the contents of each output file:
+
+* http://rnachipintegrator.readthedocs.org/en/latest/
+
+-------------
+
+.. class:: infomark
+
+**Credits**
+
+This Galaxy tool has been developed within the Bioinformatics Core Facility at the
+University of Manchester. It runs the RnaChipIntegrator package which has also been
+developed by this group, and is documented at
+http://fls-bioinformatics-core.github.com/RnaChipIntegrator/
+
+Please kindly acknowledge the Bioinformatics Core Facility if you use this tool.
+  </help>
+  <expand macro="citations" />
+</tool>