diff rnachipintegrator_canonical_genes.xml @ 0:0abe6bac47a6 draft

planemo upload for repository https://github.com/fls-bioinformatics-core/galaxy-tools/tree/master/rnachipintegrator commit 97d556dae96db5457590a3a257392b6e4093a912-dirty
author pjbriggs
date Wed, 24 Feb 2016 09:25:18 -0500
parents
children 4b65b5d0d08d
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rnachipintegrator_canonical_genes.xml	Wed Feb 24 09:25:18 2016 -0500
@@ -0,0 +1,202 @@
+<tool id="rnachipintegrator_canonical_genes" name="Analyse canonical genes against 'peak' data" version="@VERSION@.0">
+  <description>using RnaChipIntegrator</description>
+  <macros>
+    <import>rnachipintegrator_macros.xml</import>
+  </macros>
+  <expand macro="requirements" />
+  <expand macro="version_command" />
+  <command interpreter="bash"><![CDATA[
+  rnachipintegrator_wrapper.sh
+  #if $peaks_in.metadata.chromCol
+    --peak_cols=${peaks_in.metadata.chromCol},${peaks_in.metadata.startCol},${peaks_in.metadata.endCol}
+  #end if
+  #if str( $cutoff ) != ""
+    --cutoff=$cutoff
+  #else
+    --cutoff=0
+  #end if
+  #if str( $number ) != ""
+    --number=$number
+  #end if
+  --promoter_region=$promoter_start,$promoter_end
+  --edge=$edge
+  --xlsx_file "$xlsx_out"
+  --output_files "$peaks_per_feature_out" "$features_per_peak_out"
+  #if $output.compact_format
+    --compact
+  #else
+    #if $output.summary
+      --summary_files "$peaks_per_feature_summary" "$features_per_peak_summary"
+    #end if
+    ${output.pad_output}
+  #end if
+  "${canonical_genes.fields.path}" "$peaks_in"
+  ]]></command>
+  <inputs>
+    <param format="tabular" name="peaks_in" type="data" label="Peaks" />
+    <param name="canonical_genes" type="select" label="Canonical genes to analyse peaks against">
+      <options from_data_table="rnachipintegrator_canonical_genes">
+      </options>
+    </param>
+    <expand macro="analysis_options" />
+    <expand macro="output_options" />
+  </inputs>
+  <outputs>
+    <!-- Always produce XLS output -->
+    <data format="xlsx" name="xlsx_out"
+	  label="All RnaChipIntegrator analyses: ${canonical_genes.fields.name} vs ${peaks_in.name} (Excel spreadsheet)" />
+    <data format="tabular" name="peaks_per_feature_out"
+	  label="Nearest peaks to each gene: ${canonical_genes.fields.name} vs ${peaks_in.name}" />
+    <data format="tabular" name="features_per_peak_out"
+	  label="Nearest genes to each peak: ${canonical_genes.fields.name} vs ${peaks_in.name}" />
+    <data format="tabular" name="peaks_per_feature_summary"
+	  label="Nearest peaks to each gene (summary): ${canonical_genes.fields.name} vs ${peaks_in.name}" >
+      <filter>output['compact_format'] is False</filter>
+      <filter>output['summary'] is True</filter>
+    </data>
+    <data format="tabular" name="features_per_peak_summary"
+	  label="Nearest genes to each peak (summary): ${canonical_genes.fields.name} vs ${peaks_in.name}" >
+      <filter>output['compact_format'] is False</filter>
+      <filter>output['summary'] is True</filter>
+    </data>
+  </outputs>
+  <tests>
+    <!--
+	RnaChipIntegrator +name=mm9 +cutoff=50000 +xlsx +summary mm9_canonical_genes.tsv mm9_summits.txt
+    -->
+    <test>
+      <param name="peaks_in" value="mm9_summits.txt" ftype="tabular" />
+      <param name="canonical_genes" value="mm9_test" />
+      <param name="cutoff" value="50000" />
+      <output name="xlsx_out" file="mm9_summits.xlsx" compare="sim_size" />
+      <output name="peaks_per_feature_out" ftype="tabular"
+	      file="mm9_summits_per_feature.out" />
+      <output name="features_per_peak_out" ftype="tabular"
+	      file="mm9_features_per_summit.out" />
+    </test>
+    <!--
+	RnaChipIntegrator +name=mm9 +cutoff=50000 +xlsx +compact mm9_canonical_genes.tsv mm9_peaks.txt
+    -->
+    <test>
+      <param name="peaks_in" value="mm9_peaks.txt" ftype="tabular" />
+      <param name="canonical_genes" value="mm9_test" />
+      <param name="cutoff" value="50000" />
+      <output name="xlsx_out" file="mm9_peaks1.xlsx" compare="sim_size" />
+      <output name="peaks_per_feature_out" ftype="tabular"
+	      file="mm9_peaks_per_feature1.out" />
+      <output name="features_per_peak_out" ftype="tabular"
+	      file="mm9_features_per_peak1.out" />
+    </test>
+    <!--
+	RnaChipIntegrator +name=mm9 +cutoff=50000 +xlsx +summary +pad mm9_canonical_genes.tsv mm9_peaks.txt
+    -->
+    <test>
+      <param name="peaks_in" value="mm9_peaks.txt" ftype="tabular" />
+      <param name="canonical_genes" value="mm9_test" />
+      <param name="cutoff" value="50000" />
+      <param name="compact_format" value="false" />
+      <param name="summary" value="true" />
+      <param name="pad_output" value="true" />
+      <output name="xlsx_out" file="mm9_peaks3.xlsx" compare="sim_size" />
+      <output name="peaks_per_feature_out" ftype="tabular"
+	      file="mm9_peaks_per_feature3.out" />
+      <output name="features_per_peak_out" ftype="tabular"
+	      file="mm9_features_per_peak3.out" />
+      <output name="peaks_per_feature_summary" ftype="tabular"
+	      file="mm9_peaks_per_feature3.summary" />
+      <output name="features_per_peak_summary" ftype="tabular"
+	      file="mm9_features_per_peak3.summary" />
+    </test>
+  </tests>
+  <help>
+
+.. class:: infomark
+
+**What it does**
+
+Performs integrated analyses of a set of peaks (e.g. ChIP data) against a
+list of "canonical genes" for a specific organism and genome build,
+identifying the nearest peaks to each canonical gene (and vice versa).
+
+RnaChipIntegrator can be obtained from
+http://fls-bioinformatics-core.github.com/RnaChipIntegrator/
+
+-------------
+
+.. class:: infomark
+
+**Input**
+
+The peak data must be in a tabular file with at least 3 columns of data
+for each peak (one peak per line):
+
+====== ========== =================================
+Column Name       Description
+====== ========== =================================
+     1 chr        Chromosome name
+     2 start      Start position of the peak
+     3 end        End position of the peak
+====== ========== =================================
+
+-------------
+
+.. class:: infomark
+
+**Outputs**
+
+The key outputs from the tool are two lists compromising the nearest
+peaks for each gene, and the nearest gene for each peak (one
+dataset for each list).
+
+There are two formats for reporting: "compact" and "full":
+
+ * **Compact output** reports all the hits for each peak or gene on
+   a single line of output;
+ * **Full output** reports each peak/gene pair on a separate line
+   (i.e. a multi-line output format).
+
+In "full" output mode, additional options are available:
+
+ * The output files can be "padded" with extra (empty) lines to ensure
+   that there are always the same number of lines for each peak or
+   gene, if fewer than the requested number of hits are found.
+ * "Summary" datasets can also be requested, which include just the
+   nearest peak reported for each gene (and vice versa).
+
+In either mode these data will also be output in a single MS Excel file,
+which contains one sheet per result set.
+
+.. class:: warning
+
+Using "compact" output with the number of hits limited to more than 4
+peak/gene pairs (or with no limit at all) can result in a large number
+of columns in the output files, which in some versions of Galaxy will
+not be properly displayed. However the data files themselves should be
+okay.
+
+-------------
+
+.. class:: informark
+
+**More information**
+
+It is recommended that you refer to the ``RnaChipIntegrator``
+documentation for information on the contents of each output file:
+
+* http://rnachipintegrator.readthedocs.org/en/latest/
+
+-------------
+
+.. class:: infomark
+
+**Credits**
+
+This Galaxy tool has been developed within the Bioinformatics Core Facility at the
+University of Manchester. It runs the RnaChipIntegrator package which has also been
+developed by this group, and is documented at
+https://pypi.python.org/pypi/RnaChipIntegrator/
+
+Please kindly acknowledge the Bioinformatics Core Facility if you use this tool.
+  </help>
+  <expand macro="citations" />
+</tool>