Mercurial > repos > pjbriggs > rnachipintegrator
diff rnachipintegrator_wrapper.xml @ 0:0abe6bac47a6 draft
planemo upload for repository https://github.com/fls-bioinformatics-core/galaxy-tools/tree/master/rnachipintegrator commit 97d556dae96db5457590a3a257392b6e4093a912-dirty
author | pjbriggs |
---|---|
date | Wed, 24 Feb 2016 09:25:18 -0500 |
parents | |
children | d37526cd12aa |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rnachipintegrator_wrapper.xml Wed Feb 24 09:25:18 2016 -0500 @@ -0,0 +1,264 @@ +<?xml version="1.0" encoding="utf-8"?> +<tool id="rnachipintegrator_wrapper" name="RnaChipIntegrator" version="@VERSION@.0"> + <description>Integrated analysis of 'gene' and 'peak' data</description> + <macros> + <import>rnachipintegrator_macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="version_command" /> + <command interpreter="bash"><![CDATA[ + rnachipintegrator_wrapper.sh + #if $peaks_in.metadata.chromCol + --peak_cols=${peaks_in.metadata.chromCol},${peaks_in.metadata.startCol},${peaks_in.metadata.endCol} + #end if + #if str( $cutoff ) != "" + --cutoff=$cutoff + #else + --cutoff=0 + #end if + #if str( $number ) != "" + --number=$number + #end if + --promoter_region=$promoter_start,$promoter_end + --edge=$edge + $diff_expressed_only + --xlsx_file "$xlsx_out" + --output_files "$peaks_per_feature_out" "$features_per_peak_out" + #if $output.compact_format + --compact + #else + #if $output.summary + --summary_files "$peaks_per_feature_summary" "$features_per_peak_summary" + #end if + ${output.pad_output} + #end if + "$features_in" "$peaks_in" + ]]></command> + <inputs> + <param format="tabular" name="features_in" type="data" + label="Genes/genomic features" /> + <param format="tabular" name="peaks_in" type="data" + label="Peaks/regions" /> + <expand macro="analysis_options" /> + <param name="diff_expressed_only" type="boolean" + truevalue="--only-DE" falsevalue="" checked="false" + label="Only consider genes which are flagged as differentially + expressed" + help="NB input feature data must include differential expression + flags (--only-DE)" /> + <expand macro="output_options" /> + </inputs> + <outputs> + <!-- Always produce XLSX output --> + <data format="xlsx" name="xlsx_out" + label="All RnaChipIntegrator analyses: ${features_in.name} vs ${peaks_in.name} (Excel spreadsheet)" /> + <data format="tabular" name="peaks_per_feature_out" + label="Nearest peaks to each gene: ${features_in.name} vs ${peaks_in.name}" /> + <data format="tabular" name="features_per_peak_out" + label="Nearest genes to each peak: ${features_in.name} vs ${peaks_in.name}" /> + <data format="tabular" name="peaks_per_feature_summary" + label="Nearest peaks to each gene (summary): ${features_in.name} vs ${peaks_in.name}" > + <filter>output['compact_format'] is False</filter> + <filter>output['summary'] is True</filter> + </data> + <data format="tabular" name="features_per_peak_summary" + label="Nearest gene to each peak (summary): ${features_in.name} vs ${peaks_in.name}" > + <filter>output['compact_format'] is False</filter> + <filter>output['summary'] is True</filter> + </data> + </outputs> + <tests> + <!-- + RnaChipIntegrator +name=test +cutoff=130000 +promoter_region=-10000,2500 +xlsx +compact features.txt summits.txt + --> + <test> + <param name="features_in" value="features.txt" ftype="tabular" /> + <param name="peaks_in" value="summits.txt" ftype="tabular" /> + <param name="cutoff" value="130000" /> + <param name="promoter_start" value="-10000" /> + <param name="promoter_end" value="2500" /> + <output name="xlsx_out" file="summits.xlsx" compare="sim_size" /> + <output name="peaks_per_feature_out" ftype="tabular" + file="summits_per_feature.out" /> + <output name="features_per_peak_out" ftype="tabular" + file="features_per_summit.out" /> + </test> + <!-- + RnaChipIntegrator +name=test +cutoff=130000 +promoter_region=-10000,2500 +xlsx +compact features.txt peaks.txt + --> + <test> + <param name="features_in" value="features.txt" ftype="tabular" /> + <param name="peaks_in" value="peaks.txt" ftype="tabular" /> + <param name="cutoff" value="130000" /> + <param name="promoter_start" value="-10000" /> + <param name="promoter_end" value="2500" /> + <output name="xlsx_out" file="peaks1.xlsx" compare="sim_size" /> + <output name="peaks_per_feature_out" ftype="tabular" + file="peaks_per_feature1.out" /> + <output name="features_per_peak_out" ftype="tabular" + file="features_per_peak1.out" /> + </test> + <!-- + RnaChipIntegrator +name=test +cutoff=130000 +xlsx features.txt peaks.txt + --> + <test> + <param name="features_in" value="features.txt" ftype="tabular" /> + <param name="peaks_in" value="peaks.txt" ftype="tabular" /> + <param name="cutoff" value="130000" /> + <param name="compact_format" value="false" /> + <output name="xlsx_out" file="peaks2.xlsx" compare="sim_size" /> + <output name="peaks_per_feature_out" ftype="tabular" + file="peaks_per_feature2.out" /> + <output name="features_per_peak_out" ftype="tabular" + file="features_per_peak2.out" /> + </test> + <!-- + RnaChipIntegrator +name=test +cutoff=130000 +only-DE +xlsx +compact features.txt peaks.txt + --> + <test> + <param name="features_in" value="features.txt" ftype="tabular" /> + <param name="peaks_in" value="peaks.txt" ftype="tabular" /> + <param name="cutoff" value="130000" /> + <param name="diff_expressed_only" value="true" /> + <output name="xlsx_out" file="peaks3.xlsx" compare="sim_size" /> + <output name="peaks_per_feature_out" ftype="tabular" + file="peaks_per_feature3.out" /> + <output name="features_per_peak_out" ftype="tabular" + file="features_per_peak3.out" /> + </test> + <!-- + RnaChipIntegrator +name=test +cutoff=130000 +promoter_region=-10000,2500 +xlsx +summary features.txt peaks.txt + --> + <test> + <param name="features_in" value="features.txt" ftype="tabular" /> + <param name="peaks_in" value="peaks.txt" ftype="tabular" /> + <param name="cutoff" value="130000" /> + <param name="compact_format" value="false" /> + <param name="summary" value="true" /> + <param name="pad_output" value="true" /> + <output name="xlsx_out" file="peaks4.xlsx" compare="sim_size" /> + <output name="peaks_per_feature_out" ftype="tabular" + file="peaks_per_feature4.out" /> + <output name="features_per_peak_out" ftype="tabular" + file="features_per_peak4.out" /> + <output name="peaks_per_feature_summary" ftype="tabular" + file="peaks_per_feature4.summary" /> + <output name="features_per_peak_summary" ftype="tabular" + file="features_per_peak4.summary" /> + </test> + </tests> + <help> + +.. class:: infomark + +**What it does** + +Performs integrated analyses of genes (or other genomic feature data) +gainst a set of peaks (e.g. ChIP data), identifying the nearest peaks to +each feature and vice versa. + +The program was originally written specifically for ChIP-Seq and RNA-Seq +data but works equally well for ChIP-chip and microarray expression data, +and can also be used to integrate any set of genomic features (e.g. +canonical genes, CpG islands) with expression data. + +RnaChipIntegrator can be obtained from +https://pypi.python.org/pypi/RnaChipIntegrator/ + +------------- + +.. class:: infomark + +**Input** + +The gene data must be in a tabular file with the following columns +of data for each gene or genomic feature (one gene per line): + +====== ========== ====================================================================== +Column Name Description +====== ========== ====================================================================== + 1 ID Name used to identify the gene in the output + 2 chr Chromosome name + 3 start Start position of the gene + 4 end End position of the gene + 5 strand Must be either '+' or '-' + 6 diff_expr Optional: indicates gene is differentially expressed (1) or not (0) +====== ========== ====================================================================== + +The peak data must be in a tabular file with at least 3 columns of data +for each peak (one peak per line): + +====== ========== ================================= +Column Name Description +====== ========== ================================= + 1 chr Chromosome name + 2 start Start position of the peak + 3 end End position of the peak +====== ========== ================================= + +If peak data is in ``bed`` format then the tool will automatically +assign the correct columns, otherwise the first three columns of data +will be used. + +------------- + +.. class:: infomark + +**Outputs** + +The key outputs from the tool are two lists compromising the nearest +peaks for each gene, and the nearest gene for each peak (one dataset +for each list). + +There are two formats for reporting: "compact" and "full": + + * **Compact output** reports all the hits for each peak or gene on + a single line of output; + * **Full output** reports each peak/gene pair on a separate line + (i.e. a multi-line output format). + +In "full" output mode, additional options are available: + + * The output files can be "padded" with extra (empty) lines to ensure + that there are always the same number of lines for each peak or + gene, if fewer than the requested number of hits are found. + * "Summary" datasets can also be requested, which include just the + nearest peak reported for each gene (and vice versa). + +In either mode these data will also be output in a single MS Excel file, +which contains one sheet per result set. + +.. class:: warning + +Using "compact" output with the number of hits limited to more than 4 +peak/gene pairs (or with no limit at all) can result in a large number +of columns in the output files, which in some versions of Galaxy will +not be properly displayed. However the data files themselves should be +okay. + +------------- + +.. class:: informark + +**More information** + +It is recommended that you refer to the ``RnaChipIntegrator`` +documentation for information on the contents of each output file: + +* http://rnachipintegrator.readthedocs.org/en/latest/ + +------------- + +.. class:: infomark + +**Credits** + +This Galaxy tool has been developed within the Bioinformatics Core Facility at the +University of Manchester. It runs the RnaChipIntegrator package which has also been +developed by this group, and is documented at +http://fls-bioinformatics-core.github.com/RnaChipIntegrator/ + +Please kindly acknowledge the Bioinformatics Core Facility if you use this tool. + </help> + <expand macro="citations" /> +</tool>