diff scHicQualityControl.xml @ 0:aff673b3928c draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/schicexplorer commit 2a80f777c0221752232882c0d43b55f2b1dcd223"
author iuc
date Thu, 23 Jan 2020 20:56:34 +0000
parents
children e06c31798a26
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/scHicQualityControl.xml	Thu Jan 23 20:56:34 2020 +0000
@@ -0,0 +1,116 @@
+<tool id="schicexplorer_schicqualitycontrol" name="@BINARY@" version="@WRAPPER_VERSION@.0">
+    <description>quality control for single-cell Hi-C interaction matrices</description>
+    <macros>
+        <token name="@BINARY@">scHicQualityControl</token>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <command detect_errors="exit_code"><![CDATA[
+        @BINARY@
+
+        --matrix '$matrix_mcooler'
+        #if $chromosomes:
+            #set $chromosome = ' '.join([ '\'%s\'' % $chrom for $chrom in str($chromosomes).split(' ') ])
+            --chromosomes $chromosome
+        #end if
+        --minimumReadCoverage $minimumReadCoverage
+        --minimumDensity $minimumDensity
+        --maximumRegionToConsider $maximumRegionToConsider
+        #if $dpi:
+            --dpi $dpi
+        #end if 
+        --outFileNameDensity plot_density.$image_file_format
+        --outFileNameReadCoverage plot_read_coverage.$image_file_format
+        --outFileNameQCReport report.txt
+        --outputMcool filtered_matrices.mcool
+
+        --threads @THREADS@
+
+        && mv plot_density.$image_file_format plot_density
+        && mv plot_read_coverage.$image_file_format plot_read_coverage
+
+
+    ]]></command>
+    <inputs>
+        <expand macro="matrix_mcooler_macro"/>
+        <param name="minimumReadCoverage" type="integer" value="1000000"  label="Minimum read coverage" help='Remove all samples with a lower read coverage as this value.' />   
+        <param name="minimumDensity" type="float" value="0.001"  label="Minimum density" help='Remove all samples with a lower density as this value.' />   
+        <param name="maximumRegionToConsider" type="integer" value="30000000"  label="Maximum region to consider" help='To compute the density, consider only this genomic distance around the diagonal.' />   
+        <param name='chromosomes' type='text' label='List of chromosomes to consider' help='Please separate the chromosomes by space'/>
+        <param name='dpi' type='integer' label='DPI for image' help='Change the default resolution of the plot.' optional='true'/>
+        <param name="image_file_format" type="select" label="Image output format">
+            <option value="png" selected="True">png</option>
+            <option value="svg">svg</option>
+            <option value="pdf">pdf</option>
+        </param>
+    </inputs>
+    <outputs>
+        <data name='output_plot_density' from_work_dir='plot_density' format='png' label='Density'>
+            <change_format>
+                <when input="image_file_format" value="svg" format="svg" />
+                <when input="image_file_format" value="pdf" format="pdf" />
+            </change_format>
+        </data>
+        <data name='output_plot_read_coverage' from_work_dir='plot_read_coverage' format='png' label='Read coverage'>
+            <change_format>
+                <when input="image_file_format" value="svg" format="svg" />
+                <when input="image_file_format" value="pdf" format="pdf" />
+            </change_format>
+        </data>
+        <data name="report" from_work_dir="report.txt" format="txt" label="${tool.name} on ${on_string}: QC report"/>
+        <data name="outFileName" from_work_dir="filtered_matrices.mcool" format="mcool" label="${tool.name} on ${on_string}: Filtered matrices"/>
+        
+    </outputs>
+    <tests>
+        <test>
+            <param name='matrix_mcooler' value='test_matrix.mcool' />
+            <param name='minimumReadCoverage' value='100000' />
+            <param name='minimumDensity' value='0.001' />
+            <param name='maximumRegionToConsider' value='30000000' />
+            <param name="image_file_format" value="png" />
+            <param name="dpi" value="300" />
+            <output name="output_plot_density" file="scHicQualityControl/density.png" ftype="png" compare="sim_size" delta="35000"/>        
+            <output name="output_plot_read_coverage" file="scHicQualityControl/coverage.png" ftype="png" compare="sim_size" delta="35000"/>        
+            <output name="report" file="scHicQualityControl/qc_report.txt" ftype="txt" compare="sim_size" delta="35000"/>        
+            <output name="outFileName" ftype="mcool">
+                <assert_contents>
+                    <has_h5_keys keys='Diploid_1_CGTACTAG_AAGGAGTA_R1fastqgz, Diploid_1_CGTACTAG_AAGGAGTA_R1fastqgz/bins, Diploid_1_CGTACTAG_AAGGAGTA_R1fastqgz/bins/chrom, Diploid_1_CGTACTAG_AAGGAGTA_R1fastqgz/bins/end, Diploid_1_CGTACTAG_AAGGAGTA_R1fastqgz/bins/start, Diploid_1_CGTACTAG_AAGGAGTA_R1fastqgz/chroms, Diploid_1_CGTACTAG_AAGGAGTA_R1fastqgz/chroms/length, Diploid_1_CGTACTAG_AAGGAGTA_R1fastqgz/chroms/name, Diploid_1_CGTACTAG_AAGGAGTA_R1fastqgz/indexes, Diploid_1_CGTACTAG_AAGGAGTA_R1fastqgz/indexes/bin1_offset, Diploid_1_CGTACTAG_AAGGAGTA_R1fastqgz/indexes/chrom_offset, Diploid_1_CGTACTAG_AAGGAGTA_R1fastqgz/pixels, Diploid_1_CGTACTAG_AAGGAGTA_R1fastqgz/pixels/bin1_id, Diploid_1_CGTACTAG_AAGGAGTA_R1fastqgz/pixels/bin2_id, Diploid_1_CGTACTAG_AAGGAGTA_R1fastqgz/pixels/count, Diploid_1_CGTACTAG_ACTGCATA_R1fastqgz, Diploid_1_CGTACTAG_ACTGCATA_R1fastqgz/bins, Diploid_1_CGTACTAG_ACTGCATA_R1fastqgz/bins/chrom, Diploid_1_CGTACTAG_ACTGCATA_R1fastqgz/bins/end, Diploid_1_CGTACTAG_ACTGCATA_R1fastqgz/bins/start, Diploid_1_CGTACTAG_ACTGCATA_R1fastqgz/chroms, Diploid_1_CGTACTAG_ACTGCATA_R1fastqgz/chroms/length, Diploid_1_CGTACTAG_ACTGCATA_R1fastqgz/chroms/name, Diploid_1_CGTACTAG_ACTGCATA_R1fastqgz/indexes, Diploid_1_CGTACTAG_ACTGCATA_R1fastqgz/indexes/bin1_offset, Diploid_1_CGTACTAG_ACTGCATA_R1fastqgz/indexes/chrom_offset, Diploid_1_CGTACTAG_ACTGCATA_R1fastqgz/pixels, Diploid_1_CGTACTAG_ACTGCATA_R1fastqgz/pixels/bin1_id, Diploid_1_CGTACTAG_ACTGCATA_R1fastqgz/pixels/bin2_id, Diploid_1_CGTACTAG_ACTGCATA_R1fastqgz/pixels/count, Diploid_1_CGTACTAG_CGTCTAAT_R1fastqgz, Diploid_1_CGTACTAG_CGTCTAAT_R1fastqgz/bins, Diploid_1_CGTACTAG_CGTCTAAT_R1fastqgz/bins/chrom, Diploid_1_CGTACTAG_CGTCTAAT_R1fastqgz/bins/end, Diploid_1_CGTACTAG_CGTCTAAT_R1fastqgz/bins/start, Diploid_1_CGTACTAG_CGTCTAAT_R1fastqgz/chroms, Diploid_1_CGTACTAG_CGTCTAAT_R1fastqgz/chroms/length, Diploid_1_CGTACTAG_CGTCTAAT_R1fastqgz/chroms/name, Diploid_1_CGTACTAG_CGTCTAAT_R1fastqgz/indexes, Diploid_1_CGTACTAG_CGTCTAAT_R1fastqgz/indexes/bin1_offset, Diploid_1_CGTACTAG_CGTCTAAT_R1fastqgz/indexes/chrom_offset, Diploid_1_CGTACTAG_CGTCTAAT_R1fastqgz/pixels, Diploid_1_CGTACTAG_CGTCTAAT_R1fastqgz/pixels/bin1_id, Diploid_1_CGTACTAG_CGTCTAAT_R1fastqgz/pixels/bin2_id, Diploid_1_CGTACTAG_CGTCTAAT_R1fastqgz/pixels/count, Diploid_1_CGTACTAG_CTAAGCCT_R1fastqgz, Diploid_1_CGTACTAG_CTAAGCCT_R1fastqgz/bins, Diploid_1_CGTACTAG_CTAAGCCT_R1fastqgz/bins/chrom, Diploid_1_CGTACTAG_CTAAGCCT_R1fastqgz/bins/end, Diploid_1_CGTACTAG_CTAAGCCT_R1fastqgz/bins/start, Diploid_1_CGTACTAG_CTAAGCCT_R1fastqgz/chroms, Diploid_1_CGTACTAG_CTAAGCCT_R1fastqgz/chroms/length, Diploid_1_CGTACTAG_CTAAGCCT_R1fastqgz/chroms/name, Diploid_1_CGTACTAG_CTAAGCCT_R1fastqgz/indexes, Diploid_1_CGTACTAG_CTAAGCCT_R1fastqgz/indexes/bin1_offset, Diploid_1_CGTACTAG_CTAAGCCT_R1fastqgz/indexes/chrom_offset, Diploid_1_CGTACTAG_CTAAGCCT_R1fastqgz/pixels, Diploid_1_CGTACTAG_CTAAGCCT_R1fastqgz/pixels/bin1_id, Diploid_1_CGTACTAG_CTAAGCCT_R1fastqgz/pixels/bin2_id, Diploid_1_CGTACTAG_CTAAGCCT_R1fastqgz/pixels/count, Diploid_1_CGTACTAG_CTCTCTAT_R1fastqgz, Diploid_1_CGTACTAG_CTCTCTAT_R1fastqgz/bins, Diploid_1_CGTACTAG_CTCTCTAT_R1fastqgz/bins/chrom, Diploid_1_CGTACTAG_CTCTCTAT_R1fastqgz/bins/end, Diploid_1_CGTACTAG_CTCTCTAT_R1fastqgz/bins/start, Diploid_1_CGTACTAG_CTCTCTAT_R1fastqgz/chroms, Diploid_1_CGTACTAG_CTCTCTAT_R1fastqgz/chroms/length, Diploid_1_CGTACTAG_CTCTCTAT_R1fastqgz/chroms/name, Diploid_1_CGTACTAG_CTCTCTAT_R1fastqgz/indexes, Diploid_1_CGTACTAG_CTCTCTAT_R1fastqgz/indexes/bin1_offset, Diploid_1_CGTACTAG_CTCTCTAT_R1fastqgz/indexes/chrom_offset, Diploid_1_CGTACTAG_CTCTCTAT_R1fastqgz/pixels, Diploid_1_CGTACTAG_CTCTCTAT_R1fastqgz/pixels/bin1_id, Diploid_1_CGTACTAG_CTCTCTAT_R1fastqgz/pixels/bin2_id, Diploid_1_CGTACTAG_CTCTCTAT_R1fastqgz/pixels/count, Diploid_1_CGTACTAG_GTAAGGAG_R1fastqgz, Diploid_1_CGTACTAG_GTAAGGAG_R1fastqgz/bins, Diploid_1_CGTACTAG_GTAAGGAG_R1fastqgz/bins/chrom, Diploid_1_CGTACTAG_GTAAGGAG_R1fastqgz/bins/end, Diploid_1_CGTACTAG_GTAAGGAG_R1fastqgz/bins/start, Diploid_1_CGTACTAG_GTAAGGAG_R1fastqgz/chroms, Diploid_1_CGTACTAG_GTAAGGAG_R1fastqgz/chroms/length, Diploid_1_CGTACTAG_GTAAGGAG_R1fastqgz/chroms/name, Diploid_1_CGTACTAG_GTAAGGAG_R1fastqgz/indexes, Diploid_1_CGTACTAG_GTAAGGAG_R1fastqgz/indexes/bin1_offset, Diploid_1_CGTACTAG_GTAAGGAG_R1fastqgz/indexes/chrom_offset, Diploid_1_CGTACTAG_GTAAGGAG_R1fastqgz/pixels, Diploid_1_CGTACTAG_GTAAGGAG_R1fastqgz/pixels/bin1_id, Diploid_1_CGTACTAG_GTAAGGAG_R1fastqgz/pixels/bin2_id, Diploid_1_CGTACTAG_GTAAGGAG_R1fastqgz/pixels/count, Diploid_1_CGTACTAG_TCTCTCCG_R1fastqgz, Diploid_1_CGTACTAG_TCTCTCCG_R1fastqgz/bins, Diploid_1_CGTACTAG_TCTCTCCG_R1fastqgz/bins/chrom, Diploid_1_CGTACTAG_TCTCTCCG_R1fastqgz/bins/end, Diploid_1_CGTACTAG_TCTCTCCG_R1fastqgz/bins/start, Diploid_1_CGTACTAG_TCTCTCCG_R1fastqgz/chroms, Diploid_1_CGTACTAG_TCTCTCCG_R1fastqgz/chroms/length, Diploid_1_CGTACTAG_TCTCTCCG_R1fastqgz/chroms/name, Diploid_1_CGTACTAG_TCTCTCCG_R1fastqgz/indexes, Diploid_1_CGTACTAG_TCTCTCCG_R1fastqgz/indexes/bin1_offset, Diploid_1_CGTACTAG_TCTCTCCG_R1fastqgz/indexes/chrom_offset, 
+                    Diploid_1_CGTACTAG_TCTCTCCG_R1fastqgz/pixels, Diploid_1_CGTACTAG_TCTCTCCG_R1fastqgz/pixels/bin1_id, Diploid_1_CGTACTAG_TCTCTCCG_R1fastqgz/pixels/bin2_id, Diploid_1_CGTACTAG_TCTCTCCG_R1fastqgz/pixels/count, Diploid_1_TAAGGCGA_AAGGAGTA_R1fastqgz, Diploid_1_TAAGGCGA_AAGGAGTA_R1fastqgz/bins, Diploid_1_TAAGGCGA_AAGGAGTA_R1fastqgz/bins/chrom, Diploid_1_TAAGGCGA_AAGGAGTA_R1fastqgz/bins/end, Diploid_1_TAAGGCGA_AAGGAGTA_R1fastqgz/bins/start, Diploid_1_TAAGGCGA_AAGGAGTA_R1fastqgz/chroms, Diploid_1_TAAGGCGA_AAGGAGTA_R1fastqgz/chroms/length, Diploid_1_TAAGGCGA_AAGGAGTA_R1fastqgz/chroms/name, Diploid_1_TAAGGCGA_AAGGAGTA_R1fastqgz/indexes, Diploid_1_TAAGGCGA_AAGGAGTA_R1fastqgz/indexes/bin1_offset, Diploid_1_TAAGGCGA_AAGGAGTA_R1fastqgz/indexes/chrom_offset, Diploid_1_TAAGGCGA_AAGGAGTA_R1fastqgz/pixels, Diploid_1_TAAGGCGA_AAGGAGTA_R1fastqgz/pixels/bin1_id, Diploid_1_TAAGGCGA_AAGGAGTA_R1fastqgz/pixels/bin2_id, Diploid_1_TAAGGCGA_AAGGAGTA_R1fastqgz/pixels/count, Diploid_1_TAAGGCGA_CGTCTAAT_R1fastqgz, Diploid_1_TAAGGCGA_CGTCTAAT_R1fastqgz/bins, Diploid_1_TAAGGCGA_CGTCTAAT_R1fastqgz/bins/chrom, Diploid_1_TAAGGCGA_CGTCTAAT_R1fastqgz/bins/end, Diploid_1_TAAGGCGA_CGTCTAAT_R1fastqgz/bins/start, Diploid_1_TAAGGCGA_CGTCTAAT_R1fastqgz/chroms, Diploid_1_TAAGGCGA_CGTCTAAT_R1fastqgz/chroms/length, Diploid_1_TAAGGCGA_CGTCTAAT_R1fastqgz/chroms/name, Diploid_1_TAAGGCGA_CGTCTAAT_R1fastqgz/indexes, Diploid_1_TAAGGCGA_CGTCTAAT_R1fastqgz/indexes/bin1_offset, Diploid_1_TAAGGCGA_CGTCTAAT_R1fastqgz/indexes/chrom_offset, Diploid_1_TAAGGCGA_CGTCTAAT_R1fastqgz/pixels, Diploid_1_TAAGGCGA_CGTCTAAT_R1fastqgz/pixels/bin1_id, Diploid_1_TAAGGCGA_CGTCTAAT_R1fastqgz/pixels/bin2_id, Diploid_1_TAAGGCGA_CGTCTAAT_R1fastqgz/pixels/count, Diploid_1_TAAGGCGA_CTAAGCCT_R1fastqgz, Diploid_1_TAAGGCGA_CTAAGCCT_R1fastqgz/bins, Diploid_1_TAAGGCGA_CTAAGCCT_R1fastqgz/bins/chrom, Diploid_1_TAAGGCGA_CTAAGCCT_R1fastqgz/bins/end, Diploid_1_TAAGGCGA_CTAAGCCT_R1fastqgz/bins/start, Diploid_1_TAAGGCGA_CTAAGCCT_R1fastqgz/chroms, Diploid_1_TAAGGCGA_CTAAGCCT_R1fastqgz/chroms/length, Diploid_1_TAAGGCGA_CTAAGCCT_R1fastqgz/chroms/name, Diploid_1_TAAGGCGA_CTAAGCCT_R1fastqgz/indexes, Diploid_1_TAAGGCGA_CTAAGCCT_R1fastqgz/indexes/bin1_offset, Diploid_1_TAAGGCGA_CTAAGCCT_R1fastqgz/indexes/chrom_offset, Diploid_1_TAAGGCGA_CTAAGCCT_R1fastqgz/pixels, Diploid_1_TAAGGCGA_CTAAGCCT_R1fastqgz/pixels/bin1_id, Diploid_1_TAAGGCGA_CTAAGCCT_R1fastqgz/pixels/bin2_id, Diploid_1_TAAGGCGA_CTAAGCCT_R1fastqgz/pixels/count, Diploid_2_AAGAGGCA_CGTCTAAT_R1fastqgz, Diploid_2_AAGAGGCA_CGTCTAAT_R1fastqgz/bins, Diploid_2_AAGAGGCA_CGTCTAAT_R1fastqgz/bins/chrom, Diploid_2_AAGAGGCA_CGTCTAAT_R1fastqgz/bins/end, Diploid_2_AAGAGGCA_CGTCTAAT_R1fastqgz/bins/start, Diploid_2_AAGAGGCA_CGTCTAAT_R1fastqgz/chroms, Diploid_2_AAGAGGCA_CGTCTAAT_R1fastqgz/chroms/length, Diploid_2_AAGAGGCA_CGTCTAAT_R1fastqgz/chroms/name, Diploid_2_AAGAGGCA_CGTCTAAT_R1fastqgz/indexes, Diploid_2_AAGAGGCA_CGTCTAAT_R1fastqgz/indexes/bin1_offset, Diploid_2_AAGAGGCA_CGTCTAAT_R1fastqgz/indexes/chrom_offset, Diploid_2_AAGAGGCA_CGTCTAAT_R1fastqgz/pixels, Diploid_2_AAGAGGCA_CGTCTAAT_R1fastqgz/pixels/bin1_id, Diploid_2_AAGAGGCA_CGTCTAAT_R1fastqgz/pixels/bin2_id, Diploid_2_AAGAGGCA_CGTCTAAT_R1fastqgz/pixels/count, Diploid_2_AAGAGGCA_TCTCTCCG_R1fastqgz, Diploid_2_AAGAGGCA_TCTCTCCG_R1fastqgz/bins, Diploid_2_AAGAGGCA_TCTCTCCG_R1fastqgz/bins/chrom, Diploid_2_AAGAGGCA_TCTCTCCG_R1fastqgz/bins/end, Diploid_2_AAGAGGCA_TCTCTCCG_R1fastqgz/bins/start, Diploid_2_AAGAGGCA_TCTCTCCG_R1fastqgz/chroms, Diploid_2_AAGAGGCA_TCTCTCCG_R1fastqgz/chroms/length, Diploid_2_AAGAGGCA_TCTCTCCG_R1fastqgz/chroms/name, Diploid_2_AAGAGGCA_TCTCTCCG_R1fastqgz/indexes, Diploid_2_AAGAGGCA_TCTCTCCG_R1fastqgz/indexes/bin1_offset, Diploid_2_AAGAGGCA_TCTCTCCG_R1fastqgz/indexes/chrom_offset, Diploid_2_AAGAGGCA_TCTCTCCG_R1fastqgz/pixels, Diploid_2_AAGAGGCA_TCTCTCCG_R1fastqgz/pixels/bin1_id, Diploid_2_AAGAGGCA_TCTCTCCG_R1fastqgz/pixels/bin2_id, Diploid_2_AAGAGGCA_TCTCTCCG_R1fastqgz/pixels/count'/>
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+
+Quality control
+===============
+
+scHicQualityControl removes scHi-C interaction matrices with a too low read coverage or density. It creates four output files:
+
+1. Read coverage plot
+
+.. image:: $PATH_TO_IMAGES/read_coverage.png
+
+
+2. Density plot
+
+.. image:: $PATH_TO_IMAGES/density.png
+
+3. Quality report
+
+.. code-block::
+
+    scHi-C sample contained 3882 cells:
+    Number of removed matrices containing bad chromosomes 0
+    Number of removed matrices due to low read coverage (< 100000): 1374
+    Number of removed matrices due to too many zero bins (< 0.02 density, within 30000000 relative genomic distance): 610
+    2508 samples passed the quality control. Please consider matrices with a low read coverage may be the matrices with a low density and overlap therefore.
+
+4. The scHi-C mcool matrix with the filtered matrices.
+For more information about scHiCExplorer please consider our documentation on readthedocs.io_
+
+.. _readthedocs.io: http://schicexplorer.readthedocs.io/
+]]></help>
+    <expand macro="citations" />
+
+</tool>