changeset 6:0cf41189f086 draft

planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/nanopore_qc commit 0d8d1ec70b450f96a29a98e4dec9688b18170d32
author jdv
date Sun, 12 Aug 2018 13:21:30 -0400
parents f2081dc93880
children 30d073976339
files nanopore_qc.R nanopore_qc.xml test-data/output.html.small.q10 test-data/output.html.small.q6 yaml_to_html.pl
diffstat 5 files changed, 90 insertions(+), 408 deletions(-) [+]
line wrap: on
line diff
--- a/nanopore_qc.R	Mon Mar 12 19:55:54 2018 -0400
+++ b/nanopore_qc.R	Sun Aug 12 13:21:30 2018 -0400
@@ -56,11 +56,20 @@
                      help="The cutoff value for the mean Q score of a read (default 7). Used to create separate plots for reads above and below this threshold"
                      )
 
+parser <- add_option(parser, 
+                     opt_str = c("-d", "--discard_failed"), 
+                     type="logical", 
+                     default=FALSE,
+                     dest = 'filt.failed',
+                     help="Discard reads that failed Albacore filtering"
+                     )
+
 opt = parse_args(parser)
 
-input.file = opt$input.file
-output.dir = opt$output.dir
-q = opt$q
+input.file  = opt$input.file
+output.dir  = opt$output.dir
+filt.failed = opt$filt.failed
+q           = opt$q
 
 # this is how we label the reads at least as good as q
 q_title = paste("Q>=", q, sep="")
@@ -123,6 +132,7 @@
     # by default the lowest value is -Inf, i.e. includes all reads. The 
     # other value in min.q is set by the user at the command line
     d = read_tsv(filepath, col_types = cols_only(channel = 'i', 
+                                                passes_filtering = 'c',
                                                 num_events_template = 'i', 
                                                 sequence_length_template = 'i', 
                                                 mean_qscore_template = 'n',
@@ -146,6 +156,10 @@
 
     # ignore 0-length reads
     d <- d[d$sequence_length_template > 0,]
+    # ignore reads failing filtering
+    if (filt.failed) {
+        d <- d[d$passes_filtering == 'True',]
+    }
         
     d$events_per_base = d$num_events_template/d$sequence_length_template
 
@@ -173,7 +187,6 @@
     d = d[keep]
 
     d$start_bin = cut(d$start_time, 9,labels=c(1:9))
-    write.table(d,"foo.tsv",sep="\t",quote=F)
         
     return(d)
 }
--- a/nanopore_qc.xml	Mon Mar 12 19:55:54 2018 -0400
+++ b/nanopore_qc.xml	Sun Aug 12 13:21:30 2018 -0400
@@ -1,20 +1,20 @@
-<tool id="nanopore_qc" name="NanoporeQC" version="0.001">
+<tool id="nanopore_qc" name="NanoporeQC" version="0.002">
 
     <description>Quality report for nanopore data</description>
 
     <requirements>
-        <requirement type="package" version=">=3.3.1">r-base</requirement>
-        <requirement type="package" version=">=2.2.1">r-ggplot2</requirement>
-        <requirement type="package" version=">=1.8.4">r-plyr</requirement>
-        <requirement type="package" version=">=1.4.2">r-reshape2</requirement>
-        <requirement type="package" version=">=0.2.2">r-readr</requirement>
-        <requirement type="package" version=">=2.1.14">r-yaml</requirement>
-        <requirement type="package" version=">=0.4.1">r-scales</requirement>
-        <requirement type="package" version=">=1.4.1">r-futile.logger</requirement>
-        <requirement type="package" version=">=1.10.4">r-data.table</requirement>
-        <requirement type="package" version=">=1.3.2">r-optparse</requirement>
-        <requirement type="package" version=">=1.8">r-mgcv</requirement>
-        <requirement type="package" version=">=0.66">perl-yaml-libyaml</requirement>
+        <requirement type="package" version="3.3.2">r-base</requirement>
+        <requirement type="package" version="2.2.1">r-ggplot2</requirement>
+        <requirement type="package" version="1.8.4">r-plyr</requirement>
+        <requirement type="package" version="1.4.2">r-reshape2</requirement>
+        <requirement type="package" version="0.2.2">r-readr</requirement>
+        <requirement type="package" version="2.1.13">r-yaml</requirement>
+        <requirement type="package" version="0.4.1">r-scales</requirement>
+        <requirement type="package" version="1.4.1">r-futile.logger</requirement>
+        <requirement type="package" version="1.10.4">r-data.table</requirement>
+        <requirement type="package" version="1.3.2">r-optparse</requirement>
+        <requirement type="package" version="1.8_16">r-mgcv</requirement>
+        <requirement type="package" version="0.66">perl-yaml-libyaml</requirement>
     </requirements>
 
     <stdio>
@@ -39,12 +39,14 @@
             -i '$input'
             -o '${html_file.files_path}'
             -q '$q_cutoff'
+            $discard_failed
 
         &&
 
         perl '${__tool_directory__}/yaml_to_html.pl'
 
             '${html_file.files_path}/summary.yaml'
+            '${html_file.files_path}'
             '$html_file'
 
     ]]>
@@ -53,21 +55,52 @@
     <inputs>
         <param name="input" type="data" format="tabular" label="Basecall summary file from Albacore"/>
         <param name="q_cutoff" type="float" value="10" min="0" label="Quality cutoff for QC calculations" />
+        <param name="discard_failed" type="boolean" checked="false" truevalue="--discard_failed T" falsevalue="" label="Ignore reads failing quality filter" />
     </inputs>
     <outputs>
         <data format="html" name="html_file" label="NanoporeQC on ${on_string}" />
     </outputs>
     <tests>
+        <!-- binary image blobs can differ, which is why we use line matching
+        instead of file matching -->
+
+        <!-- quality cutoff 10 -->
         <test>
             <param name="input" value="sequencing_summary.txt.small" ftype="tabular" />
             <param name="q_cutoff" value="10" />
-            <output name="html_file" file="output.html.small.q10" compare="diff" />
+            <output name="html_file">
+                <assert_contents>
+                    <has_text_matching expression="&lt;td&gt;Total Reads&lt;/td&gt;\s*&lt;td&gt;9990&lt;/td&gt;" />
+                    <has_text_matching expression="&lt;td&gt;Total Reads&lt;/td&gt;\s*&lt;td&gt;7952&lt;/td&gt;" />
+                </assert_contents>
+            </output>
         </test>
+
+        <!-- quality cutoff 6 -->
         <test>
             <param name="input" value="sequencing_summary.txt.small" ftype="tabular" />
             <param name="q_cutoff" value="6" />
-            <output name="html_file" file="output.html.small.q6" compare="diff" />
+            <output name="html_file">
+                <assert_contents>
+                    <has_text_matching expression="&lt;td&gt;Total Reads&lt;/td&gt;\s*&lt;td&gt;9990&lt;/td&gt;" />
+                    <has_text_matching expression="&lt;td&gt;Total Reads&lt;/td&gt;\s*&lt;td&gt;9254&lt;/td&gt;" />
+                </assert_contents>
+            </output>
         </test>
+
+        <!-- quality cutoff 6, discard filtered -->
+        <test>
+            <param name="input" value="sequencing_summary.txt.small" ftype="tabular" />
+            <param name="q_cutoff" value="6" />
+            <param name="discard_failed" value="true" />
+            <output name="html_file">
+                <assert_contents>
+                    <has_text_matching expression="&lt;td&gt;Total Reads&lt;/td&gt;\s*&lt;td&gt;9107&lt;/td&gt;" />
+                    <has_text_matching expression="&lt;td&gt;Total Reads&lt;/td&gt;\s*&lt;td&gt;9107&lt;/td&gt;" />
+                </assert_contents>
+            </output>
+        </test>
+
     </tests>
     <help>
 <![CDATA[
--- a/test-data/output.html.small.q10	Mon Mar 12 19:55:54 2018 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,191 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<!DOCTYPE html>
-<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
-<head>
-    <title></title>
-    <meta http-equiv="content-type" content="application/xhtml+xml; charset=utf-8" />
-    <style>
-        h2 {
-            padding: 0.3em;
-            background-color: #000000;
-            color: #ffffff;
-            margin: 1em 0 2em 0;
-        }
-        h3 {
-            padding: 0em 0.2em 0em 0.2em;
-            color: #555555;
-            border: solid 1px black;
-            border-width: 0px 0px 1px 0px;
-            margin: 2em 0 0.4em 0;
-        }
-        tr {
-            margin: 0;
-        }
-        tr:nth-child(even) {
-            background-color: #bbbbbb;
-        }
-        tr:nth-child(odd) {
-            background-color: #eeeeee;
-        }
-        caption {
-            text-align: left;
-            font-weight: bold;
-            background-color: #550000;
-            color: #ffffff;
-            padding: 0.1em 0.2em;
-        }
-        table {
-            margin: 1em;
-            padding: 0.3em;
-        }
-        td {
-            margin: 0;
-            padding: 0 0.4em;
-        }
-        tr td:nth-child(1) {
-            color: #550000;
-        }
-        figure {
-            display: table;
-            margin: 2em 0;
-        }
-        figcaption {
-            display: table-caption;
-            caption-side: top;
-            font-size: 1.1em;
-            text-decoration: none;
-            text-align: center;
-            font-weight: bold;
-            background-color: #550000;
-            color: #ffffff;
-            padding: 0.1em 0.2em;
-            margin: 2em 0 0.7em 0;
-        }
-            
-    </style>
-</head>
-<body>
-
-    <h2>NanoporeQC Report</h2>
-    <h3>Summary statistics</h3>
-    <table>
-        <caption>All reads</caption>
-        <tr>
-            <td>Total Yield (Gb)</td>
-            <td>0.0092773</td>
-        </tr>
-        <tr>
-            <td>Total Reads</td>
-            <td>9990</td>
-        </tr>
-        <tr>
-            <td>Mean Length</td>
-            <td>928.7</td>
-        </tr>
-        <tr>
-            <td>Median Length</td>
-            <td>941.0</td>
-        </tr>
-        <tr>
-            <td>Max Length</td>
-            <td>25740.0</td>
-        </tr>
-        <tr>
-            <td>Mean Q</td>
-            <td>11.7</td>
-        </tr>
-        <tr>
-            <td>Median Q</td>
-            <td>12.7</td>
-        </tr>
-    </table>
-    <table>
-        <caption>Q>=10</caption>
-        <tr>
-            <td>Total Yield (Gb)</td>
-            <td>0.0079507</td>
-        </tr>
-        <tr>
-            <td>Total Reads</td>
-            <td>7952</td>
-        </tr>
-        <tr>
-            <td>Mean Length</td>
-            <td>999.8</td>
-        </tr>
-        <tr>
-            <td>Median Length</td>
-            <td>949.0</td>
-        </tr>
-        <tr>
-            <td>Max Length</td>
-            <td>6545.0</td>
-        </tr>
-        <tr>
-            <td>Mean Q</td>
-            <td>12.9</td>
-        </tr>
-        <tr>
-            <td>Median Q</td>
-            <td>13.1</td>
-        </tr>
-    </table>
-    <h3>QC plots</h3>
-    <p>(Click on plot for hi-resolution version)</p>
-    <a href="length_histogram.png">
-        <figure>
-            <img src="length_histogram.screen.png" alt="length_histogram" />
-            <figcaption>Read length distribution</figcaption>
-        </figure>
-    </a>
-    <a href="q_histogram.png">
-        <figure>
-            <img src="q_histogram.screen.png" alt="q_histogram" />
-            <figcaption>Mean quality score distribution</figcaption>
-        </figure>
-    </a>
-    <a href="reads_per_hour.png">
-        <figure>
-            <img src="reads_per_hour.screen.png" alt="reads_per_hour" />
-            <figcaption>Yield over time</figcaption>
-        </figure>
-    </a>
-    <a href="cumulative_yield.png">
-        <figure>
-            <img src="cumulative_yield.screen.png" alt="cumulative_yield" />
-            <figcaption>Cumulative yield over time</figcaption>
-        </figure>
-    </a>
-    <a href="yield_summary.png">
-        <figure>
-            <img src="yield_summary.screen.png" alt="yield_summary" />
-            <figcaption>Yield by read length cutoff</figcaption>
-        </figure>
-    </a>
-    <a href="flowcell_overview.png">
-        <figure>
-            <img src="flowcell_overview.screen.png" alt="flowcell_overview" />
-            <figcaption>Median read quality per channel</figcaption>
-        </figure>
-    </a>
-    <a href="length_by_hour.png">
-        <figure>
-            <img src="length_by_hour.screen.png" alt="length_by_hour" />
-            <figcaption>Read length over time</figcaption>
-        </figure>
-    </a>
-    <a href="q_by_hour.png">
-        <figure>
-            <img src="q_by_hour.screen.png" alt="q_by_hour" />
-            <figcaption>Read quality over time</figcaption>
-        </figure>
-    </a>
-    <a href="length_vs_q.png">
-        <figure>
-            <img src="length_vs_q.screen.png" alt="length_vs_q" />
-            <figcaption>Read length vs. quality</figcaption>
-        </figure>
-    </a>
-
-</body>
-</html>
--- a/test-data/output.html.small.q6	Mon Mar 12 19:55:54 2018 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,191 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<!DOCTYPE html>
-<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
-<head>
-    <title></title>
-    <meta http-equiv="content-type" content="application/xhtml+xml; charset=utf-8" />
-    <style>
-        h2 {
-            padding: 0.3em;
-            background-color: #000000;
-            color: #ffffff;
-            margin: 1em 0 2em 0;
-        }
-        h3 {
-            padding: 0em 0.2em 0em 0.2em;
-            color: #555555;
-            border: solid 1px black;
-            border-width: 0px 0px 1px 0px;
-            margin: 2em 0 0.4em 0;
-        }
-        tr {
-            margin: 0;
-        }
-        tr:nth-child(even) {
-            background-color: #bbbbbb;
-        }
-        tr:nth-child(odd) {
-            background-color: #eeeeee;
-        }
-        caption {
-            text-align: left;
-            font-weight: bold;
-            background-color: #550000;
-            color: #ffffff;
-            padding: 0.1em 0.2em;
-        }
-        table {
-            margin: 1em;
-            padding: 0.3em;
-        }
-        td {
-            margin: 0;
-            padding: 0 0.4em;
-        }
-        tr td:nth-child(1) {
-            color: #550000;
-        }
-        figure {
-            display: table;
-            margin: 2em 0;
-        }
-        figcaption {
-            display: table-caption;
-            caption-side: top;
-            font-size: 1.1em;
-            text-decoration: none;
-            text-align: center;
-            font-weight: bold;
-            background-color: #550000;
-            color: #ffffff;
-            padding: 0.1em 0.2em;
-            margin: 2em 0 0.7em 0;
-        }
-            
-    </style>
-</head>
-<body>
-
-    <h2>NanoporeQC Report</h2>
-    <h3>Summary statistics</h3>
-    <table>
-        <caption>All reads</caption>
-        <tr>
-            <td>Total Yield (Gb)</td>
-            <td>0.0092773</td>
-        </tr>
-        <tr>
-            <td>Total Reads</td>
-            <td>9990</td>
-        </tr>
-        <tr>
-            <td>Mean Length</td>
-            <td>928.7</td>
-        </tr>
-        <tr>
-            <td>Median Length</td>
-            <td>941.0</td>
-        </tr>
-        <tr>
-            <td>Max Length</td>
-            <td>25740.0</td>
-        </tr>
-        <tr>
-            <td>Mean Q</td>
-            <td>11.7</td>
-        </tr>
-        <tr>
-            <td>Median Q</td>
-            <td>12.7</td>
-        </tr>
-    </table>
-    <table>
-        <caption>Q>=6</caption>
-        <tr>
-            <td>Total Yield (Gb)</td>
-            <td>0.0090947</td>
-        </tr>
-        <tr>
-            <td>Total Reads</td>
-            <td>9254</td>
-        </tr>
-        <tr>
-            <td>Mean Length</td>
-            <td>982.8</td>
-        </tr>
-        <tr>
-            <td>Median Length</td>
-            <td>945.0</td>
-        </tr>
-        <tr>
-            <td>Max Length</td>
-            <td>6545.0</td>
-        </tr>
-        <tr>
-            <td>Mean Q</td>
-            <td>12.3</td>
-        </tr>
-        <tr>
-            <td>Median Q</td>
-            <td>12.9</td>
-        </tr>
-    </table>
-    <h3>QC plots</h3>
-    <p>(Click on plot for hi-resolution version)</p>
-    <a href="length_histogram.png">
-        <figure>
-            <img src="length_histogram.screen.png" alt="length_histogram" />
-            <figcaption>Read length distribution</figcaption>
-        </figure>
-    </a>
-    <a href="q_histogram.png">
-        <figure>
-            <img src="q_histogram.screen.png" alt="q_histogram" />
-            <figcaption>Mean quality score distribution</figcaption>
-        </figure>
-    </a>
-    <a href="reads_per_hour.png">
-        <figure>
-            <img src="reads_per_hour.screen.png" alt="reads_per_hour" />
-            <figcaption>Yield over time</figcaption>
-        </figure>
-    </a>
-    <a href="cumulative_yield.png">
-        <figure>
-            <img src="cumulative_yield.screen.png" alt="cumulative_yield" />
-            <figcaption>Cumulative yield over time</figcaption>
-        </figure>
-    </a>
-    <a href="yield_summary.png">
-        <figure>
-            <img src="yield_summary.screen.png" alt="yield_summary" />
-            <figcaption>Yield by read length cutoff</figcaption>
-        </figure>
-    </a>
-    <a href="flowcell_overview.png">
-        <figure>
-            <img src="flowcell_overview.screen.png" alt="flowcell_overview" />
-            <figcaption>Median read quality per channel</figcaption>
-        </figure>
-    </a>
-    <a href="length_by_hour.png">
-        <figure>
-            <img src="length_by_hour.screen.png" alt="length_by_hour" />
-            <figcaption>Read length over time</figcaption>
-        </figure>
-    </a>
-    <a href="q_by_hour.png">
-        <figure>
-            <img src="q_by_hour.screen.png" alt="q_by_hour" />
-            <figcaption>Read quality over time</figcaption>
-        </figure>
-    </a>
-    <a href="length_vs_q.png">
-        <figure>
-            <img src="length_vs_q.screen.png" alt="length_vs_q" />
-            <figcaption>Read length vs. quality</figcaption>
-        </figure>
-    </a>
-
-</body>
-</html>
--- a/yaml_to_html.pl	Mon Mar 12 19:55:54 2018 -0400
+++ b/yaml_to_html.pl	Sun Aug 12 13:21:30 2018 -0400
@@ -5,12 +5,13 @@
 use 5.012;
 
 use YAML::XS qw/LoadFile/;
+use MIME::Base64;
 use autodie;
 
-my ($fn_in, $fn_out) = @ARGV;
+my ($fn_yaml, $dir_in, $fn_out) = @ARGV;
 
 die "Can't find or read input file: $!\n"
-    if (! -r $fn_in);
+    if (! -r $fn_yaml);
 
 # set output filehandle based on arguments
 my $fh = \*STDOUT;
@@ -18,9 +19,9 @@
     open $fh, '>', $fn_out;
 }
 
-my $yaml = LoadFile($ARGV[0]);
+my $yaml = LoadFile($fn_yaml);
 
-convert($yaml);
+convert($yaml, $dir_in);
 
 sub convert {
 
@@ -99,16 +100,26 @@
 
 
     say {$fh} "    <h3>QC plots</h3>";
-    say {$fh} "    <p>(Click on plot for hi-resolution version)</p>";
+    say {$fh} "    <p>(Click on plot for high-resolution version, or in Chrome \"Open link in new tab\")</p>";
 
     for my $base (@order) {
 
         my $caption = $figs{$base} // die "No caption found for $base";
+     
+        # Base64-encode images
+        my $fn_img_full   = "$dir_in/$base.png";
+        my $fn_img_screen = "$dir_in/$base.screen.png";
+        die "Failed to find or read $fn_img_full"
+            if (! -r $fn_img_full);
+        die "Failed to find or read $fn_img_screen"
+            if (! -r $fn_img_screen);
+        my $img_full   = encode($fn_img_full);
+        my $img_screen = encode($fn_img_screen);
 
         print {$fh} <<"CONTENT"
-    <a href="$base.png">
+    <a href="data:image/png;base64,$img_full">
         <figure>
-            <img src="$base.screen.png" alt="$base" />
+            <img src="data:image/png;base64,$img_screen" alt="$base" />
             <figcaption>$caption</figcaption>
         </figure>
     </a>
@@ -120,7 +131,14 @@
 
 }
 
+sub encode {
 
+    my ($fn) = @_;
+    open my $in, '<:raw', $fn;
+    local($/) = undef;
+    return encode_base64(<$in>);
+
+}
 
 sub header {