Mercurial > repos > jdv > nanopore_qc
changeset 6:0cf41189f086 draft
planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/nanopore_qc commit 0d8d1ec70b450f96a29a98e4dec9688b18170d32
author | jdv |
---|---|
date | Sun, 12 Aug 2018 13:21:30 -0400 |
parents | f2081dc93880 |
children | 30d073976339 |
files | nanopore_qc.R nanopore_qc.xml test-data/output.html.small.q10 test-data/output.html.small.q6 yaml_to_html.pl |
diffstat | 5 files changed, 90 insertions(+), 408 deletions(-) [+] |
line wrap: on
line diff
--- a/nanopore_qc.R Mon Mar 12 19:55:54 2018 -0400 +++ b/nanopore_qc.R Sun Aug 12 13:21:30 2018 -0400 @@ -56,11 +56,20 @@ help="The cutoff value for the mean Q score of a read (default 7). Used to create separate plots for reads above and below this threshold" ) +parser <- add_option(parser, + opt_str = c("-d", "--discard_failed"), + type="logical", + default=FALSE, + dest = 'filt.failed', + help="Discard reads that failed Albacore filtering" + ) + opt = parse_args(parser) -input.file = opt$input.file -output.dir = opt$output.dir -q = opt$q +input.file = opt$input.file +output.dir = opt$output.dir +filt.failed = opt$filt.failed +q = opt$q # this is how we label the reads at least as good as q q_title = paste("Q>=", q, sep="") @@ -123,6 +132,7 @@ # by default the lowest value is -Inf, i.e. includes all reads. The # other value in min.q is set by the user at the command line d = read_tsv(filepath, col_types = cols_only(channel = 'i', + passes_filtering = 'c', num_events_template = 'i', sequence_length_template = 'i', mean_qscore_template = 'n', @@ -146,6 +156,10 @@ # ignore 0-length reads d <- d[d$sequence_length_template > 0,] + # ignore reads failing filtering + if (filt.failed) { + d <- d[d$passes_filtering == 'True',] + } d$events_per_base = d$num_events_template/d$sequence_length_template @@ -173,7 +187,6 @@ d = d[keep] d$start_bin = cut(d$start_time, 9,labels=c(1:9)) - write.table(d,"foo.tsv",sep="\t",quote=F) return(d) }
--- a/nanopore_qc.xml Mon Mar 12 19:55:54 2018 -0400 +++ b/nanopore_qc.xml Sun Aug 12 13:21:30 2018 -0400 @@ -1,20 +1,20 @@ -<tool id="nanopore_qc" name="NanoporeQC" version="0.001"> +<tool id="nanopore_qc" name="NanoporeQC" version="0.002"> <description>Quality report for nanopore data</description> <requirements> - <requirement type="package" version=">=3.3.1">r-base</requirement> - <requirement type="package" version=">=2.2.1">r-ggplot2</requirement> - <requirement type="package" version=">=1.8.4">r-plyr</requirement> - <requirement type="package" version=">=1.4.2">r-reshape2</requirement> - <requirement type="package" version=">=0.2.2">r-readr</requirement> - <requirement type="package" version=">=2.1.14">r-yaml</requirement> - <requirement type="package" version=">=0.4.1">r-scales</requirement> - <requirement type="package" version=">=1.4.1">r-futile.logger</requirement> - <requirement type="package" version=">=1.10.4">r-data.table</requirement> - <requirement type="package" version=">=1.3.2">r-optparse</requirement> - <requirement type="package" version=">=1.8">r-mgcv</requirement> - <requirement type="package" version=">=0.66">perl-yaml-libyaml</requirement> + <requirement type="package" version="3.3.2">r-base</requirement> + <requirement type="package" version="2.2.1">r-ggplot2</requirement> + <requirement type="package" version="1.8.4">r-plyr</requirement> + <requirement type="package" version="1.4.2">r-reshape2</requirement> + <requirement type="package" version="0.2.2">r-readr</requirement> + <requirement type="package" version="2.1.13">r-yaml</requirement> + <requirement type="package" version="0.4.1">r-scales</requirement> + <requirement type="package" version="1.4.1">r-futile.logger</requirement> + <requirement type="package" version="1.10.4">r-data.table</requirement> + <requirement type="package" version="1.3.2">r-optparse</requirement> + <requirement type="package" version="1.8_16">r-mgcv</requirement> + <requirement type="package" version="0.66">perl-yaml-libyaml</requirement> </requirements> <stdio> @@ -39,12 +39,14 @@ -i '$input' -o '${html_file.files_path}' -q '$q_cutoff' + $discard_failed && perl '${__tool_directory__}/yaml_to_html.pl' '${html_file.files_path}/summary.yaml' + '${html_file.files_path}' '$html_file' ]]> @@ -53,21 +55,52 @@ <inputs> <param name="input" type="data" format="tabular" label="Basecall summary file from Albacore"/> <param name="q_cutoff" type="float" value="10" min="0" label="Quality cutoff for QC calculations" /> + <param name="discard_failed" type="boolean" checked="false" truevalue="--discard_failed T" falsevalue="" label="Ignore reads failing quality filter" /> </inputs> <outputs> <data format="html" name="html_file" label="NanoporeQC on ${on_string}" /> </outputs> <tests> + <!-- binary image blobs can differ, which is why we use line matching + instead of file matching --> + + <!-- quality cutoff 10 --> <test> <param name="input" value="sequencing_summary.txt.small" ftype="tabular" /> <param name="q_cutoff" value="10" /> - <output name="html_file" file="output.html.small.q10" compare="diff" /> + <output name="html_file"> + <assert_contents> + <has_text_matching expression="<td>Total Reads</td>\s*<td>9990</td>" /> + <has_text_matching expression="<td>Total Reads</td>\s*<td>7952</td>" /> + </assert_contents> + </output> </test> + + <!-- quality cutoff 6 --> <test> <param name="input" value="sequencing_summary.txt.small" ftype="tabular" /> <param name="q_cutoff" value="6" /> - <output name="html_file" file="output.html.small.q6" compare="diff" /> + <output name="html_file"> + <assert_contents> + <has_text_matching expression="<td>Total Reads</td>\s*<td>9990</td>" /> + <has_text_matching expression="<td>Total Reads</td>\s*<td>9254</td>" /> + </assert_contents> + </output> </test> + + <!-- quality cutoff 6, discard filtered --> + <test> + <param name="input" value="sequencing_summary.txt.small" ftype="tabular" /> + <param name="q_cutoff" value="6" /> + <param name="discard_failed" value="true" /> + <output name="html_file"> + <assert_contents> + <has_text_matching expression="<td>Total Reads</td>\s*<td>9107</td>" /> + <has_text_matching expression="<td>Total Reads</td>\s*<td>9107</td>" /> + </assert_contents> + </output> + </test> + </tests> <help> <![CDATA[
--- a/test-data/output.html.small.q10 Mon Mar 12 19:55:54 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,191 +0,0 @@ -<?xml version="1.0" encoding="utf-8"?> -<!DOCTYPE html> -<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> -<head> - <title></title> - <meta http-equiv="content-type" content="application/xhtml+xml; charset=utf-8" /> - <style> - h2 { - padding: 0.3em; - background-color: #000000; - color: #ffffff; - margin: 1em 0 2em 0; - } - h3 { - padding: 0em 0.2em 0em 0.2em; - color: #555555; - border: solid 1px black; - border-width: 0px 0px 1px 0px; - margin: 2em 0 0.4em 0; - } - tr { - margin: 0; - } - tr:nth-child(even) { - background-color: #bbbbbb; - } - tr:nth-child(odd) { - background-color: #eeeeee; - } - caption { - text-align: left; - font-weight: bold; - background-color: #550000; - color: #ffffff; - padding: 0.1em 0.2em; - } - table { - margin: 1em; - padding: 0.3em; - } - td { - margin: 0; - padding: 0 0.4em; - } - tr td:nth-child(1) { - color: #550000; - } - figure { - display: table; - margin: 2em 0; - } - figcaption { - display: table-caption; - caption-side: top; - font-size: 1.1em; - text-decoration: none; - text-align: center; - font-weight: bold; - background-color: #550000; - color: #ffffff; - padding: 0.1em 0.2em; - margin: 2em 0 0.7em 0; - } - - </style> -</head> -<body> - - <h2>NanoporeQC Report</h2> - <h3>Summary statistics</h3> - <table> - <caption>All reads</caption> - <tr> - <td>Total Yield (Gb)</td> - <td>0.0092773</td> - </tr> - <tr> - <td>Total Reads</td> - <td>9990</td> - </tr> - <tr> - <td>Mean Length</td> - <td>928.7</td> - </tr> - <tr> - <td>Median Length</td> - <td>941.0</td> - </tr> - <tr> - <td>Max Length</td> - <td>25740.0</td> - </tr> - <tr> - <td>Mean Q</td> - <td>11.7</td> - </tr> - <tr> - <td>Median Q</td> - <td>12.7</td> - </tr> - </table> - <table> - <caption>Q>=10</caption> - <tr> - <td>Total Yield (Gb)</td> - <td>0.0079507</td> - </tr> - <tr> - <td>Total Reads</td> - <td>7952</td> - </tr> - <tr> - <td>Mean Length</td> - <td>999.8</td> - </tr> - <tr> - <td>Median Length</td> - <td>949.0</td> - </tr> - <tr> - <td>Max Length</td> - <td>6545.0</td> - </tr> - <tr> - <td>Mean Q</td> - <td>12.9</td> - </tr> - <tr> - <td>Median Q</td> - <td>13.1</td> - </tr> - </table> - <h3>QC plots</h3> - <p>(Click on plot for hi-resolution version)</p> - <a href="length_histogram.png"> - <figure> - <img src="length_histogram.screen.png" alt="length_histogram" /> - <figcaption>Read length distribution</figcaption> - </figure> - </a> - <a href="q_histogram.png"> - <figure> - <img src="q_histogram.screen.png" alt="q_histogram" /> - <figcaption>Mean quality score distribution</figcaption> - </figure> - </a> - <a href="reads_per_hour.png"> - <figure> - <img src="reads_per_hour.screen.png" alt="reads_per_hour" /> - <figcaption>Yield over time</figcaption> - </figure> - </a> - <a href="cumulative_yield.png"> - <figure> - <img src="cumulative_yield.screen.png" alt="cumulative_yield" /> - <figcaption>Cumulative yield over time</figcaption> - </figure> - </a> - <a href="yield_summary.png"> - <figure> - <img src="yield_summary.screen.png" alt="yield_summary" /> - <figcaption>Yield by read length cutoff</figcaption> - </figure> - </a> - <a href="flowcell_overview.png"> - <figure> - <img src="flowcell_overview.screen.png" alt="flowcell_overview" /> - <figcaption>Median read quality per channel</figcaption> - </figure> - </a> - <a href="length_by_hour.png"> - <figure> - <img src="length_by_hour.screen.png" alt="length_by_hour" /> - <figcaption>Read length over time</figcaption> - </figure> - </a> - <a href="q_by_hour.png"> - <figure> - <img src="q_by_hour.screen.png" alt="q_by_hour" /> - <figcaption>Read quality over time</figcaption> - </figure> - </a> - <a href="length_vs_q.png"> - <figure> - <img src="length_vs_q.screen.png" alt="length_vs_q" /> - <figcaption>Read length vs. quality</figcaption> - </figure> - </a> - -</body> -</html>
--- a/test-data/output.html.small.q6 Mon Mar 12 19:55:54 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,191 +0,0 @@ -<?xml version="1.0" encoding="utf-8"?> -<!DOCTYPE html> -<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> -<head> - <title></title> - <meta http-equiv="content-type" content="application/xhtml+xml; charset=utf-8" /> - <style> - h2 { - padding: 0.3em; - background-color: #000000; - color: #ffffff; - margin: 1em 0 2em 0; - } - h3 { - padding: 0em 0.2em 0em 0.2em; - color: #555555; - border: solid 1px black; - border-width: 0px 0px 1px 0px; - margin: 2em 0 0.4em 0; - } - tr { - margin: 0; - } - tr:nth-child(even) { - background-color: #bbbbbb; - } - tr:nth-child(odd) { - background-color: #eeeeee; - } - caption { - text-align: left; - font-weight: bold; - background-color: #550000; - color: #ffffff; - padding: 0.1em 0.2em; - } - table { - margin: 1em; - padding: 0.3em; - } - td { - margin: 0; - padding: 0 0.4em; - } - tr td:nth-child(1) { - color: #550000; - } - figure { - display: table; - margin: 2em 0; - } - figcaption { - display: table-caption; - caption-side: top; - font-size: 1.1em; - text-decoration: none; - text-align: center; - font-weight: bold; - background-color: #550000; - color: #ffffff; - padding: 0.1em 0.2em; - margin: 2em 0 0.7em 0; - } - - </style> -</head> -<body> - - <h2>NanoporeQC Report</h2> - <h3>Summary statistics</h3> - <table> - <caption>All reads</caption> - <tr> - <td>Total Yield (Gb)</td> - <td>0.0092773</td> - </tr> - <tr> - <td>Total Reads</td> - <td>9990</td> - </tr> - <tr> - <td>Mean Length</td> - <td>928.7</td> - </tr> - <tr> - <td>Median Length</td> - <td>941.0</td> - </tr> - <tr> - <td>Max Length</td> - <td>25740.0</td> - </tr> - <tr> - <td>Mean Q</td> - <td>11.7</td> - </tr> - <tr> - <td>Median Q</td> - <td>12.7</td> - </tr> - </table> - <table> - <caption>Q>=6</caption> - <tr> - <td>Total Yield (Gb)</td> - <td>0.0090947</td> - </tr> - <tr> - <td>Total Reads</td> - <td>9254</td> - </tr> - <tr> - <td>Mean Length</td> - <td>982.8</td> - </tr> - <tr> - <td>Median Length</td> - <td>945.0</td> - </tr> - <tr> - <td>Max Length</td> - <td>6545.0</td> - </tr> - <tr> - <td>Mean Q</td> - <td>12.3</td> - </tr> - <tr> - <td>Median Q</td> - <td>12.9</td> - </tr> - </table> - <h3>QC plots</h3> - <p>(Click on plot for hi-resolution version)</p> - <a href="length_histogram.png"> - <figure> - <img src="length_histogram.screen.png" alt="length_histogram" /> - <figcaption>Read length distribution</figcaption> - </figure> - </a> - <a href="q_histogram.png"> - <figure> - <img src="q_histogram.screen.png" alt="q_histogram" /> - <figcaption>Mean quality score distribution</figcaption> - </figure> - </a> - <a href="reads_per_hour.png"> - <figure> - <img src="reads_per_hour.screen.png" alt="reads_per_hour" /> - <figcaption>Yield over time</figcaption> - </figure> - </a> - <a href="cumulative_yield.png"> - <figure> - <img src="cumulative_yield.screen.png" alt="cumulative_yield" /> - <figcaption>Cumulative yield over time</figcaption> - </figure> - </a> - <a href="yield_summary.png"> - <figure> - <img src="yield_summary.screen.png" alt="yield_summary" /> - <figcaption>Yield by read length cutoff</figcaption> - </figure> - </a> - <a href="flowcell_overview.png"> - <figure> - <img src="flowcell_overview.screen.png" alt="flowcell_overview" /> - <figcaption>Median read quality per channel</figcaption> - </figure> - </a> - <a href="length_by_hour.png"> - <figure> - <img src="length_by_hour.screen.png" alt="length_by_hour" /> - <figcaption>Read length over time</figcaption> - </figure> - </a> - <a href="q_by_hour.png"> - <figure> - <img src="q_by_hour.screen.png" alt="q_by_hour" /> - <figcaption>Read quality over time</figcaption> - </figure> - </a> - <a href="length_vs_q.png"> - <figure> - <img src="length_vs_q.screen.png" alt="length_vs_q" /> - <figcaption>Read length vs. quality</figcaption> - </figure> - </a> - -</body> -</html>
--- a/yaml_to_html.pl Mon Mar 12 19:55:54 2018 -0400 +++ b/yaml_to_html.pl Sun Aug 12 13:21:30 2018 -0400 @@ -5,12 +5,13 @@ use 5.012; use YAML::XS qw/LoadFile/; +use MIME::Base64; use autodie; -my ($fn_in, $fn_out) = @ARGV; +my ($fn_yaml, $dir_in, $fn_out) = @ARGV; die "Can't find or read input file: $!\n" - if (! -r $fn_in); + if (! -r $fn_yaml); # set output filehandle based on arguments my $fh = \*STDOUT; @@ -18,9 +19,9 @@ open $fh, '>', $fn_out; } -my $yaml = LoadFile($ARGV[0]); +my $yaml = LoadFile($fn_yaml); -convert($yaml); +convert($yaml, $dir_in); sub convert { @@ -99,16 +100,26 @@ say {$fh} " <h3>QC plots</h3>"; - say {$fh} " <p>(Click on plot for hi-resolution version)</p>"; + say {$fh} " <p>(Click on plot for high-resolution version, or in Chrome \"Open link in new tab\")</p>"; for my $base (@order) { my $caption = $figs{$base} // die "No caption found for $base"; + + # Base64-encode images + my $fn_img_full = "$dir_in/$base.png"; + my $fn_img_screen = "$dir_in/$base.screen.png"; + die "Failed to find or read $fn_img_full" + if (! -r $fn_img_full); + die "Failed to find or read $fn_img_screen" + if (! -r $fn_img_screen); + my $img_full = encode($fn_img_full); + my $img_screen = encode($fn_img_screen); print {$fh} <<"CONTENT" - <a href="$base.png"> + <a href="data:image/png;base64,$img_full"> <figure> - <img src="$base.screen.png" alt="$base" /> + <img src="data:image/png;base64,$img_screen" alt="$base" /> <figcaption>$caption</figcaption> </figure> </a> @@ -120,7 +131,14 @@ } +sub encode { + my ($fn) = @_; + open my $in, '<:raw', $fn; + local($/) = undef; + return encode_base64(<$in>); + +} sub header {