Mercurial > repos > jdv > scrappie
changeset 0:30c2b84b4117 draft
planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/scrappie commit bf5788ad5a3293446a50a3246b44ba09174c9b71
author | jdv |
---|---|
date | Wed, 30 Aug 2017 02:55:35 -0400 |
parents | |
children | 52ba9fde200f |
files | scrappie_raw.py scrappie_raw.xml test-data/test_data.fast5.tar.gz test-data/test_data.fasta |
diffstat | 4 files changed, 150 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scrappie_raw.py Wed Aug 30 02:55:35 2017 -0400 @@ -0,0 +1,53 @@ +#!/usr/bin/env python3 + +import sys, os +import glob +import tarfile +import subprocess +import shutil +import h5py +import numpy as np + +def main(): + tar_file = sys.argv[1] + out_file = sys.argv[2] + threads = sys.argv[3] + + extract_fast5(tar_file) + with open(out_file, "w") as outfile: + subprocess.call(["scrappie", + "raw", + "--threads", threads, + "--outformat", "fasta", + "in_dir" ], + stdout=outfile ) + +def extract_fast5(fn): + + try: + in_dir = "in_dir" + if not os.path.exists(in_dir): + os.makedirs(in_dir) + + tar = tarfile.open(fn, mode='r') + tar.extractall(path=in_dir) + + files = glob.glob( + os.path.join(in_dir, "**", "*.fast5"), + recursive=True + ) + if len(files) < 1: + raise ValueError('No FAST5 files found') + for f in files: + shutil.copy(f, in_dir) + + except OSError as e: + print("Unexpected error:", e.strerror) + raise + + except: + print("Unexpected error:", sys.exc_info()[0]) + raise + +if __name__ == "__main__" : + main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scrappie_raw.xml Wed Aug 30 02:55:35 2017 -0400 @@ -0,0 +1,81 @@ +<tool id="scrappie_raw" name="Scrappie (raw)" version="1.0.0"> + + <description>ONT development basecaller</description> + + <!-- ***************************************************************** --> + + <!-- + <requirements> + <requirement type="package" version="1.0.0">scrappie</requirement> + </requirements> + --> + + <!-- ***************************************************************** --> + + <version_command>scrappie version | perl -wnE'print "$1\n" for /scrappie (.+)/g'</version_command> + + <!-- ***************************************************************** --> + + <command detect_errors="aggressive"> + <![CDATA[ + + python3 $__tool_directory__/scrappie_raw.py $input $output \${GALAXY_SLOTS:-1} + + ]]> + </command> + + <!-- ***************************************************************** --> + + <inputs> + + <param name="input" type="data" format="fast5_archive" label="Input reads" /> + + </inputs> + + <!-- ***************************************************************** --> + + <outputs> + + <data name="output" format="fasta" label="${tool.name} on ${on_string} (called.fasta)" /> + + </outputs> + + <!-- ***************************************************************** --> + + <tests> + <!-- multithreaded output is non-deterministic, so simply compare file + sizes --> + <test> + <param name="input" value="test_data.fast5.tar.gz" ftype="fast5_archive" /> + <output name="output" file="test_data.fasta" compare="sim_size" delta="0" /> + </test> + </tests> + + <!-- ***************************************************************** --> + + <help> + <![CDATA[ + +**Description** + +Scrappie provides recurrent neural network basecalling for Oxford Nanopore +MinION data. It is a technology demonstrator for the Oxford Nanopore +Research Algorithms group. It is designed for improved calling of +homopolymers over nanonet. Scrappie is provided unsupported by Oxford +Nanopore Technologies. + +The Galaxy wrapper has modified scrappie to take a gzip tarball of FAST5 reads +as input, such as can be produced by `poretools combine`, and always outputs a +single FASTA file. + +This is the raw basecaller. + + ]]> + </help> + + <!-- ***************************************************************** --> + + <citations> + </citations> + +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_data.fasta Wed Aug 30 02:55:35 2017 -0400 @@ -0,0 +1,16 @@ +>macs_MacBook_local_20170809_FAH11850_MN21227_sequencing_run_AIV_run_1_84332_read_12068_ch_399_strand { "normalised_score" : 0.190867, "nblock" : 3004, "sequence_length" : 301, "blocks_per_base" : 9.980066 } +GCCTGGATCGTGGCGGTGGGCGGCGGAGGCGGAGGCGGCGTGGGCGAGGCGGTGGGCGCCGGCGTGGCGGGCGGTGGGGCCGGGGCGGTGGCGCGGTGGGCGGGCGGGAGTGGGAGGAGGCGGGTGGGAGGCGGCGGAAGCGGGCGGGGCGGGAGGGGTGGAGGCGTGAGGCGGTGATGGGGGGCGCGGTGCAAGTAGGCCGAGGCCGAGAAATGTGGGGAGCATCAGGTGGGCGGGAGGTGGCGGGTGAGGCGTCATGAAAGCCAGGCTCGTGACTCGGGAAGTGATAGCTCAGGAAGCA +>macs_MacBook_local_20170809_FAH11850_MN21227_sequencing_run_AIV_run_1_84332_read_12091_ch_399_strand { "normalised_score" : 0.131128, "nblock" : 5237, "sequence_length" : 348, "blocks_per_base" : 15.048851 } +TGTGGGTGGGTGGGCGGGCGGTGGTGGCGGTGGGCGGGCGGGCGGGCGGGCGGTGGGGCGGGCGGTGTGGGGGTGGCGGGGCGGGTGGCGTGGGCGGGCGGTGGGGCGGGCGTGGCGTGGGCGGGGTGGGTGGGAGGCGGGAGGCGTGGGCGGGCGGGAGGCGGGTGGCGGGGGTGGGTGGGAGCGGGTGCGGGGGCGGGGAGGTGGGAGGCGGCGCGGGGCGGGAGGCGGGCGGTGGGGAGGCGGGGCGGGTGGGGGCGGGTGGGGAAGGGAGGCGGGCGGGGCGGGATGGAAGGGAGTGGGCAGGGGCCGGGGAGCAGGGGAAGGCAGGGGCAGTGATGGGGGCAG +>macs_MacBook_local_20170809_FAH11850_MN21227_sequencing_run_AIV_run_1_84332_read_12153_ch_399_strand { "normalised_score" : 0.156256, "nblock" : 3279, "sequence_length" : 303, "blocks_per_base" : 10.821782 } +CTGGTGTGTAGATGGATGTCCAGTGGGCGGGTGACGGGGTGGTGCCCTGGTGGGCGTGGTGATGATGGGTGTGGGTGGTATTCTGATGGAATGAGTGTGGCAGTGGGTGGGTTAGTGGCGGTGTAACGTCGGCGTGTGGCAGGGAATGTTGGTGGTCTGATGACAGGTGGGATGAGTCAATGAGTGGTGTTAGTTGTGAGTTGTGATGATGATGATTTGATAGATGATAATAATGGAGATAGATCAAGTCAATGGGATGACATGGTGTAAGTGTGTTCGGTGTTTGACCAGTTGGGTGATTAT +>macs_MacBook_local_20170809_FAH11850_MN21227_sequencing_run_AIV_run_1_84332_read_12175_ch_399_strand { "normalised_score" : 0.158499, "nblock" : 6272, "sequence_length" : 563, "blocks_per_base" : 11.140320 } +ACGTGTGGGTCAGATGGGCGGTGGTGAGGCGGGCCGGTGGAGATGGGTGTGGCAGTCGAGGCGTGGTCCTGTCGTGCACGTCGGGCGCCGGTGGCGGAGGCCGGGGGTGGAGGCGGTGGGCTCGTGGGGTGGCTGAGGGGCGGGGCCGGTGGCTGGGGGTGGCGGGCGGTGGGCGGGCGGGCGGGTGGAGGCGGCGTGGTGGGGGCGGTGGGGCGGGCTGGGTGGGCGGGGGTGGGTGGGGCTGGGTGGGTGGGGAGGCGGGCGGTGGGAAGCGGGGTGGGCGGGTGGAAGGGCGGTGGGAGGCGGGAGGCGGGTGAAGGGTGGGAGGCGGAGGTGGGGCGGGGCGGGGTGGCGGGCGTGGGGGGCGGGGTGGGGTGGCGGGCTCACGGTGGCGGGGGCGGAGTGAGGTGGGAGGCGGTGGGGTGGCGGTGGCGGCGGGAGTGGAGTCCGTGCGTGGTGGGGCGGTGTGATCAGTGTAGGTGAGAGCGGGTGCGGGGGGCGTGGGAGGGGTGGCTGGGGTGGGGAGGCGGAGGGCGGTGTGGGGTGACGTGGGCGTGGGCATG +>macs_MacBook_local_20170809_FAH11850_MN21227_sequencing_run_AIV_run_1_84332_read_12081_ch_399_strand { "normalised_score" : 0.134486, "nblock" : 8572, "sequence_length" : 409, "blocks_per_base" : 20.958435 } +TGGGTGCGTGTGCGGGCGGGTGGTGGAGTGTGGTTGTGGGTGTGGTGGTGACTCGGGCTGTGCTGGTGGCTTGTGGACTCGGGTGGTGGCGGTGGGGCGGAGGTTGGGAATGGGGCGTGGGGGGTGGTGCGTGGGAGGCGGGTGGGGGAGGCGGGTGGAGGGGCGGCGGCGGGAGGCGGAAGGGCGCGGCGGGCGGGTGGGCGGTGGGCGGAAGGGTGGGTGGGGAGGCGGGTGGGCGGGCGGGCGGAGGCGGGGGCGGGCGGGAAGGGCGGGGCGGAGTGGGAAGGGCGGGGCGGAAGGTGGGTGGGACGGGGCGGGGGCGTGGGAGCGGGTGGGCGGGTGGGGCGGGCGGGTGGTGGGAGGCGGGCGGCGGGGCGGGGCGCGCGCGGGTGGGGTGCGGGGTGGGAGC +>macs_MacBook_local_20170809_FAH11850_MN21227_sequencing_run_AIV_run_1_84332_read_12113_ch_399_strand { "normalised_score" : 0.154456, "nblock" : 5194, "sequence_length" : 488, "blocks_per_base" : 10.643442 } +TATCGTTGGTGGGTGGGCGGTAGATGGTGGGTTGGTGGTGGGTGGGTGTGTTGGTGGTGGATGGTGTTGTGGTGGTGGTGTGGGATGGGGTGCGTGGGTGGGTGGGGTTAATAGATGGGTTGAGTGGTGGGTGTTGTGTGGCGTGGTTGACCGTGAGAGTGGTGGCCGTGGGTTTGGTGGGCTGAGTGAGCAGGCCATGTGTTTTGTGGTGGTGTGGTGGGTAGGGGGGGCGCCGGTGTGATGGTGATGTTTGGCCGTCAGTGTGGGTGGAATGGTGGGCGGGCGGCTGAGTGGGTGCCGGGGTATGGGGTATGAAGTAAACAAAGCTATCTGTTTTGCGCGATGGCTGGCTGGAGTATTAGGGCCGAGTGGCATGGCATAGCTGCATGAGAACCTGGTAGGCGGCCGTCGGGGCGGTGCTCCGATGCTAGCAGATGGGTGGGCATGTGAGGCGTGCATGGGCGCGTGTCCAGGAAGTGTGTTTTGTG +>macs_MacBook_local_20170809_FAH11850_MN21227_sequencing_run_AIV_run_1_84332_read_12085_ch_399_strand { "normalised_score" : 0.133404, "nblock" : 4509, "sequence_length" : 287, "blocks_per_base" : 15.710801 } +ATCTGGATTGGGTCAGTGGGTGGATGGAGCGGATGGTGTGGTTGGTGGGTGGTGATGGTGGTCGGGGTGGTGGAATGTCGTTGGTGGAGGTTGGGTTGGATGGTGGGCGGTGGGTGGGGCTGGGTGGTGGTGGTGGGTGCGTGGTGCGATGGGGCGTGTTGGGTGGGTGTGTGGGCGGGTGGGCAGTGGGTGGGTGGGTGGCGTGGGCGGTGGGCGGGCCGTGGGCGGGCGGGCGGGTGGGCGGGTGGGCGGGAGCGGGCGGGCGGGCGGGTGGGCGAGCGAGGAAG +>macs_MacBook_local_20170809_FAH11850_MN21227_sequencing_run_AIV_run_1_84332_read_12103_ch_399_strand { "normalised_score" : 0.195423, "nblock" : 4268, "sequence_length" : 443, "blocks_per_base" : 9.634312 } +ATGAATAGTGGGCGGGGGTTAAATGTCGGCTCTGTGGGTGGTGTGGGCAGATGGTGGTGGGAGCTGGGTGGGTTGGTGGGTGGGTGGCATGGTGGCTGTGGGTGTGGTGGAATGGGTGATCAATTAATAAGATGGGGTGGTGTGATATGGAGGGGTGGTGGGGTGGCGGTGACAATGTGGGGAGTGGTGGTGGGAACAGTAGATCAGGAGGGTGAGCACTGCGATGGGTGTTGGCCGTGGGTGATGGTGGGTGGGTGGCGGGGCATGGCGGGTGTTGGGTGGCGTGGGTTGGGCTGGGGCGTGGTGGGCGCGGGTGGGGTGGGCGCGGGTGGGGTGAAGCAGGAAGGGTGGGAGCGGGTGGGTGGAGTGGGGAAGGTGAAAAAGGGTTTGTGGGTGGCGGGGAGTGAGGGTGGAGGTGCGCGGGAAGAAAAGACGGGTGGGTC