annotate nanopolish_extract.py @ 6:36cc4ae4160e draft

planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/nanopolish commit d47b131af7d3a9f8446732f91ec08503c99aab58
author jdv
date Mon, 04 Dec 2017 02:03:21 -0500
parents e4b6d4a53f2e
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
2136c2725fc4 planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/nanopolish commit 0206b7bd377b39ad28592b0a02588f40575efd3e-dirty
jdv
parents:
diff changeset
1 #!/usr/bin/env python3
2136c2725fc4 planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/nanopolish commit 0206b7bd377b39ad28592b0a02588f40575efd3e-dirty
jdv
parents:
diff changeset
2
2136c2725fc4 planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/nanopolish commit 0206b7bd377b39ad28592b0a02588f40575efd3e-dirty
jdv
parents:
diff changeset
3 import sys, os
2136c2725fc4 planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/nanopolish commit 0206b7bd377b39ad28592b0a02588f40575efd3e-dirty
jdv
parents:
diff changeset
4 import glob
2136c2725fc4 planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/nanopolish commit 0206b7bd377b39ad28592b0a02588f40575efd3e-dirty
jdv
parents:
diff changeset
5 import subprocess
2136c2725fc4 planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/nanopolish commit 0206b7bd377b39ad28592b0a02588f40575efd3e-dirty
jdv
parents:
diff changeset
6 import shutil
2136c2725fc4 planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/nanopolish commit 0206b7bd377b39ad28592b0a02588f40575efd3e-dirty
jdv
parents:
diff changeset
7 import h5py
2136c2725fc4 planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/nanopolish commit 0206b7bd377b39ad28592b0a02588f40575efd3e-dirty
jdv
parents:
diff changeset
8 import numpy as np
2136c2725fc4 planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/nanopolish commit 0206b7bd377b39ad28592b0a02588f40575efd3e-dirty
jdv
parents:
diff changeset
9
2136c2725fc4 planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/nanopolish commit 0206b7bd377b39ad28592b0a02588f40575efd3e-dirty
jdv
parents:
diff changeset
10 def main():
2136c2725fc4 planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/nanopolish commit 0206b7bd377b39ad28592b0a02588f40575efd3e-dirty
jdv
parents:
diff changeset
11 tar_file = sys.argv[1]
2136c2725fc4 planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/nanopolish commit 0206b7bd377b39ad28592b0a02588f40575efd3e-dirty
jdv
parents:
diff changeset
12 out_file = sys.argv[2]
2136c2725fc4 planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/nanopolish commit 0206b7bd377b39ad28592b0a02588f40575efd3e-dirty
jdv
parents:
diff changeset
13 threads = sys.argv[3] # currently unused
2136c2725fc4 planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/nanopolish commit 0206b7bd377b39ad28592b0a02588f40575efd3e-dirty
jdv
parents:
diff changeset
14
2136c2725fc4 planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/nanopolish commit 0206b7bd377b39ad28592b0a02588f40575efd3e-dirty
jdv
parents:
diff changeset
15 extract_fast5(tar_file)
2136c2725fc4 planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/nanopolish commit 0206b7bd377b39ad28592b0a02588f40575efd3e-dirty
jdv
parents:
diff changeset
16
2
e4b6d4a53f2e planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/nanopolish commit d8cc434bd1704b2834f89b7d91370f356e3ac85a
jdv
parents: 0
diff changeset
17 subprocess.call([
e4b6d4a53f2e planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/nanopolish commit d8cc434bd1704b2834f89b7d91370f356e3ac85a
jdv
parents: 0
diff changeset
18 "nanopolish",
e4b6d4a53f2e planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/nanopolish commit d8cc434bd1704b2834f89b7d91370f356e3ac85a
jdv
parents: 0
diff changeset
19 "extract",
e4b6d4a53f2e planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/nanopolish commit d8cc434bd1704b2834f89b7d91370f356e3ac85a
jdv
parents: 0
diff changeset
20 "--recurse",
0
2136c2725fc4 planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/nanopolish commit 0206b7bd377b39ad28592b0a02588f40575efd3e-dirty
jdv
parents:
diff changeset
21 "--fastq",
2136c2725fc4 planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/nanopolish commit 0206b7bd377b39ad28592b0a02588f40575efd3e-dirty
jdv
parents:
diff changeset
22 "--output", out_file,
2136c2725fc4 planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/nanopolish commit 0206b7bd377b39ad28592b0a02588f40575efd3e-dirty
jdv
parents:
diff changeset
23 "in_dir" ])
2136c2725fc4 planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/nanopolish commit 0206b7bd377b39ad28592b0a02588f40575efd3e-dirty
jdv
parents:
diff changeset
24
2136c2725fc4 planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/nanopolish commit 0206b7bd377b39ad28592b0a02588f40575efd3e-dirty
jdv
parents:
diff changeset
25 def extract_fast5(fn):
2136c2725fc4 planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/nanopolish commit 0206b7bd377b39ad28592b0a02588f40575efd3e-dirty
jdv
parents:
diff changeset
26
2136c2725fc4 planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/nanopolish commit 0206b7bd377b39ad28592b0a02588f40575efd3e-dirty
jdv
parents:
diff changeset
27 try:
2136c2725fc4 planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/nanopolish commit 0206b7bd377b39ad28592b0a02588f40575efd3e-dirty
jdv
parents:
diff changeset
28 in_dir = "in_dir"
2136c2725fc4 planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/nanopolish commit 0206b7bd377b39ad28592b0a02588f40575efd3e-dirty
jdv
parents:
diff changeset
29 if not os.path.exists(in_dir):
2136c2725fc4 planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/nanopolish commit 0206b7bd377b39ad28592b0a02588f40575efd3e-dirty
jdv
parents:
diff changeset
30 os.makedirs(in_dir)
2136c2725fc4 planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/nanopolish commit 0206b7bd377b39ad28592b0a02588f40575efd3e-dirty
jdv
parents:
diff changeset
31
2
e4b6d4a53f2e planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/nanopolish commit d8cc434bd1704b2834f89b7d91370f356e3ac85a
jdv
parents: 0
diff changeset
32 # python's tarfile interface does not sanitize file paths within
e4b6d4a53f2e planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/nanopolish commit d8cc434bd1704b2834f89b7d91370f356e3ac85a
jdv
parents: 0
diff changeset
33 # tarballs, which can be a big security risk. GNU tar does sanitize by
e4b6d4a53f2e planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/nanopolish commit d8cc434bd1704b2834f89b7d91370f356e3ac85a
jdv
parents: 0
diff changeset
34 # default, so it's easier/safer here just to call the system tar
e4b6d4a53f2e planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/nanopolish commit d8cc434bd1704b2834f89b7d91370f356e3ac85a
jdv
parents: 0
diff changeset
35 subprocess.call([
e4b6d4a53f2e planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/nanopolish commit d8cc434bd1704b2834f89b7d91370f356e3ac85a
jdv
parents: 0
diff changeset
36 "tar",
e4b6d4a53f2e planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/nanopolish commit d8cc434bd1704b2834f89b7d91370f356e3ac85a
jdv
parents: 0
diff changeset
37 "-xf",
e4b6d4a53f2e planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/nanopolish commit d8cc434bd1704b2834f89b7d91370f356e3ac85a
jdv
parents: 0
diff changeset
38 fn,
e4b6d4a53f2e planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/nanopolish commit d8cc434bd1704b2834f89b7d91370f356e3ac85a
jdv
parents: 0
diff changeset
39 "-C",
e4b6d4a53f2e planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/nanopolish commit d8cc434bd1704b2834f89b7d91370f356e3ac85a
jdv
parents: 0
diff changeset
40 "in_dir"])
0
2136c2725fc4 planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/nanopolish commit 0206b7bd377b39ad28592b0a02588f40575efd3e-dirty
jdv
parents:
diff changeset
41
2136c2725fc4 planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/nanopolish commit 0206b7bd377b39ad28592b0a02588f40575efd3e-dirty
jdv
parents:
diff changeset
42 files = glob.glob(
2136c2725fc4 planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/nanopolish commit 0206b7bd377b39ad28592b0a02588f40575efd3e-dirty
jdv
parents:
diff changeset
43 os.path.join(in_dir, "**", "*.fast5"),
2136c2725fc4 planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/nanopolish commit 0206b7bd377b39ad28592b0a02588f40575efd3e-dirty
jdv
parents:
diff changeset
44 recursive=True
2136c2725fc4 planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/nanopolish commit 0206b7bd377b39ad28592b0a02588f40575efd3e-dirty
jdv
parents:
diff changeset
45 )
2136c2725fc4 planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/nanopolish commit 0206b7bd377b39ad28592b0a02588f40575efd3e-dirty
jdv
parents:
diff changeset
46 if len(files) < 1:
2136c2725fc4 planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/nanopolish commit 0206b7bd377b39ad28592b0a02588f40575efd3e-dirty
jdv
parents:
diff changeset
47 raise ValueError('No FAST5 files found')
2136c2725fc4 planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/nanopolish commit 0206b7bd377b39ad28592b0a02588f40575efd3e-dirty
jdv
parents:
diff changeset
48
2136c2725fc4 planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/nanopolish commit 0206b7bd377b39ad28592b0a02588f40575efd3e-dirty
jdv
parents:
diff changeset
49 except OSError as e:
2136c2725fc4 planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/nanopolish commit 0206b7bd377b39ad28592b0a02588f40575efd3e-dirty
jdv
parents:
diff changeset
50 print("Unexpected error:", e.strerror)
2136c2725fc4 planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/nanopolish commit 0206b7bd377b39ad28592b0a02588f40575efd3e-dirty
jdv
parents:
diff changeset
51 raise
2136c2725fc4 planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/nanopolish commit 0206b7bd377b39ad28592b0a02588f40575efd3e-dirty
jdv
parents:
diff changeset
52
2136c2725fc4 planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/nanopolish commit 0206b7bd377b39ad28592b0a02588f40575efd3e-dirty
jdv
parents:
diff changeset
53 except:
2136c2725fc4 planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/nanopolish commit 0206b7bd377b39ad28592b0a02588f40575efd3e-dirty
jdv
parents:
diff changeset
54 print("Unexpected error:", sys.exc_info()[0])
2136c2725fc4 planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/nanopolish commit 0206b7bd377b39ad28592b0a02588f40575efd3e-dirty
jdv
parents:
diff changeset
55 raise
2136c2725fc4 planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/nanopolish commit 0206b7bd377b39ad28592b0a02588f40575efd3e-dirty
jdv
parents:
diff changeset
56
2136c2725fc4 planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/nanopolish commit 0206b7bd377b39ad28592b0a02588f40575efd3e-dirty
jdv
parents:
diff changeset
57 if __name__ == "__main__" :
2136c2725fc4 planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/nanopolish commit 0206b7bd377b39ad28592b0a02588f40575efd3e-dirty
jdv
parents:
diff changeset
58 main()