annotate test/test_dedup_hash.py @ 0:627dc826a68f draft default tip

planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
author mvdbeek
date Wed, 23 Nov 2016 07:46:20 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
627dc826a68f planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
1 import hashlib
627dc826a68f planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
2 import inspect
627dc826a68f planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
3 import os
627dc826a68f planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
4 import subprocess
627dc826a68f planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
5 import sys
627dc826a68f planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
6 import tempfile
627dc826a68f planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
7
627dc826a68f planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
8
627dc826a68f planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
9 currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
627dc826a68f planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
10 parent_dir = os.path.dirname(currentdir)
627dc826a68f planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
11 sys.path.insert(0, os.path.join(parent_dir, 'dedup_hash/'))
627dc826a68f planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
12 import dedup_hash
627dc826a68f planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
13
627dc826a68f planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
14
627dc826a68f planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
15 TEST_DATA_DIR = os.path.join(parent_dir, 'test-data/')
627dc826a68f planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
16 UNCOMPRESSED_IN = ['r1.fastq', 'r2.fastq']
627dc826a68f planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
17 COMPRESSED_IN = ['r1.fastq.gz', 'r2.fastq.gz']
627dc826a68f planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
18 UNCOMPRESSED_OUT = ['r1_dedup.fastq', 'r2_dedup.fastq']
627dc826a68f planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
19 SINGLE_IN = ['r1.fastq']
627dc826a68f planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
20 SINGLE_OUT = ['r1_dedup.fastq']
627dc826a68f planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
21
627dc826a68f planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
22
627dc826a68f planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
23
627dc826a68f planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
24 def run(input):
627dc826a68f planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
25 args = prepare_args(input)
627dc826a68f planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
26 run_dedup(args)
627dc826a68f planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
27 compare_output(args)
627dc826a68f planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
28
627dc826a68f planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
29
627dc826a68f planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
30 def compare_output(args):
627dc826a68f planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
31 ref_out1 = os.path.join(TEST_DATA_DIR, 'r1_dedup.fastq')
627dc826a68f planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
32 try:
627dc826a68f planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
33 assert md5(args['outfiles'][0]) == md5(ref_out1)
627dc826a68f planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
34 except AssertionError:
627dc826a68f planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
35 cmd = "diff -Nru %s %s" % (args['outfiles'][0], ref_out1)
627dc826a68f planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
36 subprocess.check_call(cmd.split(' '))
627dc826a68f planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
37 print('all good')
627dc826a68f planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
38
627dc826a68f planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
39
627dc826a68f planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
40 def prepare_args(test_files):
627dc826a68f planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
41 infiles = [os.path.join(TEST_DATA_DIR, test_file) for test_file in test_files]
627dc826a68f planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
42 outfiles = [tempfile.NamedTemporaryFile(delete=False).name for test_file in test_files] # Same number of output files as input files
627dc826a68f planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
43 kwargs = {'infiles': infiles,
627dc826a68f planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
44 'outfiles': outfiles,
627dc826a68f planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
45 'write_gzip': False}
627dc826a68f planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
46 return kwargs
627dc826a68f planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
47
627dc826a68f planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
48
627dc826a68f planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
49 def run_dedup(kwargs):
627dc826a68f planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
50 fastq_pairs_instance = dedup_hash.get_unique_fastq_instance()
627dc826a68f planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
51 fastq_pairs_instance(**kwargs)
627dc826a68f planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
52
627dc826a68f planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
53 def md5(fname):
627dc826a68f planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
54 hash_md5 = hashlib.md5()
627dc826a68f planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
55 with open(fname, "rb") as f:
627dc826a68f planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
56 for chunk in iter(lambda: f.read(4096), b""):
627dc826a68f planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
57 hash_md5.update(chunk)
627dc826a68f planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
58 return hash_md5.hexdigest()
627dc826a68f planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
59
627dc826a68f planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
60 if __name__ == '__main__':
627dc826a68f planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
61 run(UNCOMPRESSED_IN)
627dc826a68f planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
62 run(COMPRESSED_IN)
627dc826a68f planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
63 run(SINGLE_IN)