Mercurial > repos > petrn > repeatexplorer
comparison test_repex_pipeline.py @ 0:f6ebec6e235e draft
Uploaded
| author | petrn |
|---|---|
| date | Thu, 19 Dec 2019 13:46:43 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:f6ebec6e235e |
|---|---|
| 1 #!/usr/bin/env python3 | |
| 2 ''' | |
| 3 Basic Tarean and RepeatExplorer tests | |
| 4 ''' | |
| 5 import subprocess | |
| 6 import tempfile | |
| 7 import unittest | |
| 8 import os | |
| 9 import shutil | |
| 10 | |
| 11 def check_for_missing_files(directory, file_list): | |
| 12 ''' check if files exists in the directory ''' | |
| 13 missing_files = [] | |
| 14 for f in file_list: | |
| 15 path = os.path.join(directory, f) | |
| 16 if os.path.exists(path): | |
| 17 continue | |
| 18 else: | |
| 19 missing_files.append(f) | |
| 20 return missing_files | |
| 21 | |
| 22 | |
| 23 class TestBasic(unittest.TestCase): | |
| 24 ''' basic repex-tarean testcase ''' | |
| 25 EXECUTABLE = "./seqclust" | |
| 26 | |
| 27 # file lists to check | |
| 28 FILE_LIST_BASIC = [ | |
| 29 "./seqclust/clustering/clusters/dir_CL0001/hitsort_part.csv", | |
| 30 "./seqclust/clustering/clusters/dir_CL0001/reads.fasta", | |
| 31 "./seqclust/clustering/clusters/dir_CL0001/reads_selection.fasta", | |
| 32 "./seqclust/clustering/clusters/dir_CL0001/dna_database_annotation.csv", | |
| 33 "./seqclust/clustering/clusters/dir_CL0001/graph_layout.GL", | |
| 34 "./seqclust/clustering/clusters/dir_CL0001/graph_layout.png", | |
| 35 "./seqclust/clustering/clusters/dir_CL0001/graph_layout_tmb.png", | |
| 36 "./seqclust/clustering/clusters/dir_CL0001/graph_layout_directed.RData", | |
| 37 "./logfile.txt", "./style1.css", "./documentation.html", | |
| 38 "./tarean_report.html", "./cluster_report.html", | |
| 39 "./summary_histogram.png", "./index.html", "./sequences.db", | |
| 40 "./hitsort.db", "./TAREAN_consensus_rank_1.fasta", | |
| 41 "./TAREAN_consensus_rank_2.fasta", "./TAREAN_consensus_rank_3.fasta", | |
| 42 "./TAREAN_consensus_rank_4.fasta", "./seqclust/clustering/hitsort", | |
| 43 "./seqclust/clustering/hitsort.cls" | |
| 44 ] | |
| 45 FILE_LIST_ASSEMBLY = [ | |
| 46 "./seqclust/small_clusters_assembly/small_clusters.aln", | |
| 47 "./seqclust/small_clusters_assembly/small_clusters.ace", | |
| 48 "./seqclust/small_clusters_assembly/small_clusters.fasta" | |
| 49 ] | |
| 50 FILE_LIST_FILTERING = ["./seqclust/prerun/filter_sequences.fasta"] | |
| 51 FILE_LIST_COMPARATIVE = ["COMPARATIVE_ANALYSIS_COUNTS.csv"] | |
| 52 FILE_LIST_CUSTOM_DATABASE = [ | |
| 53 "./seqclust/custom_databases/extra_database", | |
| 54 "./seqclust/clustering/clusters/dir_CL0001/custom_db_extra_database_annotation.csv" | |
| 55 ] | |
| 56 def setUp(self): | |
| 57 pass | |
| 58 | |
| 59 # helper function | |
| 60 def tarean_run(self, cmd_options, file_list): | |
| 61 ''' Basic taren run ''' | |
| 62 # output goes to tmp directory | |
| 63 tmpdir = tempfile.mkdtemp() | |
| 64 logfile = tempfile.NamedTemporaryFile(delete=False) | |
| 65 print("\n------------------------------------------------------") | |
| 66 print("Temp files:") | |
| 67 print(" tmpdir : ", tmpdir) | |
| 68 print(" logfile : ", logfile.name) | |
| 69 print("------------------------------------------------------") | |
| 70 print([self.EXECUTABLE] + ['-l', logfile.name, '-v', tmpdir] + cmd_options) | |
| 71 p = subprocess.Popen( | |
| 72 args=[self.EXECUTABLE] + ['-l', logfile.name, '-v', tmpdir | |
| 73 ] + cmd_options) | |
| 74 p.wait() | |
| 75 status = p.returncode | |
| 76 missing_files = check_for_missing_files(directory=tmpdir, | |
| 77 file_list=file_list) | |
| 78 if status: | |
| 79 # print log file | |
| 80 print("Non zero exit status!") | |
| 81 with open(logfile.name) as f: | |
| 82 print(f.read()) | |
| 83 | |
| 84 self.assertEqual(status, 0) | |
| 85 self.assertEqual( | |
| 86 len(missing_files), | |
| 87 0, | |
| 88 msg="\n missing files: \n" + "\n".join(missing_files)) | |
| 89 shutil.rmtree(tmpdir) | |
| 90 os.remove(logfile.name) | |
| 91 | |
| 92 | |
| 93 def test_help(self): | |
| 94 '''Test if help option works ''' | |
| 95 p = subprocess.Popen(args=[self.EXECUTABLE, "-h"], | |
| 96 stdout=subprocess.PIPE) | |
| 97 output = str(p.stdout.readlines()) | |
| 98 p.stdout.close() | |
| 99 p.wait() | |
| 100 status = p.returncode | |
| 101 self.assertRegex(output, "usage") | |
| 102 self.assertRegex(output, "optional arguments") | |
| 103 self.assertEqual(status, 0) | |
| 104 | |
| 105 def test_basic_no_merging_tarean(self): | |
| 106 ''' Basic taren run ''' | |
| 107 cmd_options = ['-t', '-p', '-s', '6000', 'test_data/LAS_paired_10k.fas'] | |
| 108 self.tarean_run(cmd_options, file_list=self.FILE_LIST_BASIC) | |
| 109 | |
| 110 def test_basic_with_merging_tarean(self): | |
| 111 ''' Basic taren run ''' | |
| 112 cmd_options = ['-t', '-p', '-M', '0.2', '-s', '6000', | |
| 113 'test_data/LAS_paired_10k.fas'] | |
| 114 self.tarean_run(cmd_options, file_list=self.FILE_LIST_BASIC) | |
| 115 | |
| 116 | |
| 117 def test_basic_with_merging_tarean_dust_off(self): | |
| 118 ''' Basic taren run ''' | |
| 119 cmd_options = ['-t', '-p', '-M', '0.2', '-s', '6000', "-opt", "ILLUMINA_DUST_OFF", | |
| 120 'test_data/LAS_paired_10k.fas'] | |
| 121 self.tarean_run(cmd_options, file_list=self.FILE_LIST_BASIC) | |
| 122 | |
| 123 def test_long_with_merging_tarean(self): | |
| 124 '''Using more data with tarean''' | |
| 125 cmd_options = ['-t', '-p', '-M', '0.1', '-m', '0.01', | |
| 126 'test_data/LAS_paired_25k.fas'] | |
| 127 self.tarean_run(cmd_options, file_list=self.FILE_LIST_BASIC) | |
| 128 | |
| 129 def test_long_with_merging2_tarean(self): | |
| 130 '''Using more data with tarean 300k reads''' | |
| 131 cmd_options = ['-t', '-p', '-M', '0.1', '-m', '0.01', | |
| 132 'test_data/LAS_paired_300k.fas'] | |
| 133 self.tarean_run(cmd_options, file_list=self.FILE_LIST_BASIC) | |
| 134 | |
| 135 def test_short_comparative_re(self): | |
| 136 '''comparative analysis, two species, small run''' | |
| 137 cmd_options = ['-P','3', '-p', '-m', '0.01', | |
| 138 'test_data/sequences_comparative.fasta'] | |
| 139 self.tarean_run(cmd_options, file_list=self.FILE_LIST_BASIC + self.FILE_LIST_COMPARATIVE) | |
| 140 | |
| 141 # REPEATEXPLORER - full runs | |
| 142 def test_basic_no_merging_re(self): | |
| 143 ''' Basic taren run ''' | |
| 144 cmd_options = ['-p', '-s', '6000', 'test_data/LAS_paired_10k.fas'] | |
| 145 self.tarean_run(cmd_options, file_list=self.FILE_LIST_BASIC + self.FILE_LIST_ASSEMBLY) | |
| 146 | |
| 147 def test_basic_no_merging_re_diamond(self): | |
| 148 ''' Basic taren run ''' | |
| 149 cmd_options = ['-p', '-s', '6000','-D','DIAMOND', 'test_data/LAS_paired_10k.fas'] | |
| 150 self.tarean_run(cmd_options, file_list=self.FILE_LIST_BASIC + self.FILE_LIST_ASSEMBLY) | |
| 151 | |
| 152 | |
| 153 | |
| 154 def test_basic_with_merging_re(self): | |
| 155 ''' Basic taren run ''' | |
| 156 cmd_options = ['-p', '-M', '0.2', '-s', '6000', | |
| 157 'test_data/LAS_paired_10k.fas'] | |
| 158 self.tarean_run(cmd_options, file_list=self.FILE_LIST_BASIC + self.FILE_LIST_ASSEMBLY) | |
| 159 | |
| 160 def test_long_with_merging_re(self): | |
| 161 '''Using more data with tarean''' | |
| 162 cmd_options = ['-p', '-M', '0.1', '-m', '0.01', | |
| 163 'test_data/LAS_paired_25k.fas'] | |
| 164 self.tarean_run(cmd_options, file_list=self.FILE_LIST_BASIC + self.FILE_LIST_ASSEMBLY) | |
| 165 | |
| 166 def test_long_with_merging_re_diamond(self): | |
| 167 '''Using more data with tarean and using diamond''' | |
| 168 cmd_options = ['-p', '-M', '0.1', '-m', '0.01','-D','DIAMOND', | |
| 169 'test_data/LAS_paired_25k.fas'] | |
| 170 self.tarean_run(cmd_options, file_list=self.FILE_LIST_BASIC + self.FILE_LIST_ASSEMBLY) | |
| 171 | |
| 172 def test_long_with_merging2_re(self): | |
| 173 '''Using more data with tarean 300k reads''' | |
| 174 cmd_options = ['-p', '-M', '0.1', '-m', '0.01', | |
| 175 'test_data/LAS_paired_300k.fas'] | |
| 176 self.tarean_run(cmd_options, file_list=self.FILE_LIST_BASIC + self.FILE_LIST_ASSEMBLY) | |
| 177 | |
| 178 def test_long_with_merging_and_filtering_re(self): | |
| 179 '''Using more data with tarean, test of automatic filtering''' | |
| 180 cmd_options = ['-A', '-p', '-M', '0.2', '-m', '0.01', | |
| 181 'test_data/ceu_200k.fasta'] | |
| 182 self.tarean_run( | |
| 183 cmd_options, | |
| 184 file_list=self.FILE_LIST_BASIC + self.FILE_LIST_FILTERING + self.FILE_LIST_ASSEMBLY) | |
| 185 | |
| 186 def test_custom_database_re(self): | |
| 187 ''' Basic taren run ''' | |
| 188 cmd_options = ['-p', '-d', 'test_data/extra_database', 'extra_database', 'test_data/LAS_paired_10k.fas'] | |
| 189 self.tarean_run(cmd_options, file_list=self.FILE_LIST_BASIC + self.FILE_LIST_CUSTOM_DATABASE) | |
| 190 | |
| 191 def tearDown(self): | |
| 192 pass | |
| 193 | |
| 194 | |
| 195 SHORT_TASK_NAME_LIST_TAREAN = ['test_help', 'test_basic_no_merging_tarean', | |
| 196 'test_basic_with_merging_tarean', | |
| 197 'test_basic_with_merging_tarean_dust_off'] | |
| 198 LONG_TASK_NAME_LIST_TAREAN = ['test_long_with_merging_tarean', | |
| 199 'test_long_with_merging2_tarean'] | |
| 200 SHORT_TASK_NAME_LIST_RE = ['test_basic_no_merging_re', | |
| 201 'test_basic_with_merging_re', | |
| 202 'test_basic_no_merging_re_diamond'] | |
| 203 LONG_TASK_NAME_LIST_RE = ['test_long_with_merging_re', | |
| 204 'test_long_with_merging2_re', | |
| 205 'test_long_with_merging_and_filtering_re', | |
| 206 'test_long_with_merging_re_diamond'] | |
| 207 | |
| 208 COMPARATIVE_LIST = ['test_short_comparative_re'] | |
| 209 CUSTOM_DATABASE_LIST = ['test_short_custom_database'] | |
| 210 | |
| 211 # Test suites: | |
| 212 SHORT_TAREAN_SUITE = unittest.TestSuite([TestBasic(i) | |
| 213 for i in SHORT_TASK_NAME_LIST_TAREAN]) | |
| 214 LONG_TAREAN_SUITE = unittest.TestSuite([TestBasic(i) | |
| 215 for i in LONG_TASK_NAME_LIST_TAREAN]) | |
| 216 COMPARATIVE_SUITE = unittest.TestSuite([TestBasic(i) for i in COMPARATIVE_LIST]) | |
| 217 CUSTOM_DB_SUITE = unittest.TestSuite([TestBasic('test_custom_database_re')]) | |
| 218 | |
| 219 SHORT_RE_SUITE = unittest.TestSuite([TestBasic(i) for i in SHORT_TASK_NAME_LIST_RE]) | |
| 220 LONG_RE_SUITE = unittest.TestSuite([TestBasic(i) for i in LONG_TASK_NAME_LIST_RE]) | |
| 221 | |
| 222 SHORT_SUITE = unittest.TestSuite([SHORT_RE_SUITE, SHORT_TAREAN_SUITE, | |
| 223 COMPARATIVE_SUITE, CUSTOM_DB_SUITE]) | |
| 224 | |
| 225 LONG_LONG = unittest.TestSuite([LONG_RE_SUITE, LONG_TAREAN_SUITE]) | |
| 226 | |
| 227 # for single test tesing | |
| 228 if __name__ == '__main__': | |
| 229 unittest.main(verbosity=2) |
