Mercurial > repos > p.lucas > taxid_genusexpand_taxid2acc
comparison TAXID_genusexpand_taxid2acc.py @ 0:e7dd595fb0dd draft
Uploaded 09 03 23 first upload of script
| author | p.lucas | 
|---|---|
| date | Thu, 09 Mar 2023 16:50:29 +0000 | 
| parents | |
| children | 81acd8138218 | 
   comparison
  equal
  deleted
  inserted
  replaced
| -1:000000000000 | 0:e7dd595fb0dd | 
|---|---|
| 1 #!/usr/bin/env python3 | |
| 2 # -*- coding: utf-8 -*- | |
| 3 ### | |
| 4 # USE PYTHON3 | |
| 5 # From a file of taxid and accession numbers (tsv), deduce species taxids, get ref genome acc nr list (all chr). (it will allow to have complete genomes when aligning with host to remove host reads) | |
| 6 # provide 2 files: | |
| 7 # - file with all acc numbers that are included in taxid(s) provided by user (extended to genus level) | |
| 8 # - file with all acc numbers that are excluded in taxid(s) provided by user (extended to genus level) | |
| 9 ### | |
| 10 | |
| 11 ### Libraries to import: | |
| 12 import argparse, os, sys, csv, warnings, re, itertools, operator | |
| 13 from os import path | |
| 14 import ncbi_genome_download as ngd | |
| 15 # to find all lineage and in case of no complete genome, the deduction of closests complete genomes (same genus, order...) | |
| 16 from ete3 import NCBITaxa | |
| 17 | |
| 18 # to be able to report line number in error messages | |
| 19 import inspect | |
| 20 frame = inspect.currentframe() | |
| 21 | |
| 22 # debug | |
| 23 b_test_load_taxids = False # ok 2023 03 07 | |
| 24 b_test_add_host_chr_taxids_accnr_from_ori_list = False # ok 2023 03 07 | |
| 25 | |
| 26 prog_tag = '[' + os.path.basename(__file__) + ']' | |
| 27 | |
| 28 # boolean to know if we dowload ncbi taxonomy file in current env | |
| 29 b_load_ncbi_tax_f = False | |
| 30 | |
| 31 # list of interesting taxid (fathers) | |
| 32 taxidlist_f = '' | |
| 33 taxidlist = [] | |
| 34 accnrlist = [] | |
| 35 | |
| 36 # order = -4 | |
| 37 # family or clade = -3 | |
| 38 # subtribe or genus = -2 | |
| 39 curr_index_in_lineage = -2 | |
| 40 min_index_in_lineage = -4 | |
| 41 | |
| 42 # boolean to know if we download ncbi taxonomy file in current env | |
| 43 b_load_ncbi_tax_f = False | |
| 44 b_test_all = False | |
| 45 b_test = False | |
| 46 b_acc_in_f = False | |
| 47 b_acc_out_f = False | |
| 48 | |
| 49 b_verbose = False | |
| 50 | |
| 51 # rank = '' # rank level retained by user | |
| 52 # rank_num = index of rank retained by user | |
| 53 | |
| 54 # # set to check that provided rank exist to be sure to be able to use it | |
| 55 # ranks = { | |
| 56 # 'superkingdom' => 0, | |
| 57 # # 'clade', # no, several clade, name is too generic | |
| 58 # 'kingdom' => 1, | |
| 59 # 'phylum' => 2, | |
| 60 # 'subphylum' => 3, | |
| 61 # 'superclass' => 4, | |
| 62 # 'class' => 5, | |
| 63 # 'superorder' => 6, | |
| 64 # 'order' => 7, | |
| 65 # 'suborder' => 8, | |
| 66 # 'infraorder' => 9, | |
| 67 # 'parvorder' => 10, | |
| 68 # 'superfamily' => 11, | |
| 69 # 'family' => 12, | |
| 70 # 'subfamily' => 13, | |
| 71 # 'genus' => 14, | |
| 72 # 'species' => 15, | |
| 73 # 'subspecies' => 16 | |
| 74 # } | |
| 75 | |
| 76 parser = argparse.ArgumentParser() | |
| 77 parser.add_argument("-i", "--taxid_acc_in_f", dest='taxid_acc_in_f', | |
| 78 help="taxid acc_number list in tsv (tabular separated at each line)", | |
| 79 metavar="FILE") | |
| 80 parser.add_argument("-o", "--acc_out_f", dest='acc_out_f', | |
| 81 help="[optional if --taxid_acc_in_f provided] Output text file with accession numbers from krona_taxid_acc_f NOT found under taxid in ncbi taxonomy tree", | |
| 82 metavar="FILE") | |
| 83 # parser.add_argument("-r", "--rank", dest='rank', | |
| 84 # help="[Optional] default: genus, rank to retain for each acc number provided. We will retain all the acc number descendant from this 'rank' (genus) taxid list", | |
| 85 # action="store_const") | |
| 86 parser.add_argument("-n", "--ncbi_tax_f", dest='ncbi_tax_f', | |
| 87 help="[Optional] ncbi tabular file with taxonomy organized to represent a tree", | |
| 88 metavar="FILE") | |
| 89 parser.add_argument("-l", "--load_ncbi_tax_f", dest='b_load_ncbi_tax_f', | |
| 90 help="[Optional] load ncbi tabular file with taxonomy organized to represent a tree in current env at default location (~/.etetoolkit/taxa.sqlite). Only needed for first run", | |
| 91 action='store_true') | |
| 92 parser.add_argument("-z", "--test_all", dest='b_test_all', | |
| 93 help="[Optional] run all tests. Additionally, with --load_ncbi_tax_f, allow to download ncbi ete3 tax db the first time you use the script", | |
| 94 action='store_true') | |
| 95 parser.add_argument("-v", "--verbose", dest='b_verbose', | |
| 96 help="[Optional] To have details on records when running", | |
| 97 action='store_true') | |
| 98 parser.set_defaults(b_load_ncbi_tax_f=False) | |
| 99 parser.set_defaults(b_test_all=False) | |
| 100 parser.set_defaults(b_verbose=False) | |
| 101 | |
| 102 # get absolute path in case of files | |
| 103 args = parser.parse_args() | |
| 104 | |
| 105 # ------------------------------------------- | |
| 106 # check arguments | |
| 107 b_test_all = args.b_test_all | |
| 108 | |
| 109 if b_test_all: | |
| 110 b_test_load_taxids = False | |
| 111 b_test_add_host_chr_taxids_accnr_from_ori_list = True | |
| 112 b_test = True | |
| 113 b_acc_in_f = True | |
| 114 b_acc_out_f = True | |
| 115 else: | |
| 116 b_test = (b_test_load_taxids or | |
| 117 b_test_add_host_chr_taxids_accnr_from_ori_list) | |
| 118 | |
| 119 if ((not b_test)and | |
| 120 ((len(sys.argv) < 1) or (len(sys.argv) > 5))): | |
| 121 print("\n".join(["To use this scripts, run:", | |
| 122 "conda activate TAXID_genusexpand_taxid2acc", | |
| 123 "./TAXID_genusexpand_taxid2acc.py --test_all --load_ncbi_tax_f", | |
| 124 " ", | |
| 125 "Then you won't need --test_all --load_ncbi_tax_f options\n\n", | |
| 126 "Then, as an example:\n\n", | |
| 127 ' '.join(['./TAXID_genusexpand_taxid2acc.py', | |
| 128 '-i taxid_accnr_list.tsv', | |
| 129 '-o accnr_out_list.txt']),"\n\n" ])) | |
| 130 parser.print_help() | |
| 131 print(prog_tag + "[Error] we found "+str(len(sys.argv)) + | |
| 132 " arguments, exit line "+str(frame.f_lineno)) | |
| 133 sys.exit(0) | |
| 134 | |
| 135 # print('args:', args) | |
| 136 # if(not b_test): | |
| 137 if args.ncbi_tax_f is not None: | |
| 138 ncbi_tax_f = os.path.abspath(args.ncbi_tax_f) | |
| 139 else: | |
| 140 # ncbi_tax_f = "/nfs/data/db/ete3/taxa.sqlite" | |
| 141 ncbi_tax_f = os.path.expanduser("~/.etetoolkit/taxa.sqlite") | |
| 142 if args.taxid_acc_in_f is not None: | |
| 143 taxid_acc_in_f = os.path.abspath(args.taxid_acc_f) | |
| 144 b_acc_in_f = True | |
| 145 elif(not b_test): | |
| 146 sys.exit("[Error] You must provide taxid_acc_in_f") | |
| 147 if args.acc_out_f is not None: | |
| 148 acc_out_f = os.path.abspath(args.acc_out_f) | |
| 149 b_acc_out_f = True | |
| 150 elif(not b_test): | |
| 151 sys.exit("-acc_out_f <accnr_file>n must be provided\n") | |
| 152 # if args.rank is not None: | |
| 153 # rank = 'genus' | |
| 154 # else: | |
| 155 # rank = args.rank | |
| 156 | |
| 157 if args.b_verbose is not None: | |
| 158 b_verbose = int(args.b_verbose) | |
| 159 | |
| 160 if(not b_test): | |
| 161 if (not b_acc_in_f) and (not b_acc_out_f): | |
| 162 sys.exit(prog_tag + "[Error] You must provide either --acc_f <file> and -acc_out_f <file>") | |
| 163 | |
| 164 # # store index of the rank expected by user | |
| 165 # rank_num = ranks{ rank } | |
| 166 | |
| 167 # -------------------------------------------------------------------------- | |
| 168 # to sort uniq, for a list, only need to add list conversion | |
| 169 # -------------------------------------------------------------------------- | |
| 170 mapper= map # Python ≥ 3 | |
| 171 def sort_uniq(sequence): | |
| 172 return mapper( | |
| 173 operator.itemgetter(0), | |
| 174 itertools.groupby(sorted(sequence))) | |
| 175 # -------------------------------------------------------------------------- | |
| 176 | |
| 177 # -------------------------------------------------------------------------- | |
| 178 # load taxid acc list, return taxidlist | |
| 179 # -------------------------------------------------------------------------- | |
| 180 def load_taxids(taxid_acc_tabular_f): | |
| 181 | |
| 182 if not path.exists(taxid_acc_tabular_f): | |
| 183 sys.exit("Error " + taxid_acc_tabular_f + | |
| 184 " file does not exist, line "+ str(sys._getframe().f_lineno) ) | |
| 185 | |
| 186 cmd = "cut -f 1,2 "+taxid_acc_tabular_f+" | sort | uniq " | |
| 187 # cmd = "cut -f 1 "+taxid_acc_tabular_f+" | sort | uniq " | |
| 188 | |
| 189 for line in os.popen(cmd).readlines(): | |
| 190 k, v = line.rstrip().split() | |
| 191 taxidlist.append(k) | |
| 192 accnrlist.append(v) | |
| 193 | |
| 194 return taxidlist | |
| 195 # -------------------------------------------------------------------------- | |
| 196 | |
| 197 # test load_taxids function | |
| 198 # display taxidlist, then exit | |
| 199 if b_test_load_taxids: | |
| 200 taxid_acc_tabular_f = 'megablast_out_f_taxid_acc_host.tsv' | |
| 201 print("START b_test_load_taxids") | |
| 202 print("loading "+taxid_acc_tabular_f+" file") | |
| 203 taxidlist = load_taxids(taxid_acc_tabular_f) | |
| 204 for i in range(len(taxidlist)): | |
| 205 print(f"{taxidlist[i]}\t{accnrlist[i]}") | |
| 206 print("END b_test_load_taxids") | |
| 207 if not b_test_add_host_chr_taxids_accnr_from_ori_list: | |
| 208 sys.exit() | |
| 209 # -------------------------------------------------------------------------- | |
| 210 | |
| 211 # # -------------------------------------------------------------------------- | |
| 212 # # needs internet connexion, not possible | |
| 213 # # -------------------------------------------------------------------------- | |
| 214 # def get_leave_taxid_from_acc_nr(accnrlist): | |
| 215 | |
| 216 # # deduce a list of taxid from a list of accession numbers | |
| 217 # cmd = "cat megablast_out_f_acc_out_taxid.tsv | epost -db nuccore | esummary | xtract -pattern DocumentSummary -element TaxId | sort -u" | |
| 218 # for line in os.popen(cmd).readlines(): | |
| 219 # taxidlist.append(line.rstrip()) | |
| 220 | |
| 221 # return taxidlist | |
| 222 # # -------------------------------------------------------------------------- | |
| 223 | |
| 224 # -------------------------------------------------------------------------- | |
| 225 # function to retain the most recent acc nr for host complete genome found: | |
| 226 # - return acc nr of most recent complete genome | |
| 227 # - print accnr species and name retained | |
| 228 # - reinitiate tmp lists of accnrlisttmp speciestmp and nametmp | |
| 229 # -------------------------------------------------------------------------- | |
| 230 def retain_1accnr(accnrlisttmp, speciestmp, nametmp): | |
| 231 | |
| 232 max_accnr_version = 0 | |
| 233 curr_accnr_version = 0 | |
| 234 max_accnr_nr = 0 | |
| 235 curr_accnr_nr = 0 | |
| 236 kept_accnr_i = 0 | |
| 237 p = re.compile(".*?(\d+)\.(\d+)$") | |
| 238 | |
| 239 # print(f"{prog_tag} retain_1accnr({accnrlisttmp}, {speciestmp}, {nametmp}") | |
| 240 | |
| 241 for i in range(len(accnrlisttmp)): | |
| 242 m = p.match( accnrlisttmp[i] ) | |
| 243 if m: | |
| 244 # print('Match found: ', m.group(2)) | |
| 245 curr_accnr_version = int(m.group(2)) | |
| 246 accnr_nr = int(m.group(1)) | |
| 247 if curr_accnr_version > max_accnr_version: | |
| 248 max_accnr_version = curr_accnr_version | |
| 249 kept_accnr_i = i | |
| 250 # print(f"record kept_accnr_i:{kept_accnr_i}") | |
| 251 elif curr_accnr_nr > max_accnr_nr: | |
| 252 max_accnr_nr = curr_accnr_nr | |
| 253 kept_accnr_i = i | |
| 254 # print(f"record kept_accnr_i:{kept_accnr_i}") | |
| 255 | |
| 256 else: | |
| 257 sys.exit(f"{prog_tag} No version found for accnr:{accnrlisttmp[i]}") | |
| 258 | |
| 259 print(f"retained accnr:{accnrlisttmp[kept_accnr_i]}\tspecies:{speciestmp[kept_accnr_i]}\tname:{nametmp[kept_accnr_i]}") | |
| 260 kept_accn = accnrlisttmp[kept_accnr_i] | |
| 261 | |
| 262 return kept_accn | |
| 263 # -------------------------------------------------------------------------- | |
| 264 | |
| 265 # -------------------------------------------------------------------------- | |
| 266 # procedure to find complete genome closely related to current taxid | |
| 267 # goes upper in taxonomy if nothing found until order | |
| 268 # -------------------------------------------------------------------------- | |
| 269 def ngd_upper_lineage(curr_index_in_lineage, | |
| 270 lineage, | |
| 271 ncbi, | |
| 272 accnrlisttmp, # current working list | |
| 273 accnrlist, # final list, if something added (or min index reached), recursivity stop | |
| 274 speciestmp, | |
| 275 nametmp | |
| 276 ): | |
| 277 print(f"{prog_tag} ngd_upper_lineage with curr_index_in_lineage:{curr_index_in_lineage}") | |
| 278 | |
| 279 # deduce up rank, search complet genome/chr in | |
| 280 upper_taxid=str(lineage[curr_index_in_lineage]) # order when last is species | |
| 281 rank = ncbi.get_rank([lineage[curr_index_in_lineage]]) | |
| 282 name = ncbi.get_taxid_translator([lineage[curr_index_in_lineage]]) | |
| 283 print(f"{prog_tag} test with taxid:{upper_taxid} corresponding to rank:{rank}") | |
| 284 leaves_taxids = ncbi.get_descendant_taxa(upper_taxid, | |
| 285 intermediate_nodes=False, | |
| 286 collapse_subspecies=False, | |
| 287 return_tree=False | |
| 288 ) | |
| 289 # int conversion to strings | |
| 290 leaves_taxids = list(map(str, leaves_taxids)) | |
| 291 leaves_taxids_list = ','.join(leaves_taxids) | |
| 292 cmd = f"ncbi-genome-download -s genbank --taxids {leaves_taxids_list} --assembly-level complete,chromosome --dry-run vertebrate_other,vertebrate_mammalian,plant,invertebrate 2>&1" | |
| 293 # print(f"{prog_tag} cmd:{cmd}") | |
| 294 | |
| 295 # specific to retain_1accn to avoid lists are crashed by other ngd call | |
| 296 accnrlisttmp_r = [] | |
| 297 speciestmp_r = [] | |
| 298 nametmp_r = [] | |
| 299 for line in os.popen(cmd).readlines(): | |
| 300 | |
| 301 if not re.match("^Considering", line): | |
| 302 # print(f"line:{line.rstrip()}") | |
| 303 if re.match("^(?:ERROR|Error): No downloads", line): | |
| 304 print(f"{prog_tag} No chr/complete genome for taxid:{upper_taxid} rank:{rank} (expanding name:{name})") | |
| 305 if curr_index_in_lineage > min_index_in_lineage: # need to go on with upper lineage if not last accepted | |
| 306 curr_index_in_lineage = curr_index_in_lineage - 1 | |
| 307 print(f"{prog_tag} ngd_upper_lineage call {curr_index_in_lineage} line {str(sys._getframe().f_lineno)}") | |
| 308 return ngd_upper_lineage(curr_index_in_lineage, | |
| 309 lineage, | |
| 310 ncbi, | |
| 311 accnrlisttmp, # current working list | |
| 312 accnrlist, # final list, if something added (or min index reached), recursivity stop | |
| 313 speciestmp, | |
| 314 nametmp | |
| 315 ) | |
| 316 else: | |
| 317 acc_nr, species, name = line.rstrip().split("\t") | |
| 318 accnrlisttmp_r.append(acc_nr) | |
| 319 speciestmp_r.append(species) | |
| 320 nametmp_r.append(name) | |
| 321 if b_verbose: | |
| 322 print(f"{prog_tag} we found for {species} chr fasta for host genome with accnr {acc_nr} (name:{name})") | |
| 323 | |
| 324 # retain only the most recent complete genome for current treated taxid | |
| 325 return retain_1accnr(accnrlisttmp_r, speciestmp_r, nametmp_r) | |
| 326 | |
| 327 # else: | |
| 328 # print(f"line matching Considering:{line}") | |
| 329 # -------------------------------------------------------------------------- | |
| 330 | |
| 331 # -------------------------------------------------------------------------- | |
| 332 # read taxids, deduce complete genomes available in genblank, provides in output file | |
| 333 # the acc number in addition to those already listed | |
| 334 # -------------------------------------------------------------------------- | |
| 335 def add_host_chr_taxids_accnr_from_ori_list(taxidlist, | |
| 336 accnrlist, | |
| 337 acc_out_f): | |
| 338 | |
| 339 # store all accnr found for complete genome of current taxid (or from same family/order) | |
| 340 # the aim is to keep only the most recent/complete | |
| 341 accnrlisttmp = [] | |
| 342 speciestmp = [] | |
| 343 nametmp = [] | |
| 344 | |
| 345 # get host complete genome when found using ncbi_genome_download | |
| 346 taxids_list=','.join(taxidlist) | |
| 347 | |
| 348 # # ------------------------------------------ | |
| 349 # # ncbi-genome-download as a library | |
| 350 # ngd_out_f= os.getcwd()+'/accnr_sp_accnr.tsv' | |
| 351 # ngd.download(section='genbank', | |
| 352 # taxids=taxids_list, | |
| 353 # assembly_levels='chromosome', | |
| 354 # flat_output=True, | |
| 355 # output=ngd_out_f, | |
| 356 # groups='vertebrate_other,vertebrate_mammalian,plant,invertebrate', | |
| 357 # dry_run=True | |
| 358 # ) | |
| 359 | |
| 360 | |
| 361 # cmd = "cut -f 1,2 "+ngd_out_f | |
| 362 | |
| 363 # for line in os.popen(cmd).readlines(): | |
| 364 # acc_nr, species = line.rstrip().split() | |
| 365 # accnrlist.append(acc_nr) | |
| 366 | |
| 367 # if b_verbose: | |
| 368 # print(f"{prog_tag} we found for {species} chr fasta for host genome with accnr {acc_nr}") | |
| 369 # # ------------------------------------------ | |
| 370 | |
| 371 # ------------------------------------------ | |
| 372 # ncbi-genome-download as executable script | |
| 373 # ------------------------------------------ | |
| 374 | |
| 375 # load NCBITaxa | |
| 376 ncbi = NCBITaxa() # Install ete3 db in local user file (.ete_toolkit/ directory) | |
| 377 print(prog_tag + " Try to load ncbi tax db file:"+ncbi_tax_f) | |
| 378 ncbi = NCBITaxa(dbfile=ncbi_tax_f) | |
| 379 if (not os.path.isfile(ncbi_tax_f)) or b_load_ncbi_tax_f: | |
| 380 try: | |
| 381 ncbi.update_taxonomy_database() | |
| 382 except: | |
| 383 warnings.warn(prog_tag+"[SQLite Integrity error/warning] due to redundant IDs") | |
| 384 | |
| 385 for taxid_u in taxidlist: | |
| 386 cmd = f"ncbi-genome-download -s genbank --taxids {taxid_u} --assembly-level complete,chromosome --dry-run vertebrate_other,vertebrate_mammalian,plant,invertebrate 2>&1" | |
| 387 for line in os.popen(cmd).readlines(): | |
| 388 # ERROR: No downloads matched your filter. Please check your options. | |
| 389 if re.match("^(?:ERROR|Error): No downloads", line): | |
| 390 # get complete lineage: accept ONLY leave taxid? (species) | |
| 391 lineage = ncbi.get_lineage(int(taxid_u)) | |
| 392 name = ncbi.translate_to_names(lineage) | |
| 393 if b_verbose: | |
| 394 print(f"taxid:{taxid_u}\tlineage:{lineage}\tname:{name}") | |
| 395 | |
| 396 # same search but going upper in taxonomy, finding leaves taxid to find new closeley related complete genome | |
| 397 # print(f"{prog_tag} ngd_upper_lineage call {curr_index_in_lineage} line {str(sys._getframe().f_lineno)}") | |
| 398 new_acc_nr = ngd_upper_lineage(curr_index_in_lineage, | |
| 399 lineage, | |
| 400 ncbi, | |
| 401 accnrlisttmp, # current working list | |
| 402 accnrlist, # final list, if something added (or min index reached), recursivity stop | |
| 403 speciestmp, | |
| 404 nametmp | |
| 405 ) | |
| 406 accnrlist.append( new_acc_nr ) | |
| 407 | |
| 408 # initialize for next search | |
| 409 accnrlisttmp = [] | |
| 410 speciestmp = [] | |
| 411 nametmp = [] | |
| 412 | |
| 413 elif not re.match("^Considering", line): | |
| 414 # print(f"line:{line.rstrip()}") | |
| 415 acc_nr, species, name = line.rstrip().split("\t") | |
| 416 accnrlisttmp.append(acc_nr) | |
| 417 speciestmp.append(species) | |
| 418 nametmp.append(name) | |
| 419 if b_verbose: | |
| 420 print(f"{prog_tag} we found for {species} chr fasta for host genome with accnr {acc_nr} (name:{name})") | |
| 421 | |
| 422 # retain only the most recent complete genome for current treated taxid | |
| 423 if len(accnrlisttmp): | |
| 424 accnrlist.append( retain_1accnr(accnrlisttmp, speciestmp, nametmp) ) | |
| 425 | |
| 426 # remove redundant accnr | |
| 427 accnrlist = list(sort_uniq(accnrlist)) | |
| 428 with open(acc_out_f, "w") as record_file: | |
| 429 for accnr in accnrlist: | |
| 430 record_file.write("%s\n" % (accnr)) | |
| 431 # ------------------------------------------ | |
| 432 | |
| 433 print(f"{prog_tag} {acc_out_f} file created") | |
| 434 | |
| 435 # -------------------------------------------------------------------------- | |
| 436 # test | |
| 437 if b_test_add_host_chr_taxids_accnr_from_ori_list: | |
| 438 taxid_acc_tabular_f = 'megablast_out_f_taxid_acc_host.tsv' | |
| 439 taxid_acc_in_f = 'megablast_out_f_taxid_acc_host.tsv' | |
| 440 acc_out_f = 'megablast_out_f_taxid_acc_hostexpanded.tsv' | |
| 441 print(f"{prog_tag} START b_test_add_host_chr_taxids_accnr_from_ori_list") | |
| 442 print(f"{prog_tag} loading {taxid_acc_tabular_f} file") | |
| 443 taxidlist = load_taxids(taxid_acc_in_f) | |
| 444 print(f"{prog_tag} end loading") | |
| 445 | |
| 446 add_host_chr_taxids_accnr_from_ori_list(taxidlist, | |
| 447 accnrlist, | |
| 448 acc_out_f) | |
| 449 print(f"{prog_tag} END b_test_add_host_chr_taxids_accnr_from_ori_list") | |
| 450 sys.exit() | |
| 451 # -------------------------------------------------------------------------- | |
| 452 | |
| 453 # -------------------------------------------------------------------------- | |
| 454 # MAIN | |
| 455 # -------------------------------------------------------------------------- | |
| 456 ##### MAIN | |
| 457 def __main__(): | |
| 458 # load taxid_acc file | |
| 459 load_taxids(taxid_acc_tabular_f) | |
| 460 # check in ncbi taxonomy which acc number are in and out of given taxid | |
| 461 add_host_chr_taxids_accnr_from_ori_list(taxidlist, | |
| 462 accnrlist, | |
| 463 acc_out_f) | |
| 464 # -------------------------------------------------------------------------- | |
| 465 #### MAIN END | |
| 466 if __name__ == "__main__": __main__() | |
| 467 | 
