Mercurial > repos > pimarin > data_manager_fetch_plasmidfinder
comparison data_manager/plasmidfinder_fetch_database.py @ 5:60cfd33bc2fb draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_plasmidfinder commit 0a3992c5be846fc9f18b7ca18f0adcd78f5b9396-dirty
| author | pimarin |
|---|---|
| date | Mon, 24 Jul 2023 10:00:33 +0000 |
| parents | e05fd47bcca6 |
| children | f99089461adb |
comparison
equal
deleted
inserted
replaced
| 4:e05fd47bcca6 | 5:60cfd33bc2fb |
|---|---|
| 1 import argparse | 1 import argparse |
| 2 import json | 2 import json |
| 3 import os | 3 import os |
| 4 import tarfile | 4 import time |
| 5 from datetime import datetime | |
| 6 from pathlib import Path | 5 from pathlib import Path |
| 7 | 6 |
| 8 import requests | 7 |
| 8 import git | |
| 9 | 9 |
| 10 | 10 |
| 11 class GetPlasmidfinderDataManager: | 11 class GetPlasmidfinderDataManager: |
| 12 """ | 12 """ |
| 13 Create the json file with database information for galaxy data manager | 13 Create the json file with database information for galaxy data manager |
| 14 """ | 14 """ |
| 15 | 15 |
| 16 def __init__(self, | 16 def __init__(self, |
| 17 plasmidfinder_database="plasmidfinder_database", | 17 plasmidfinder_database="plasmidfinder_database", |
| 18 db_name="plasmidfinder-db", | 18 db_name="plasmidfinder_database", |
| 19 plasmidfinder_version="latest"): | 19 plasmidfinder_version="latest"): |
| 20 self.data_table_name = plasmidfinder_database | 20 self.data_table_name = plasmidfinder_database |
| 21 self._db_name = db_name | 21 self._db_name = db_name |
| 22 self._plasmidfinder_version = plasmidfinder_version | 22 self._plasmidfinder_version = plasmidfinder_version |
| 23 self._plasmidfinder_date_version = None | 23 self._plasmidfinder_date_version = None |
| 66 Build the data manager info for galaxy | 66 Build the data manager info for galaxy |
| 67 """ | 67 """ |
| 68 | 68 |
| 69 def __init__(self, | 69 def __init__(self, |
| 70 output_dir=Path.cwd(), | 70 output_dir=Path.cwd(), |
| 71 plasmidfinder_url="https://bitbucket.org/genomicepidemiology/plasmidfinder_db/get/", | 71 plasmidfinder_url="https://bitbucket.org/genomicepidemiology/plasmidfinder_db/src/master", |
| 72 db_name="plasmidfinder-db", | 72 db_name="plasmidfinder_database", |
| 73 db_tmp="tmp_database", | 73 db_tmp="tmp_database", |
| 74 plasmidfinder_version="latest", | 74 plasmidfinder_version="latest", |
| 75 json_file_path=None, | 75 json_file_path=None, |
| 76 date_version=datetime.now().strftime("%Y-%m-%d")): | 76 date_version=None): |
| 77 | 77 |
| 78 super().__init__() | 78 super().__init__() |
| 79 self.json_file_path = json_file_path | 79 self.json_file_path = json_file_path |
| 80 self._output_dir = output_dir | 80 self._output_dir = output_dir |
| 81 self._plasmidfinder_url = plasmidfinder_url | 81 self._plasmidfinder_url = plasmidfinder_url |
| 84 self._db_name_tar = f'{db_name}.gz' | 84 self._db_name_tar = f'{db_name}.gz' |
| 85 self._plasmidfinder_version = plasmidfinder_version | 85 self._plasmidfinder_version = plasmidfinder_version |
| 86 self._plasmidfinder_date_version = date_version | 86 self._plasmidfinder_date_version = date_version |
| 87 self._commit_number = None | 87 self._commit_number = None |
| 88 | 88 |
| 89 def extract_db_commit(self, request_header, title_name="content-disposition"): | 89 def git_clone(self): |
| 90 """ | 90 git.Repo.clone_from(url=self._plasmidfinder_url, to_path=self._output_dir) |
| 91 Extract the commit if to add the information as identifier of the download | 91 self._plasmidfinder_repository = git.Repo(path=self._output_dir) |
| 92 @request_header: a request object obtained from requests.get() | |
| 93 @title_name: the tag to search in the header of the requests object | |
| 94 return: the value of the commit | |
| 95 """ | |
| 96 db_info = request_header.headers[title_name] | |
| 97 commit_number = db_info.split("-")[2].split(".")[0] | |
| 98 return commit_number | |
| 99 | 92 |
| 100 def untar_files(self, file_path: Path, extracted_path_output: Path): | 93 def get_commit_number(self): |
| 101 """ | 94 sha = self._plasmidfinder_repository.head.commit.hexsha |
| 102 untar the download archive | 95 short_sha = self._plasmidfinder_repository.git.rev_parse(sha, short=7) |
| 103 @file_path: input path of the tar.gz file | 96 self._commit_number = short_sha |
| 104 @extracted_path_output: output path of the extract folder | |
| 105 return: the path of the output | |
| 106 """ | |
| 107 try: | |
| 108 with file_path.open('rb') as fh_in, \ | |
| 109 tarfile.open(fileobj=fh_in, mode='r:gz') as tar_file: | |
| 110 tar_file.extractall(path=extracted_path_output) | |
| 111 print(f'Untar the database in {extracted_path_output}') | |
| 112 return extracted_path_output | |
| 113 except OSError: | |
| 114 os.sys.exit(f'ERROR: Could not extract {file_path}') | |
| 115 | 97 |
| 116 def choose_db_version(self): | 98 def get_commit_date(self): |
| 117 """ | 99 self._plasmidfinder_date_version = time.strftime("%Y_%m_%d", time.gmtime(self._plasmidfinder_repository.head.commit.committed_date)) |
| 118 Update the url link depending on the version choosen by user. | |
| 119 This method could be upgraded simply by adding the new versions | |
| 120 """ | |
| 121 if self._plasmidfinder_version == "latest": | |
| 122 self._plasmidfinder_url = f"{self._plasmidfinder_url}master.gz" | |
| 123 elif self._plasmidfinder_version == "2.1": | |
| 124 self._plasmidfinder_url = f"{self._plasmidfinder_url}1307168.gz" | |
| 125 | 100 |
| 126 def download_database(self): | 101 def download_database(self): |
| 127 """ | 102 """ |
| 128 Download the plasmidfinder database using requests lib | 103 Download the plasmidfinder database using git lib |
| 129 Make the directory and temporary directory for download | 104 Extract commit and commit date |
| 130 Untar the download files | |
| 131 """ | 105 """ |
| 132 self._output_dir = Path(self._output_dir) | 106 self._output_dir = Path(self._output_dir) |
| 133 self.choose_db_version() | 107 self.git_clone() |
| 134 try: | 108 if self._plasmidfinder_version != "latest": |
| 135 request_info = requests.get(self._plasmidfinder_url) | 109 self._plasmidfinder_repository.git.checkout(self._plasmidfinder_version) |
| 136 request_info.raise_for_status() | 110 self.get_commit_number() |
| 137 self._commit_number = self.extract_db_commit(request_info) | 111 self.get_commit_date() |
| 138 output_tar_path = self._output_dir.joinpath(self._temporary_folder) | |
| 139 output_tar_path_file = output_tar_path.joinpath(self._db_name_tar) | |
| 140 output_path = self._output_dir.joinpath(self._db_name) | |
| 141 os.makedirs(output_tar_path) | |
| 142 os.makedirs(output_path) | |
| 143 with open(output_tar_path_file, 'wb') as output_dir: | |
| 144 output_dir.write(request_info.content) | |
| 145 untar_output = self.untar_files(file_path=output_tar_path_file, extracted_path_output=output_tar_path.joinpath(self._db_name)) | |
| 146 self.moove_download_files(source=untar_output, destination=output_path) | |
| 147 except requests.exceptions.HTTPError as http_error: | |
| 148 print(f"Requests Error: {http_error}") | |
| 149 print(f"Fail to import Plasmidfinder database from {self._plasmidfinder_url}") | |
| 150 | |
| 151 def moove_download_files(self, source, destination, expression_search="*"): | |
| 152 """ | |
| 153 Clean downloaded data by mooving fasta files in the final folder | |
| 154 @older_path: previous path where the files are located | |
| 155 @new_path: final path where files will be mooved | |
| 156 @expression_search: keep only file with this expression | |
| 157 """ | |
| 158 fasta_files = Path(source).rglob(expression_search) | |
| 159 file_list_paths = [file for file in fasta_files if file.is_file()] | |
| 160 [self.keep_filename(pathname=path, output_path=destination) for path in file_list_paths] | |
| 161 | |
| 162 def keep_filename(self, pathname, output_path): | |
| 163 """ | |
| 164 Moove files | |
| 165 @pathname: previous path | |
| 166 @output_path: final path | |
| 167 """ | |
| 168 Path.replace(pathname, output_path.joinpath(pathname.name)) | |
| 169 | 112 |
| 170 def read_json_input_file(self): | 113 def read_json_input_file(self): |
| 171 """ | 114 """ |
| 172 Import the json file | 115 Import the json file |
| 173 """ | 116 """ |
