data_manager_fetch_plasmidfinder: data_manager/plasmidfinder_fetch

comparison data_manager/plasmidfinder_fetch_database.py @ 5:60cfd33bc2fb draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_plasmidfinder commit 0a3992c5be846fc9f18b7ca18f0adcd78f5b9396-dirty

author	pimarin
date	Mon, 24 Jul 2023 10:00:33 +0000
parents	e05fd47bcca6
children	f99089461adb

comparison

equal deleted inserted replaced

-:e05fd47bcca6
+:60cfd33bc2fb
 import argparse
 import json
 import os
-import tarfile
+import time
-from datetime import datetime
 from pathlib import Path
-import requests
+import git
 class GetPlasmidfinderDataManager:
 """
 Create the json file with database information for galaxy data manager
 """
 def __init__(self,
 plasmidfinder_database="plasmidfinder_database",
-db_name="plasmidfinder-db",
+db_name="plasmidfinder_database",
 plasmidfinder_version="latest"):
 self.data_table_name = plasmidfinder_database
 self._db_name = db_name
 self._plasmidfinder_version = plasmidfinder_version
 self._plasmidfinder_date_version = None
 Build the data manager info for galaxy
 """
 def __init__(self,
 output_dir=Path.cwd(),
-plasmidfinder_url="https://bitbucket.org/genomicepidemiology/plasmidfinder_db/get/",
+plasmidfinder_url="https://bitbucket.org/genomicepidemiology/plasmidfinder_db/src/master",
-db_name="plasmidfinder-db",
+db_name="plasmidfinder_database",
 db_tmp="tmp_database",
 plasmidfinder_version="latest",
 json_file_path=None,
-date_version=datetime.now().strftime("%Y-%m-%d")):
+date_version=None):
 super().__init__()
 self.json_file_path = json_file_path
 self._output_dir = output_dir
 self._plasmidfinder_url = plasmidfinder_url
 self._db_name_tar = f'{db_name}.gz'
 self._plasmidfinder_version = plasmidfinder_version
 self._plasmidfinder_date_version = date_version
 self._commit_number = None
-def extract_db_commit(self, request_header, title_name="content-disposition"):
+def git_clone(self):
-"""
+git.Repo.clone_from(url=self._plasmidfinder_url, to_path=self._output_dir)
-Extract the commit if to add the information as identifier of the download
+self._plasmidfinder_repository = git.Repo(path=self._output_dir)
-@request_header: a request object obtained from requests.get()
-@title_name: the tag to search in the header of the requests object
-return: the value of the commit
-"""
-db_info = request_header.headers[title_name]
-commit_number = db_info.split("-")[2].split(".")[0]
-return commit_number
-def untar_files(self, file_path: Path, extracted_path_output: Path):
+def get_commit_number(self):
-"""
+sha = self._plasmidfinder_repository.head.commit.hexsha
-untar the download archive
+short_sha = self._plasmidfinder_repository.git.rev_parse(sha, short=7)
-@file_path: input path of the tar.gz file
+self._commit_number = short_sha
-@extracted_path_output: output path of the extract folder
-return: the path of the output
-"""
-try:
-with file_path.open('rb') as fh_in, \
-tarfile.open(fileobj=fh_in, mode='r:gz') as tar_file:
-tar_file.extractall(path=extracted_path_output)
-print(f'Untar the database in {extracted_path_output}')
-return extracted_path_output
-except OSError:
-os.sys.exit(f'ERROR: Could not extract {file_path}')
-def choose_db_version(self):
+def get_commit_date(self):
-"""
+self._plasmidfinder_date_version = time.strftime("%Y_%m_%d", time.gmtime(self._plasmidfinder_repository.head.commit.committed_date))
-Update the url link depending on the version choosen by user.
-This method could be upgraded simply by adding the new versions
-"""
-if self._plasmidfinder_version == "latest":
-self._plasmidfinder_url = f"{self._plasmidfinder_url}master.gz"
-elif self._plasmidfinder_version == "2.1":
-self._plasmidfinder_url = f"{self._plasmidfinder_url}1307168.gz"
 def download_database(self):
 """
-Download the plasmidfinder database using requests lib
+Download the plasmidfinder database using git lib
-Make the directory and temporary directory for download
+Extract commit and commit date
-Untar the download files
 """
 self._output_dir = Path(self._output_dir)
-self.choose_db_version()
+self.git_clone()
-try:
+if self._plasmidfinder_version != "latest":
-request_info = requests.get(self._plasmidfinder_url)
+self._plasmidfinder_repository.git.checkout(self._plasmidfinder_version)
-request_info.raise_for_status()
+self.get_commit_number()
-self._commit_number = self.extract_db_commit(request_info)
+self.get_commit_date()
-output_tar_path = self._output_dir.joinpath(self._temporary_folder)
-output_tar_path_file = output_tar_path.joinpath(self._db_name_tar)
-output_path = self._output_dir.joinpath(self._db_name)
-os.makedirs(output_tar_path)
-os.makedirs(output_path)
-with open(output_tar_path_file, 'wb') as output_dir:
-output_dir.write(request_info.content)
-untar_output = self.untar_files(file_path=output_tar_path_file, extracted_path_output=output_tar_path.joinpath(self._db_name))
-self.moove_download_files(source=untar_output, destination=output_path)
-except requests.exceptions.HTTPError as http_error:
-print(f"Requests Error: {http_error}")
-print(f"Fail to import Plasmidfinder database from {self._plasmidfinder_url}")
-def moove_download_files(self, source, destination, expression_search="*"):
-"""
-Clean downloaded data by mooving fasta files in the final folder
-@older_path: previous path where the files are located
-@new_path: final path where files will be mooved
-@expression_search: keep only file with this expression
-"""
-fasta_files = Path(source).rglob(expression_search)
-file_list_paths = [file for file in fasta_files if file.is_file()]
-[self.keep_filename(pathname=path, output_path=destination) for path in file_list_paths]
-def keep_filename(self, pathname, output_path):
-"""
-Moove files
-@pathname: previous path
-@output_path: final path
-"""
-Path.replace(pathname, output_path.joinpath(pathname.name))
 def read_json_input_file(self):
 """
 Import the json file
 """

Mercurial > repos > pimarin > data_manager_fetch_plasmidfinder

comparison data_manager/plasmidfinder_fetch_database.py @ 5:60cfd33bc2fb draft