annotate get_db_info.py @ 13:5b16f2911491 draft default tip

planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
author tduigou
date Fri, 23 May 2025 13:47:21 +0000
parents ee28ec28140d
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
1 import subprocess
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
2 import time
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
3 import argparse
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
4 import socket
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
5 import os
4
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
6 import re
0
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
7 import pandas as pd
4
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
8 from Bio.Seq import Seq
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
9 from Bio.SeqRecord import SeqRecord
0
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
10 from sqlalchemy import create_engine, inspect
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
11 from sqlalchemy.sql import text
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
12 from sqlalchemy.engine.url import make_url
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
13 from sqlalchemy.exc import OperationalError
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
14
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
15 def fix_db_uri(uri):
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
16 """Replace __at__ with @ in the URI if needed."""
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
17 return uri.replace("__at__", "@")
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
18
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
19 def is_port_in_use(port):
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
20 """Check if a TCP port is already in use on localhost."""
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
21 with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
22 return s.connect_ex(('localhost', port)) == 0
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
23
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
24 def extract_db_name(uri):
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
25 """Extract the database name from the SQLAlchemy URI."""
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
26 url = make_url(uri)
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
27 return url.database
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
28
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
29 def start_postgres_container(db_name):
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
30 """Start a PostgreSQL container with the given database name as the container name."""
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
31 container_name = db_name
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
32
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
33 # Check if container is already running
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
34 container_running = subprocess.run(
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
35 f"docker ps -q -f name={container_name}", shell=True, capture_output=True, text=True
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
36 )
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
37
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
38 if container_running.stdout.strip():
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
39 print(f"Container '{container_name}' is already running.")
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
40 return
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
41
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
42 # Check if container exists (stopped)
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
43 container_exists = subprocess.run(
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
44 f"docker ps -a -q -f name={container_name}", shell=True, capture_output=True, text=True
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
45 )
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
46
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
47 if container_exists.stdout.strip():
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
48 print(f"Starting existing container '{container_name}'...")
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
49 subprocess.run(f"docker start {container_name}", shell=True)
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
50 print(f"PostgreSQL Docker container '{container_name}' activated.")
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
51 return
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
52
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
53 # If container does not exist, create and start a new one
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
54 port = 5432 if not is_port_in_use(5432) else 5433
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
55 postgres_password = os.getenv("POSTGRES_PASSWORD", "RK17")
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
56
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
57 start_command = [
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
58 "docker", "run", "--name", container_name,
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
59 "-e", f"POSTGRES_PASSWORD={postgres_password}",
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
60 "-p", f"{port}:5432",
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
61 "-d", "postgres"
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
62 ]
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
63
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
64 try:
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
65 subprocess.run(start_command, check=True)
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
66 print(f"PostgreSQL Docker container '{container_name}' started on port {port}.")
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
67 except subprocess.CalledProcessError as e:
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
68 print(f"Failed to start Docker container: {e}")
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
69
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
70 def wait_for_db(uri, timeout=60):
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
71 """Try connecting to the DB until it works or timeout."""
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
72 engine = create_engine(uri)
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
73 start_time = time.time()
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
74 while time.time() - start_time < timeout:
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
75 try:
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
76 with engine.connect():
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
77 print("Connected to database.")
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
78 return
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
79 except OperationalError:
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
80 print("Database not ready, retrying...")
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
81 time.sleep(2)
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
82 raise Exception("Database connection failed after timeout.")
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
83
9
6401a277ee72 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 7
diff changeset
84 def fetch_annotations(csv_file, sequence_column, annotation_columns, db_uri, table_name, fragment_column_name, output):
4
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
85 """Fetch annotations from the database and save the result as GenBank files."""
0
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
86 db_uri = fix_db_uri(db_uri)
12
ee28ec28140d planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 10
diff changeset
87 df = pd.read_csv(csv_file, sep=',', header=None)
0
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
88
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
89 engine = create_engine(db_uri)
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
90 connection = engine.connect()
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
91
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
92 annotated_data = []
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
93
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
94 try:
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
95 with connection:
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
96 inspector = inspect(engine)
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
97 columns = [column['name'] for column in inspector.get_columns(table_name)]
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
98
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
99 # Fetch all fragments from the table once
2
0443378b44e5 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents: 0
diff changeset
100 if fragment_column_name not in columns:
0443378b44e5 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents: 0
diff changeset
101 raise ValueError(f"Fragment column '{fragment_column_name}' not found in table '{table_name}'.")
6
56a0938d534d planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 4
diff changeset
102
2
0443378b44e5 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents: 0
diff changeset
103 fragment_column_index = columns.index(fragment_column_name)
0
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
104 all_rows = connection.execute(text(f"SELECT * FROM {table_name}")).fetchall()
2
0443378b44e5 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents: 0
diff changeset
105 fragment_map = {row[fragment_column_index]: row for row in all_rows}
6
56a0938d534d planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 4
diff changeset
106
56a0938d534d planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 4
diff changeset
107 # Compare fragments between CSV and DB
4
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
108 csv_fragments = set()
13
5b16f2911491 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 12
diff changeset
109 all_ids = set(df[0].dropna().astype(str))
4
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
110 for _, row in df.iterrows():
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
111 for col in df.columns:
13
5b16f2911491 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 12
diff changeset
112 if col != 0:
12
ee28ec28140d planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 10
diff changeset
113 fragment = row[col]
13
5b16f2911491 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 12
diff changeset
114 if pd.notna(fragment):
5b16f2911491 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 12
diff changeset
115 fragment_str = str(fragment)
5b16f2911491 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 12
diff changeset
116 if fragment_str not in all_ids:
5b16f2911491 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 12
diff changeset
117 csv_fragments.add(fragment_str)
4
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
118
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
119 db_fragments = set(fragment_map.keys())
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
120 missing_fragments = sorted(list(csv_fragments - db_fragments))
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
121 if missing_fragments:
6
56a0938d534d planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 4
diff changeset
122 raise ValueError(
56a0938d534d planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 4
diff changeset
123 f" Missing fragments in DB: {', '.join(missing_fragments)}"
56a0938d534d planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 4
diff changeset
124 )
56a0938d534d planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 4
diff changeset
125
56a0938d534d planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 4
diff changeset
126 # === CONTINUE WITH GB FILE CREATION ===
0
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
127 for _, row in df.iterrows():
12
ee28ec28140d planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 10
diff changeset
128 annotated_row = {"Backbone": row[0], "Fragments": []}
0
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
129 for col in df.columns:
12
ee28ec28140d planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 10
diff changeset
130 if col != 0:
0
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
131 fragment = row[col]
13
5b16f2911491 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 12
diff changeset
132 if fragment not in csv_fragments:
5b16f2911491 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 12
diff changeset
133 continue
0
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
134 db_row = fragment_map.get(fragment)
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
135
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
136 if db_row:
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
137 fragment_data = {"id": fragment}
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
138 for i, column_name in enumerate(columns[1:]): # skip ID column
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
139 fragment_data[column_name] = db_row[i + 1]
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
140 else:
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
141 fragment_data = {"id": fragment, "metadata": "No data found"}
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
142
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
143 annotated_row["Fragments"].append(fragment_data)
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
144
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
145 annotated_data.append(annotated_row)
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
146
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
147 except Exception as e:
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
148 print(f"Error occurred during annotation: {e}")
6
56a0938d534d planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 4
diff changeset
149 raise # Ensures the error exits the script
0
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
150
4
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
151 # GenBank file generation per fragment
0
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
152 try:
4
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
153 for annotated_row in annotated_data:
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
154 backbone_id = annotated_row["Backbone"]
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
155 for fragment in annotated_row["Fragments"]:
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
156 fragment_id = fragment["id"]
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
157 sequence = fragment.get(sequence_column, "")
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
158 annotation = fragment.get(annotation_columns, "")
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
159
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
160 # Create the SeqRecord
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
161 record = SeqRecord(
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
162 Seq(sequence),
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
163 id=fragment_id,
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
164 name=fragment_id,
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
165 description=f"Fragment {fragment_id} from Backbone {backbone_id}"
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
166 )
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
167
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
168 # Add annotations to GenBank header
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
169 record.annotations = {
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
170 k: str(fragment[k]) for k in annotation_columns if k in fragment
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
171 }
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
172
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
173 # LOCUS line extraction from annotation (copy-paste the LOCUS from annotation)
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
174 locus_line_match = re.search(r"LOCUS\s+.+", annotation)
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
175 if locus_line_match:
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
176 locus_line = locus_line_match.group()
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
177 else:
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
178 print(f"LOCUS info missing for fragment {fragment_id}")
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
179 locus_line = f"LOCUS {fragment_id: <20} {len(sequence)} bp DNA linear UNK 01-JAN-2025"
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
180
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
181 # Format sequence as per GenBank standards (with ORIGIN and line breaks)
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
182 if "ORIGIN" in sequence:
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
183 origin_block = sequence.strip()
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
184 else:
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
185 # Format sequence as per GenBank standards (with ORIGIN and line breaks)
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
186 formatted_sequence = "ORIGIN\n"
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
187 seq_str = str(record.seq)
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
188 for i in range(0, len(seq_str), 60): # 60 bases per line
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
189 line_seq = seq_str[i:i + 60]
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
190 formatted_sequence += f"{str(i + 1).rjust(9)} { ' '.join([line_seq[j:j+10] for j in range(0, len(line_seq), 10)]) }\n"
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
191 origin_block = formatted_sequence.strip()
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
192
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
193 # Find and copy the FEATURES section directly from annotation
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
194 features_section = ""
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
195 features_start = annotation.find("FEATURES")
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
196 if features_start != -1:
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
197 features_section = annotation[features_start:]
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
198
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
199 # Writing the GenBank file
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
200 if not os.path.exists(output):
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
201 os.makedirs(output)
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
202
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
203 gb_filename = os.path.join(output, f"{fragment_id}.gb")
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
204 with open(gb_filename, "w") as f:
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
205 # Write the LOCUS line
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
206 f.write(locus_line + "\n")
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
207 # Write DEFINITION, ACCESSION, and other annotations
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
208 f.write(f"DEFINITION {record.description}\n")
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
209 f.write(f"ACCESSION {record.id}\n")
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
210 f.write(f"VERSION DB\n")
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
211 f.write(f"KEYWORDS .\n")
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
212 f.write(f"SOURCE .\n")
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
213 # Write the FEATURES section directly from annotation
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
214 f.write(features_section)
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
215 # Write the ORIGIN section
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
216 f.write(origin_block + "\n")
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
217 f.write("//\n")
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
218
0
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
219 except Exception as e:
4
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
220 print(f"Error saving GenBank files: {e}")
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
221 return
0
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
222
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
223 def main():
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
224 parser = argparse.ArgumentParser(description="Fetch annotations from PostgreSQL database and save as JSON.")
9
6401a277ee72 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 7
diff changeset
225 parser.add_argument("--input", required=True, help="Input CSV file")
4
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
226 parser.add_argument("--sequence_column", required=True, help="DB column contains sequence for ganbank file")
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
227 parser.add_argument("--annotation_columns", required=True, help="DB column contains head for ganbank file")
0
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
228 parser.add_argument("--db_uri", required=True, help="Database URI connection string")
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
229 parser.add_argument("--table", required=True, help="Table name in the database")
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
230 parser.add_argument("--fragment_column", required=True, help="Fragment column name in the database")
4
61158f32e5c3 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 2
diff changeset
231 parser.add_argument("--output", required=True, help="Output dir for gb files")
0
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
232 args = parser.parse_args()
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
233
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
234 # Start the Docker container (if not already running)
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
235 db_uri = fix_db_uri(args.db_uri)
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
236 db_name = extract_db_name(db_uri)
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
237 start_postgres_container(db_name)
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
238
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
239 # Wait until the database is ready
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
240 wait_for_db(db_uri)
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
241
10
703fda6c48c5 planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 9
diff changeset
242 # Fetch annotations from the database and save as gb
6
56a0938d534d planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents: 4
diff changeset
243 fetch_annotations(args.input, args.sequence_column, args.annotation_columns, db_uri, args.table, args.fragment_column, args.output)
0
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
244
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
245 if __name__ == "__main__":
41ac63b5d221 planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff changeset
246 main()