Mercurial > repos > tduigou > get_db_info
annotate get_db_info.py @ 13:5b16f2911491 draft default tip
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
| author | tduigou |
|---|---|
| date | Fri, 23 May 2025 13:47:21 +0000 |
| parents | ee28ec28140d |
| children |
| rev | line source |
|---|---|
|
0
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
1 import subprocess |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
2 import time |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
3 import argparse |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
4 import socket |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
5 import os |
|
4
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
6 import re |
|
0
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
7 import pandas as pd |
|
4
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
8 from Bio.Seq import Seq |
|
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
9 from Bio.SeqRecord import SeqRecord |
|
0
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
10 from sqlalchemy import create_engine, inspect |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
11 from sqlalchemy.sql import text |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
12 from sqlalchemy.engine.url import make_url |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
13 from sqlalchemy.exc import OperationalError |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
14 |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
15 def fix_db_uri(uri): |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
16 """Replace __at__ with @ in the URI if needed.""" |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
17 return uri.replace("__at__", "@") |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
18 |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
19 def is_port_in_use(port): |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
20 """Check if a TCP port is already in use on localhost.""" |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
21 with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
22 return s.connect_ex(('localhost', port)) == 0 |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
23 |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
24 def extract_db_name(uri): |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
25 """Extract the database name from the SQLAlchemy URI.""" |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
26 url = make_url(uri) |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
27 return url.database |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
28 |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
29 def start_postgres_container(db_name): |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
30 """Start a PostgreSQL container with the given database name as the container name.""" |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
31 container_name = db_name |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
32 |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
33 # Check if container is already running |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
34 container_running = subprocess.run( |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
35 f"docker ps -q -f name={container_name}", shell=True, capture_output=True, text=True |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
36 ) |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
37 |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
38 if container_running.stdout.strip(): |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
39 print(f"Container '{container_name}' is already running.") |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
40 return |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
41 |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
42 # Check if container exists (stopped) |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
43 container_exists = subprocess.run( |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
44 f"docker ps -a -q -f name={container_name}", shell=True, capture_output=True, text=True |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
45 ) |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
46 |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
47 if container_exists.stdout.strip(): |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
48 print(f"Starting existing container '{container_name}'...") |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
49 subprocess.run(f"docker start {container_name}", shell=True) |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
50 print(f"PostgreSQL Docker container '{container_name}' activated.") |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
51 return |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
52 |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
53 # If container does not exist, create and start a new one |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
54 port = 5432 if not is_port_in_use(5432) else 5433 |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
55 postgres_password = os.getenv("POSTGRES_PASSWORD", "RK17") |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
56 |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
57 start_command = [ |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
58 "docker", "run", "--name", container_name, |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
59 "-e", f"POSTGRES_PASSWORD={postgres_password}", |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
60 "-p", f"{port}:5432", |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
61 "-d", "postgres" |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
62 ] |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
63 |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
64 try: |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
65 subprocess.run(start_command, check=True) |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
66 print(f"PostgreSQL Docker container '{container_name}' started on port {port}.") |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
67 except subprocess.CalledProcessError as e: |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
68 print(f"Failed to start Docker container: {e}") |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
69 |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
70 def wait_for_db(uri, timeout=60): |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
71 """Try connecting to the DB until it works or timeout.""" |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
72 engine = create_engine(uri) |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
73 start_time = time.time() |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
74 while time.time() - start_time < timeout: |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
75 try: |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
76 with engine.connect(): |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
77 print("Connected to database.") |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
78 return |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
79 except OperationalError: |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
80 print("Database not ready, retrying...") |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
81 time.sleep(2) |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
82 raise Exception("Database connection failed after timeout.") |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
83 |
|
9
6401a277ee72
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
7
diff
changeset
|
84 def fetch_annotations(csv_file, sequence_column, annotation_columns, db_uri, table_name, fragment_column_name, output): |
|
4
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
85 """Fetch annotations from the database and save the result as GenBank files.""" |
|
0
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
86 db_uri = fix_db_uri(db_uri) |
|
12
ee28ec28140d
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
10
diff
changeset
|
87 df = pd.read_csv(csv_file, sep=',', header=None) |
|
0
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
88 |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
89 engine = create_engine(db_uri) |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
90 connection = engine.connect() |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
91 |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
92 annotated_data = [] |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
93 |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
94 try: |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
95 with connection: |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
96 inspector = inspect(engine) |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
97 columns = [column['name'] for column in inspector.get_columns(table_name)] |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
98 |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
99 # Fetch all fragments from the table once |
|
2
0443378b44e5
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
0
diff
changeset
|
100 if fragment_column_name not in columns: |
|
0443378b44e5
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
0
diff
changeset
|
101 raise ValueError(f"Fragment column '{fragment_column_name}' not found in table '{table_name}'.") |
|
6
56a0938d534d
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
4
diff
changeset
|
102 |
|
2
0443378b44e5
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
0
diff
changeset
|
103 fragment_column_index = columns.index(fragment_column_name) |
|
0
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
104 all_rows = connection.execute(text(f"SELECT * FROM {table_name}")).fetchall() |
|
2
0443378b44e5
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
0
diff
changeset
|
105 fragment_map = {row[fragment_column_index]: row for row in all_rows} |
|
6
56a0938d534d
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
4
diff
changeset
|
106 |
|
56a0938d534d
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
4
diff
changeset
|
107 # Compare fragments between CSV and DB |
|
4
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
108 csv_fragments = set() |
|
13
5b16f2911491
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
12
diff
changeset
|
109 all_ids = set(df[0].dropna().astype(str)) |
|
4
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
110 for _, row in df.iterrows(): |
|
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
111 for col in df.columns: |
|
13
5b16f2911491
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
12
diff
changeset
|
112 if col != 0: |
|
12
ee28ec28140d
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
10
diff
changeset
|
113 fragment = row[col] |
|
13
5b16f2911491
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
12
diff
changeset
|
114 if pd.notna(fragment): |
|
5b16f2911491
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
12
diff
changeset
|
115 fragment_str = str(fragment) |
|
5b16f2911491
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
12
diff
changeset
|
116 if fragment_str not in all_ids: |
|
5b16f2911491
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
12
diff
changeset
|
117 csv_fragments.add(fragment_str) |
|
4
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
118 |
|
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
119 db_fragments = set(fragment_map.keys()) |
|
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
120 missing_fragments = sorted(list(csv_fragments - db_fragments)) |
|
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
121 if missing_fragments: |
|
6
56a0938d534d
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
4
diff
changeset
|
122 raise ValueError( |
|
56a0938d534d
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
4
diff
changeset
|
123 f" Missing fragments in DB: {', '.join(missing_fragments)}" |
|
56a0938d534d
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
4
diff
changeset
|
124 ) |
|
56a0938d534d
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
4
diff
changeset
|
125 |
|
56a0938d534d
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
4
diff
changeset
|
126 # === CONTINUE WITH GB FILE CREATION === |
|
0
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
127 for _, row in df.iterrows(): |
|
12
ee28ec28140d
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
10
diff
changeset
|
128 annotated_row = {"Backbone": row[0], "Fragments": []} |
|
0
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
129 for col in df.columns: |
|
12
ee28ec28140d
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
10
diff
changeset
|
130 if col != 0: |
|
0
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
131 fragment = row[col] |
|
13
5b16f2911491
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
12
diff
changeset
|
132 if fragment not in csv_fragments: |
|
5b16f2911491
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
12
diff
changeset
|
133 continue |
|
0
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
134 db_row = fragment_map.get(fragment) |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
135 |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
136 if db_row: |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
137 fragment_data = {"id": fragment} |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
138 for i, column_name in enumerate(columns[1:]): # skip ID column |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
139 fragment_data[column_name] = db_row[i + 1] |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
140 else: |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
141 fragment_data = {"id": fragment, "metadata": "No data found"} |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
142 |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
143 annotated_row["Fragments"].append(fragment_data) |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
144 |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
145 annotated_data.append(annotated_row) |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
146 |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
147 except Exception as e: |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
148 print(f"Error occurred during annotation: {e}") |
|
6
56a0938d534d
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
4
diff
changeset
|
149 raise # Ensures the error exits the script |
|
0
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
150 |
|
4
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
151 # GenBank file generation per fragment |
|
0
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
152 try: |
|
4
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
153 for annotated_row in annotated_data: |
|
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
154 backbone_id = annotated_row["Backbone"] |
|
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
155 for fragment in annotated_row["Fragments"]: |
|
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
156 fragment_id = fragment["id"] |
|
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
157 sequence = fragment.get(sequence_column, "") |
|
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
158 annotation = fragment.get(annotation_columns, "") |
|
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
159 |
|
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
160 # Create the SeqRecord |
|
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
161 record = SeqRecord( |
|
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
162 Seq(sequence), |
|
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
163 id=fragment_id, |
|
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
164 name=fragment_id, |
|
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
165 description=f"Fragment {fragment_id} from Backbone {backbone_id}" |
|
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
166 ) |
|
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
167 |
|
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
168 # Add annotations to GenBank header |
|
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
169 record.annotations = { |
|
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
170 k: str(fragment[k]) for k in annotation_columns if k in fragment |
|
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
171 } |
|
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
172 |
|
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
173 # LOCUS line extraction from annotation (copy-paste the LOCUS from annotation) |
|
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
174 locus_line_match = re.search(r"LOCUS\s+.+", annotation) |
|
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
175 if locus_line_match: |
|
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
176 locus_line = locus_line_match.group() |
|
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
177 else: |
|
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
178 print(f"LOCUS info missing for fragment {fragment_id}") |
|
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
179 locus_line = f"LOCUS {fragment_id: <20} {len(sequence)} bp DNA linear UNK 01-JAN-2025" |
|
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
180 |
|
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
181 # Format sequence as per GenBank standards (with ORIGIN and line breaks) |
|
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
182 if "ORIGIN" in sequence: |
|
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
183 origin_block = sequence.strip() |
|
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
184 else: |
|
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
185 # Format sequence as per GenBank standards (with ORIGIN and line breaks) |
|
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
186 formatted_sequence = "ORIGIN\n" |
|
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
187 seq_str = str(record.seq) |
|
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
188 for i in range(0, len(seq_str), 60): # 60 bases per line |
|
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
189 line_seq = seq_str[i:i + 60] |
|
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
190 formatted_sequence += f"{str(i + 1).rjust(9)} { ' '.join([line_seq[j:j+10] for j in range(0, len(line_seq), 10)]) }\n" |
|
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
191 origin_block = formatted_sequence.strip() |
|
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
192 |
|
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
193 # Find and copy the FEATURES section directly from annotation |
|
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
194 features_section = "" |
|
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
195 features_start = annotation.find("FEATURES") |
|
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
196 if features_start != -1: |
|
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
197 features_section = annotation[features_start:] |
|
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
198 |
|
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
199 # Writing the GenBank file |
|
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
200 if not os.path.exists(output): |
|
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
201 os.makedirs(output) |
|
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
202 |
|
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
203 gb_filename = os.path.join(output, f"{fragment_id}.gb") |
|
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
204 with open(gb_filename, "w") as f: |
|
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
205 # Write the LOCUS line |
|
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
206 f.write(locus_line + "\n") |
|
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
207 # Write DEFINITION, ACCESSION, and other annotations |
|
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
208 f.write(f"DEFINITION {record.description}\n") |
|
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
209 f.write(f"ACCESSION {record.id}\n") |
|
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
210 f.write(f"VERSION DB\n") |
|
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
211 f.write(f"KEYWORDS .\n") |
|
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
212 f.write(f"SOURCE .\n") |
|
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
213 # Write the FEATURES section directly from annotation |
|
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
214 f.write(features_section) |
|
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
215 # Write the ORIGIN section |
|
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
216 f.write(origin_block + "\n") |
|
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
217 f.write("//\n") |
|
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
218 |
|
0
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
219 except Exception as e: |
|
4
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
220 print(f"Error saving GenBank files: {e}") |
|
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
221 return |
|
0
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
222 |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
223 def main(): |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
224 parser = argparse.ArgumentParser(description="Fetch annotations from PostgreSQL database and save as JSON.") |
|
9
6401a277ee72
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
7
diff
changeset
|
225 parser.add_argument("--input", required=True, help="Input CSV file") |
|
4
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
226 parser.add_argument("--sequence_column", required=True, help="DB column contains sequence for ganbank file") |
|
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
227 parser.add_argument("--annotation_columns", required=True, help="DB column contains head for ganbank file") |
|
0
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
228 parser.add_argument("--db_uri", required=True, help="Database URI connection string") |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
229 parser.add_argument("--table", required=True, help="Table name in the database") |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
230 parser.add_argument("--fragment_column", required=True, help="Fragment column name in the database") |
|
4
61158f32e5c3
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
2
diff
changeset
|
231 parser.add_argument("--output", required=True, help="Output dir for gb files") |
|
0
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
232 args = parser.parse_args() |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
233 |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
234 # Start the Docker container (if not already running) |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
235 db_uri = fix_db_uri(args.db_uri) |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
236 db_name = extract_db_name(db_uri) |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
237 start_postgres_container(db_name) |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
238 |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
239 # Wait until the database is ready |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
240 wait_for_db(db_uri) |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
241 |
|
10
703fda6c48c5
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
9
diff
changeset
|
242 # Fetch annotations from the database and save as gb |
|
6
56a0938d534d
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
4
diff
changeset
|
243 fetch_annotations(args.input, args.sequence_column, args.annotation_columns, db_uri, args.table, args.fragment_column, args.output) |
|
0
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
244 |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
245 if __name__ == "__main__": |
|
41ac63b5d221
planemo upload for repository https://github.com/brsynth commit 15dbdd1f0a222a8e1b0fb5c16b36885520a3d005
tduigou
parents:
diff
changeset
|
246 main() |
