Mercurial > repos > tduigou > get_db_info
comparison get_db_info.py @ 13:5b16f2911491 draft default tip
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
| author | tduigou |
|---|---|
| date | Fri, 23 May 2025 13:47:21 +0000 |
| parents | ee28ec28140d |
| children |
comparison
equal
deleted
inserted
replaced
| 12:ee28ec28140d | 13:5b16f2911491 |
|---|---|
| 104 all_rows = connection.execute(text(f"SELECT * FROM {table_name}")).fetchall() | 104 all_rows = connection.execute(text(f"SELECT * FROM {table_name}")).fetchall() |
| 105 fragment_map = {row[fragment_column_index]: row for row in all_rows} | 105 fragment_map = {row[fragment_column_index]: row for row in all_rows} |
| 106 | 106 |
| 107 # Compare fragments between CSV and DB | 107 # Compare fragments between CSV and DB |
| 108 csv_fragments = set() | 108 csv_fragments = set() |
| 109 all_ids = set(df[0]) | 109 all_ids = set(df[0].dropna().astype(str)) |
| 110 for _, row in df.iterrows(): | 110 for _, row in df.iterrows(): |
| 111 for col in df.columns: | 111 for col in df.columns: |
| 112 if col != 0: # Skip the first column | 112 if col != 0: |
| 113 fragment = row[col] | 113 fragment = row[col] |
| 114 if fragment not in all_ids: | 114 if pd.notna(fragment): |
| 115 csv_fragments.add(fragment) | 115 fragment_str = str(fragment) |
| 116 if fragment_str not in all_ids: | |
| 117 csv_fragments.add(fragment_str) | |
| 116 | 118 |
| 117 db_fragments = set(fragment_map.keys()) | 119 db_fragments = set(fragment_map.keys()) |
| 118 missing_fragments = sorted(list(csv_fragments - db_fragments)) | 120 missing_fragments = sorted(list(csv_fragments - db_fragments)) |
| 119 | |
| 120 if missing_fragments: | 121 if missing_fragments: |
| 121 raise ValueError( | 122 raise ValueError( |
| 122 f" Missing fragments in DB: {', '.join(missing_fragments)}" | 123 f" Missing fragments in DB: {', '.join(missing_fragments)}" |
| 123 ) | 124 ) |
| 124 | 125 |
| 126 for _, row in df.iterrows(): | 127 for _, row in df.iterrows(): |
| 127 annotated_row = {"Backbone": row[0], "Fragments": []} | 128 annotated_row = {"Backbone": row[0], "Fragments": []} |
| 128 for col in df.columns: | 129 for col in df.columns: |
| 129 if col != 0: | 130 if col != 0: |
| 130 fragment = row[col] | 131 fragment = row[col] |
| 132 if fragment not in csv_fragments: | |
| 133 continue | |
| 131 db_row = fragment_map.get(fragment) | 134 db_row = fragment_map.get(fragment) |
| 132 | 135 |
| 133 if db_row: | 136 if db_row: |
| 134 fragment_data = {"id": fragment} | 137 fragment_data = {"id": fragment} |
| 135 for i, column_name in enumerate(columns[1:]): # skip ID column | 138 for i, column_name in enumerate(columns[1:]): # skip ID column |
