comparison get_db_info.py @ 13:5b16f2911491 draft default tip

planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
author tduigou
date Fri, 23 May 2025 13:47:21 +0000
parents ee28ec28140d
children
comparison
equal deleted inserted replaced
12:ee28ec28140d 13:5b16f2911491
104 all_rows = connection.execute(text(f"SELECT * FROM {table_name}")).fetchall() 104 all_rows = connection.execute(text(f"SELECT * FROM {table_name}")).fetchall()
105 fragment_map = {row[fragment_column_index]: row for row in all_rows} 105 fragment_map = {row[fragment_column_index]: row for row in all_rows}
106 106
107 # Compare fragments between CSV and DB 107 # Compare fragments between CSV and DB
108 csv_fragments = set() 108 csv_fragments = set()
109 all_ids = set(df[0]) 109 all_ids = set(df[0].dropna().astype(str))
110 for _, row in df.iterrows(): 110 for _, row in df.iterrows():
111 for col in df.columns: 111 for col in df.columns:
112 if col != 0: # Skip the first column 112 if col != 0:
113 fragment = row[col] 113 fragment = row[col]
114 if fragment not in all_ids: 114 if pd.notna(fragment):
115 csv_fragments.add(fragment) 115 fragment_str = str(fragment)
116 if fragment_str not in all_ids:
117 csv_fragments.add(fragment_str)
116 118
117 db_fragments = set(fragment_map.keys()) 119 db_fragments = set(fragment_map.keys())
118 missing_fragments = sorted(list(csv_fragments - db_fragments)) 120 missing_fragments = sorted(list(csv_fragments - db_fragments))
119
120 if missing_fragments: 121 if missing_fragments:
121 raise ValueError( 122 raise ValueError(
122 f" Missing fragments in DB: {', '.join(missing_fragments)}" 123 f" Missing fragments in DB: {', '.join(missing_fragments)}"
123 ) 124 )
124 125
126 for _, row in df.iterrows(): 127 for _, row in df.iterrows():
127 annotated_row = {"Backbone": row[0], "Fragments": []} 128 annotated_row = {"Backbone": row[0], "Fragments": []}
128 for col in df.columns: 129 for col in df.columns:
129 if col != 0: 130 if col != 0:
130 fragment = row[col] 131 fragment = row[col]
132 if fragment not in csv_fragments:
133 continue
131 db_row = fragment_map.get(fragment) 134 db_row = fragment_map.get(fragment)
132 135
133 if db_row: 136 if db_row:
134 fragment_data = {"id": fragment} 137 fragment_data = {"id": fragment}
135 for i, column_name in enumerate(columns[1:]): # skip ID column 138 for i, column_name in enumerate(columns[1:]): # skip ID column