diff --git a/src/pump/_db.py b/src/pump/_db.py index afb856d..b1af2bf 100644 --- a/src/pump/_db.py +++ b/src/pump/_db.py @@ -161,7 +161,7 @@ def __init__(self, raw_db_dspace_5, raw_db_utilities_5, raw_db_7): self.raw_db_7 = raw_db_7 def _fetch_all_vals(self, db5, table_name: str, sql: str = None): - sql = f"SELECT * FROM {table_name}" + sql = sql or f"SELECT * FROM {table_name}" cols5 = [] db5 = db5 or self.raw_db_dspace_5 vals5 = db5.fetch_all(sql, col_names=cols5) @@ -211,11 +211,16 @@ def diff_table_cmp_cols(self, db5, table_name: str, compare_arr: list, gdpr: boo return self._cmp_values(table_name, vals5, only_in_5, vals7, only_in_7, do_not_show) - def diff_table_cmp_len(self, db5, table_name: str, nonnull: list = None, gdpr: bool = True): + def diff_table_cmp_len(self, db5, table_name: str, nonnull: list = None, gdpr: bool = True, sql: str = None): nonnull = nonnull or [] + sql_info = False cols5, vals5, cols7, vals7 = self._fetch_all_vals(db5, table_name) do_not_show = gdpr and "email" in nonnull + if len(vals5) != len(vals7) and sql is not None: + cols5, vals5, cols7, vals7 = self._fetch_all_vals(db5, table_name, sql) + sql_info = True + msg = " OK " if len(vals5) == len(vals7) else " !!! WARN !!! " _logger.info( f"Table [{table_name: >20}] {msg} compared by len only v5:[{len(vals5)}], v7:[{len(vals7)}]") @@ -230,6 +235,10 @@ def diff_table_cmp_len(self, db5, table_name: str, nonnull: list = None, gdpr: b _logger.info( f"Table [{table_name: >20}] {msg} NON NULL [{col_name:>15}] v5:[{len(vals5_cmp):3}], v7:[{len(vals7_cmp):3}]") + if sql_info: + _logger.info( + f"Table [{table_name: >20}] !!! WARN !!! SQL request: {sql}") + def diff_table_sql(self, db5, table_name: str, sql5, sql7, compare, process_ftor): cols5 = [] vals5 = db5.fetch_all(sql5, col_names=cols5) @@ -274,6 +283,10 @@ def validate(self, to_validate): if len(defin) == 0: self.diff_table_cmp_len(db5, table_name) + cmp = defin.get("len", None) + if cmp is not None: + self.diff_table_cmp_len(db5, table_name, None, True, cmp["sql"]) + cmp = defin.get("sql", None) if cmp is not None: self.diff_table_sql( diff --git a/src/pump/_item.py b/src/pump/_item.py index b82d248..26e3ee4 100644 --- a/src/pump/_item.py +++ b/src/pump/_item.py @@ -25,6 +25,9 @@ class items: ["workspaceitem", { }], ["collection2item", { + "len": { + "sql": "select distinct collection_id, item_id from public.collection2item group by collection_id, item_id", + } }], ]