From 96a2fbd2afc08a31b006b994d8252704438c8a07 Mon Sep 17 00:00:00 2001 From: Paurikova2 <107862249+Paurikova2@users.noreply.github.com> Date: Mon, 24 Jun 2024 10:08:24 +0200 Subject: [PATCH] table status of tul before and after vanilla import table status of tul before and after vanilla import - script --- src/dtb_difference.py | 42 ++++++++++++++++++++++++++++++++++++++++++ src/pump/_db.py | 9 +++++++-- src/tul_settings.py | 35 +++++++++++++++++++++++++++++++++++ 3 files changed, 84 insertions(+), 2 deletions(-) create mode 100644 src/dtb_difference.py create mode 100644 src/tul_settings.py diff --git a/src/dtb_difference.py b/src/dtb_difference.py new file mode 100644 index 0000000..ac2c58b --- /dev/null +++ b/src/dtb_difference.py @@ -0,0 +1,42 @@ +import logging + +import settings +import tul_settings +from utils import init_logging, update_settings + +_logger = logging.getLogger() + +env = update_settings(settings.env, tul_settings.settings) +init_logging(_logger, env["log_file"]) + +import dspace # noqa +import pump + + +def difference_dtb(old_dtb: dict, new_dtb: dict): + msg = "" + no_exist7 = "" + no_exist5 = "" + for name in sorted(old_dtb.keys()): + if name not in new_dtb: + no_exist7 += f"{name}," + else: + difference = int(new_dtb[name]) - int(old_dtb[name]) + result = "surplus " if difference > 0 else ( + "deficit " if difference < 0 else "") + msg += f"{name: >40}: {int(difference): >8d} {result}\n" + del new_dtb[name] + for name in sorted(new_dtb.keys()): + no_exist5 += f"{name}," + _logger.info( + f"\n{msg}Nonexistent tables in DSpace 7:\n\t{no_exist7}\nNonexistent tables in DSpace 5:\n\t{no_exist5}") + _logger.info(40 * "=") + + +if __name__ == "__main__": + _logger.info("Loading repo objects") + + _logger.info("Database difference:") + raw_db_7 = pump.db(env["db_dspace_7"]) + raw_db_tul = pump.db(env["db_tul"]) + difference_dtb(raw_db_tul.table_count(), raw_db_7.table_count()) diff --git a/src/pump/_db.py b/src/pump/_db.py index 5372e0b..b3f65ed 100644 --- a/src/pump/_db.py +++ b/src/pump/_db.py @@ -127,13 +127,18 @@ def all_tables(self): return self.fetch_all( "SELECT table_name FROM information_schema.tables WHERE is_insertable_into = 'YES' AND table_schema = 'public'") - def status(self): + def table_count(self): d = {} tables = self.all_tables() for table in tables: name = table[0] - count = self.fetch_one(f"SELECT COUNT(*) FROM {name}") + # Use double quotes for table names because some of them are in uppercase. + count = self.fetch_one(f"SELECT COUNT(*) FROM \"{name}\"") d[name] = count + return d + + def status(self): + d = self.table_count() zero = "" msg = "" for name in sorted(d.keys()): diff --git a/src/tul_settings.py b/src/tul_settings.py new file mode 100644 index 0000000..34b5e54 --- /dev/null +++ b/src/tul_settings.py @@ -0,0 +1,35 @@ +import os +from datetime import datetime +_this_dir = os.path.dirname(os.path.abspath(__file__)) +ts = datetime.now().strftime("%Y_%m_%d__%H.%M.%S") + +settings = { + "log_file": os.path.join(_this_dir, "../__logs", f"{ts}.txt"), + + "resume_dir": "__temp/resume/", + + "backend": { + "endpoint": "http://dev-5.pc:85/server/api/", + "user": "test@test.edu", + "password": "admin", + "authentication": True, + }, + + "db_dspace_7": { + # CLARIN-DSpace 7 database + "name": "dspace", + "host": "localhost", + # careful - NON standard port + "port": 5435, + "user": "dspace", + "password": "dspace", + }, + + "db_tul": { + "name": "tul", + "host": "localhost", + "user": "postgres", + "password": "dspace", + "port": 5432, + } +}