Skip to content

Commit

Permalink
table status of tul before and after vanilla import
Browse files Browse the repository at this point in the history
table status of tul before and after vanilla import - script
  • Loading branch information
Paurikova2 authored Jun 24, 2024
1 parent 9e337b8 commit 96a2fbd
Show file tree
Hide file tree
Showing 3 changed files with 84 additions and 2 deletions.
42 changes: 42 additions & 0 deletions src/dtb_difference.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import logging

import settings
import tul_settings
from utils import init_logging, update_settings

_logger = logging.getLogger()

env = update_settings(settings.env, tul_settings.settings)
init_logging(_logger, env["log_file"])

import dspace # noqa
import pump


def difference_dtb(old_dtb: dict, new_dtb: dict):
msg = ""
no_exist7 = ""
no_exist5 = ""
for name in sorted(old_dtb.keys()):
if name not in new_dtb:
no_exist7 += f"{name},"
else:
difference = int(new_dtb[name]) - int(old_dtb[name])
result = "surplus " if difference > 0 else (
"deficit " if difference < 0 else "")
msg += f"{name: >40}: {int(difference): >8d} {result}\n"
del new_dtb[name]
for name in sorted(new_dtb.keys()):
no_exist5 += f"{name},"
_logger.info(
f"\n{msg}Nonexistent tables in DSpace 7:\n\t{no_exist7}\nNonexistent tables in DSpace 5:\n\t{no_exist5}")
_logger.info(40 * "=")


if __name__ == "__main__":
_logger.info("Loading repo objects")

_logger.info("Database difference:")
raw_db_7 = pump.db(env["db_dspace_7"])
raw_db_tul = pump.db(env["db_tul"])
difference_dtb(raw_db_tul.table_count(), raw_db_7.table_count())
9 changes: 7 additions & 2 deletions src/pump/_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,13 +127,18 @@ def all_tables(self):
return self.fetch_all(
"SELECT table_name FROM information_schema.tables WHERE is_insertable_into = 'YES' AND table_schema = 'public'")

def status(self):
def table_count(self):
d = {}
tables = self.all_tables()
for table in tables:
name = table[0]
count = self.fetch_one(f"SELECT COUNT(*) FROM {name}")
# Use double quotes for table names because some of them are in uppercase.
count = self.fetch_one(f"SELECT COUNT(*) FROM \"{name}\"")
d[name] = count
return d

def status(self):
d = self.table_count()
zero = ""
msg = ""
for name in sorted(d.keys()):
Expand Down
35 changes: 35 additions & 0 deletions src/tul_settings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import os
from datetime import datetime
_this_dir = os.path.dirname(os.path.abspath(__file__))
ts = datetime.now().strftime("%Y_%m_%d__%H.%M.%S")

settings = {
"log_file": os.path.join(_this_dir, "../__logs", f"{ts}.txt"),

"resume_dir": "__temp/resume/",

"backend": {
"endpoint": "http://dev-5.pc:85/server/api/",
"user": "[email protected]",
"password": "admin",
"authentication": True,
},

"db_dspace_7": {
# CLARIN-DSpace 7 database
"name": "dspace",
"host": "localhost",
# careful - NON standard port
"port": 5435,
"user": "dspace",
"password": "dspace",
},

"db_tul": {
"name": "tul",
"host": "localhost",
"user": "postgres",
"password": "dspace",
"port": 5432,
}
}

0 comments on commit 96a2fbd

Please sign in to comment.