Skip to content

Commit

Permalink
improvement: set up baserow export
Browse files Browse the repository at this point in the history
  • Loading branch information
kevinstadler committed Nov 4, 2024
1 parent 008ab93 commit d33ff36
Show file tree
Hide file tree
Showing 13 changed files with 33,953 additions and 88,927 deletions.
3 changes: 2 additions & 1 deletion app/publications/[id]/page.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import { InlineList } from "@/components/inline-list";
import { LanguageLink } from "@/components/language-link";
import { MainContent } from "@/components/main-content";
import { ClickablePublicationThumbnail, PublicationCover } from "@/components/publication-cover";
import { PublisherLink } from "@/components/publisher-link";
import { TranslatorLink } from "@/components/translator-link";
import { getPublication, getSameLanguagePublications } from "@/lib/data";
import type { Publication, Translator } from "@/lib/model";
Expand Down Expand Up @@ -92,7 +93,7 @@ export default async function PublicationPage(props: PublicationPageProps) {
</InlineList>
</NameValue>
<NameValue name={t("publisher")}>
{pub.publisher} {pub.publication_details}
<PublisherLink publisher={pub.publisher} /> {pub.publication_details}
</NameValue>
<NameValue name={t("year")}>{pub.year_display}</NameValue>
</PublicationDetails>
Expand Down
9 changes: 9 additions & 0 deletions components/publisher-link.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
import type { Publisher } from "@/lib/model";

interface PublisherLinkProps {
publisher: Publisher;
}

export function PublisherLink(props: PublisherLinkProps) {
return props.publisher.name;
}
16 changes: 13 additions & 3 deletions lib/model.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ export const proseCategories = ["novels", "novellas", "autobiography", "fragment

export type Category = (typeof otherCategories)[number] | (typeof proseCategories)[number];

export type YesNoMaybe = "maybe" | "no" | "yes";

/** Publication contains one or more translated works. */
export interface Publication {
id: string;
Expand All @@ -21,12 +23,15 @@ export interface Publication {
year: number;
year_display: string;
isbn?: string;
publisher: string;
publisher: Publisher;

// misc info that varies between publications of the same publisher
// prime example: issue/page details when the 'publisher' is a periodical/magazine
publication_details?: string;
exemplar_suhrkamp_berlin: boolean;
exemplar_oeaw: boolean;
original_publication?: string;
zusatzinfos?: string;
exemplar_suhrkamp_berlin: YesNoMaybe;
exemplar_oeaw: YesNoMaybe;
images: Array<Asset>;
has_image: boolean; // redundant, derived from 'images' (workaround for https://github.com/typesense/typesense/issues/790)
}
Expand Down Expand Up @@ -59,6 +64,11 @@ export interface Translator {
wikidata?: string;
}

export interface Publisher {
id: string;
name: string;
}

interface Asset {
id: string; // same as filename (without extension, which is .jpg)
metadata?: string;
Expand Down
149 changes: 149 additions & 0 deletions scripts/baserow.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
#!/usr/bin/env python3

import argparse
from datetime import datetime
import json
import logging
import os

from acdh_baserow_pyutils import BaseRowClient
from dotenv import load_dotenv

parser = argparse.ArgumentParser(
prog="baserow", description="import / export data from / to baserow"
)
parser.add_argument(
"-from-baserow", action="store_true", help="make a backup/dump of baserow data"
)
parser.add_argument(
"-to-baserow", action="store_true", help="import transformed data to Baserow"
)
parser.add_argument(
"-typesense",
action="store_true",
help="import transformed publications to Typesense (default: %(default)s)",
)

parser.add_argument(
"-env",
default="../.env.local",
help=".env file to be used for getting typesense server details",
)
parser.add_argument(
"-v",
"--verbose",
action="count",
default=0,
help="Increase the verbosity of the logging output: default is WARNING, use -v for INFO, -vv for DEBUG",
)

args = parser.parse_args()

load_dotenv(args.env)

logging.basicConfig(
level=max(10, 30 - 10 * args.verbose),
format="%(count)-4s %(levelname)-8s %(message)s\n",
)

if "BASEROW_USER" not in os.environ:
logging.fatal("Couldn't find baserow database information in environment files")
exit(1)

BASEROW_USER = os.environ.get("BASEROW_USER")
BASEROW_PW = os.environ.get("BASEROW_PW")
BASEROW_TOKEN = os.environ.get("BASEROW_TOKEN")
BASEROW_BASE_URL = os.environ.get("BASEROW_BASE_URL")
DATABASE_ID = "631"
# initialize the client
br_client = BaseRowClient(
BASEROW_USER, BASEROW_PW, BASEROW_TOKEN, BASEROW_BASE_URL, DATABASE_ID
)

if args.from_baserow or args.to_baserow:
timestamp = datetime.today().strftime("%Y%m%d-%H%M%S")
folder = f"baserow-dump-{timestamp}"
os.mkdir(folder)
# writes all tables from Database as json.files into a folder
br_client.dump_tables_as_json(DATABASE_ID, folder_name=folder, indent="\t")

table_ids = {}
for table in br_client.list_tables(DATABASE_ID):
table_ids[table["name"]] = table["id"]

if args.to_baserow:

def patch_table(name):
logging.info(f"loading data/{name}.json")
data = json.load(open(f"data/{name}.json"))
logging.info(f'updating baserow table "{name}"')
for i, d in enumerate(data):
r = br_client.patch_row(table_ids[name], str(i + 1), d)
if "error" in r:
logging.error(r["detail"])

patch_table("Übersetzer")
patch_table("BernhardWerk")
patch_table("Übersetzung")
patch_table("Publikation")

if args.typesense:
logging.debug(f"Loading typesense access data from {args.env}")
from dotenv import load_dotenv

os.chdir(os.path.dirname(__file__))
load_dotenv(args.env)

if "TYPESENSE_ADMIN_API_KEY" not in os.environ:
logging.fatal(
"Couldn't find typesense database information in environment files"
)
exit(1)

logging.info(f"connecting to {os.environ.get('NEXT_PUBLIC_TYPESENSE_HOST')}")
import typesense

client = typesense.Client(
{
"api_key": os.environ.get("TYPESENSE_ADMIN_API_KEY"),
"nodes": [
{
"host": os.environ.get("NEXT_PUBLIC_TYPESENSE_HOST"),
"port": os.environ.get("NEXT_PUBLIC_TYPESENSE_PORT"),
"protocol": os.environ.get("NEXT_PUBLIC_TYPESENSE_PROTOCOL"),
}
],
"connection_timeout_seconds": 5,
}
)

collection_name = os.environ.get("TYPESENSE_COLLECTION_NAME")

r = client.collections[collection_name].retrieve()

if r["num_documents"] > 0:
logging.info(f'Clearing {r["num_documents"]} existing documents')
r = client.collections[collection_name].documents.delete(
{"filter_by": 'id :!= ""'}
)
logging.info(
f'Cleared {r["num_deleted"]} documents from collection {collection_name}'
)

r = client.collections[collection_name].documents.import_(publications.values())

nfails = list(map(lambda d: d["success"], r)).count(False)
if nfails == len(publications):
if args.verbose > 0:
print(r)
logging.error(
f"Failed to insert any of the documents. Either the documents don't comply with the schema of the collection, or maybe you are using an api key that only has read access to the collection? (run the script again with --verbose to see all {nfails} errors)"
)
exit(1)
elif nfails > 0:
logging.error(f"{nfails} documents could not be inserted.")
for doc in filter(lambda d: not d["success"], r):
logging.error(doc)
exit(1)

logging.info("Success!")
Loading

0 comments on commit d33ff36

Please sign in to comment.