From 03c2a311b23f0807bac29279359ff33d7041eac0 Mon Sep 17 00:00:00 2001 From: Niklas Kerkhoff Date: Tue, 19 Nov 2024 17:33:14 +0100 Subject: [PATCH] fix calendar --- deployment/tutor-assistant-nginx-proxy.conf | 2 + .../TutorAssistantDocumentService.kt | 4 +- .../documents/applications/ApplierVisitor.kt | 6 ++- .../applications/entities/Document.kt | 3 +- .../applications/entities/FileDocument.kt | 3 +- .../applications/entities/WebsiteDocument.kt | 3 +- .../documents/settings/SettingsParser.kt | 38 ++++++++++++++----- .../src/main/resources/application.yml | 2 +- .../controller/api/documents_controller.py | 8 ++-- .../domain/calendar/calendar_chain_service.py | 4 +- .../domain/documents/document_service.py | 6 +-- 11 files changed, 52 insertions(+), 27 deletions(-) diff --git a/deployment/tutor-assistant-nginx-proxy.conf b/deployment/tutor-assistant-nginx-proxy.conf index ddcf98b..8d2044a 100644 --- a/deployment/tutor-assistant-nginx-proxy.conf +++ b/deployment/tutor-assistant-nginx-proxy.conf @@ -2,10 +2,12 @@ events { } http { server { + client_max_body_size 10M; listen 80; location /api/ { proxy_pass http://tutor-assistant-app-service:8080/api/; + proxy_read_timeout 300s; } location /auth/ { diff --git a/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/documents/TutorAssistantDocumentService.kt b/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/documents/TutorAssistantDocumentService.kt index fb5eb81..d1a2dda 100644 --- a/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/documents/TutorAssistantDocumentService.kt +++ b/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/documents/TutorAssistantDocumentService.kt @@ -18,13 +18,15 @@ class TutorAssistantDocumentService( title: String, originalKey: String, loaderType: String, - loaderParams: Map + loaderParams: Map, + isCalendar: Boolean ): List { val requestBody = mapOf( "title" to title, "originalKey" to originalKey, "loaderType" to loaderType, "loaderParams" to loaderParams, + "isCalendar" to isCalendar ) return webClient.post() diff --git a/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/documents/applications/ApplierVisitor.kt b/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/documents/applications/ApplierVisitor.kt index 7c8b579..b08abe8 100644 --- a/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/documents/applications/ApplierVisitor.kt +++ b/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/documents/applications/ApplierVisitor.kt @@ -29,7 +29,8 @@ class ApplierVisitor( fileDocument.title, fileDocument.fileStoreId.toString(), fileDocument.loaderType, - loaderParams + loaderParams, + fileDocument.isCalendar ).also { logAddedToTutorAssistant(fileDocument.title, it) } @@ -62,7 +63,8 @@ class ApplierVisitor( websiteDocument.title, websiteDocument.loaderParams.url, websiteDocument.loaderType, - loaderParams + loaderParams, + websiteDocument.isCalendar ).also { logAddedToTutorAssistant(websiteDocument.title, it) } diff --git a/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/documents/applications/entities/Document.kt b/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/documents/applications/entities/Document.kt index a3c4d0d..33319a3 100644 --- a/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/documents/applications/entities/Document.kt +++ b/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/documents/applications/entities/Document.kt @@ -11,7 +11,8 @@ import jakarta.persistence.InheritanceType abstract class Document( open val title: String, open val loaderType: String, - open val collection: String? + open val collection: String?, + open val isCalendar: Boolean ) : AppEntity() { @ElementCollection open var tutorAssistantIds: List = emptyList() diff --git a/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/documents/applications/entities/FileDocument.kt b/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/documents/applications/entities/FileDocument.kt index b6bf659..7d2f9bb 100644 --- a/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/documents/applications/entities/FileDocument.kt +++ b/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/documents/applications/entities/FileDocument.kt @@ -8,9 +8,10 @@ class FileDocument( title: String, loaderType: String, collection: String?, + isCalendar: Boolean, val fileStoreId: UUID, val fileStoreUrl: String, -) : Document(title, loaderType, collection) { +) : Document(title, loaderType, collection, isCalendar) { override fun accept(visitor: DocumentVisitor) { visitor.visit(this) } diff --git a/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/documents/applications/entities/WebsiteDocument.kt b/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/documents/applications/entities/WebsiteDocument.kt index 549a5be..26b9be6 100644 --- a/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/documents/applications/entities/WebsiteDocument.kt +++ b/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/documents/applications/entities/WebsiteDocument.kt @@ -8,8 +8,9 @@ class WebsiteDocument( title: String, loaderType: String, collection: String?, + isCalendar: Boolean, val loaderParams: LoaderParams, -) : Document(title, loaderType, collection) { +) : Document(title, loaderType, collection, isCalendar) { override fun accept(visitor: DocumentVisitor) { visitor.visit(this) } diff --git a/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/documents/settings/SettingsParser.kt b/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/documents/settings/SettingsParser.kt index b353f64..a538c73 100644 --- a/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/documents/settings/SettingsParser.kt +++ b/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/documents/settings/SettingsParser.kt @@ -49,7 +49,7 @@ class SettingsParser( return when { json.has("elements") -> parseElements(json["elements"], collection) - json.has("elementsBuilder") -> parseElementsBuilder(json["elementsBuilder"], collection, getValues(json)) + json.has("elementsBuilder") -> parseElementsBuilder(json["elementsBuilder"], collection, json.getValues()) else -> throw SettingsParserException("Failed parsing collection") } } @@ -91,6 +91,7 @@ class SettingsParser( json["title"].stringWithValueOrThrow(value), json["loaderType"].stringWithValueOrThrow(value), collection, + json["isCalendar"].booleanOrFalse(), fileStoreId, fileStoreUrl ) @@ -103,6 +104,7 @@ class SettingsParser( json["title"].stringWithValueOrThrow(value), json["loaderType"].stringWithValueOrThrow(value), collection, + json["isCalendar"].booleanOrFalse(), parseWebsiteLoaderParams(json["loaderParams"], value) ) } @@ -117,37 +119,48 @@ class SettingsParser( ) } + private fun JsonNode?.requireNotNull() { + if (this == null) throw SettingsParserException("Node is null") + } + private fun JsonNode.requireObject() { - if (!isObject) throw SettingsParserException("Not an object") + requireNotNull() + if (!isObject) throw SettingsParserException("Node is not an object") } private fun JsonNode.requireArray() { - if (!isArray) throw SettingsParserException("Not an array") + requireNotNull() + if (!isArray) throw SettingsParserException("Node is not an array") } private fun JsonNode.requireObjectKeys(vararg keys: String) { - keys.forEach { if (!has(it)) throw SettingsParserException("Key $it not found") } + requireNotNull() + keys.forEach { if (!has(it)) throw SettingsParserException("Key $it not found in node") } } private fun JsonNode.stringOrThrow(): String { - if (!isTextual) throw SettingsParserException("Not a string") + requireNotNull() + if (!isTextual) throw SettingsParserException("Node is not a string") return asText() } private fun JsonNode.intOrThrow(): Int { - if (!isInt) throw SettingsParserException("Not an int") + requireNotNull() + if (!isInt) throw SettingsParserException("Node is not an int") return asInt() } - private fun getValues(json: JsonNode): List { - json.requireObjectKeys("values") + private fun JsonNode.getValues(): List { + requireNotNull() + requireObjectKeys("values") - val key = json["values"].stringOrThrow() + val key = this["values"].stringOrThrow() return allValues[key] ?: throw SettingsParserException("Values for key $key not found") } private fun JsonNode.getUrlFromFilename(value: String?): Pair { + requireNotNull() val filename = this["filename"].stringWithValueOrThrow(value) val idAndUrl = fileStoreIdsAndUrls[filename] ?: throw SettingsParserException("File $filename does not exist") val id = idAndUrl.first ?: throw SettingsParserException("File store id must not be null") @@ -155,6 +168,7 @@ class SettingsParser( } private fun JsonNode.stringWithValueOrThrow(value: String?): String { + requireNotNull() val string = stringOrThrow() if (value == null) return string @@ -167,5 +181,9 @@ class SettingsParser( strategy(value) } } -} + private fun JsonNode?.booleanOrFalse(): Boolean { + if (this == null) return false + return booleanValue() + } +} diff --git a/tutor-assistant-app-service/src/main/resources/application.yml b/tutor-assistant-app-service/src/main/resources/application.yml index a968261..396b468 100644 --- a/tutor-assistant-app-service/src/main/resources/application.yml +++ b/tutor-assistant-app-service/src/main/resources/application.yml @@ -32,7 +32,7 @@ logging: max-file-size: 10MB max-history: 50 level: - root: ${ROOT_LOG_LEVEL} + root: info app: diff --git a/tutor-assistant/tutor_assistant/controller/api/documents_controller.py b/tutor-assistant/tutor_assistant/controller/api/documents_controller.py index 47152a2..4cc380d 100644 --- a/tutor-assistant/tutor_assistant/controller/api/documents_controller.py +++ b/tutor-assistant/tutor_assistant/controller/api/documents_controller.py @@ -12,19 +12,19 @@ @router.post('/documents/add') async def _add_document(request: Request): body: dict = await request.json() - check_request_body(body, ['title', 'originalKey', 'loaderType', 'loaderParams']) + check_request_body(body, ['title', 'originalKey', 'loaderType', 'loaderParams', 'isCalendar']) title: str = body['title'] original_key: str = body['originalKey'] loader_type: str = body['loaderType'] loader_params: dict = body['loaderParams'] - summarize_documents_count = body.get('summarizeDocumentsCount', -1) + is_calendar: bool = body['isCalendar'] config.logger.info( - f'POST /documents/add: loader_type:{loader_type};loader_params:{loader_params.keys()};summarize_documents_count:{summarize_documents_count}') + f'POST /documents/add: title={title}; original_key={original_key}') loader = get_loader(loader_creators, title, loader_type, loader_params) - ids = DocumentService(config).add(loader, original_key, summarize_documents_count) + ids = DocumentService(config).add(loader, original_key, is_calendar) config.logger.info(f'Result: {ids}') diff --git a/tutor-assistant/tutor_assistant/domain/calendar/calendar_chain_service.py b/tutor-assistant/tutor_assistant/domain/calendar/calendar_chain_service.py index 73b2bba..e58aa30 100644 --- a/tutor-assistant/tutor_assistant/domain/calendar/calendar_chain_service.py +++ b/tutor-assistant/tutor_assistant/domain/calendar/calendar_chain_service.py @@ -39,13 +39,13 @@ def _get_retriever_chain(self, query: str): def _retriever(self, query: str) -> list[Document]: vector_store = self._config.vector_store_manager.load() try: - docs, scores = zip(*vector_store.similarity_search_with_score(query, k=100)) + docs, scores = zip(*vector_store.similarity_search_with_score(query, k=1000)) except: return [] result = [] doc: Document for doc, score in zip(docs, scores): - if 'CalendarEntries' in doc.metadata['summary']: + if 'isCalendar' in doc.metadata and doc.metadata['isCalendar']: doc.metadata["score"] = score result.append(doc) diff --git a/tutor-assistant/tutor_assistant/domain/documents/document_service.py b/tutor-assistant/tutor_assistant/domain/documents/document_service.py index 9dcfcc8..d6f82fa 100644 --- a/tutor-assistant/tutor_assistant/domain/documents/document_service.py +++ b/tutor-assistant/tutor_assistant/domain/documents/document_service.py @@ -13,18 +13,16 @@ class DocumentService: def __init__(self, config: DomainConfig): self._config = config - def add(self, loader: BaseLoader, original_key: str, summarize_documents_count: int) -> list[str]: + def add(self, loader: BaseLoader, original_key: str, is_calendar: bool) -> list[str]: documents = loader.load() ids: list[str] = [] for i, doc in enumerate(documents): doc.id = str(uuid.uuid4()) doc.metadata['id'] = doc.id doc.metadata['originalKey'] = original_key + doc.metadata['isCalendar'] = is_calendar ids.append(doc.id) - # if len(documents) <= summarize_documents_count: - # self._summarize_documents(documents) - meta_docs = self._handle_meta_docs(documents) store = self._config.vector_store_manager.load()