diff --git a/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/embedding_manager/RagDocumentService.kt b/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/embedding_manager/RagDocumentService.kt new file mode 100644 index 0000000..d63e503 --- /dev/null +++ b/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/embedding_manager/RagDocumentService.kt @@ -0,0 +1,69 @@ +package de.niklaskerkhoff.tutorassistantappservice.modules.embedding_manager + +import de.niklaskerkhoff.tutorassistantappservice.lib.app_components.AppService +import de.niklaskerkhoff.tutorassistantappservice.lib.webclient.EmptyResponseBodyException +import org.springframework.beans.factory.annotation.Value +import org.springframework.core.ParameterizedTypeReference +import org.springframework.stereotype.Service +import org.springframework.web.reactive.function.client.WebClient + +/** + * Manages documents of the RAG-Service. + */ +@Service +class RagDocumentService( + private val webClient: WebClient +) : AppService() { + @Value("\${app.tutor-assistant.base-url}") + private lateinit var baseUrl: String + + /** + * Adds a document to the RAG-Service. + * + * @param title of the document. + * @param originalKey id of the file in the file store or the url of a website. + * @param loaderType specifies how to load and process the content. + * @param loaderParams specifies how to access the ressource and its content. + * @param isCalendar specifies if the document shall be used for the generation of the calendar. + * + * @returns the ids of the embedded chunks. + */ + fun addDocument( + title: String, + originalKey: String, + loaderType: String, + loaderParams: Map, + isCalendar: Boolean + ): List { + val requestBody = mapOf( + "title" to title, + "originalKey" to originalKey, + "loaderType" to loaderType, + "loaderParams" to loaderParams, + "isCalendar" to isCalendar + ) + + return webClient.post() + .uri("$baseUrl/documents/add") + .bodyValue(requestBody) + .retrieve() + .bodyToMono(object : ParameterizedTypeReference>() {}) + .block() ?: throw EmptyResponseBodyException() + } + + /** + * Deletes a document. + * + * @param chunksIds ids of the embedded chunks of the document. + * + * @returns true if deletion was successful. + */ + fun deleteDocument(chunksIds: List): Boolean { + return webClient.post() + .uri("$baseUrl/documents/delete") + .bodyValue(chunksIds) + .retrieve() + .bodyToMono(Boolean::class.java) + .block() ?: throw EmptyResponseBodyException() + } +} diff --git a/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/embedding_manager/documents/ApplierVisitor.kt b/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/embedding_manager/documents/ApplierVisitor.kt new file mode 100644 index 0000000..cfa608f --- /dev/null +++ b/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/embedding_manager/documents/ApplierVisitor.kt @@ -0,0 +1,98 @@ +package de.niklaskerkhoff.tutorassistantappservice.modules.embedding_manager.documents + +import de.niklaskerkhoff.tutorassistantappservice.lib.logging.Logger +import de.niklaskerkhoff.tutorassistantappservice.modules.embedding_manager.RagDocumentService +import de.niklaskerkhoff.tutorassistantappservice.modules.embedding_manager.documents.entities.* +import org.springframework.stereotype.Component + + +/** + * Applies the setting + */ +@Component +class ApplierVisitor( + private val fileDocumentRepo: FileDocumentRepo, + private val websiteDocumentRepo: WebsiteDocumentRepo, + private val ragDocumentService: RagDocumentService +) : DocumentVisitor, Logger { + + override fun visit(fileDocument: FileDocument) { + log.info("Visiting fileDocument with title ${fileDocument.title}") + + val existing = fileDocumentRepo.findByTitle(fileDocument.title) + if (existing != null) { + logStopping(fileDocument.title) + return + } + + logContinuing(fileDocument.title) + + val loaderParams = mapOf("url" to fileDocument.fileStoreUrl) + + val tutorAssistantIds = ragDocumentService.addDocument( + fileDocument.title, + fileDocument.fileStoreId.toString(), + fileDocument.loaderType, + loaderParams, + fileDocument.isCalendar + ).also { + logAddedToRagService(fileDocument.title, it) + } + + fileDocument.chunksIds = tutorAssistantIds + + fileDocumentRepo.save(fileDocument).also { + logSaved(it.title) + } + } + + override fun visit(websiteDocument: WebsiteDocument) { + log.info("Visiting websiteDocument with title ${websiteDocument.title}") + + val existing = websiteDocumentRepo.findByTitle(websiteDocument.title) + if (existing != null) { + logStopping(websiteDocument.title) + return + } + + logContinuing(websiteDocument.title) + + val loaderParams = mapOf( + "url" to websiteDocument.loaderParams.url, + "htmlSelector" to websiteDocument.loaderParams.htmlSelector, + "htmlSelectionIndex" to websiteDocument.loaderParams.htmlSelectionIndex, + ) + + val tutorAssistantIds = ragDocumentService.addDocument( + websiteDocument.title, + websiteDocument.loaderParams.url, + websiteDocument.loaderType, + loaderParams, + websiteDocument.isCalendar + ).also { + logAddedToRagService(websiteDocument.title, it) + } + + websiteDocument.chunksIds = tutorAssistantIds + + websiteDocumentRepo.save(websiteDocument).also { + logSaved(websiteDocument.title) + } + } + + private fun logContinuing(title: String) { + log.info("$title does not exist, continuing") + } + + private fun logStopping(title: String) { + log.info("$title already exists, stopping") + } + + private fun logAddedToRagService(title: String, tutorAssistantIds: List) { + log.info("Added $title to Tutor-Assistant, got ${tutorAssistantIds.size} ids") + } + + private fun logSaved(title: String) { + log.info("Saved $title") + } +} diff --git a/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/embedding_manager/documents/DocumentController.kt b/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/embedding_manager/documents/DocumentController.kt new file mode 100644 index 0000000..08898dc --- /dev/null +++ b/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/embedding_manager/documents/DocumentController.kt @@ -0,0 +1,38 @@ +package de.niklaskerkhoff.tutorassistantappservice.modules.embedding_manager.documents + +import de.niklaskerkhoff.tutorassistantappservice.modules.embedding_manager.documents.entities.toDto +import org.springframework.security.access.prepost.PreAuthorize +import org.springframework.web.bind.annotation.* +import java.util.* + +@RestController +@RequestMapping("embedding_manager/documents") +class DocumentController( + private val documentService: DocumentService +) { + @GetMapping("files") + fun getFileDocuments() = documentService.getFileDocuments().map { it.toDto() } + + @GetMapping("websites") + fun getWebsiteDocuments() = documentService.getWebsiteDocuments().map { it.toDto() } + + @PostMapping("embed") + @PreAuthorize("hasRole('document-manager')") + fun embed(): Unit = documentService.embed() + + @PostMapping("files/{id}/reembed") + @PreAuthorize("hasRole('document-manager')") + fun reembedFile(@PathVariable id: UUID): Unit = documentService.reembedFileDocument(id) + + @PostMapping("websites/{id}/reembed") + @PreAuthorize("hasRole('document-manager')") + fun reembedWebsite(@PathVariable id: UUID): Unit = documentService.reembedWebsiteDocument(id) + + @DeleteMapping("files/{id}") + @PreAuthorize("hasRole('document-manager')") + fun deleteFile(@PathVariable id: UUID): Unit = documentService.deleteFileDocument(id) + + @DeleteMapping("websites/{id}") + @PreAuthorize("hasRole('document-manager')") + fun deleteWebsite(@PathVariable id: UUID): Unit = documentService.deleteWebsiteDocument(id) +} diff --git a/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/embedding_manager/documents/DocumentLoader.kt b/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/embedding_manager/documents/DocumentLoader.kt new file mode 100644 index 0000000..e9d1ae7 --- /dev/null +++ b/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/embedding_manager/documents/DocumentLoader.kt @@ -0,0 +1,7 @@ +package de.niklaskerkhoff.tutorassistantappservice.modules.embedding_manager.documents + +import de.niklaskerkhoff.tutorassistantappservice.modules.embedding_manager.documents.entities.Document + +interface DocumentLoader { + fun loadDocuments(): List +} \ No newline at end of file diff --git a/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/embedding_manager/documents/DocumentService.kt b/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/embedding_manager/documents/DocumentService.kt new file mode 100644 index 0000000..3aac2de --- /dev/null +++ b/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/embedding_manager/documents/DocumentService.kt @@ -0,0 +1,96 @@ +package de.niklaskerkhoff.tutorassistantappservice.modules.embedding_manager.documents + +import de.niklaskerkhoff.tutorassistantappservice.lib.app_components.AppService +import de.niklaskerkhoff.tutorassistantappservice.lib.entities.findByIdOrThrow +import de.niklaskerkhoff.tutorassistantappservice.lib.exceptions.BadRequestException +import de.niklaskerkhoff.tutorassistantappservice.modules.embedding_manager.RagDocumentService +import de.niklaskerkhoff.tutorassistantappservice.modules.embedding_manager.documents.entities.* +import org.springframework.stereotype.Service +import java.util.* + +/** + * Manges Documents + * * returns them + * * embeds them + * * deletes them + */ +@Service +class DocumentService( + private val fileDocumentRepo: FileDocumentRepo, + private val websiteDocumentRepo: WebsiteDocumentRepo, + private val documentLoader: DocumentLoader, + private val applierVisitor: ApplierVisitor, + private val ragDocumentService: RagDocumentService +) : AppService() { + + /** + * @returns all file documents. + */ + fun getFileDocuments(): List = fileDocumentRepo.findAll() + + /** + * @returns all website documents. + */ + fun getWebsiteDocuments(): List = websiteDocumentRepo.findAll() + + /** + * Embeds the documents + */ + fun embed() { + val documents = documentLoader.loadDocuments() + documents.forEach { it.accept(applierVisitor) } + } + + /** + * Deletes the embedding of a file and embeds it again. + * + * @param id of the file. + */ + fun reembedFileDocument(id: UUID) = reembed(id, fileDocumentRepo) + + /** + * Deletes the embedding of a website and embeds it again. + * + * @param id of the website. + */ + fun reembedWebsiteDocument(id: UUID) = reembed(id, websiteDocumentRepo) + + /** + * Deletes an embedding of a file. + * + * @param id of the file. + */ + fun deleteFileDocument(id: UUID) = delete(id, fileDocumentRepo) + + /** + * Deletes an embedding of a website. + * + * @param id of the website. + */ + fun deleteWebsiteDocument(id: UUID) = delete(id, websiteDocumentRepo) + + private fun reembed(id: UUID, documentRepo: DocumentRepo) { + val existingDocument = documentRepo.findByIdOrThrow(id) + val title = existingDocument.title + val allDocuments = documentLoader.loadDocuments() + val documentToReembed = allDocuments.find { it.title == title } + ?: throw BadRequestException("Document $title not specified in main settings") + + delete(existingDocument, documentRepo) + documentToReembed.accept(applierVisitor) + } + + private fun delete(id: UUID, documentRepo: DocumentRepo) { + val document = documentRepo.findByIdOrThrow(id) + delete(document, documentRepo) + } + + private fun delete(document: T, documentRepo: DocumentRepo) { + ragDocumentService.deleteDocument(document.chunksIds).also { + log.info("Deleted ${document.chunksIds} from Tutor-Assistant") + } + documentRepo.delete(document).also { + log.info("Deleted document with id ${document.id}") + } + } +} diff --git a/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/embedding_manager/documents/entities/Document.kt b/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/embedding_manager/documents/entities/Document.kt new file mode 100644 index 0000000..41bf756 --- /dev/null +++ b/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/embedding_manager/documents/entities/Document.kt @@ -0,0 +1,46 @@ +package de.niklaskerkhoff.tutorassistantappservice.modules.embedding_manager.documents.entities + +import de.niklaskerkhoff.tutorassistantappservice.lib.entities.AppEntity +import jakarta.persistence.ElementCollection +import jakarta.persistence.Entity +import jakarta.persistence.Inheritance +import jakarta.persistence.InheritanceType + +/** + * Document to embed. + */ +@Entity +@Inheritance(strategy = InheritanceType.TABLE_PER_CLASS) +abstract class Document( + /** + * Human-readable id. + */ + open val title: String, + + /** + * Specifies how the document is loaded. + */ + open val loaderType: String, + + /** + * Assigns a collection to the document for grouping. + */ + open val collection: String?, + + /** + * Specifies if the document shall be used for generating the calendar. + */ + open val isCalendar: Boolean +) : AppEntity() { + + /** + * Ids of the embedded chunks returned by the rag service + */ + @ElementCollection + open var chunksIds: List = emptyList() + + /** + * Visitor method + */ + abstract fun accept(visitor: DocumentVisitor) +} diff --git a/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/embedding_manager/documents/entities/DocumentRepo.kt b/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/embedding_manager/documents/entities/DocumentRepo.kt new file mode 100644 index 0000000..c5782db --- /dev/null +++ b/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/embedding_manager/documents/entities/DocumentRepo.kt @@ -0,0 +1,9 @@ +package de.niklaskerkhoff.tutorassistantappservice.modules.embedding_manager.documents.entities + +import de.niklaskerkhoff.tutorassistantappservice.lib.entities.AppEntityRepo +import org.springframework.data.repository.NoRepositoryBean + +@NoRepositoryBean +interface DocumentRepo : AppEntityRepo { + fun findByTitle(title: String): T? +} diff --git a/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/embedding_manager/documents/entities/DocumentVisitor.kt b/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/embedding_manager/documents/entities/DocumentVisitor.kt new file mode 100644 index 0000000..ed51661 --- /dev/null +++ b/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/embedding_manager/documents/entities/DocumentVisitor.kt @@ -0,0 +1,6 @@ +package de.niklaskerkhoff.tutorassistantappservice.modules.embedding_manager.documents.entities + +interface DocumentVisitor { + fun visit(fileDocument: FileDocument) + fun visit(websiteDocument: WebsiteDocument) +} diff --git a/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/embedding_manager/documents/entities/FileDocument.kt b/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/embedding_manager/documents/entities/FileDocument.kt new file mode 100644 index 0000000..bd03eb4 --- /dev/null +++ b/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/embedding_manager/documents/entities/FileDocument.kt @@ -0,0 +1,29 @@ +package de.niklaskerkhoff.tutorassistantappservice.modules.embedding_manager.documents.entities + +import jakarta.persistence.Entity +import java.util.UUID + +/** + * Document from a file + */ +@Entity +class FileDocument( + title: String, + loaderType: String, + collection: String?, + isCalendar: Boolean, + + /** + * ID of the file in the file store + */ + val fileStoreId: UUID, + + /** + * URL of the file in the file store + */ + val fileStoreUrl: String, +) : Document(title, loaderType, collection, isCalendar) { + override fun accept(visitor: DocumentVisitor) { + visitor.visit(this) + } +} diff --git a/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/embedding_manager/documents/entities/FileDocumentDtos.kt b/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/embedding_manager/documents/entities/FileDocumentDtos.kt new file mode 100644 index 0000000..cf4a4ed --- /dev/null +++ b/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/embedding_manager/documents/entities/FileDocumentDtos.kt @@ -0,0 +1,21 @@ +package de.niklaskerkhoff.tutorassistantappservice.modules.embedding_manager.documents.entities + +import java.util.UUID + +data class FileDocumentDto( + val id: UUID?, + val title: String, + val loaderType: String, + val collection: String?, + val fileStoreId: UUID +) { + constructor(fileDocument: FileDocument) : this( + id = fileDocument.id, + title = fileDocument.title, + loaderType = fileDocument.loaderType, + collection = fileDocument.collection, + fileStoreId = fileDocument.fileStoreId + ) +} + +fun FileDocument.toDto() = FileDocumentDto(this) diff --git a/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/embedding_manager/documents/entities/FileDocumentRepo.kt b/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/embedding_manager/documents/entities/FileDocumentRepo.kt new file mode 100644 index 0000000..4a3c5a6 --- /dev/null +++ b/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/embedding_manager/documents/entities/FileDocumentRepo.kt @@ -0,0 +1,3 @@ +package de.niklaskerkhoff.tutorassistantappservice.modules.embedding_manager.documents.entities + +interface FileDocumentRepo : DocumentRepo diff --git a/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/embedding_manager/documents/entities/WebsiteDocument.kt b/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/embedding_manager/documents/entities/WebsiteDocument.kt new file mode 100644 index 0000000..b88cfc4 --- /dev/null +++ b/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/embedding_manager/documents/entities/WebsiteDocument.kt @@ -0,0 +1,44 @@ +package de.niklaskerkhoff.tutorassistantappservice.modules.embedding_manager.documents.entities + +import jakarta.persistence.Embeddable +import jakarta.persistence.Entity + + +/** + * Document from a website. + */ +@Entity +class WebsiteDocument( + title: String, + loaderType: String, + collection: String?, + isCalendar: Boolean, + + /** + * Specifies how to load the website. + */ + val loaderParams: LoaderParams, +) : Document(title, loaderType, collection, isCalendar) { + override fun accept(visitor: DocumentVisitor) { + visitor.visit(this) + } + + @Embeddable + data class LoaderParams( + /** + * URL from which the website is loaded. + */ + val url: String, + + /** + * HTML selector to identify the element from which the content shall be used. + * Returns a list of elements matching the selector. + */ + val htmlSelector: String, + + /** + * Index of element to use in the selected elements + */ + val htmlSelectionIndex: Int + ) +} diff --git a/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/embedding_manager/documents/entities/WebsiteDocumentDtos.kt b/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/embedding_manager/documents/entities/WebsiteDocumentDtos.kt new file mode 100644 index 0000000..4e9bd23 --- /dev/null +++ b/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/embedding_manager/documents/entities/WebsiteDocumentDtos.kt @@ -0,0 +1,21 @@ +package de.niklaskerkhoff.tutorassistantappservice.modules.embedding_manager.documents.entities + +import java.util.* + +data class WebsiteDocumentDto( + val id: UUID?, + val title: String, + val loaderType: String, + val collection: String?, + val url: String, +) { + constructor(websiteDocument: WebsiteDocument) : this( + id = websiteDocument.id, + title = websiteDocument.title, + loaderType = websiteDocument.loaderType, + collection = websiteDocument.collection, + url = websiteDocument.loaderParams.url + ) +} + +fun WebsiteDocument.toDto() = WebsiteDocumentDto(this) diff --git a/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/embedding_manager/documents/entities/WebsiteDocumentRepo.kt b/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/embedding_manager/documents/entities/WebsiteDocumentRepo.kt new file mode 100644 index 0000000..700c2b0 --- /dev/null +++ b/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/embedding_manager/documents/entities/WebsiteDocumentRepo.kt @@ -0,0 +1,3 @@ +package de.niklaskerkhoff.tutorassistantappservice.modules.embedding_manager.documents.entities + +interface WebsiteDocumentRepo : DocumentRepo diff --git a/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/embedding_manager/resources/ResourceController.kt b/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/embedding_manager/resources/ResourceController.kt new file mode 100644 index 0000000..a1f1ef9 --- /dev/null +++ b/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/embedding_manager/resources/ResourceController.kt @@ -0,0 +1,67 @@ +package de.niklaskerkhoff.tutorassistantappservice.modules.embedding_manager.resources + +import de.niklaskerkhoff.tutorassistantappservice.lib.filestore.FileStoreFileReferenceDto +import de.niklaskerkhoff.tutorassistantappservice.lib.filestore.FileStoreService +import de.niklaskerkhoff.tutorassistantappservice.lib.filestore.toDto +import org.springframework.core.io.InputStreamResource +import org.springframework.http.HttpHeaders +import org.springframework.http.MediaType +import org.springframework.http.ResponseEntity +import org.springframework.security.access.prepost.PreAuthorize +import org.springframework.web.bind.annotation.* +import org.springframework.web.multipart.MultipartFile +import java.util.* + +@RestController +@RequestMapping("embedding_manager/resources") +class ResourceController( + private val resourceService: ResourceService, + private val fileStoreService: FileStoreService +) { + companion object { + private val CONTENT_TYPES = mapOf( + "pdf" to MediaType.APPLICATION_PDF, + ) + private val DEFAULT_CONTENT_TYPE = MediaType.TEXT_PLAIN + } + + /** + * @see FileStoreService.listFiles + */ + @GetMapping + fun listFiles(): List = fileStoreService.listFiles().map { it.toDto() } + + /** + * @returns file as InputStreamResource in a way browsers can open it directly. + */ + @GetMapping("{id}") + fun getFile(@PathVariable id: UUID): ResponseEntity { + val fileData = fileStoreService.getFileById(id) + val fileType = fileData.first.displayName.split(".").last() + + return ResponseEntity.ok() + .header(HttpHeaders.CONTENT_DISPOSITION, "inline; filename=\"${fileData.first.displayName}\"") + .contentType(CONTENT_TYPES[fileType] ?: DEFAULT_CONTENT_TYPE) + .body(fileData.second) + } + + /** + * @see FileStoreService.assignAndUpload + */ + @PostMapping + @PreAuthorize("hasRole('document-manager')") + fun addFile( + @RequestPart("file") file: MultipartFile, + ): FileStoreFileReferenceDto { + return fileStoreService.assignAndUpload(file, resourceService.getUniqueFilename(file)).toDto() + } + + /** + * @see FileStoreService.deleteById + */ + @DeleteMapping("{id}") + @PreAuthorize("hasRole('document-manager')") + fun deleteFile(@PathVariable id: UUID) { + fileStoreService.deleteById(id) + } +} diff --git a/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/embedding_manager/resources/ResourceService.kt b/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/embedding_manager/resources/ResourceService.kt new file mode 100644 index 0000000..5bb84bf --- /dev/null +++ b/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/embedding_manager/resources/ResourceService.kt @@ -0,0 +1,49 @@ +package de.niklaskerkhoff.tutorassistantappservice.modules.embedding_manager.resources + +import de.niklaskerkhoff.tutorassistantappservice.lib.exceptions.BadRequestException +import de.niklaskerkhoff.tutorassistantappservice.lib.filestore.FileStoreFileReferenceDefaultRepo +import org.springframework.stereotype.Service +import org.springframework.web.multipart.MultipartFile + +/** + * Helper for managing resources. + */ +@Service +class ResourceService( + private val fileReferenceDefaultRepo: FileStoreFileReferenceDefaultRepo +) { + companion object { + private const val UNIQUE_START_N = 1 + } + + /** + * Generates a unique filename from the original filename. + */ + fun getUniqueFilename(file: MultipartFile): String { + val filename = file.originalFilename ?: throw BadRequestException("Filename must not be null") + return getUniqueFilename(filename) + } + + private fun getUniqueFilename(filename: String, n: Int? = null): String { + val newFilename = if (n == null) filename else addNumberToFilename(filename, n) + + val fileReferences = fileReferenceDefaultRepo.findAllByDisplayName(newFilename) + if (fileReferences.size > 1) throw UnknownError("Filenames must be unique") + + if (fileReferences.isEmpty()) return newFilename + + val newN = n?.plus(1) ?: UNIQUE_START_N + return getUniqueFilename(filename, newN) + } + + private fun addNumberToFilename(filename: String, number: Int): String { + val dotIndex = filename.lastIndexOf('.') + return if (dotIndex != -1) { + val name = filename.substring(0, dotIndex) + val extension = filename.substring(dotIndex) + "$name.$number$extension" + } else { + "$filename.$number" + } + } +} diff --git a/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/embedding_manager/settings/SettingController.kt b/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/embedding_manager/settings/SettingController.kt new file mode 100644 index 0000000..bfd5aa5 --- /dev/null +++ b/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/embedding_manager/settings/SettingController.kt @@ -0,0 +1,34 @@ +package de.niklaskerkhoff.tutorassistantappservice.modules.embedding_manager.settings + +import de.niklaskerkhoff.tutorassistantappservice.modules.embedding_manager.settings.entities.SettingDto +import de.niklaskerkhoff.tutorassistantappservice.modules.embedding_manager.settings.entities.toDto +import org.springframework.security.access.prepost.PreAuthorize +import org.springframework.web.bind.annotation.* +import org.springframework.web.multipart.MultipartFile +import java.util.* + +@RestController +@RequestMapping("embedding_manager/settings") +@PreAuthorize("hasRole('document-manager')") +class SettingController( + private val settingService: SettingService +) { + + /** + * @see SettingService.getSettings + */ + @GetMapping + fun getSettings(): List = settingService.getSettings().map { it.toDto() } + + /** + * @see SettingService.addSettings + */ + @PostMapping + fun addSetting(@RequestPart("file") file: MultipartFile): SettingDto = settingService.addSettings(file).toDto() + + /** + * @see SettingService.deleteSetting + */ + @DeleteMapping("{id}") + fun deleteSetting(@PathVariable id: UUID): Unit = settingService.deleteSetting(id) +} diff --git a/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/embedding_manager/settings/SettingService.kt b/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/embedding_manager/settings/SettingService.kt new file mode 100644 index 0000000..a3d76fc --- /dev/null +++ b/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/embedding_manager/settings/SettingService.kt @@ -0,0 +1,99 @@ +package de.niklaskerkhoff.tutorassistantappservice.modules.embedding_manager.settings + +import com.fasterxml.jackson.databind.ObjectMapper +import de.niklaskerkhoff.tutorassistantappservice.lib.app_components.AppService +import de.niklaskerkhoff.tutorassistantappservice.lib.exceptions.BadRequestException +import de.niklaskerkhoff.tutorassistantappservice.lib.filestore.FileStoreService +import de.niklaskerkhoff.tutorassistantappservice.modules.embedding_manager.documents.DocumentLoader +import de.niklaskerkhoff.tutorassistantappservice.modules.embedding_manager.documents.entities.Document +import de.niklaskerkhoff.tutorassistantappservice.modules.embedding_manager.settings.entities.Setting +import de.niklaskerkhoff.tutorassistantappservice.modules.embedding_manager.settings.entities.SettingRepo +import de.niklaskerkhoff.tutorassistantappservice.modules.embedding_manager.settings.entities.SettingType +import org.springframework.stereotype.Service +import org.springframework.transaction.annotation.Transactional +import org.springframework.web.multipart.MultipartFile +import java.util.* + +/** + * Manages settings and the embedding process + */ +@Service +class SettingService( + private val settingRepo: SettingRepo, + private val fileStoreService: FileStoreService, + private val objectMapper: ObjectMapper +) : AppService(), DocumentLoader { + + companion object { + private val VALUE_STRATEGIES = mapOf String>( + "plain" to { it }, + "underscored" to { it.replace(" ", "_") } + ) + } + + /** + * Embeds the resources based on the settings. + * + * @returns a list of the documents. + */ + override fun loadDocuments(): List { + val settings = settingRepo.findAll() + val (tempMainSettings, tempValueSettings) = settings.partition { it.type == SettingType.MAIN } + if (tempMainSettings.isEmpty()) throw BadRequestException("Main setting does not exist") + + val mainJson = tempMainSettings.first().content + val values = tempValueSettings.associate { it.name to it.content.trim().split('\n') } + val fileStoreIdsAndUrls = fileStoreService.listFiles().associate { it.displayName to Pair(it.id, it.storeUrl) } + + return SettingsParser(objectMapper, mainJson, values, fileStoreIdsAndUrls, VALUE_STRATEGIES).parse().also { + log.info("Parsed ${it.size} documents") + } + } + + /** + * @returns all settings. + */ + fun getSettings(): List = settingRepo.findAll() + + /** + * Adds a setting file. + * Deletes existing settings with the same filename. + * + * @param file which contains the setting content. + * @returns the saved setting. + * @throws BadRequestException if originalFilename of the MultipartFile is null. + */ + @Transactional + fun addSettings(file: MultipartFile): Setting { + val name = file.originalFilename ?: throw BadRequestException("File name must not be null") + val fileEnding = name.split(".").last() + val value = file.inputStream.bufferedReader().use { it.readText() } + val type = if (fileEnding == "json") SettingType.MAIN else SettingType.VALUES + + settingRepo.deleteAllByName(name).also { + log.info("Deleted existing settings with name '$name'") + } + + if (type == SettingType.MAIN) { + settingRepo.deleteAllByType(type).also { + log.info("Deleted main setting if existed") + } + } + + val setting = Setting(name, value, type) + return settingRepo.save(setting).also { + log.info("Saved new setting with name '$name'") + } + } + + /** + * Deletes a setting. + * + * @param id of the setting to delete. + */ + fun deleteSetting(id: UUID) { + settingRepo.deleteById(id).also { + log.info("Deleted setting with id $id") + } + } +} diff --git a/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/embedding_manager/settings/SettingsExceptions.kt b/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/embedding_manager/settings/SettingsExceptions.kt new file mode 100644 index 0000000..8f6fd42 --- /dev/null +++ b/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/embedding_manager/settings/SettingsExceptions.kt @@ -0,0 +1,6 @@ +package de.niklaskerkhoff.tutorassistantappservice.modules.embedding_manager.settings + +/** + * Exception used when setting parsing fails + */ +class SettingsParserException(message: String) : Exception(message) diff --git a/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/embedding_manager/settings/SettingsParser.kt b/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/embedding_manager/settings/SettingsParser.kt new file mode 100644 index 0000000..1d861ce --- /dev/null +++ b/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/embedding_manager/settings/SettingsParser.kt @@ -0,0 +1,222 @@ +package de.niklaskerkhoff.tutorassistantappservice.modules.embedding_manager.settings + +import com.fasterxml.jackson.databind.JsonNode +import com.fasterxml.jackson.databind.ObjectMapper +import de.niklaskerkhoff.tutorassistantappservice.modules.embedding_manager.documents.entities.Document +import de.niklaskerkhoff.tutorassistantappservice.modules.embedding_manager.documents.entities.FileDocument +import de.niklaskerkhoff.tutorassistantappservice.modules.embedding_manager.documents.entities.WebsiteDocument +import java.util.* + +/** + * Parses the JSON of the main setting. + */ +class SettingsParser( + /** + * Reads the JSON. + */ + private val objectMapper: ObjectMapper, + + /** + * Main setting that is read and parsed. + */ + private val mainJson: String, + + /** + * Values from value settings. + * The key of the map is the filename of the value setting + * and thus a proper value for the "values" keys in the main setting. + * The value of the map is the list of the values specified by the value setting. + */ + private val allValues: Map>, + + /** + * Maps the filename of a file resource to its file store id and url. + * These filenames are a proper values for the "filename" keys in the main setting. + */ + private val fileStoreIdsAndUrls: Map>, + + /** + * Maps the name of a strategy to the strategy. + * The keys can be used for inserting values from value settings in the main setting. + * The value is transformed by the strategy. + */ + private val valueStrategies: Map String> +) { + companion object { + private const val WEBSITE_TYPE = "Website" + private const val FILE_TYPE = "File" + private val VALUE_REGEX = "\\$\\{(\\w+)}".toRegex() + } + + /** + * Parses the main setting. + * + * @returns the resulting documents. + */ + fun parse(): List { + val json = objectMapper.readTree(mainJson) + + return parseRoot(json) + } + + private fun parseRoot(json: JsonNode): List { + json.requireArray() + + return json.elements().asSequence().map { parseCollectionOrDocument(it) }.flatten().toList() + } + + private fun parseCollectionOrDocument(json: JsonNode): List { + json.requireObject() + + return when { + json.has("title") -> listOf(parseDocument(json, null, null)) + json.has("collection") -> parseCollection(json) + else -> throw SettingsParserException("Failed parsing collection or document") + } + } + + private fun parseCollection(json: JsonNode): List { + json.requireObjectKeys("collection") + + val collection = json["collection"].stringOrThrow() + + return when { + json.has("elements") -> parseElements(json["elements"], collection) + json.has("elementsBuilder") -> parseElementsBuilder(json["elementsBuilder"], collection, json.getValues()) + else -> throw SettingsParserException("Failed parsing collection") + } + } + + private fun parseElements(json: JsonNode, collection: String?): List { + json.requireArray() + + return json.elements().asSequence().map { parseDocument(it, collection, null) }.toList() + } + + private fun parseElementsBuilder( + json: JsonNode, + collection: String?, + values: List + ): List { + json.requireObject() + + return values.map { parseDocument(json, collection, it) } + } + + private fun parseDocument(json: JsonNode, collection: String?, value: String?): Document { + json.requireObjectKeys("type") + + val type = json["type"].stringOrThrow() + + return when (type) { + WEBSITE_TYPE -> parseWebsite(json, collection, value) + FILE_TYPE -> parseFile(json, collection, value) + else -> throw SettingsParserException("Failed parsing document") + } + } + + private fun parseFile(json: JsonNode, collection: String?, value: String?): Document { + json.requireObjectKeys("title", "loaderType", "filename") + + val (fileStoreId, fileStoreUrl) = json.getUrlFromFilename(value) + + return FileDocument( + json["title"].stringWithValueOrThrow(value), + json["loaderType"].stringWithValueOrThrow(value), + collection, + json["isCalendar"].booleanOrFalse(), + fileStoreId, + fileStoreUrl + ) + } + + private fun parseWebsite(json: JsonNode, collection: String?, value: String?): Document { + json.requireObjectKeys("title", "loaderType", "loaderParams") + + return WebsiteDocument( + json["title"].stringWithValueOrThrow(value), + json["loaderType"].stringWithValueOrThrow(value), + collection, + json["isCalendar"].booleanOrFalse(), + parseWebsiteLoaderParams(json["loaderParams"], value) + ) + } + + private fun parseWebsiteLoaderParams(json: JsonNode, value: String?): WebsiteDocument.LoaderParams { + json.requireObjectKeys("url", "htmlSelector", "htmlSelectionIndex") + + return WebsiteDocument.LoaderParams( + json["url"].stringWithValueOrThrow(value), + json["htmlSelector"].stringWithValueOrThrow(value), + json["htmlSelectionIndex"].intOrThrow() + ) + } + + private fun JsonNode?.requireNotNull() { + if (this == null) throw SettingsParserException("Node is null") + } + + private fun JsonNode.requireObject() { + requireNotNull() + if (!isObject) throw SettingsParserException("Node is not an object") + } + + private fun JsonNode.requireArray() { + requireNotNull() + if (!isArray) throw SettingsParserException("Node is not an array") + } + + private fun JsonNode.requireObjectKeys(vararg keys: String) { + requireNotNull() + keys.forEach { if (!has(it)) throw SettingsParserException("Key $it not found in node") } + } + + private fun JsonNode.stringOrThrow(): String { + requireNotNull() + if (!isTextual) throw SettingsParserException("Node is not a string") + return asText() + } + + private fun JsonNode.intOrThrow(): Int { + requireNotNull() + if (!isInt) throw SettingsParserException("Node is not an int") + return asInt() + } + + private fun JsonNode.getValues(): List { + requireNotNull() + requireObjectKeys("values") + + val key = this["values"].stringOrThrow() + + return allValues[key] ?: throw SettingsParserException("Values for key $key not found") + } + + private fun JsonNode.getUrlFromFilename(value: String?): Pair { + requireNotNull() + val filename = this["filename"].stringWithValueOrThrow(value) + val idAndUrl = fileStoreIdsAndUrls[filename] ?: throw SettingsParserException("File $filename does not exist") + val id = idAndUrl.first ?: throw SettingsParserException("File store id must not be null") + return Pair(id, idAndUrl.second) + } + + private fun JsonNode.stringWithValueOrThrow(value: String?): String { + requireNotNull() + val string = stringOrThrow() + if (value == null) return string + + return VALUE_REGEX.replace(string) { matchResult -> + val strategyName = matchResult.groups[1]?.value + ?: throw SettingsParserException("Unknown error reading strategy name") + val strategy = valueStrategies[strategyName] + ?: throw SettingsParserException("Value strategy $strategyName does not exist") + + strategy(value) + } + } + + private fun JsonNode?.booleanOrFalse(): Boolean { + if (this == null) return false + return booleanValue() + } +} diff --git a/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/embedding_manager/settings/entities/Setting.kt b/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/embedding_manager/settings/entities/Setting.kt new file mode 100644 index 0000000..7546674 --- /dev/null +++ b/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/embedding_manager/settings/entities/Setting.kt @@ -0,0 +1,28 @@ +package de.niklaskerkhoff.tutorassistantappservice.modules.embedding_manager.settings.entities + +import de.niklaskerkhoff.tutorassistantappservice.lib.entities.AppEntity +import jakarta.persistence.Column +import jakarta.persistence.Entity + +/** + * Setting for defining how resources shall be embedded. + */ +@Entity +class Setting( + /** + * Human-readable identifier. + * In case this is a value setting, the name is used as value for "values" key in the main setting. + */ + val name: String, + + /** + * Actual specification of the setting. + */ + @Column(columnDefinition = "text") + val content: String, + + /** + * Specifies how the setting is used. + */ + val type: SettingType +) : AppEntity() diff --git a/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/embedding_manager/settings/entities/SettingDtos.kt b/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/embedding_manager/settings/entities/SettingDtos.kt new file mode 100644 index 0000000..767b692 --- /dev/null +++ b/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/embedding_manager/settings/entities/SettingDtos.kt @@ -0,0 +1,19 @@ +package de.niklaskerkhoff.tutorassistantappservice.modules.embedding_manager.settings.entities + +import java.util.* + +data class SettingDto( + val id: UUID?, + val name: String, + val content: String, + val type: SettingType, +) { + constructor(setting: Setting) : this( + id = setting.id, + name = setting.name, + content = setting.content, + type = setting.type + ) +} + +fun Setting.toDto() = SettingDto(this) diff --git a/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/embedding_manager/settings/entities/SettingRepo.kt b/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/embedding_manager/settings/entities/SettingRepo.kt new file mode 100644 index 0000000..21ef5a6 --- /dev/null +++ b/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/embedding_manager/settings/entities/SettingRepo.kt @@ -0,0 +1,9 @@ +package de.niklaskerkhoff.tutorassistantappservice.modules.embedding_manager.settings.entities + +import de.niklaskerkhoff.tutorassistantappservice.lib.entities.AppEntityRepo + +interface SettingRepo : AppEntityRepo { + fun deleteAllByType(type: SettingType) + + fun deleteAllByName(name: String) +} diff --git a/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/embedding_manager/settings/entities/SettingType.kt b/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/embedding_manager/settings/entities/SettingType.kt new file mode 100644 index 0000000..db10929 --- /dev/null +++ b/tutor-assistant-app-service/src/main/kotlin/de/niklaskerkhoff/tutorassistantappservice/modules/embedding_manager/settings/entities/SettingType.kt @@ -0,0 +1,15 @@ +package de.niklaskerkhoff.tutorassistantappservice.modules.embedding_manager.settings.entities + +enum class SettingType { + /** + * main setting. + * Contains the instructions for creating documents as JSON. + */ + MAIN, + + /** + * value setting. + * Contains values that can be inserted into the main setting. + */ + VALUES +} diff --git a/tutor-assistant-web/src/common/hooks/useAsyncActionTrigger.ts b/tutor-assistant-web/src/common/hooks/useAsyncActionTrigger.ts new file mode 100644 index 0000000..9ffd440 --- /dev/null +++ b/tutor-assistant-web/src/common/hooks/useAsyncActionTrigger.ts @@ -0,0 +1,40 @@ +import { useEffect, useState } from 'react' + +/** + * Performs an async action if it can. Retries on given triggers. + * + * @param action to be performed. + * @param canPerformAction function returning if action can be performed. + * @param triggers to retry performing the action + * + * @returns + * * isProcessing: if the action is waiting to be performed or currently performing. + * * process: function for starting to try to perform the action. + */ +export function useAsyncActionTrigger( + action: () => Promise, + canPerformAction: () => boolean, + triggers: unknown[], +): [boolean, () => void] { + const [isProcessing, setIsProcessing] = useState(false) + + function process() { + setIsProcessing(true) + } + + useEffect(() => { + if (isProcessing && canPerformAction()) { + doAction() + } + }, [...triggers, isProcessing]) + + async function doAction() { + await action() + setIsProcessing(false) + } + + return [ + isProcessing, + process, + ] +} diff --git a/tutor-assistant-web/src/modules/chat/components/details/ChatContexts.tsx b/tutor-assistant-web/src/modules/chat/components/details/ChatContexts.tsx new file mode 100644 index 0000000..fc997db --- /dev/null +++ b/tutor-assistant-web/src/modules/chat/components/details/ChatContexts.tsx @@ -0,0 +1,80 @@ +import { ChatMessageContext } from '../../chat-model.ts' +import { useTranslation } from 'react-i18next' +import { useOpenContexts } from '../../hooks/useOpenContexts.ts' +import { isNotPresent, isPresent } from '../../../../common/utils/utils.ts' +import { MainContent, Spacer, VStack } from '../../../../common/components/containers/flex-layout.tsx' +import { Button, Card, CardActions, CardContent, Typography } from '@mui/joy' +import { Scroller } from '../../../../common/components/containers/Scroller.tsx' +import { roundTo } from '../../../../common/utils/math-utils.ts' +import { Multiline } from '../../../../common/components/widgets/Multiline.tsx' +import React from 'react' + +interface ChatContextsProps { + contexts: ChatMessageContext[] | undefined +} + +/** + * Displays contexts (sources) as cards. Displays open buttons for sources that can be opened (have an originalKey). + * + * @param contexts to be displayed. + */ +export function ChatContexts({ contexts }: ChatContextsProps) { + const { t } = useTranslation() + + const { openContexts } = useOpenContexts() + + if (isNotPresent(contexts)) contexts = [] + + function getTitleAndPage(context: ChatMessageContext) { + const pageOutput = isPresent(context.page) ? `, ${t('Page')} ${context.page + 1}` : '' + return isPresent(context.title) ? `${context.title}${pageOutput}` : '' + } + + if (contexts.length === 0) return ( + + + {t('Select a message')} + + + ) + + return ( + <> + + + + { + contexts.map((context, index) => ( + + + + + {getTitleAndPage(context)} + + + {t('Relevance')}: {roundTo(context.score ?? -1, 2)} + + + + + + {isPresent(context.originalKey) && ( + + + + + )} + + )) + } + + + + + + + ) +} + diff --git a/tutor-assistant-web/src/modules/chat/components/details/ChatSummary.tsx b/tutor-assistant-web/src/modules/chat/components/details/ChatSummary.tsx new file mode 100644 index 0000000..87f7bbd --- /dev/null +++ b/tutor-assistant-web/src/modules/chat/components/details/ChatSummary.tsx @@ -0,0 +1,37 @@ +import { ChatSummary as ChatSummaryModel } from '../../chat-model.ts' +import { MainContent } from '../../../../common/components/containers/flex-layout.tsx' +import { Scroller } from '../../../../common/components/containers/Scroller.tsx' +import { isPresent } from '../../../../common/utils/utils.ts' +import { Box } from '@mui/joy' +import { StyledMarkdown } from '../../../../common/components/widgets/StyledMarkdown.tsx' +import React from 'react' + +interface ChatSummaryProps { + summary: ChatSummaryModel | undefined +} + +/** + * Displays summary of a chat. + * + * @param summary of the chat. + */ +export function ChatSummary({ summary }: ChatSummaryProps) { + + return ( + <> + + + { + isPresent(summary) && ( + + {`## ${summary.title}`} + {`### ${summary.subtitle}`} + {summary.content} + + ) + } + + + + ) +} diff --git a/tutor-assistant-web/src/modules/chat/hooks/useFileResources.ts b/tutor-assistant-web/src/modules/chat/hooks/useFileResources.ts new file mode 100644 index 0000000..21c91c6 --- /dev/null +++ b/tutor-assistant-web/src/modules/chat/hooks/useFileResources.ts @@ -0,0 +1,25 @@ +import { useAuth } from '../../../app/auth/useAuth.ts' +import { apiBaseUrl } from '../../../app/base.ts' + + +/** + * Opens a file-resource inside the browser. + */ +export function useFileResources() { + const { getAuthHttp } = useAuth() + + async function loadFile(id: string) { + const response = await getAuthHttp().get(`${apiBaseUrl}/embedding_manager/resources/${id}`, { + responseType: 'blob', + }) + + const blobUrl = URL.createObjectURL(response.data) + window.open(blobUrl, '_blank') + + setTimeout(() => URL.revokeObjectURL(blobUrl), 30_000) + } + + return { + loadFile, + } +} \ No newline at end of file diff --git a/tutor-assistant-web/src/modules/documents/components/DocumentsList.tsx b/tutor-assistant-web/src/modules/documents/components/DocumentsList.tsx new file mode 100644 index 0000000..e033e66 --- /dev/null +++ b/tutor-assistant-web/src/modules/documents/components/DocumentsList.tsx @@ -0,0 +1,79 @@ +import { useTranslation } from 'react-i18next' +import { useDocuments } from '../hooks/useDocuments.ts' +import { Row, Spacer, VStack } from '../../../common/components/containers/flex-layout.tsx' +import { Accordion, AccordionDetails, AccordionGroup, AccordionSummary, Button, Typography } from '@mui/joy' +import { StandardList } from './StandardList.tsx' +import { Scroller } from '../../../common/components/containers/Scroller.tsx' +import { useOpenContexts } from '../../chat/hooks/useOpenContexts.ts' + +interface Props { + canManage: boolean +} + +/** + * Renders a list and buttons for viewing and managing file and website documents. + * + * @param canManage true if the user can embed, reembed and delete, false if the user can only view. + */ +export function DocumentsList({ canManage }: Props) { + const { t } = useTranslation() + const { + embed, + groupedDocuments, + reembedFile, + reembedWebsite, + deleteFile, + deleteWebsite, + } = useDocuments() + + const { openContexts } = useOpenContexts() + + return ( + + + + + + + {canManage && ()} + + + + { + Object.keys(groupedDocuments).map((key, index) => ( + + + {key} + + + file.title} + onClick={file => openContexts(file.fileStoreId)} + onReload={file => reembedFile(file.id)} + onDelete={file => deleteFile(file.id)} + canManage={canManage} + /> + website.title} + onClick={website => openContexts(website.url)} + onReload={website => reembedWebsite(website.id)} + onDelete={website => deleteWebsite(website.id)} + canManage={canManage} + /> + + + + )) + } + + + + + + + + + ) +} diff --git a/tutor-assistant-web/src/modules/documents/hooks/useDocuments.ts b/tutor-assistant-web/src/modules/documents/hooks/useDocuments.ts new file mode 100644 index 0000000..4188967 --- /dev/null +++ b/tutor-assistant-web/src/modules/documents/hooks/useDocuments.ts @@ -0,0 +1,110 @@ +import { useEffect, useMemo, useState } from 'react' +import { useAuth } from '../../../app/auth/useAuth.ts' +import { apiBaseUrl } from '../../../app/base.ts' +import { remove } from '../../../common/utils/array-utils.ts' +import { isNotPresent } from '../../../common/utils/utils.ts' +import { FileDocument, WebsiteDocument } from '../model.ts' +import { useTranslation } from 'react-i18next' + + +/** + * Manages file and website documents. + * + * @property embed function to run the embedding process. + * @property groupedDocuments file and website documents grouped by their collection. + * Each collection contains its file and website documents. + * @property websites website documents. + * @property reembedFile function to reembed a file. + * @property reembedWebsite function to reembed a website. + * @property deleteFile function to delete a file. + * @property deleteWebsite function to delete a website. + */ +export function useDocuments() { + const { t } = useTranslation() + const { getAuthHttp } = useAuth() + + const [files, setFiles] = useState([]) + const [websites, setWebsites] = useState([]) + + const generalKey = t('General') + + const groupedDocuments = useMemo(() => { + const result = { + [generalKey]: { + files: [] as FileDocument[], + websites: [] as WebsiteDocument[], + }, + } + + files.forEach((file) => { + const key = file.collection ?? generalKey + if (!(key in result)) { + result[key] = { files: [], websites: [] } + } + result[key].files.push(file) + }) + + websites.forEach((website) => { + const key = website.collection ?? generalKey + if (!(key in result)) { + result[key] = { files: [], websites: [] } + } + result[key].websites.push(website) + }) + return result + }, [files, websites]) + + + useEffect(() => { + loadFiles() + loadWebsites() + }, []) + + async function embed() { + await getAuthHttp().post(`${apiBaseUrl}/embedding_manager/documents/embed`) + loadFiles() + loadWebsites() + } + + + async function loadFiles() { + const response = await getAuthHttp().get(`${apiBaseUrl}/embedding_manager/documents/files`) + setFiles(response.data) + } + + async function loadWebsites() { + const response = await getAuthHttp().get(`${apiBaseUrl}/embedding_manager/documents/websites`) + setWebsites(response.data) + } + + async function reembedFile(id: string | undefined) { + if (isNotPresent(id)) return + await getAuthHttp().post(`${apiBaseUrl}/embedding_manager/documents/files/${id}/reembed`) + } + + async function reembedWebsite(id: string | undefined) { + if (isNotPresent(id)) return + await getAuthHttp().post(`${apiBaseUrl}/embedding_manager/documents/websites/${id}/reembed`) + } + + async function deleteFile(id: string | undefined) { + if (isNotPresent(id)) return + await getAuthHttp().delete(`${apiBaseUrl}/embedding_manager/documents/files/${id}`) + setFiles(prevState => remove(id, prevState)) + } + + async function deleteWebsite(id: string | undefined) { + if (isNotPresent(id)) return + await getAuthHttp().delete(`${apiBaseUrl}/embedding_manager/documents/websites/${id}`) + setWebsites(prevState => remove(id, prevState)) + } + + return { + embed, + groupedDocuments, + reembedFile, + reembedWebsite, + deleteFile, + deleteWebsite, + } +} diff --git a/tutor-assistant/resources/prompt_templates/queries_from_chat_model/multiple_retriever_queries.txt b/tutor-assistant/resources/prompt_templates/queries_from_chat_model/multiple_retriever_queries.txt new file mode 100644 index 0000000..e69de29 diff --git a/tutor-assistant/resources/prompt_templates/queries_from_chat_model/multiple_retriever_queries_1.txt b/tutor-assistant/resources/prompt_templates/queries_from_chat_model/multiple_retriever_queries_1.txt new file mode 100644 index 0000000..1631f6f --- /dev/null +++ b/tutor-assistant/resources/prompt_templates/queries_from_chat_model/multiple_retriever_queries_1.txt @@ -0,0 +1,5 @@ +Dies ist ein Chatverlauf. +Untersuche, welche groben Themen in der letzten Nachricht des Benutzers vorkommen. Benenne sie jeweils mit einem Begriff. +Gib möglichst wenig Begriffe aus. Sie sollen nur die Hauptthemen der Anfragen abdecken. Häufig wird nur ein Begriff notwendig sein. +Verwende die anderen Nachrichten nur, um den Kontext der letzten Nachricht zu verstehen. +Ganz wichtig: Beantworte nicht die Frage. Gib nur die Begriffe zu den groben Themen aus. Trenne sie mit Semikolons. diff --git a/tutor-assistant/resources/prompt_templates/queries_from_chat_model/multiple_retriever_queries_2.txt b/tutor-assistant/resources/prompt_templates/queries_from_chat_model/multiple_retriever_queries_2.txt new file mode 100644 index 0000000..a66b07c --- /dev/null +++ b/tutor-assistant/resources/prompt_templates/queries_from_chat_model/multiple_retriever_queries_2.txt @@ -0,0 +1,5 @@ +Dies ist ein Chatverlauf. +Ich muss Anfragen an einen Vectorstore machen, sodass die richtigen Dokumente abgerufen werden, sodass die letzte Frage des Benutzers beantwortet werden kann. +Gib mir Anfragen, die ich stellen soll. Trenne die Anfragen mit einem Semikolon. +Gib mir möglichst wenig Anfragen, um Zeit zu sparen. Die Anfragen sollen dennoch alles Wichtige abdecken. In vielen Fällen wird eine Anfrage reichen. +Ganz wichtig: beantworte nicht die Frage, sondern gib nur die Anfragen aus. diff --git a/tutor-assistant/resources/prompt_templates/queries_from_chat_model/multiple_retriever_queries_3.txt b/tutor-assistant/resources/prompt_templates/queries_from_chat_model/multiple_retriever_queries_3.txt new file mode 100644 index 0000000..37b52d6 --- /dev/null +++ b/tutor-assistant/resources/prompt_templates/queries_from_chat_model/multiple_retriever_queries_3.txt @@ -0,0 +1,21 @@ +Dies ist ein Chatverlauf. Es soll eine Antwort auf die letzte Nachricht des Benutzers erstellt werden. +Um eine Antwort zu generieren, müssen erstmal die notwendigen Dokumente aus einem Vectorstore abgerufen werden. + +Ich möchte, dass du mir Anfragen ausgibst, die ich an den Vectorstore senden kann. Beachte dabei folgendes: +- Die Anfragen sollen beschreiben, was der Benutzer möchte. Zudem sollen sie so formuliert sein, dass bei einer Ähnlichkeitssuche die richtigen Dokumente übereinstimmen. +- Überlege dir, welche Informationen du bräuchtest und formuliere dahingehend Anfragen. +- Es soll die letzte Nachricht des Benutzers beantwortet werden. Beziehe jedoch den Chat-Verlauf mit ein, wenn es für den Kontext wichtig ist. +- Es sollen möglichst wenig Anfragen generiert werden. Je mehr Anfragen, desto länger muss der Benutzer warten. +- Die Anfragen sollen sehr verschieden sein. Bei ähnlichen Anfragen kämen dieselben Dokumente zurück, damit gäbe es Redundanz. +- Verwende unter keinen Umständen dieselben Begriffe in mehreren Anfragen. +- Die Anfragen sollen Dokumente identifizieren, nicht die Frage beantworten. +- Häufig wird nur eine Anfrage benötigt. +- Formuliere nur kurze Anfragen. Sie sollen nur wenige Wörter lang sein, wenn überhaupt mehr als ein Wort. +- Versuche wirklich den Kern der Frage des Benutzers zu erfassen und entsprechende Anfragen zu generieren. Gib das Thema aus und nicht, was die Frage dazu ist. +- Benutzer die Begriffe des Benutzers. +- Trenne die Anfragen mit einem Semikolon + +Es ist wirklich super wichtig, dass du nur ganz wenige Anfragen ausgibst. Gib wirklich nur dann mehrere aus, wenn es absolut notwendig ist, um den Kontext der Nachricht zu erfassen. Meistens ist dies nicht der Fall!!! +Verwende auf gar keinen Fall dasselbe Wort in mehreren Anfragen. Das bedeutet, dass man nur eine Anfrage braucht!!! + +Ganz wichtig: beantworte nicht die Frage, sondern gib nur die Anfragen aus. diff --git a/tutor-assistant/tutor_assistant/controller/api/calendar_api.py b/tutor-assistant/tutor_assistant/controller/api/calendar_api.py new file mode 100644 index 0000000..3273a30 --- /dev/null +++ b/tutor-assistant/tutor_assistant/controller/api/calendar_api.py @@ -0,0 +1,22 @@ +from fastapi import APIRouter + +from tutor_assistant.controller.config.domain_config import config +from tutor_assistant.controller.utils.data_transfer_utils import json_output +from tutor_assistant.domain.calendar.calendar_chain_service import CalendarChainService +from tutor_assistant.utils.string_utils import shorten_middle + +router = APIRouter() + + +@router.post('/calendar') +async def _calendar(): + config.logger.info('POST /calendar') + + chain = CalendarChainService(config).create() + + result = chain.invoke({}) + answer = result['answer'] + + config.logger.info(f'Result: {shorten_middle(answer, 30)}') + + return json_output(answer) diff --git a/tutor-assistant/tutor_assistant/controller/api/chats_api.py b/tutor-assistant/tutor_assistant/controller/api/chats_api.py new file mode 100644 index 0000000..2571f9b --- /dev/null +++ b/tutor-assistant/tutor_assistant/controller/api/chats_api.py @@ -0,0 +1,45 @@ +from fastapi import Request, APIRouter +from starlette.responses import StreamingResponse + +from tutor_assistant.controller.config.domain_config import config +from tutor_assistant.controller.utils.api_utils import check_request_body +from tutor_assistant.controller.utils.data_transfer_utils import json_output +from tutor_assistant.controller.utils.langchain_utils import stream_response +from tutor_assistant.domain.chats.message_chain_service import MessageChainService +from tutor_assistant.domain.chats.summary_chain_service import SummaryChainService +from tutor_assistant.utils.string_utils import shorten_middle + +router = APIRouter() + + +@router.post('/chats/message') +async def _message(request: Request): + body = await request.json() + check_request_body(body, ['message']) + user_message_content = body['message'] + history = body.get('history', []) + + config.logger.info(f'POST /chats/message: len(message):{len(user_message_content)};len(history):{len(history)}') + + response = MessageChainService(config).load_response(user_message_content, history) + + config.logger.info('Starting event-stream') + + return StreamingResponse( + stream_response(response), media_type="text/event-stream" + ) + + +@router.post("/chats/summarize") +async def _summary(request: Request): + body = await request.json() + history = body.get('history', []) + + config.logger.info(f'POST /chats/summarize: len(history):{len(history)}') + + chain = SummaryChainService(config).create(history) + result = chain.invoke({}) + + config.logger.info(f'Result: {shorten_middle(result, 30)}') + + return json_output(result) diff --git a/tutor-assistant/tutor_assistant/controller/api/documents_api.py b/tutor-assistant/tutor_assistant/controller/api/documents_api.py new file mode 100644 index 0000000..7d4db09 --- /dev/null +++ b/tutor-assistant/tutor_assistant/controller/api/documents_api.py @@ -0,0 +1,45 @@ +from fastapi import Request, APIRouter + +from tutor_assistant.controller.config.domain_config import config +from tutor_assistant.controller.config.loaders_config import loader_creators +from tutor_assistant.controller.utils.api_utils import check_request_body +from tutor_assistant.controller.utils.loaders_utils import get_loader +from tutor_assistant.domain.documents.document_service import DocumentService + +router = APIRouter() + + +@router.post('/documents/add') +async def _add_document(request: Request): + body: dict = await request.json() + check_request_body(body, ['title', 'originalKey', 'loaderType', 'loaderParams', 'isCalendar']) + title: str = body['title'] + original_key: str = body['originalKey'] + loader_type: str = body['loaderType'] + loader_params: dict = body['loaderParams'] + is_calendar: bool = body['isCalendar'] + + config.logger.info( + f'POST /documents/add: title={title}; original_key={original_key}') + + loader = get_loader(loader_creators, title, loader_type, loader_params) + + ids = DocumentService(config).add(loader, title, original_key, is_calendar) + + config.logger.info(f'Result: {ids}') + + return ids + + +@router.post('/documents/delete') +async def _delete_document(request: Request): + body = await request.json() + ids: list[str] = body + + config.logger.info(f'POST /documents/delete: ids:{ids}') + + result = DocumentService(config).delete(ids) + + config.logger.info(f'Result: {result}') + + return True if result is None else result diff --git a/tutor-assistant/tutor_assistant/domain/chats/message_multi_steps_response_loader.py b/tutor-assistant/tutor_assistant/domain/chats/message_multi_steps_response_loader.py new file mode 100644 index 0000000..9fb5e13 --- /dev/null +++ b/tutor-assistant/tutor_assistant/domain/chats/message_multi_steps_response_loader.py @@ -0,0 +1,101 @@ +from typing import Iterator, Generator, Any + +from langchain_core.documents import Document +from langchain_core.output_parsers import StrOutputParser +from langchain_core.prompts import ChatPromptTemplate +from langchain_core.runnables import RunnablePassthrough, RunnableSerializable + +from tutor_assistant.controller.utils.data_transfer_utils import messages_from_history +from tutor_assistant.controller.utils.langchain_utils import escape_prompt +from tutor_assistant.domain.documents.retrievers.with_references_retriever import WithReferencesRetriever +from tutor_assistant.domain.domain_config import DomainConfig +from tutor_assistant.domain.utils.templates import prepend_base_template + + +class MessageMultiStepsResponseLoader: + def __init__(self, config: DomainConfig): + self._config = config + + def load_response(self, user_message_content: str, history: list[dict[str, str]]) -> Generator[Any, Any, None]: + messages = self._get_all_messages(user_message_content, history) + yield from self._get_response_for_queries(messages, [user_message_content], ['first.txt', 'last.txt']) + + def _get_response_for_queries( + self, messages: list[tuple[str, str]], queries: list[str], templates: list[str] + ) -> Generator[Any, Any, None]: + + retriever_chain = self._get_retriever_chain(queries) + model_chain = self._get_model_chain(messages, templates[0]) + + result = RunnablePassthrough.assign(context=retriever_chain).assign(answer=model_chain).stream({}) + + contexts = [] + answer_start = '' + + for item in result: + if 'context' in item: + contexts.append(item) + elif 'answer' in item: + answer_start += item['answer'] + + if '!!!QUERIES!!!' in answer_start: + print('!!!QUERIES!!!') + queries = self._get_queries_from_answer(result) + yield from self._get_response_for_queries(messages, queries, templates[1:]) + break + elif '!!!RESPONSE!!!' in answer_start: + print('!!!RESPONSE!!!') + yield from contexts + yield from self._yield_response(result) + break + elif len(answer_start) > 14: + print('Long enough') + yield from contexts + yield answer_start + yield from self._yield_response(result) + break + + @staticmethod + def _get_all_messages(user_message_content: str, history) -> list[tuple[str, str]]: + messages = [] + for msg in messages_from_history(history): + messages.append(msg) + messages.append(('user', escape_prompt(user_message_content))) + + return messages + + def _get_queries_from_answer(self, result: Iterator) -> list[str]: + answer = '' + for item in result: + if 'answer' in item: + answer += item['answer'] + + queries = answer.split(';') + self._config.logger.info(f'Queries from chat model: {queries}') + return queries + + @staticmethod + def _yield_response(result: Iterator) -> Generator[Any, Any, None]: + for item in result: + yield item + + def _get_retriever_chain(self, queries: list[str]) -> RunnableSerializable[Any, list[Document]]: + return (lambda _: queries) | WithReferencesRetriever(self._config) + + def _get_model_chain(self, messages: list[tuple[str, str]], template: str): + prompt = self._get_chat_prompt(messages, template) + model = self._config.chat_model + parser = StrOutputParser() + + return prompt | model | parser + + def _get_chat_prompt(self, messages: list[tuple[str, str]], template: str) -> ChatPromptTemplate: + template = self._config.resources['prompt_templates']['multi_steps'][template] + complete_template = prepend_base_template(self._config, template) + + prompt_messages = [('system', complete_template)] + prompt_messages.extend(messages) + + prompt_template = ChatPromptTemplate.from_messages(prompt_messages) + + return prompt_template diff --git a/tutor-assistant/tutor_assistant/domain/documents/retrievers/combined_retriever.py b/tutor-assistant/tutor_assistant/domain/documents/retrievers/combined_retriever.py new file mode 100644 index 0000000..79f6dcb --- /dev/null +++ b/tutor-assistant/tutor_assistant/domain/documents/retrievers/combined_retriever.py @@ -0,0 +1,23 @@ +from typing import Any + +from langchain_core.callbacks import CallbackManagerForRetrieverRun +from langchain_core.documents import Document +from langchain_core.retrievers import BaseRetriever + +from tutor_assistant.domain.documents.retrievers.queries_from_chat_model_loader import get_queries +from tutor_assistant.domain.documents.retrievers.with_references_retriever import WithReferencesRetriever +from tutor_assistant.domain.domain_config import DomainConfig + + +class CombinedRetriever(BaseRetriever): + def __init__(self, config: DomainConfig, *args: Any, **kwargs: Any): + super().__init__(*args, **kwargs) + self._config = config + self._with_references_retriever = WithReferencesRetriever(config) + + def _get_relevant_documents( + self, messages: list[tuple[str, str]], *, run_manager: CallbackManagerForRetrieverRun + ) -> list[Document]: + queries = get_queries(self._config, messages) + + return ((lambda _: queries) | self._with_references_retriever).invoke({}) diff --git a/tutor-assistant/tutor_assistant/domain/documents/retrievers/generated_queries_retriever.py b/tutor-assistant/tutor_assistant/domain/documents/retrievers/generated_queries_retriever.py new file mode 100644 index 0000000..52675dd --- /dev/null +++ b/tutor-assistant/tutor_assistant/domain/documents/retrievers/generated_queries_retriever.py @@ -0,0 +1,32 @@ +from typing import Any +from uuid import uuid4 + +from langchain_core.callbacks import CallbackManagerForRetrieverRun +from langchain_core.documents import Document +from langchain_core.retrievers import BaseRetriever + +from tutor_assistant.domain.documents.retrievers.queries_from_chat_model_loader import get_queries +from tutor_assistant.domain.documents.utils.vector_store_utils import similarity_search_with_score +from tutor_assistant.domain.domain_config import DomainConfig +from tutor_assistant.utils.list_utils import distinct_by + + +class GeneratedQueriesRetriever(BaseRetriever): + def __init__(self, config: DomainConfig, *args: Any, **kwargs: Any): + super().__init__(*args, **kwargs) + self._vector_store = config.vector_store_manager.load() + self._config = config + + def _get_relevant_documents( + self, messages: list[tuple[str, str]], *, run_manager: CallbackManagerForRetrieverRun + ) -> list[Document]: + queries = get_queries(self._config, messages) + all_docs = [] + for query in queries: + docs = similarity_search_with_score(self._vector_store, query) + all_docs.extend(docs) + return distinct_by(self._id_or_random, all_docs) + + @staticmethod + def _id_or_random(doc: Document) -> Any: + return doc.metadata['id'] if 'id' in doc.metadata else str(uuid4()) diff --git a/tutor-assistant/tutor_assistant/domain/documents/retrievers/queries_from_chat_model_loader.py b/tutor-assistant/tutor_assistant/domain/documents/retrievers/queries_from_chat_model_loader.py new file mode 100644 index 0000000..c79c63d --- /dev/null +++ b/tutor-assistant/tutor_assistant/domain/documents/retrievers/queries_from_chat_model_loader.py @@ -0,0 +1,19 @@ +from langchain_core.prompts import ChatPromptTemplate + +from tutor_assistant.domain.domain_config import DomainConfig + + +def get_queries(config: DomainConfig, messages: list[tuple[str, str]]) -> list[str]: + chain = _get_chat_prompt(config, messages) | config.chat_model + content = chain.invoke({}).content + print('content', content) + queries = content.split(';') + + return queries + + +def _get_chat_prompt(config: DomainConfig, messages: list[tuple[str, str]]) -> ChatPromptTemplate: + multiple_prompts = config.resources['prompt_templates']['queries_from_chat_model'][ + 'multiple_retriever_queries_3.txt'] + prompt_messages = messages + [('system', multiple_prompts)] + return ChatPromptTemplate.from_messages(prompt_messages) diff --git a/tutor-assistant/tutor_assistant/domain/documents/utils/vector_store_utils.py b/tutor-assistant/tutor_assistant/domain/documents/utils/vector_store_utils.py new file mode 100644 index 0000000..2e17f33 --- /dev/null +++ b/tutor-assistant/tutor_assistant/domain/documents/utils/vector_store_utils.py @@ -0,0 +1,23 @@ +from langchain_core.documents import Document +from langchain_core.vectorstores import VectorStore + + +def similarity_search_with_score(vector_store: VectorStore, query: str): + try: + docs, scores = zip( + *vector_store.similarity_search_with_score( + query, + k=5 + ) + ) + except Exception as e: + print('Exception:', e) + return [] + result = [] + doc: Document + for doc, np_score in zip(docs, scores): + score = float(np_score) # np_score: numpy-float + doc.metadata['score'] = score + result.append(doc) + + return result