Skip to content

Commit

Permalink
cleanup, api, docs, refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
Niklas Kerkhoff committed Dec 11, 2024
1 parent 5581b34 commit f5f9c5c
Show file tree
Hide file tree
Showing 42 changed files with 1,750 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
package de.niklaskerkhoff.tutorassistantappservice.modules.embedding_manager

import de.niklaskerkhoff.tutorassistantappservice.lib.app_components.AppService
import de.niklaskerkhoff.tutorassistantappservice.lib.webclient.EmptyResponseBodyException
import org.springframework.beans.factory.annotation.Value
import org.springframework.core.ParameterizedTypeReference
import org.springframework.stereotype.Service
import org.springframework.web.reactive.function.client.WebClient

/**
* Manages documents of the RAG-Service.
*/
@Service
class RagDocumentService(
private val webClient: WebClient
) : AppService() {
@Value("\${app.tutor-assistant.base-url}")
private lateinit var baseUrl: String

/**
* Adds a document to the RAG-Service.
*
* @param title of the document.
* @param originalKey id of the file in the file store or the url of a website.
* @param loaderType specifies how to load and process the content.
* @param loaderParams specifies how to access the ressource and its content.
* @param isCalendar specifies if the document shall be used for the generation of the calendar.
*
* @returns the ids of the embedded chunks.
*/
fun addDocument(
title: String,
originalKey: String,
loaderType: String,
loaderParams: Map<String, Any>,
isCalendar: Boolean
): List<String> {
val requestBody = mapOf(
"title" to title,
"originalKey" to originalKey,
"loaderType" to loaderType,
"loaderParams" to loaderParams,
"isCalendar" to isCalendar
)

return webClient.post()
.uri("$baseUrl/documents/add")
.bodyValue(requestBody)
.retrieve()
.bodyToMono(object : ParameterizedTypeReference<List<String>>() {})
.block() ?: throw EmptyResponseBodyException()
}

/**
* Deletes a document.
*
* @param chunksIds ids of the embedded chunks of the document.
*
* @returns true if deletion was successful.
*/
fun deleteDocument(chunksIds: List<String>): Boolean {
return webClient.post()
.uri("$baseUrl/documents/delete")
.bodyValue(chunksIds)
.retrieve()
.bodyToMono(Boolean::class.java)
.block() ?: throw EmptyResponseBodyException()
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
package de.niklaskerkhoff.tutorassistantappservice.modules.embedding_manager.documents

import de.niklaskerkhoff.tutorassistantappservice.lib.logging.Logger
import de.niklaskerkhoff.tutorassistantappservice.modules.embedding_manager.RagDocumentService
import de.niklaskerkhoff.tutorassistantappservice.modules.embedding_manager.documents.entities.*
import org.springframework.stereotype.Component


/**
* Applies the setting
*/
@Component
class ApplierVisitor(
private val fileDocumentRepo: FileDocumentRepo,
private val websiteDocumentRepo: WebsiteDocumentRepo,
private val ragDocumentService: RagDocumentService
) : DocumentVisitor, Logger {

override fun visit(fileDocument: FileDocument) {
log.info("Visiting fileDocument with title ${fileDocument.title}")

val existing = fileDocumentRepo.findByTitle(fileDocument.title)
if (existing != null) {
logStopping(fileDocument.title)
return
}

logContinuing(fileDocument.title)

val loaderParams = mapOf("url" to fileDocument.fileStoreUrl)

val tutorAssistantIds = ragDocumentService.addDocument(
fileDocument.title,
fileDocument.fileStoreId.toString(),
fileDocument.loaderType,
loaderParams,
fileDocument.isCalendar
).also {
logAddedToRagService(fileDocument.title, it)
}

fileDocument.chunksIds = tutorAssistantIds

fileDocumentRepo.save(fileDocument).also {
logSaved(it.title)
}
}

override fun visit(websiteDocument: WebsiteDocument) {
log.info("Visiting websiteDocument with title ${websiteDocument.title}")

val existing = websiteDocumentRepo.findByTitle(websiteDocument.title)
if (existing != null) {
logStopping(websiteDocument.title)
return
}

logContinuing(websiteDocument.title)

val loaderParams = mapOf(
"url" to websiteDocument.loaderParams.url,
"htmlSelector" to websiteDocument.loaderParams.htmlSelector,
"htmlSelectionIndex" to websiteDocument.loaderParams.htmlSelectionIndex,
)

val tutorAssistantIds = ragDocumentService.addDocument(
websiteDocument.title,
websiteDocument.loaderParams.url,
websiteDocument.loaderType,
loaderParams,
websiteDocument.isCalendar
).also {
logAddedToRagService(websiteDocument.title, it)
}

websiteDocument.chunksIds = tutorAssistantIds

websiteDocumentRepo.save(websiteDocument).also {
logSaved(websiteDocument.title)
}
}

private fun logContinuing(title: String) {
log.info("$title does not exist, continuing")
}

private fun logStopping(title: String) {
log.info("$title already exists, stopping")
}

private fun logAddedToRagService(title: String, tutorAssistantIds: List<String>) {
log.info("Added $title to Tutor-Assistant, got ${tutorAssistantIds.size} ids")
}

private fun logSaved(title: String) {
log.info("Saved $title")
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
package de.niklaskerkhoff.tutorassistantappservice.modules.embedding_manager.documents

import de.niklaskerkhoff.tutorassistantappservice.modules.embedding_manager.documents.entities.toDto
import org.springframework.security.access.prepost.PreAuthorize
import org.springframework.web.bind.annotation.*
import java.util.*

@RestController
@RequestMapping("embedding_manager/documents")
class DocumentController(
private val documentService: DocumentService
) {
@GetMapping("files")
fun getFileDocuments() = documentService.getFileDocuments().map { it.toDto() }

@GetMapping("websites")
fun getWebsiteDocuments() = documentService.getWebsiteDocuments().map { it.toDto() }

@PostMapping("embed")
@PreAuthorize("hasRole('document-manager')")
fun embed(): Unit = documentService.embed()

@PostMapping("files/{id}/reembed")
@PreAuthorize("hasRole('document-manager')")
fun reembedFile(@PathVariable id: UUID): Unit = documentService.reembedFileDocument(id)

@PostMapping("websites/{id}/reembed")
@PreAuthorize("hasRole('document-manager')")
fun reembedWebsite(@PathVariable id: UUID): Unit = documentService.reembedWebsiteDocument(id)

@DeleteMapping("files/{id}")
@PreAuthorize("hasRole('document-manager')")
fun deleteFile(@PathVariable id: UUID): Unit = documentService.deleteFileDocument(id)

@DeleteMapping("websites/{id}")
@PreAuthorize("hasRole('document-manager')")
fun deleteWebsite(@PathVariable id: UUID): Unit = documentService.deleteWebsiteDocument(id)
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
package de.niklaskerkhoff.tutorassistantappservice.modules.embedding_manager.documents

import de.niklaskerkhoff.tutorassistantappservice.modules.embedding_manager.documents.entities.Document

interface DocumentLoader {
fun loadDocuments(): List<Document>
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
package de.niklaskerkhoff.tutorassistantappservice.modules.embedding_manager.documents

import de.niklaskerkhoff.tutorassistantappservice.lib.app_components.AppService
import de.niklaskerkhoff.tutorassistantappservice.lib.entities.findByIdOrThrow
import de.niklaskerkhoff.tutorassistantappservice.lib.exceptions.BadRequestException
import de.niklaskerkhoff.tutorassistantappservice.modules.embedding_manager.RagDocumentService
import de.niklaskerkhoff.tutorassistantappservice.modules.embedding_manager.documents.entities.*
import org.springframework.stereotype.Service
import java.util.*

/**
* Manges Documents
* * returns them
* * embeds them
* * deletes them
*/
@Service
class DocumentService(
private val fileDocumentRepo: FileDocumentRepo,
private val websiteDocumentRepo: WebsiteDocumentRepo,
private val documentLoader: DocumentLoader,
private val applierVisitor: ApplierVisitor,
private val ragDocumentService: RagDocumentService
) : AppService() {

/**
* @returns all file documents.
*/
fun getFileDocuments(): List<FileDocument> = fileDocumentRepo.findAll()

/**
* @returns all website documents.
*/
fun getWebsiteDocuments(): List<WebsiteDocument> = websiteDocumentRepo.findAll()

/**
* Embeds the documents
*/
fun embed() {
val documents = documentLoader.loadDocuments()
documents.forEach { it.accept(applierVisitor) }
}

/**
* Deletes the embedding of a file and embeds it again.
*
* @param id of the file.
*/
fun reembedFileDocument(id: UUID) = reembed(id, fileDocumentRepo)

/**
* Deletes the embedding of a website and embeds it again.
*
* @param id of the website.
*/
fun reembedWebsiteDocument(id: UUID) = reembed(id, websiteDocumentRepo)

/**
* Deletes an embedding of a file.
*
* @param id of the file.
*/
fun deleteFileDocument(id: UUID) = delete(id, fileDocumentRepo)

/**
* Deletes an embedding of a website.
*
* @param id of the website.
*/
fun deleteWebsiteDocument(id: UUID) = delete(id, websiteDocumentRepo)

private fun <T : Document> reembed(id: UUID, documentRepo: DocumentRepo<T>) {
val existingDocument = documentRepo.findByIdOrThrow(id)
val title = existingDocument.title
val allDocuments = documentLoader.loadDocuments()
val documentToReembed = allDocuments.find { it.title == title }
?: throw BadRequestException("Document $title not specified in main settings")

delete(existingDocument, documentRepo)
documentToReembed.accept(applierVisitor)
}

private fun <T : Document> delete(id: UUID, documentRepo: DocumentRepo<T>) {
val document = documentRepo.findByIdOrThrow(id)
delete(document, documentRepo)
}

private fun <T : Document> delete(document: T, documentRepo: DocumentRepo<T>) {
ragDocumentService.deleteDocument(document.chunksIds).also {
log.info("Deleted ${document.chunksIds} from Tutor-Assistant")
}
documentRepo.delete(document).also {
log.info("Deleted document with id ${document.id}")
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
package de.niklaskerkhoff.tutorassistantappservice.modules.embedding_manager.documents.entities

import de.niklaskerkhoff.tutorassistantappservice.lib.entities.AppEntity
import jakarta.persistence.ElementCollection
import jakarta.persistence.Entity
import jakarta.persistence.Inheritance
import jakarta.persistence.InheritanceType

/**
* Document to embed.
*/
@Entity
@Inheritance(strategy = InheritanceType.TABLE_PER_CLASS)
abstract class Document(
/**
* Human-readable id.
*/
open val title: String,

/**
* Specifies how the document is loaded.
*/
open val loaderType: String,

/**
* Assigns a collection to the document for grouping.
*/
open val collection: String?,

/**
* Specifies if the document shall be used for generating the calendar.
*/
open val isCalendar: Boolean
) : AppEntity() {

/**
* Ids of the embedded chunks returned by the rag service
*/
@ElementCollection
open var chunksIds: List<String> = emptyList()

/**
* Visitor method
*/
abstract fun accept(visitor: DocumentVisitor)
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package de.niklaskerkhoff.tutorassistantappservice.modules.embedding_manager.documents.entities

import de.niklaskerkhoff.tutorassistantappservice.lib.entities.AppEntityRepo
import org.springframework.data.repository.NoRepositoryBean

@NoRepositoryBean
interface DocumentRepo<T : Document> : AppEntityRepo<T> {
fun findByTitle(title: String): T?
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
package de.niklaskerkhoff.tutorassistantappservice.modules.embedding_manager.documents.entities

interface DocumentVisitor {
fun visit(fileDocument: FileDocument)
fun visit(websiteDocument: WebsiteDocument)
}
Loading

0 comments on commit f5f9c5c

Please sign in to comment.