-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Niklas Kerkhoff
committed
Dec 11, 2024
1 parent
5581b34
commit f5f9c5c
Showing
42 changed files
with
1,750 additions
and
0 deletions.
There are no files selected for viewing
69 changes: 69 additions & 0 deletions
69
...e/niklaskerkhoff/tutorassistantappservice/modules/embedding_manager/RagDocumentService.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
package de.niklaskerkhoff.tutorassistantappservice.modules.embedding_manager | ||
|
||
import de.niklaskerkhoff.tutorassistantappservice.lib.app_components.AppService | ||
import de.niklaskerkhoff.tutorassistantappservice.lib.webclient.EmptyResponseBodyException | ||
import org.springframework.beans.factory.annotation.Value | ||
import org.springframework.core.ParameterizedTypeReference | ||
import org.springframework.stereotype.Service | ||
import org.springframework.web.reactive.function.client.WebClient | ||
|
||
/** | ||
* Manages documents of the RAG-Service. | ||
*/ | ||
@Service | ||
class RagDocumentService( | ||
private val webClient: WebClient | ||
) : AppService() { | ||
@Value("\${app.tutor-assistant.base-url}") | ||
private lateinit var baseUrl: String | ||
|
||
/** | ||
* Adds a document to the RAG-Service. | ||
* | ||
* @param title of the document. | ||
* @param originalKey id of the file in the file store or the url of a website. | ||
* @param loaderType specifies how to load and process the content. | ||
* @param loaderParams specifies how to access the ressource and its content. | ||
* @param isCalendar specifies if the document shall be used for the generation of the calendar. | ||
* | ||
* @returns the ids of the embedded chunks. | ||
*/ | ||
fun addDocument( | ||
title: String, | ||
originalKey: String, | ||
loaderType: String, | ||
loaderParams: Map<String, Any>, | ||
isCalendar: Boolean | ||
): List<String> { | ||
val requestBody = mapOf( | ||
"title" to title, | ||
"originalKey" to originalKey, | ||
"loaderType" to loaderType, | ||
"loaderParams" to loaderParams, | ||
"isCalendar" to isCalendar | ||
) | ||
|
||
return webClient.post() | ||
.uri("$baseUrl/documents/add") | ||
.bodyValue(requestBody) | ||
.retrieve() | ||
.bodyToMono(object : ParameterizedTypeReference<List<String>>() {}) | ||
.block() ?: throw EmptyResponseBodyException() | ||
} | ||
|
||
/** | ||
* Deletes a document. | ||
* | ||
* @param chunksIds ids of the embedded chunks of the document. | ||
* | ||
* @returns true if deletion was successful. | ||
*/ | ||
fun deleteDocument(chunksIds: List<String>): Boolean { | ||
return webClient.post() | ||
.uri("$baseUrl/documents/delete") | ||
.bodyValue(chunksIds) | ||
.retrieve() | ||
.bodyToMono(Boolean::class.java) | ||
.block() ?: throw EmptyResponseBodyException() | ||
} | ||
} |
98 changes: 98 additions & 0 deletions
98
...askerkhoff/tutorassistantappservice/modules/embedding_manager/documents/ApplierVisitor.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
package de.niklaskerkhoff.tutorassistantappservice.modules.embedding_manager.documents | ||
|
||
import de.niklaskerkhoff.tutorassistantappservice.lib.logging.Logger | ||
import de.niklaskerkhoff.tutorassistantappservice.modules.embedding_manager.RagDocumentService | ||
import de.niklaskerkhoff.tutorassistantappservice.modules.embedding_manager.documents.entities.* | ||
import org.springframework.stereotype.Component | ||
|
||
|
||
/** | ||
* Applies the setting | ||
*/ | ||
@Component | ||
class ApplierVisitor( | ||
private val fileDocumentRepo: FileDocumentRepo, | ||
private val websiteDocumentRepo: WebsiteDocumentRepo, | ||
private val ragDocumentService: RagDocumentService | ||
) : DocumentVisitor, Logger { | ||
|
||
override fun visit(fileDocument: FileDocument) { | ||
log.info("Visiting fileDocument with title ${fileDocument.title}") | ||
|
||
val existing = fileDocumentRepo.findByTitle(fileDocument.title) | ||
if (existing != null) { | ||
logStopping(fileDocument.title) | ||
return | ||
} | ||
|
||
logContinuing(fileDocument.title) | ||
|
||
val loaderParams = mapOf("url" to fileDocument.fileStoreUrl) | ||
|
||
val tutorAssistantIds = ragDocumentService.addDocument( | ||
fileDocument.title, | ||
fileDocument.fileStoreId.toString(), | ||
fileDocument.loaderType, | ||
loaderParams, | ||
fileDocument.isCalendar | ||
).also { | ||
logAddedToRagService(fileDocument.title, it) | ||
} | ||
|
||
fileDocument.chunksIds = tutorAssistantIds | ||
|
||
fileDocumentRepo.save(fileDocument).also { | ||
logSaved(it.title) | ||
} | ||
} | ||
|
||
override fun visit(websiteDocument: WebsiteDocument) { | ||
log.info("Visiting websiteDocument with title ${websiteDocument.title}") | ||
|
||
val existing = websiteDocumentRepo.findByTitle(websiteDocument.title) | ||
if (existing != null) { | ||
logStopping(websiteDocument.title) | ||
return | ||
} | ||
|
||
logContinuing(websiteDocument.title) | ||
|
||
val loaderParams = mapOf( | ||
"url" to websiteDocument.loaderParams.url, | ||
"htmlSelector" to websiteDocument.loaderParams.htmlSelector, | ||
"htmlSelectionIndex" to websiteDocument.loaderParams.htmlSelectionIndex, | ||
) | ||
|
||
val tutorAssistantIds = ragDocumentService.addDocument( | ||
websiteDocument.title, | ||
websiteDocument.loaderParams.url, | ||
websiteDocument.loaderType, | ||
loaderParams, | ||
websiteDocument.isCalendar | ||
).also { | ||
logAddedToRagService(websiteDocument.title, it) | ||
} | ||
|
||
websiteDocument.chunksIds = tutorAssistantIds | ||
|
||
websiteDocumentRepo.save(websiteDocument).also { | ||
logSaved(websiteDocument.title) | ||
} | ||
} | ||
|
||
private fun logContinuing(title: String) { | ||
log.info("$title does not exist, continuing") | ||
} | ||
|
||
private fun logStopping(title: String) { | ||
log.info("$title already exists, stopping") | ||
} | ||
|
||
private fun logAddedToRagService(title: String, tutorAssistantIds: List<String>) { | ||
log.info("Added $title to Tutor-Assistant, got ${tutorAssistantIds.size} ids") | ||
} | ||
|
||
private fun logSaved(title: String) { | ||
log.info("Saved $title") | ||
} | ||
} |
38 changes: 38 additions & 0 deletions
38
...rkhoff/tutorassistantappservice/modules/embedding_manager/documents/DocumentController.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
package de.niklaskerkhoff.tutorassistantappservice.modules.embedding_manager.documents | ||
|
||
import de.niklaskerkhoff.tutorassistantappservice.modules.embedding_manager.documents.entities.toDto | ||
import org.springframework.security.access.prepost.PreAuthorize | ||
import org.springframework.web.bind.annotation.* | ||
import java.util.* | ||
|
||
@RestController | ||
@RequestMapping("embedding_manager/documents") | ||
class DocumentController( | ||
private val documentService: DocumentService | ||
) { | ||
@GetMapping("files") | ||
fun getFileDocuments() = documentService.getFileDocuments().map { it.toDto() } | ||
|
||
@GetMapping("websites") | ||
fun getWebsiteDocuments() = documentService.getWebsiteDocuments().map { it.toDto() } | ||
|
||
@PostMapping("embed") | ||
@PreAuthorize("hasRole('document-manager')") | ||
fun embed(): Unit = documentService.embed() | ||
|
||
@PostMapping("files/{id}/reembed") | ||
@PreAuthorize("hasRole('document-manager')") | ||
fun reembedFile(@PathVariable id: UUID): Unit = documentService.reembedFileDocument(id) | ||
|
||
@PostMapping("websites/{id}/reembed") | ||
@PreAuthorize("hasRole('document-manager')") | ||
fun reembedWebsite(@PathVariable id: UUID): Unit = documentService.reembedWebsiteDocument(id) | ||
|
||
@DeleteMapping("files/{id}") | ||
@PreAuthorize("hasRole('document-manager')") | ||
fun deleteFile(@PathVariable id: UUID): Unit = documentService.deleteFileDocument(id) | ||
|
||
@DeleteMapping("websites/{id}") | ||
@PreAuthorize("hasRole('document-manager')") | ||
fun deleteWebsite(@PathVariable id: UUID): Unit = documentService.deleteWebsiteDocument(id) | ||
} |
7 changes: 7 additions & 0 deletions
7
...askerkhoff/tutorassistantappservice/modules/embedding_manager/documents/DocumentLoader.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
package de.niklaskerkhoff.tutorassistantappservice.modules.embedding_manager.documents | ||
|
||
import de.niklaskerkhoff.tutorassistantappservice.modules.embedding_manager.documents.entities.Document | ||
|
||
interface DocumentLoader { | ||
fun loadDocuments(): List<Document> | ||
} |
96 changes: 96 additions & 0 deletions
96
...skerkhoff/tutorassistantappservice/modules/embedding_manager/documents/DocumentService.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
package de.niklaskerkhoff.tutorassistantappservice.modules.embedding_manager.documents | ||
|
||
import de.niklaskerkhoff.tutorassistantappservice.lib.app_components.AppService | ||
import de.niklaskerkhoff.tutorassistantappservice.lib.entities.findByIdOrThrow | ||
import de.niklaskerkhoff.tutorassistantappservice.lib.exceptions.BadRequestException | ||
import de.niklaskerkhoff.tutorassistantappservice.modules.embedding_manager.RagDocumentService | ||
import de.niklaskerkhoff.tutorassistantappservice.modules.embedding_manager.documents.entities.* | ||
import org.springframework.stereotype.Service | ||
import java.util.* | ||
|
||
/** | ||
* Manges Documents | ||
* * returns them | ||
* * embeds them | ||
* * deletes them | ||
*/ | ||
@Service | ||
class DocumentService( | ||
private val fileDocumentRepo: FileDocumentRepo, | ||
private val websiteDocumentRepo: WebsiteDocumentRepo, | ||
private val documentLoader: DocumentLoader, | ||
private val applierVisitor: ApplierVisitor, | ||
private val ragDocumentService: RagDocumentService | ||
) : AppService() { | ||
|
||
/** | ||
* @returns all file documents. | ||
*/ | ||
fun getFileDocuments(): List<FileDocument> = fileDocumentRepo.findAll() | ||
|
||
/** | ||
* @returns all website documents. | ||
*/ | ||
fun getWebsiteDocuments(): List<WebsiteDocument> = websiteDocumentRepo.findAll() | ||
|
||
/** | ||
* Embeds the documents | ||
*/ | ||
fun embed() { | ||
val documents = documentLoader.loadDocuments() | ||
documents.forEach { it.accept(applierVisitor) } | ||
} | ||
|
||
/** | ||
* Deletes the embedding of a file and embeds it again. | ||
* | ||
* @param id of the file. | ||
*/ | ||
fun reembedFileDocument(id: UUID) = reembed(id, fileDocumentRepo) | ||
|
||
/** | ||
* Deletes the embedding of a website and embeds it again. | ||
* | ||
* @param id of the website. | ||
*/ | ||
fun reembedWebsiteDocument(id: UUID) = reembed(id, websiteDocumentRepo) | ||
|
||
/** | ||
* Deletes an embedding of a file. | ||
* | ||
* @param id of the file. | ||
*/ | ||
fun deleteFileDocument(id: UUID) = delete(id, fileDocumentRepo) | ||
|
||
/** | ||
* Deletes an embedding of a website. | ||
* | ||
* @param id of the website. | ||
*/ | ||
fun deleteWebsiteDocument(id: UUID) = delete(id, websiteDocumentRepo) | ||
|
||
private fun <T : Document> reembed(id: UUID, documentRepo: DocumentRepo<T>) { | ||
val existingDocument = documentRepo.findByIdOrThrow(id) | ||
val title = existingDocument.title | ||
val allDocuments = documentLoader.loadDocuments() | ||
val documentToReembed = allDocuments.find { it.title == title } | ||
?: throw BadRequestException("Document $title not specified in main settings") | ||
|
||
delete(existingDocument, documentRepo) | ||
documentToReembed.accept(applierVisitor) | ||
} | ||
|
||
private fun <T : Document> delete(id: UUID, documentRepo: DocumentRepo<T>) { | ||
val document = documentRepo.findByIdOrThrow(id) | ||
delete(document, documentRepo) | ||
} | ||
|
||
private fun <T : Document> delete(document: T, documentRepo: DocumentRepo<T>) { | ||
ragDocumentService.deleteDocument(document.chunksIds).also { | ||
log.info("Deleted ${document.chunksIds} from Tutor-Assistant") | ||
} | ||
documentRepo.delete(document).also { | ||
log.info("Deleted document with id ${document.id}") | ||
} | ||
} | ||
} |
46 changes: 46 additions & 0 deletions
46
...erkhoff/tutorassistantappservice/modules/embedding_manager/documents/entities/Document.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
package de.niklaskerkhoff.tutorassistantappservice.modules.embedding_manager.documents.entities | ||
|
||
import de.niklaskerkhoff.tutorassistantappservice.lib.entities.AppEntity | ||
import jakarta.persistence.ElementCollection | ||
import jakarta.persistence.Entity | ||
import jakarta.persistence.Inheritance | ||
import jakarta.persistence.InheritanceType | ||
|
||
/** | ||
* Document to embed. | ||
*/ | ||
@Entity | ||
@Inheritance(strategy = InheritanceType.TABLE_PER_CLASS) | ||
abstract class Document( | ||
/** | ||
* Human-readable id. | ||
*/ | ||
open val title: String, | ||
|
||
/** | ||
* Specifies how the document is loaded. | ||
*/ | ||
open val loaderType: String, | ||
|
||
/** | ||
* Assigns a collection to the document for grouping. | ||
*/ | ||
open val collection: String?, | ||
|
||
/** | ||
* Specifies if the document shall be used for generating the calendar. | ||
*/ | ||
open val isCalendar: Boolean | ||
) : AppEntity() { | ||
|
||
/** | ||
* Ids of the embedded chunks returned by the rag service | ||
*/ | ||
@ElementCollection | ||
open var chunksIds: List<String> = emptyList() | ||
|
||
/** | ||
* Visitor method | ||
*/ | ||
abstract fun accept(visitor: DocumentVisitor) | ||
} |
9 changes: 9 additions & 0 deletions
9
...off/tutorassistantappservice/modules/embedding_manager/documents/entities/DocumentRepo.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
package de.niklaskerkhoff.tutorassistantappservice.modules.embedding_manager.documents.entities | ||
|
||
import de.niklaskerkhoff.tutorassistantappservice.lib.entities.AppEntityRepo | ||
import org.springframework.data.repository.NoRepositoryBean | ||
|
||
@NoRepositoryBean | ||
interface DocumentRepo<T : Document> : AppEntityRepo<T> { | ||
fun findByTitle(title: String): T? | ||
} |
6 changes: 6 additions & 0 deletions
6
.../tutorassistantappservice/modules/embedding_manager/documents/entities/DocumentVisitor.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
package de.niklaskerkhoff.tutorassistantappservice.modules.embedding_manager.documents.entities | ||
|
||
interface DocumentVisitor { | ||
fun visit(fileDocument: FileDocument) | ||
fun visit(websiteDocument: WebsiteDocument) | ||
} |
Oops, something went wrong.