-
Notifications
You must be signed in to change notification settings - Fork 11
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #28 from OSU-App-Club/nyumat/rag-implementation
feat: full-stack RAG pipeline for PDF ingestion and contextual chat capabilities
- Loading branch information
Showing
35 changed files
with
1,755 additions
and
1,451 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
-- AlterTable | ||
ALTER TABLE "course_materials" ADD COLUMN "documentIds" TEXT[] DEFAULT ARRAY[]::TEXT[]; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
-- AlterTable | ||
ALTER TABLE "course_materials" ADD COLUMN "is_indexed" BOOLEAN NOT NULL DEFAULT false; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,45 +1,109 @@ | ||
import { PdfRecord } from "@/lib/models"; | ||
import { deleteDocumentFromPinecone, queryDocuments } from "@/lib/pinecone"; | ||
import { | ||
handleEmbeddingAndStorage, | ||
processDocument, | ||
syncDocumentWithDb, | ||
} from "@/lib/retrieval-augmentation-gen"; | ||
import { NextResponse } from "next/server"; | ||
import { index } from "@/lib/pineconeClient"; // adjust the import path as needed | ||
import { createEmbedding } from "@/lib/openAiClient"; // adjust the import path as needed | ||
|
||
/** | ||
* POST /api/embeddings | ||
* Processes a PDF file into chunks, embeds the text of each chunk, and stores the chunks in the Pinecone index. | ||
* @param request The incoming request. | ||
* @returns A response indicating the status of the operation. | ||
* @throws If an error occurs while processing the PDF. | ||
*/ | ||
export async function POST(request: Request) { | ||
const { id, text } = await request.json(); | ||
try { | ||
const requestBody = await request.json(); | ||
const args: PdfRecord = requestBody.data; | ||
if (!args.fileName) | ||
return NextResponse.json( | ||
{ error: "fileName is required" }, | ||
{ status: 400 }, | ||
); | ||
|
||
if (!id || !text) { | ||
return NextResponse.json({ error: "Missing id or text" }, { status: 400 }); | ||
} | ||
|
||
// Generate the embedding | ||
const embedding = await createEmbedding(text); | ||
const chunks = await processDocument(args); | ||
const [taskOne, taskTwo] = await Promise.allSettled([ | ||
await handleEmbeddingAndStorage(chunks), | ||
await syncDocumentWithDb(args, chunks), | ||
]); | ||
|
||
// Upsert the embedding into Pinecone | ||
await index | ||
.namespace("ns1") | ||
.upsert([{ id, values: embedding, metadata: { text } }]); | ||
if (taskOne.status === "rejected" || taskTwo.status === "rejected") { | ||
return NextResponse.json( | ||
{ error: "Failed to process PDF" }, | ||
{ status: 500 }, | ||
); | ||
} | ||
|
||
return NextResponse.json({ message: "Embedding uploaded successfully" }); | ||
return NextResponse.json({ | ||
message: "PDF processed and stored successfully", | ||
chunks: chunks.length, | ||
}); | ||
} catch (error) { | ||
console.error("Error processing PDF:", error); | ||
return NextResponse.json( | ||
{ error: "Failed to process PDF" }, | ||
{ status: 500 }, | ||
); | ||
} | ||
} | ||
|
||
/** | ||
* GET /api/embeddings | ||
* Searches for documents similar to the given query. | ||
* @param request The incoming request. | ||
* @returns A response containing the search results. | ||
* @throws If an error occurs while searching for documents. | ||
*/ | ||
export async function GET(request: Request) { | ||
const { searchParams } = new URL(request.url); | ||
const queryText = searchParams.get("text"); | ||
try { | ||
const { searchParams } = new URL(request.url); | ||
const query = searchParams.get("query"); | ||
if (!query) | ||
return NextResponse.json( | ||
{ error: "Query parameter is required" }, | ||
{ status: 400 }, | ||
); | ||
|
||
if (!queryText) { | ||
// Relevant chunks are stored in the Pinecone index | ||
const results = await queryDocuments(query); | ||
return NextResponse.json({ results }); | ||
} catch (error) { | ||
console.error("Error searching documents:", error); | ||
return NextResponse.json( | ||
{ error: "Query text is required" }, | ||
{ status: 400 }, | ||
{ error: "Failed to search documents" }, | ||
{ status: 500 }, | ||
); | ||
} | ||
} | ||
|
||
// Generate the embedding for the query | ||
const queryVector = await createEmbedding(queryText); | ||
|
||
// Search for similar embeddings in Pinecone | ||
const response = await index.namespace("ns1").query({ | ||
topK: 1, | ||
vector: queryVector, | ||
includeValues: true, | ||
includeMetadata: true, | ||
}); | ||
return NextResponse.json(response.matches[0].metadata); | ||
/** | ||
* DELETE /api/embeddings | ||
* Deletes the documents associated with the given chat ID. | ||
* @param request The incoming request. | ||
* @returns A response indicating the status of the operation. | ||
* @throws If an error occurs while deleting the documents. | ||
*/ | ||
export async function DELETE(request: Request) { | ||
try { | ||
const { searchParams } = new URL(request.url); | ||
const id = searchParams.get("chatId"); | ||
if (!id) | ||
return NextResponse.json( | ||
{ error: "ChatID parameter is required" }, | ||
{ status: 400 }, | ||
); | ||
|
||
await deleteDocumentFromPinecone(id); | ||
|
||
return NextResponse.json({ message: "Document deleted successfully" }); | ||
} catch (error) { | ||
console.error("Error deleting document:", error); | ||
return NextResponse.json( | ||
{ error: "Failed to delete document" }, | ||
{ status: 500 }, | ||
); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
import { auth } from "@/lib/auth"; | ||
import { prisma } from "@/lib/prisma"; | ||
import { NextResponse } from "next/server"; | ||
|
||
export const revalidate = 30; | ||
|
||
/** | ||
* Fetches user upload statistics | ||
* @returns | ||
* @throws | ||
* - 500 if failed to fetch the data | ||
* - 200 with the users file statistics | ||
* @example | ||
* GET /api/stats | ||
*/ | ||
export async function GET() { | ||
try { | ||
const session = await auth(); | ||
const userId = session?.user?.id; | ||
if (!userId) throw new Error("User ID is required"); | ||
|
||
const user = await prisma.user.findUnique({ | ||
where: { id: userId }, | ||
include: { CourseMaterial: true }, | ||
}); | ||
|
||
if (!user) throw new Error("User not found"); | ||
|
||
const totalFiles = user.CourseMaterial.length; | ||
const totalPages = user.CourseMaterial.reduce( | ||
(acc, material) => acc + material.pages, | ||
0, | ||
); | ||
const averageFileSize = | ||
totalFiles > 0 | ||
? user.CourseMaterial.reduce( | ||
(acc, material) => acc + material.fileSize, | ||
0, | ||
) / totalFiles | ||
: 0; | ||
|
||
const stats = { | ||
totalFiles, | ||
totalPages, | ||
averageFileSize, | ||
storageUsed: user.storageUsed, | ||
}; | ||
|
||
return NextResponse.json({ stats }); | ||
} catch (error) { | ||
console.error("Failed to fetch files", error); | ||
return NextResponse.json( | ||
{ error: "Failed to fetch files" }, | ||
{ status: 500 }, | ||
); | ||
} | ||
} |
Oops, something went wrong.