-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathvectordb.py
35 lines (32 loc) · 1.05 KB
/
vectordb.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import os
from langchain.docstore.document import Document
from langchain.document_loaders import TextLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.vectorstores.pgvector import PGVector
from langchain.document_loaders import PyPDFLoader
import psycopg2
from langchain_community.document_loaders import UnstructuredURLLoader
from langchain_community.document_loaders import SeleniumURLLoader
import pandas as pd
import numpy as np
import openai
import pandas as pd
import numpy as np
import openai
import nltk
import pickle
nltk.download('punkt')
with open('docs.pkl', 'rb') as f:
data = pickle.load(f)
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
# RDS Connection details
connection = "postgresql+psycopg://langchain:langchain@s****
collection_name = "strolr_docs"
vector_store = PGVector.from_documents(
embedding=embeddings,
documents = data,
collection_name=collection_name,
connection_string=connection,
use_jsonb=True,
)