-
-
Notifications
You must be signed in to change notification settings - Fork 1
70 lines (59 loc) · 2.41 KB
/
rag-pipeline-update-vectorstore.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
name: Upsert embeddings for changed MDX files
on:
push:
branches:
- main
jobs:
changed_files:
runs-on: ubuntu-latest
name: Process Changed Blog Embeddings
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0 # Ensures a full clone of the repository
- name: Get changed files
id: changed-files
uses: tj-actions/changed-files@v44
- name: List all changed files
run: |
echo "Changed MDX Files:"
CHANGED_MDX_FILES=$(echo "${{ steps.changed-files.outputs.all_changed_files }}" | grep '\.mdx$')
echo "$CHANGED_MDX_FILES"
echo "CHANGED_MDX_FILES<<EOF" >> $GITHUB_ENV
echo "$CHANGED_MDX_FILES" >> $GITHUB_ENV
echo "EOF" >> $GITHUB_ENV
- name: Set API keys from secrets
run: |
echo "OPENAI_API_KEY=${{ secrets.OPENAI_API_KEY }}" >> $GITHUB_ENV
echo "PINECONE_API_KEY=${{ secrets.PINECONE_API_KEY }}" >> $GITHUB_ENV
- name: Install dependencies
if: env.CHANGED_MDX_FILES
run: |
pip install langchain_community langchain_pinecone langchain_openai langchain unstructured langchainhub
- name: Process and upsert blog embeddings if changed
if: env.CHANGED_MDX_FILES
run: |
python -c "
import os
from langchain_pinecone import PineconeVectorStore
from langchain_openai import OpenAIEmbeddings
from langchain.docstore.document import Document
# Manually load changed documents
changed_files = os.getenv('CHANGED_MDX_FILES').split()
docs = [Document(page_content=open(file, 'r').read(), metadata={'source': 'local', 'name': file}) for file in changed_files if file.endswith('.mdx')]
# Initialize embeddings and vector store
embeddings = OpenAIEmbeddings(model='text-embedding-3-large')
index_name = 'zack-portfolio-3072'
vectorstore = PineconeVectorStore(embedding=embeddings, index_name=index_name)
vectorstore.add_documents(docs)
"
- name: Verify and log vector store status
if: env.CHANGED_MDX_FILES
run: |
python -c "
import os
from pinecone import Pinecone
pc = Pinecone(api_key=os.environ['PINECONE_API_KEY'])
index = pc.Index('zack-portfolio-3072')
print(index.describe_index_stats())
"