Skip to content

Merge pull request #591 from zackproser/post-how-to-fine-tune-llama-o… #87

Merge pull request #591 from zackproser/post-how-to-fine-tune-llama-o…

Merge pull request #591 from zackproser/post-how-to-fine-tune-llama-o… #87

name: Upsert embeddings for changed MDX files
on:
push:
branches:
- main
jobs:
changed_files:
runs-on: ubuntu-latest
name: Process Changed Blog Embeddings
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0 # Ensures a full clone of the repository
- name: Get changed files
id: changed-files
uses: tj-actions/changed-files@v44
- name: List all changed files
run: |
echo "Changed MDX Files:"
CHANGED_MDX_FILES=$(echo "${{ steps.changed-files.outputs.all_changed_files }}" | grep '\.mdx$')
echo "$CHANGED_MDX_FILES"
echo "CHANGED_MDX_FILES<<EOF" >> $GITHUB_ENV
echo "$CHANGED_MDX_FILES" >> $GITHUB_ENV
echo "EOF" >> $GITHUB_ENV
- name: Set API keys from secrets
run: |
echo "OPENAI_API_KEY=${{ secrets.OPENAI_API_KEY }}" >> $GITHUB_ENV
echo "PINECONE_API_KEY=${{ secrets.PINECONE_API_KEY }}" >> $GITHUB_ENV
- name: Install dependencies
if: env.CHANGED_MDX_FILES
run: |
pip install langchain_community langchain_pinecone langchain_openai langchain unstructured langchainhub
- name: Process and upsert blog embeddings if changed
if: env.CHANGED_MDX_FILES
run: |
python -c "
import os
from langchain_pinecone import PineconeVectorStore
from langchain_openai import OpenAIEmbeddings
from langchain.docstore.document import Document
# Manually load changed documents
changed_files = os.getenv('CHANGED_MDX_FILES').split()
docs = [Document(page_content=open(file, 'r').read(), metadata={'source': 'local', 'name': file}) for file in changed_files if file.endswith('.mdx')]
# Initialize embeddings and vector store
embeddings = OpenAIEmbeddings(model='text-embedding-3-large')
index_name = 'zack-portfolio-3072'
vectorstore = PineconeVectorStore(embedding=embeddings, index_name=index_name)
vectorstore.add_documents(docs)
"
- name: Verify and log vector store status
if: env.CHANGED_MDX_FILES
run: |
python -c "
import os
from pinecone import Pinecone
pc = Pinecone(api_key=os.environ['PINECONE_API_KEY'])
index = pc.Index('zack-portfolio-3072')
print(index.describe_index_stats())
"