Skip to content
This repository has been archived by the owner on Aug 25, 2024. It is now read-only.

Commit

Permalink
Solr example: use Hugging face embeddings (LangStream#729)
Browse files Browse the repository at this point in the history
  • Loading branch information
eolivelli authored Nov 20, 2023
1 parent 4399ec7 commit 81f66bb
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 5 deletions.
5 changes: 4 additions & 1 deletion examples/applications/query-solr/chatbot.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,9 @@ pipeline:
- name: "compute-embeddings"
type: "compute-ai-embeddings"
configuration:
model: "${secrets.open-ai.embeddings-model}" # This needs to match the name of the model deployment, not the base model
ai-service: "huggingface"
model: "multilingual-e5-small"
model-url: "djl://ai.djl.huggingface.pytorch/intfloat/multilingual-e5-small"
embeddings-field: "value.question_embeddings"
text: "{{ value.question }}"
flush-interval: 0
Expand All @@ -51,6 +53,7 @@ pipeline:
type: "ai-chat-completions"

configuration:
ai-service: "openai"
model: "${secrets.open-ai.chat-completions-model}" # This needs to be set to the model deployment name, not the base name
# on the log-topic we add a field with the answer
completion-field: "value.answer"
Expand Down
6 changes: 6 additions & 0 deletions examples/applications/query-solr/configuration.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,16 @@ configuration:
resources:
- type: "open-ai-configuration"
name: "OpenAI Azure configuration"
id: "openai"
configuration:
url: "${secrets.open-ai.url}"
access-key: "${secrets.open-ai.access-key}"
provider: "${secrets.open-ai.provider}"
- type: "hugging-face-configuration"
name: "Hugging Face AI configuration"
id: "huggingface"
configuration:
provider: "local"
- type: "vector-database"
name: "SolrDataSource"
configuration:
Expand Down
10 changes: 6 additions & 4 deletions examples/applications/query-solr/crawler.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ assets:
"add-field-type" : {
"name": "knn_vector",
"class": "solr.DenseVectorField",
"vectorDimension": "1536",
"vectorDimension": "384",
"similarityFunction": "cosine"
}
}
Expand Down Expand Up @@ -108,10 +108,10 @@ pipeline:
type: "text-splitter"
configuration:
splitter_type: "RecursiveCharacterTextSplitter"
chunk_size: 400
chunk_size: 200
separators: ["\n\n", "\n", " ", ""]
keep_separator: false
chunk_overlap: 100
chunk_overlap: 20
length_function: "cl100k_base"
- name: "Convert to structured data"
type: "document-to-json"
Expand All @@ -133,7 +133,9 @@ pipeline:
type: "compute-ai-embeddings"
output: chunks-topic
configuration:
model: "text-embedding-ada-002" # This needs to match the name of the model deployment, not the base model
ai-service: "huggingface"
model: "multilingual-e5-small"
model-url: "djl://ai.djl.huggingface.pytorch/intfloat/multilingual-e5-small"
embeddings-field: "value.embeddings_vector"
text: "{{ value.text }}"
batch-size: 10
Expand Down

0 comments on commit 81f66bb

Please sign in to comment.