forked from theodo-group/LLPhant
-
Notifications
You must be signed in to change notification settings - Fork 0
/
OllamaEmbeddingGenerator.php
87 lines (70 loc) · 2.3 KB
/
OllamaEmbeddingGenerator.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
<?php
declare(strict_types=1);
namespace LLPhant\Embeddings\EmbeddingGenerator\Ollama;
use Exception;
use GuzzleHttp\Client;
use LLPhant\Embeddings\Document;
use LLPhant\Embeddings\DocumentUtils;
use LLPhant\Embeddings\EmbeddingGenerator\EmbeddingGeneratorInterface;
use LLPhant\OllamaConfig;
use function str_replace;
final class OllamaEmbeddingGenerator implements EmbeddingGeneratorInterface
{
public Client $client;
private readonly string $model;
public function __construct(OllamaConfig $config)
{
$this->model = $config->model;
$this->client = new Client([
'base_uri' => $config->url,
]);
}
/**
* Call out to Ollama embedding endpoint.
*
* @return float[]
*/
public function embedText(string $text): array
{
$text = str_replace("\n", ' ', DocumentUtils::toUtf8($text));
$response = $this->client->post('embeddings', [
'body' => json_encode([
'model' => $this->model,
'prompt' => $text,
], JSON_THROW_ON_ERROR),
'headers' => [
'Content-Type' => 'application/json',
],
]);
$searchResults = json_decode($response->getBody()->getContents(), true, 512, JSON_THROW_ON_ERROR);
if (! is_array($searchResults)) {
throw new Exception("Request to Ollama didn't returned an array: ".$response->getBody()->getContents());
}
if (! isset($searchResults['embedding'])) {
throw new Exception("Request to Ollama didn't returned expected format: ".$response->getBody()->getContents());
}
return $searchResults['embedding'];
}
public function embedDocument(Document $document): Document
{
$text = $document->formattedContent ?? $document->content;
$document->embedding = $this->embedText($text);
return $document;
}
/**
* @param Document[] $documents
* @return Document[]
*/
public function embedDocuments(array $documents): array
{
$embedDocuments = [];
foreach ($documents as $document) {
$embedDocuments[] = $this->embedDocument($document);
}
return $embedDocuments;
}
public function getEmbeddingLength(): int
{
return 1024;
}
}