Skip to content

Commit

Permalink
Merge pull request #28 from sgomez/new-settings
Browse files Browse the repository at this point in the history
feat: add missed ollama API settings
  • Loading branch information
sgomez authored Sep 13, 2024
2 parents 7e127bb + 5ef32d5 commit b60c555
Show file tree
Hide file tree
Showing 2 changed files with 89 additions and 0 deletions.
14 changes: 14 additions & 0 deletions packages/ollama/src/ollama-chat-language-model.ts
Original file line number Diff line number Diff line change
Expand Up @@ -77,12 +77,22 @@ export class OllamaChatLanguageModel implements LanguageModelV1 {
format: responseFormat?.type,
model: this.modelId,
options: removeUndefined({
f16_kv: this.settings.f16Kv,
frequency_penalty: frequencyPenalty,
low_vram: this.settings.lowVram,
main_gpu: this.settings.mainGpu,
min_p: this.settings.minP,
mirostat: this.settings.mirostat,
mirostat_eta: this.settings.mirostatEta,
mirostat_tau: this.settings.mirostatTau,
num_batch: this.settings.numBatch,
num_ctx: this.settings.numCtx,
num_gpu: this.settings.numGpu,
num_keep: this.settings.numKeep,
num_predict: maxTokens,
num_thread: this.settings.numThread,
numa: this.settings.numa,
penalize_newline: this.settings.penalizeNewline,
presence_penalty: presencePenalty,
repeat_last_n: this.settings.repeatLastN,
repeat_penalty: this.settings.repeatPenalty,
Expand All @@ -92,6 +102,10 @@ export class OllamaChatLanguageModel implements LanguageModelV1 {
tfs_z: this.settings.tfsZ,
top_k: this.settings.topK ?? topK,
top_p: topP,
typical_p: this.settings.typicalP,
use_mlock: this.settings.useMlock,
use_mmap: this.settings.useMmap,
vocab_only: this.settings.vocabOnly,
}),
}

Expand Down
75 changes: 75 additions & 0 deletions packages/ollama/src/ollama-chat-settings.ts
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,26 @@ export interface OllamaChatSettings {
*/
experimentalStreamTools?: boolean

/**
* Enables the use of half-precision floating point values for key-value memory. This helps in optimizing memory usage. (Default: true)
*/
f16Kv?: boolean

/**
* If set to true, reduces the VRAM usage by trading off speed for memory. (Default: false)
*/
lowVram?: boolean

/**
* Sets which GPU is the main one.
*/
mainGpu?: number

/**
* Minimum cumulative probability for tokens to be considered. (Default: 0.0)
*/
minP?: number

/**
* Enable Mirostat sampling for controlling perplexity. (default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)
*/
Expand All @@ -111,11 +131,46 @@ export interface OllamaChatSettings {
*/
mirostatTau?: number

/**
* Controls whether to use Non-Uniform Memory Access (NUMA) for more efficient memory management. (Default: false)
*/
numa?: boolean

/**
* Sets the number of batches to be processed. (Default: 512)
*/
numBatch?: number

/**
* Sets the size of the context window used to generate the next token. (Default: 2048)
*/
numCtx?: number

/**
* Controls the number of GPUs to use for the operation. (Default: -1, indicates that NumGPU should be set dynamically)
*/
numGpu?: number

/**
* Keeps a number of tokens from the context. Controls how many of the previous tokens are retained. (Default: 4)
*/
numKeep?: number

/**
* Controls the number of tokens to predict in a single generation. (Default: -1)
*/
numPredict?: number

/**
* Sets the number of CPU threads to use. (Default: 0, indicates let the runtime decide)
*/
numThread?: number

/**
* Penalizes the model for generating newline characters. If set to true, it discourages the model from generating too many newlines. (Default: true)
*/
penalizeNewline?: boolean

/**
* Sets how far back for the model to look back to prevent repetition. (Default: 64, 0 = disabled, -1 = num_ctx)
*/
Expand Down Expand Up @@ -148,4 +203,24 @@ export interface OllamaChatSettings {
* @deprecated Use `topK` from AI SDK functions.
*/
topK?: number

/**
* Controls the "typical" sampling probability. (Default: 1.0)
*/
typicalP?: number

/**
* Locks the memory to prevent swapping, which can be useful for performance optimization. (Default: false)
*/
useMlock?: boolean

/**
* Enables memory mapping to reduce RAM usage. (Default: false)
*/
useMmap?: boolean

/**
* If true, the model will only load the vocabulary without performing further computation. (Default: false)
*/
vocabOnly?: boolean
}

0 comments on commit b60c555

Please sign in to comment.