diff --git a/lib/shared/src/experimentation/FeatureFlagProvider.ts b/lib/shared/src/experimentation/FeatureFlagProvider.ts index 99183dfc891b..eea7e0aa67d9 100644 --- a/lib/shared/src/experimentation/FeatureFlagProvider.ts +++ b/lib/shared/src/experimentation/FeatureFlagProvider.ts @@ -22,13 +22,13 @@ export enum FeatureFlag { CodyAutocompleteDeepseekV2LiteBase = 'cody-autocomplete-deepseek-v2-lite-base', // Enable various feature flags to experiment with FIM trained fine-tuned models via Fireworks - CodyAutocompleteFIMModelExperimentBaseFeatureFlag = 'cody-autocomplete-fim-model-experiment-flag-v1', - CodyAutocompleteFIMModelExperimentControl = 'cody-autocomplete-fim-model-experiment-control-v1', - CodyAutocompleteFIMModelExperimentCurrentBest = 'cody-autocomplete-fim-model-experiment-current-best-v1', - CodyAutocompleteFIMModelExperimentVariant1 = 'cody-autocomplete-fim-model-experiment-variant-1-v1', - CodyAutocompleteFIMModelExperimentVariant2 = 'cody-autocomplete-fim-model-experiment-variant-2-v1', - CodyAutocompleteFIMModelExperimentVariant3 = 'cody-autocomplete-fim-model-experiment-variant-3-v1', - CodyAutocompleteFIMModelExperimentVariant4 = 'cody-autocomplete-fim-model-experiment-variant-4-v1', + CodyAutocompleteFIMModelExperimentBaseFeatureFlag = 'cody-autocomplete-fim-model-experiment-flag-v2', + CodyAutocompleteFIMModelExperimentControl = 'cody-autocomplete-fim-model-experiment-control-v2', + CodyAutocompleteFIMModelExperimentCurrentBest = 'cody-autocomplete-fim-model-experiment-current-best-v2', + CodyAutocompleteFIMModelExperimentVariant1 = 'cody-autocomplete-fim-model-experiment-variant-1-v2', + CodyAutocompleteFIMModelExperimentVariant2 = 'cody-autocomplete-fim-model-experiment-variant-2-v2', + CodyAutocompleteFIMModelExperimentVariant3 = 'cody-autocomplete-fim-model-experiment-variant-3-v2', + CodyAutocompleteFIMModelExperimentVariant4 = 'cody-autocomplete-fim-model-experiment-variant-4-v2', // Enables Claude 3 if the user is in our holdout group CodyAutocompleteClaude3 = 'cody-autocomplete-claude-3', diff --git a/vscode/src/completions/context/context-mixer.test.ts b/vscode/src/completions/context/context-mixer.test.ts index b63623243f4f..92a1cb582479 100644 --- a/vscode/src/completions/context/context-mixer.test.ts +++ b/vscode/src/completions/context/context-mixer.test.ts @@ -72,7 +72,9 @@ describe('ContextMixer', () => { duration: 0, retrieverStats: {}, strategy: 'none', - totalChars: 0, + totalChars: 8, + prefixChars: 8, + suffixChars: 0, }) }) }) @@ -120,10 +122,13 @@ describe('ContextMixer', () => { positionBitmap: 3, retrievedItems: 2, suggestedItems: 2, + retrieverChars: 34, }, }, strategy: 'jaccard-similarity', totalChars: 42, + prefixChars: 8, + suffixChars: 0, }) }) }) @@ -218,16 +223,20 @@ describe('ContextMixer', () => { positionBitmap: 0b00101, retrievedItems: 2, suggestedItems: 2, + retrieverChars: 36, }, retriever2: { duration: expect.any(Number), positionBitmap: 0b11010, retrievedItems: 3, suggestedItems: 3, + retrieverChars: 92, }, }, strategy: 'jaccard-similarity', totalChars: 136, + prefixChars: 8, + suffixChars: 0, }) }) diff --git a/vscode/src/completions/context/context-mixer.ts b/vscode/src/completions/context/context-mixer.ts index 270e1f0b55ca..94b721972bc3 100644 --- a/vscode/src/completions/context/context-mixer.ts +++ b/vscode/src/completions/context/context-mixer.ts @@ -28,6 +28,10 @@ export interface ContextSummary { duration: number /** Total characters of combined context snippets */ totalChars: number + /** The number of characters in the prompt used from the document prefix. */ + prefixChars: number + /** The number of characters in the prompt used from the document suffix. */ + suffixChars: number /** Detailed information for each retriever that has run */ retrieverStats: { [identifier: string]: { @@ -35,6 +39,8 @@ export interface ContextSummary { suggestedItems: number /** Number of total snippets */ retrievedItems: number + /** Number of characters in the suggested Items from the retriever */ + retrieverChars: number /** Duration of the individual retriever */ duration: number /** @@ -75,7 +81,9 @@ export class ContextMixer implements vscode.Disposable { context: [], logSummary: { strategy: 'none', - totalChars: 0, + totalChars: options.docContext.prefix.length + options.docContext.suffix.length, + prefixChars: options.docContext.prefix.length, + suffixChars: options.docContext.suffix.length, duration: 0, retrieverStats: {}, }, @@ -147,11 +155,13 @@ export class ContextMixer implements vscode.Disposable { retrieverStats[retrieverId] = { suggestedItems: 0, positionBitmap: 0, + retrieverChars: 0, retrievedItems: results.find(r => r.identifier === retrieverId)?.snippets.size ?? 0, duration: results.find(r => r.identifier === retrieverId)?.duration ?? 0, } } + retrieverStats[retrieverId].retrieverChars += snippet.content.length retrieverStats[retrieverId].suggestedItems++ // Only log the position for the first 32 results to avoid overflowing the bitmap if (position < 32) { @@ -166,6 +176,8 @@ export class ContextMixer implements vscode.Disposable { strategy, duration: performance.now() - start, totalChars, + prefixChars: options.docContext.prefix.length, + suffixChars: options.docContext.suffix.length, retrieverStats, } diff --git a/vscode/src/completions/fast-path-client.ts b/vscode/src/completions/fast-path-client.ts index 44df250458f4..ba74412a318e 100644 --- a/vscode/src/completions/fast-path-client.ts +++ b/vscode/src/completions/fast-path-client.ts @@ -90,7 +90,7 @@ export function createFastPathClient( stop: [...(requestParams.stopSequences || []), ...(fireworksConfig?.parameters?.stop || [])], stream: true, languageId: providerOptions.document.languageId, - anonymousUserID, + user: anonymousUserID, } const headers = new Headers(customHeaders) // Force HTTP connection reuse to reduce latency. diff --git a/vscode/src/completions/get-inline-completions-tests/analytics.test.ts b/vscode/src/completions/get-inline-completions-tests/analytics.test.ts index 50c71640790a..41febba7ab12 100644 --- a/vscode/src/completions/get-inline-completions-tests/analytics.test.ts +++ b/vscode/src/completions/get-inline-completions-tests/analytics.test.ts @@ -126,9 +126,11 @@ describe('[getInlineCompletions] completion event', () => { "artificialDelay": undefined, "completionIntent": "function.body", "contextSummary": { + "prefixChars": 16, "retrieverStats": {}, "strategy": "none", - "totalChars": 0, + "suffixChars": 1, + "totalChars": 17, }, "id": "stable-uuid", "isFuzzyMatch": false, @@ -196,9 +198,11 @@ describe('[getInlineCompletions] completion event', () => { "artificialDelay": undefined, "completionIntent": "return_statement", "contextSummary": { + "prefixChars": 25, "retrieverStats": {}, "strategy": "none", - "totalChars": 0, + "suffixChars": 1, + "totalChars": 26, }, "id": "stable-uuid", "isFuzzyMatch": false, diff --git a/vscode/src/completions/logger.test.ts b/vscode/src/completions/logger.test.ts index 5b8d96f21cae..6da2bdc1978e 100644 --- a/vscode/src/completions/logger.test.ts +++ b/vscode/src/completions/logger.test.ts @@ -25,6 +25,8 @@ const defaultContextSummary = { strategy: 'none', duration: 0.1337, totalChars: 3, + prefixChars: 0, + suffixChars: 3, retrieverStats: {}, } satisfies ContextSummary diff --git a/vscode/src/completions/providers/create-provider.ts b/vscode/src/completions/providers/create-provider.ts index d17f042ec892..266264034122 100644 --- a/vscode/src/completions/providers/create-provider.ts +++ b/vscode/src/completions/providers/create-provider.ts @@ -21,10 +21,10 @@ import { createProviderConfig as createExperimentalOllamaProviderConfig } from ' import { createProviderConfig as createExperimentalOpenAICompatibleProviderConfig } from './expopenaicompatible' import { DEEPSEEK_CODER_V2_LITE_BASE, - DEEPSEEK_CODER_V2_LITE_BASE_WINDOW_4096, - DEEPSEEK_CODER_V2_LITE_BASE_WINDOW_8192, - DEEPSEEK_CODER_V2_LITE_BASE_WINDOW_16384, - DEEPSEEK_CODER_V2_LITE_BASE_WINDOW_32768, + DEEPSEEK_CODER_V2_LITE_BASE_DIRECT_ROUTE, + FIREWORKS_DEEPSEEK_7B_LANG_ALL, + FIREWORKS_DEEPSEEK_7B_LANG_SPECIFIC_V0, + FIREWORKS_DEEPSEEK_7B_LANG_SPECIFIC_V1, type FireworksOptions, createProviderConfig as createFireworksProviderConfig, } from './fireworks' @@ -167,17 +167,16 @@ async function resolveFIMModelExperimentFromFeatureFlags(): ReturnType< ), ]) if (fimModelVariant1) { - // Variant 1: Current production model with +200msec latency to quantity the effect of latency increase while keeping same quality - return { provider: 'fireworks', model: DEEPSEEK_CODER_V2_LITE_BASE_WINDOW_4096 } + return { provider: 'fireworks', model: DEEPSEEK_CODER_V2_LITE_BASE_DIRECT_ROUTE } } if (fimModelVariant2) { - return { provider: 'fireworks', model: DEEPSEEK_CODER_V2_LITE_BASE_WINDOW_8192 } + return { provider: 'fireworks', model: FIREWORKS_DEEPSEEK_7B_LANG_SPECIFIC_V0 } } if (fimModelVariant3) { - return { provider: 'fireworks', model: DEEPSEEK_CODER_V2_LITE_BASE_WINDOW_16384 } + return { provider: 'fireworks', model: FIREWORKS_DEEPSEEK_7B_LANG_SPECIFIC_V1 } } if (fimModelVariant4) { - return { provider: 'fireworks', model: DEEPSEEK_CODER_V2_LITE_BASE_WINDOW_32768 } + return { provider: 'fireworks', model: FIREWORKS_DEEPSEEK_7B_LANG_ALL } } if (fimModelCurrentBest) { return { provider: 'fireworks', model: DEEPSEEK_CODER_V2_LITE_BASE } diff --git a/vscode/src/completions/providers/fireworks.ts b/vscode/src/completions/providers/fireworks.ts index 0e8af85f1b8d..1426bdb1d6ad 100644 --- a/vscode/src/completions/providers/fireworks.ts +++ b/vscode/src/completions/providers/fireworks.ts @@ -48,10 +48,11 @@ export interface FireworksOptions { const PROVIDER_IDENTIFIER = 'fireworks' -export const FIREWORKS_DEEPSEEK_7B_LANG_STACK_FINETUNED = - 'fim-lang-specific-model-deepseek-stack-trained' -export const FIREWORKS_DEEPSEEK_7B_LANG_LOG_FINETUNED = 'fim-lang-specific-model-deepseek-logs-trained' +export const FIREWORKS_DEEPSEEK_7B_LANG_SPECIFIC_V0 = 'deepseek-finetuned-lang-specific-v0' +export const FIREWORKS_DEEPSEEK_7B_LANG_SPECIFIC_V1 = 'deepseek-finetuned-lang-specific-v1' +export const FIREWORKS_DEEPSEEK_7B_LANG_ALL = 'deepseek-finetuned-lang-all-v0' +export const DEEPSEEK_CODER_V2_LITE_BASE_DIRECT_ROUTE = 'deepseek-coder-v2-lite-base-direct-route' export const DEEPSEEK_CODER_V2_LITE_BASE = 'deepseek-coder-v2-lite-base' // Context window experiments with DeepSeek Model @@ -71,9 +72,12 @@ const MODEL_MAP = { // Fireworks model identifiers 'llama-code-13b': 'fireworks/accounts/fireworks/models/llama-v2-13b-code', - [FIREWORKS_DEEPSEEK_7B_LANG_LOG_FINETUNED]: FIREWORKS_DEEPSEEK_7B_LANG_LOG_FINETUNED, - [FIREWORKS_DEEPSEEK_7B_LANG_STACK_FINETUNED]: FIREWORKS_DEEPSEEK_7B_LANG_STACK_FINETUNED, + [FIREWORKS_DEEPSEEK_7B_LANG_SPECIFIC_V0]: 'finetuned-fim-lang-specific-model-ds2-v0', + [FIREWORKS_DEEPSEEK_7B_LANG_SPECIFIC_V1]: 'finetuned-fim-lang-specific-model-ds2-v1', + [FIREWORKS_DEEPSEEK_7B_LANG_ALL]: 'accounts/sourcegraph/models/finetuned-fim-lang-all-model-ds2-v0', [DEEPSEEK_CODER_V2_LITE_BASE]: 'fireworks/deepseek-coder-v2-lite-base', + [DEEPSEEK_CODER_V2_LITE_BASE_DIRECT_ROUTE]: + 'accounts/sourcegraph/models/deepseek-coder-v2-lite-base', [DEEPSEEK_CODER_V2_LITE_BASE_WINDOW_4096]: 'accounts/sourcegraph/models/deepseek-coder-v2-lite-base', [DEEPSEEK_CODER_V2_LITE_BASE_WINDOW_8192]: 'accounts/sourcegraph/models/deepseek-coder-v2-lite-base', [DEEPSEEK_CODER_V2_LITE_BASE_WINDOW_16384]: @@ -101,9 +105,11 @@ function getMaxContextTokens(model: FireworksModel): number { // Llama 2 on Fireworks supports up to 4k tokens. We're constraining it here to better // compare the results return 2048 - case FIREWORKS_DEEPSEEK_7B_LANG_STACK_FINETUNED: - case FIREWORKS_DEEPSEEK_7B_LANG_LOG_FINETUNED: - case DEEPSEEK_CODER_V2_LITE_BASE: { + case FIREWORKS_DEEPSEEK_7B_LANG_SPECIFIC_V0: + case FIREWORKS_DEEPSEEK_7B_LANG_SPECIFIC_V1: + case FIREWORKS_DEEPSEEK_7B_LANG_ALL: + case DEEPSEEK_CODER_V2_LITE_BASE: + case DEEPSEEK_CODER_V2_LITE_BASE_DIRECT_ROUTE: { return 2048 } case DEEPSEEK_CODER_V2_LITE_BASE_WINDOW_4096: @@ -137,6 +143,7 @@ class FireworksProvider extends Provider { private fireworksConfig?: ClientConfiguration['autocompleteExperimentalFireworksOptions'] private modelHelper: DefaultModel private anonymousUserID: string | undefined + private shouldEnableDirectRoute = false constructor( options: ProviderOptions, @@ -156,6 +163,7 @@ class FireworksProvider extends Provider { this.client = client this.authStatus = authStatus this.anonymousUserID = anonymousUserID + this.shouldEnableDirectRoute = this.checkIfDirectRouteShouldBeEnabled() this.isLocalInstance = Boolean( this.authStatus.endpoint?.includes('sourcegraph.test') || this.authStatus.endpoint?.includes('localhost') @@ -181,6 +189,10 @@ class FireworksProvider extends Provider { } } + private checkIfDirectRouteShouldBeEnabled(): boolean { + return this.model === DEEPSEEK_CODER_V2_LITE_BASE_DIRECT_ROUTE + } + public generateCompletions( abortSignal: AbortSignal, snippets: AutocompleteContextSnippet[], @@ -254,7 +266,14 @@ class FireworksProvider extends Provider { private getCustomHeaders = (): Record => { // Enabled Fireworks tracing for Sourcegraph teammates. // https://readme.fireworks.ai/docs/enabling-tracing - return this.authStatus.isFireworksTracingEnabled ? { 'X-Fireworks-Genie': 'true' } : {} + const customHeader: Record = {} + if (this.authStatus.isFireworksTracingEnabled) { + customHeader['X-Fireworks-Genie'] = 'true' + } + if (this.shouldEnableDirectRoute) { + customHeader['X-Sourcegraph-Use-Direct-Route'] = 'true' + } + return customHeader } private createClient(