From ebc875dc5a2176647b7c44b42ae315b5cc846bc6 Mon Sep 17 00:00:00 2001 From: toptobes <96998732+toptobes@users.noreply.github.com> Date: Wed, 22 May 2024 00:47:31 -0500 Subject: [PATCH] Kg vectorize support (#38) * embeddingApiKey support * added new vectroize parameters support * updated api report * added a couple small tests * added some vectorize tests * updated env.example * linting fixes * majorly updated vectorize tests * fixed couple breaking tests * updated build report --- .env.example | 8 +- .gitignore | 2 + DEVGUIDE.md | 27 ++ etc/astra-db-ts.api.md | 12 +- src/api/clients/data-api-http-client.ts | 53 ++-- src/api/clients/devops-api-http-client.ts | 4 +- src/api/clients/http-client.ts | 4 +- src/api/clients/types.ts | 2 +- src/api/constants.ts | 5 + src/data-api/collection.ts | 7 +- src/data-api/db.ts | 5 +- .../types/collections/collections-common.ts | 12 + src/data-api/types/collections/command.ts | 9 +- .../types/collections/create-collection.ts | 5 +- .../types/collections/spawn-collection.ts | 19 ++ tests/fixtures.ts | 5 - .../api/data-api-http-client.test.ts | 2 +- .../data-api/collection/insert-many.test.ts | 38 +-- .../data-api/collection/insert-one.test.ts | 20 +- .../data-api/collection/misc.test.ts | 2 +- .../data-api/collection/options.test.ts | 5 + tests/integration/data-api/db.test.ts | 4 +- tests/integration/data-api/vectorize.test.ts | 277 ++++++++++++++++++ tests/integration/misc/code-samples.test.ts | 16 - tests/unit/client/data-api-client.test.ts | 12 + 25 files changed, 439 insertions(+), 116 deletions(-) create mode 100644 src/data-api/types/collections/spawn-collection.ts create mode 100644 tests/integration/data-api/vectorize.test.ts diff --git a/.env.example b/.env.example index c7a625dc..d0919bb8 100644 --- a/.env.example +++ b/.env.example @@ -1,3 +1,7 @@ +################################################################################ +# READ THE DEVGUIDE.MD FILE FOR MORE INFORMATION ON HOW TO CONFIGURE THIS FILE # +################################################################################ + # Astra API endpoint ASTRA_URI=https://-.apps.astra.datastax.com @@ -5,10 +9,10 @@ ASTRA_URI=https://-.apps.astra.datastax.com APPLICATION_TOKEN=AstraCS: # Set this to some value to enable running tests that require a $vectorize enabled environment -ASTRA_RUN_VECTORIZE_TESTS= +ASTRA_RUN_VECTORIZE_TESTS=1 # Set this to some value to enable running long-running tests -ASTRA_RUN_LONG_TESTS= +ASTRA_RUN_LONG_TESTS=1 # Set this to some value to enable running admin tests ASTRA_RUN_ADMIN_TESTS= diff --git a/.gitignore b/.gitignore index 230e44fd..3e14009a 100644 --- a/.gitignore +++ b/.gitignore @@ -136,3 +136,5 @@ build.zip temp tsdoc-metadata.json + +tests/vectorize_credentials.json diff --git a/DEVGUIDE.md b/DEVGUIDE.md index cf75fda7..91d5c84b 100644 --- a/DEVGUIDE.md +++ b/DEVGUIDE.md @@ -72,6 +72,33 @@ If a new tag really, really, needs to be added, it can be done by adding a new e format, and updating the `assertTestsEnabled` function. However, this should be done sparingly, as it can make the test suite harder to manage. +### Running vectorize tests +To run vectorize tests, you need to have a vectorize-enabled kube running, with the correct tags enabled. +You must create a file, `tests/vectorize_tests.json`, with the following format: + +```ts +interface Config { + [providerName: string]: { + apiKey?: string, + providerKey?: string, + parameters?: { + [modelName: string]: Record + }, + } +} +``` + +where: +- `providerName` is the name of the provider (e.g. `nvidia`, `openai`, etc.) as found in `findEmbeddingProviders` +- `apiKey` is the API key for the provider (which will be passed in through the header) + - optional if no header auth test wanted +- `providerKey` is the provider key for the provider (which will be passed in @ collection creation) + - optional if no KMS auth test wanted +- `parameters` is a mapping of model names to their corresponding parameters + - optional if not required. `azureOpenAI`, for example, will need this. + +This file is gitignored by default and will not be checked into VCS. + ### Coverage testing To run coverage testing, run the following command: diff --git a/etc/astra-db-ts.api.md b/etc/astra-db-ts.api.md index 65686689..54af8626 100644 --- a/etc/astra-db-ts.api.md +++ b/etc/astra-db-ts.api.md @@ -175,9 +175,10 @@ export type Caller = [name: string, version?: string]; // @public export class Collection { // Warning: (ae-forgotten-export) The symbol "DataAPIHttpClient" needs to be exported by the entry point index.d.ts + // Warning: (ae-forgotten-export) The symbol "CollectionSpawnOptions" needs to be exported by the entry point index.d.ts // // @internal - constructor(db: Db, httpClient: DataAPIHttpClient, name: string, namespace: string | undefined); + constructor(db: Db, httpClient: DataAPIHttpClient, name: string, opts: CollectionSpawnOptions | undefined); bulkWrite(operations: AnyBulkWriteOperation[], options?: BulkWriteOptions): Promise>; readonly collectionName: string; countDocuments(filter: Filter, upperBound: number, options?: WithTimeout): Promise; @@ -287,7 +288,7 @@ export interface CostInfo { } // @public -export interface CreateCollectionOptions extends WithTimeout, CollectionOptions, WithNamespace { +export interface CreateCollectionOptions extends WithTimeout, CollectionOptions, CollectionSpawnOptions { checkExists?: boolean; } @@ -482,7 +483,7 @@ export class Db { // @internal constructor(endpoint: string, options: InternalRootClientOpts); admin(options?: AdminSpawnOptions): AstraDbAdmin; - collection(name: string, options?: WithNamespace): Collection; + collection(name: string, options?: CollectionSpawnOptions): Collection; collections(options?: WithNamespace & WithTimeout): Promise; command(command: Record, options?: RunCommandOptions): Promise; createCollection(collectionName: string, options?: CreateCollectionOptions): Promise>; @@ -962,8 +963,9 @@ export interface ReplaceOneOptions extends WithTimeout { export type ReplaceOneResult = InternalUpdateResult; // @public -export interface RunCommandOptions extends WithNamespace, WithTimeout { +export interface RunCommandOptions extends WithTimeout { collection?: string; + namespace?: string | null; } // @public @@ -1170,7 +1172,9 @@ export interface VectorDoc { // @alpha export interface VectorizeServiceOptions { + authentication?: Record; modelName: string; + parameters?: Record; provider: string; } diff --git a/src/api/clients/data-api-http-client.ts b/src/api/clients/data-api-http-client.ts index e3b16a2d..1bc69ab6 100644 --- a/src/api/clients/data-api-http-client.ts +++ b/src/api/clients/data-api-http-client.ts @@ -15,6 +15,7 @@ import { DEFAULT_DATA_API_AUTH_HEADER, + DEFAULT_EMBEDDING_API_KEY_HEADER, DEFAULT_NAMESPACE, DEFAULT_TIMEOUT, hrTimeMs, @@ -23,7 +24,7 @@ import { HttpMethods, RawDataAPIResponse, } from '@/src/api'; -import { DataAPIResponseError, DataAPITimeoutError, ObjectId, UUID, WithNamespace } from '@/src/data-api'; +import { DataAPIResponseError, DataAPITimeoutError, ObjectId, UUID } from '@/src/data-api'; import { TimeoutManager, TimeoutOptions } from '@/src/api/timeout-managers'; import { CommandFailedEvent, CommandStartedEvent, CommandSucceededEvent } from '@/src/data-api/events'; import { CollectionNotFoundError, DataAPIHttpError, mkRespErrorFromResponse } from '@/src/data-api/errors'; @@ -32,16 +33,20 @@ import { CollectionNotFoundError, DataAPIHttpError, mkRespErrorFromResponse } fr * @internal */ export interface DataAPIRequestInfo { - url: string; - collection?: string; - namespace?: string; - command: Record; - timeoutManager: TimeoutManager; + url: string, + collection?: string, + namespace?: string | null, + command: Record, + timeoutManager: TimeoutManager, } interface ExecuteCommandOptions { - collection?: string; - namespace?: string; + namespace?: string | null, + collection?: string, +} + +interface DataAPIHttpClientOptions extends HTTPClientOptions { + namespace: string | undefined, } /** @@ -50,16 +55,16 @@ interface ExecuteCommandOptions { export class DataAPIHttpClient extends HttpClient { public collection?: string; public namespace?: string; - readonly #props: HTTPClientOptions & WithNamespace; + readonly #props: DataAPIHttpClientOptions; - constructor(props: HTTPClientOptions & WithNamespace) { - super(props, mkAuthHeader); + constructor(props: DataAPIHttpClientOptions, embeddingApiKey?: string) { + super(props, mkHeaders(embeddingApiKey)); this.namespace = props.namespace; this.#props = props; } - public withCollection(namespace: string, collection: string): DataAPIHttpClient { - const clone = new DataAPIHttpClient(this.#props); + public forCollection(namespace: string, collection: string, embeddingApiKey: string | undefined): DataAPIHttpClient { + const clone = new DataAPIHttpClient(this.#props, embeddingApiKey); clone.collection = collection; clone.namespace = namespace; return clone; @@ -86,9 +91,12 @@ export class DataAPIHttpClient extends HttpClient { try { info.collection ||= this.collection; - info.namespace ||= this.namespace || DEFAULT_NAMESPACE; - const keyspacePath = `/${info.namespace}`; + if (info.namespace !== null) { + info.namespace ||= this.namespace || DEFAULT_NAMESPACE; + } + + const keyspacePath = info.namespace ? `/${info.namespace}` : ''; const collectionPath = info.collection ? `/${info.collection}` : ''; info.url += keyspacePath + collectionPath; @@ -117,7 +125,7 @@ export class DataAPIHttpClient extends HttpClient { if (data.errors && data?.errors?.length > 0 && data?.errors[0]?.errorCode === 'COLLECTION_NOT_EXIST') { const name = data?.errors[0]?.message.split(': ')[1]; - throw new CollectionNotFoundError(info.namespace, name); + throw new CollectionNotFoundError(info.namespace!, name); } if (data?.errors && data?.errors.length > 0) { @@ -193,6 +201,15 @@ export function reviver(_: string, value: any): any { return value; } -function mkAuthHeader(token: string): Record { - return { [DEFAULT_DATA_API_AUTH_HEADER]: token }; +function mkHeaders(embeddingApiKey: string | undefined) { + if (embeddingApiKey) { + return (token: string) => ({ + [DEFAULT_EMBEDDING_API_KEY_HEADER]: embeddingApiKey, + [DEFAULT_DATA_API_AUTH_HEADER]: token, + }); + } else { + return (token: string) => ({ + [DEFAULT_DATA_API_AUTH_HEADER]: token, + }); + } } diff --git a/src/api/clients/devops-api-http-client.ts b/src/api/clients/devops-api-http-client.ts index b343b21f..ec641f0a 100644 --- a/src/api/clients/devops-api-http-client.ts +++ b/src/api/clients/devops-api-http-client.ts @@ -55,7 +55,7 @@ interface DevopsAPIResponse { */ export class DevOpsAPIHttpClient extends HttpClient { constructor(opts: HTTPClientOptions) { - super(opts, mkAuthHeader); + super(opts, mkHeaders); } public async request(req: DevOpsAPIRequestInfo, options: TimeoutOptions | undefined, started: number = 0): Promise { @@ -187,6 +187,6 @@ export class DevOpsAPIHttpClient extends HttpClient { } } -function mkAuthHeader(token: string) { +function mkHeaders(token: string) { return { [DEFAULT_DEVOPS_API_AUTH_HEADER]: `Bearer ${token}` }; } diff --git a/src/api/clients/http-client.ts b/src/api/clients/http-client.ts index 59bb00ba..607e743d 100644 --- a/src/api/clients/http-client.ts +++ b/src/api/clients/http-client.ts @@ -16,7 +16,7 @@ import { CLIENT_USER_AGENT, RAGSTACK_REQUESTED_WITH } from '@/src/api/constants' import { Caller, DataAPIClientEvents } from '@/src/client'; import TypedEmitter from 'typed-emitter'; import { FetchCtx, ResponseInfo } from '@/src/api/fetch/types'; -import { AuthHeaderFactory, HTTPClientOptions, HTTPRequestInfo } from '@/src/api/clients/types'; +import { MkBaseHeaders, HTTPClientOptions, HTTPRequestInfo } from '@/src/api/clients/types'; /** * @internal @@ -29,7 +29,7 @@ export abstract class HttpClient { readonly #applicationToken: string; readonly baseHeaders: Record; - protected constructor(options: HTTPClientOptions, mkAuthHeader: AuthHeaderFactory) { + protected constructor(options: HTTPClientOptions, mkAuthHeader: MkBaseHeaders) { this.#applicationToken = options.applicationToken; this.baseUrl = options.baseUrl; this.emitter = options.emitter; diff --git a/src/api/clients/types.ts b/src/api/clients/types.ts index d40341d6..60a7fc26 100644 --- a/src/api/clients/types.ts +++ b/src/api/clients/types.ts @@ -18,7 +18,7 @@ export interface HTTPClientOptions { /** * @internal */ -export type AuthHeaderFactory = (token: string) => Record; +export type MkBaseHeaders = (token: string) => Record; /** * @internal diff --git a/src/api/constants.ts b/src/api/constants.ts index b25135a1..dc42065e 100644 --- a/src/api/constants.ts +++ b/src/api/constants.ts @@ -53,6 +53,11 @@ export const DEFAULT_NAMESPACE = 'default_keyspace'; */ export const DEFAULT_TIMEOUT = 30000; +/** + * @internal + */ +export const DEFAULT_EMBEDDING_API_KEY_HEADER = 'x-embedding-api-key'; + /** * @internal */ diff --git a/src/data-api/collection.ts b/src/data-api/collection.ts index 44c6b350..1b409fa3 100644 --- a/src/data-api/collection.ts +++ b/src/data-api/collection.ts @@ -75,6 +75,7 @@ import { FindOneAndDeleteCommand } from '@/src/data-api/types/find/find-one-dele import { FindOneAndUpdateCommand } from '@/src/data-api/types/find/find-one-update'; import { InsertManyCommand } from '@/src/data-api/types/insert/insert-many'; import { Mutable } from '@/src/data-api/types/utils'; +import { CollectionSpawnOptions } from '@/src/data-api/types/collections/spawn-collection'; /** * Represents the interface to a collection in the database. @@ -118,19 +119,19 @@ export class Collection { * * @internal */ - constructor(db: Db, httpClient: DataAPIHttpClient, name: string, namespace: string | undefined) { + constructor(db: Db, httpClient: DataAPIHttpClient, name: string, opts: CollectionSpawnOptions | undefined) { Object.defineProperty(this, 'collectionName', { value: name, writable: false, }); Object.defineProperty(this, 'namespace', { - value: namespace ?? db.namespace, + value: opts?.namespace ?? db.namespace, writable: false, }); Object.defineProperty(this, '_httpClient', { - value: httpClient.withCollection(this.namespace, this.collectionName), + value: httpClient.forCollection(this.namespace, this.collectionName, opts?.embeddingApiKey), enumerable: false, }); diff --git a/src/data-api/db.ts b/src/data-api/db.ts index 98fe804e..60b731f1 100644 --- a/src/data-api/db.ts +++ b/src/data-api/db.ts @@ -30,6 +30,7 @@ import { extractDbIdFromUrl, validateOption } from '@/src/data-api/utils'; import { CreateCollectionCommand } from '@/src/data-api/types/collections/create-collection'; import { ListCollectionsCommand } from '@/src/data-api/types/collections/list-collection'; import { InternalRootClientOpts } from '@/src/client/types'; +import { CollectionSpawnOptions } from '@/src/data-api/types/collections/spawn-collection'; /** * Represents an interface to some Astra database instance. This is the entrypoint for database-level DML, such as @@ -244,8 +245,8 @@ export class Db { * @see SomeDoc * @see VectorDoc */ - public collection(name: string, options?: WithNamespace): Collection { - return new Collection(this, this._httpClient, name, options?.namespace); + public collection(name: string, options?: CollectionSpawnOptions): Collection { + return new Collection(this, this._httpClient, name, options); } /** diff --git a/src/data-api/types/collections/collections-common.ts b/src/data-api/types/collections/collections-common.ts index e4a292f3..54359348 100644 --- a/src/data-api/types/collections/collections-common.ts +++ b/src/data-api/types/collections/collections-common.ts @@ -62,6 +62,18 @@ export interface VectorizeServiceOptions { * @alpha */ modelName: string, + /** + * NOTE: This feature is under current development. + * + * @alpha + */ + authentication?: Record, + /** + * NOTE: This feature is under current development. + * + * @alpha + */ + parameters?: Record, } /** diff --git a/src/data-api/types/collections/command.ts b/src/data-api/types/collections/command.ts index e79cb583..426a1695 100644 --- a/src/data-api/types/collections/command.ts +++ b/src/data-api/types/collections/command.ts @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -import { WithNamespace } from '@/src/data-api'; import { WithTimeout } from '@/src/common'; /** @@ -25,9 +24,13 @@ import { WithTimeout } from '@/src/common'; * * @public */ -export interface RunCommandOptions extends WithNamespace, WithTimeout { +export interface RunCommandOptions extends WithTimeout { /** * The collection to run the command on. If not provided, the command is run on the database. */ - collection?: string + collection?: string, + /** + * The namespace (aka keyspace) to use for the db operation. + */ + namespace?: string | null, } diff --git a/src/data-api/types/collections/create-collection.ts b/src/data-api/types/collections/create-collection.ts index 006f500b..b40d98c8 100644 --- a/src/data-api/types/collections/create-collection.ts +++ b/src/data-api/types/collections/create-collection.ts @@ -12,9 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -import { SomeDoc, WithNamespace } from '@/src/data-api'; +import { SomeDoc } from '@/src/data-api'; import { CollectionOptions } from '@/src/data-api/types'; import { WithTimeout } from '@/src/common/types'; +import { CollectionSpawnOptions } from '@/src/data-api/types/collections/spawn-collection'; /** @internal */ export interface CreateCollectionCommand { @@ -38,7 +39,7 @@ export interface CreateCollectionCommand { * * @public */ -export interface CreateCollectionOptions extends WithTimeout, CollectionOptions, WithNamespace { +export interface CreateCollectionOptions extends WithTimeout, CollectionOptions, CollectionSpawnOptions { /** * If `true` or unset, runs an additional existence check before creating the collection, failing if the collection * with the same name already exists, raising a {@link CollectionAlreadyExistsError}. diff --git a/src/data-api/types/collections/spawn-collection.ts b/src/data-api/types/collections/spawn-collection.ts new file mode 100644 index 00000000..f65d1a0b --- /dev/null +++ b/src/data-api/types/collections/spawn-collection.ts @@ -0,0 +1,19 @@ +// Copyright DataStax, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import { WithNamespace } from '@/src/data-api'; + +export interface CollectionSpawnOptions extends WithNamespace { + embeddingApiKey?: string; +} diff --git a/tests/fixtures.ts b/tests/fixtures.ts index b24bf19a..50d44b55 100644 --- a/tests/fixtures.ts +++ b/tests/fixtures.ts @@ -22,7 +22,6 @@ import { DataAPIClient } from '@/src/client'; import { Context } from 'mocha'; export const DEFAULT_COLLECTION_NAME = 'test_coll'; -export const VECTORIZE_COLLECTION_NAME = 'vectorize_coll'; export const EPHEMERAL_COLLECTION_NAME = 'temp_coll'; export const OTHER_NAMESPACE = 'other_keyspace'; export const TEMP_DB_NAME = 'astra-test-db-plus-random-name-1284' @@ -41,10 +40,6 @@ export const initTestObjects = async (ctx: Context, preferHttp2 = USE_HTTP2, cli const db = client.db(process.env.ASTRA_URI!); if (!collsSetup) { - if (process.env.ASTRA_RUN_VECTORIZE_TESTS) { - await db.createCollection(VECTORIZE_COLLECTION_NAME, { vector: { service: { modelName: 'NV-Embed-QA', provider: 'nvidia' } }, checkExists: false }); - } - await db.dropCollection(EPHEMERAL_COLLECTION_NAME); await db.dropCollection(EPHEMERAL_COLLECTION_NAME, { namespace: OTHER_NAMESPACE }); await db.createCollection(DEFAULT_COLLECTION_NAME, { vector: { dimension: 5, metric: 'cosine' }, checkExists: false, namespace: OTHER_NAMESPACE }); diff --git a/tests/integration/api/data-api-http-client.test.ts b/tests/integration/api/data-api-http-client.test.ts index ef52c7c1..51e6fb17 100644 --- a/tests/integration/api/data-api-http-client.test.ts +++ b/tests/integration/api/data-api-http-client.test.ts @@ -37,7 +37,7 @@ describe('integration.api.data-api-http-client', () => { const resp = await httpClient.executeCommand({ findCollections: {}, }, {}); - assert.strictEqual(resp.status?.collections.length, 1); + assert.strictEqual(typeof resp.status?.collections.length, 'number'); }); it('should execute a db-level command in another namespace', async () => { diff --git a/tests/integration/data-api/collection/insert-many.test.ts b/tests/integration/data-api/collection/insert-many.test.ts index 13e75cde..522a53da 100644 --- a/tests/integration/data-api/collection/insert-many.test.ts +++ b/tests/integration/data-api/collection/insert-many.test.ts @@ -9,21 +9,15 @@ // limitations under the License. // noinspection DuplicatedCode -import { Collection, DataAPIError, DataAPITimeoutError, Db, InsertManyError, ObjectId, UUID } from '@/src/data-api'; -import { - assertTestsEnabled, - initCollectionWithFailingClient, - initTestObjects, - VECTORIZE_COLLECTION_NAME, -} from '@/tests/fixtures'; +import { Collection, DataAPIError, DataAPITimeoutError, InsertManyError, ObjectId, UUID } from '@/src/data-api'; +import { initCollectionWithFailingClient, initTestObjects } from '@/tests/fixtures'; import assert from 'assert'; describe('integration.data-api.collection.insert-many', () => { let collection: Collection; - let db: Db; before(async function () { - [, db, collection] = await initTestObjects(this); + [, , collection] = await initTestObjects(this); }); beforeEach(async () => { @@ -228,30 +222,4 @@ describe('integration.data-api.collection.insert-many', () => { const docs = Array.from({ length: 100 }, (_, i) => ({ _id: i })); await collection.insertMany(docs, { ordered: true, maxTimeMS: 500000, chunkSize: 10 }); }); - - it('[vectorize] should insertMany with vectorize', async function () { - assertTestsEnabled(this, 'VECTORIZE'); - - const collection = db.collection(VECTORIZE_COLLECTION_NAME); - await collection.deleteAll(); - - const res = await collection.insertMany([ - { name: 'Arch Enemy' }, - { name: 'Equilibrium' }, - { name: 'AC/DC' }, - ], { - vectorize: [ - 'Arch Enemy is a Swedish melodic death metal band, originally a supergroup from Halmstad, formed in 1995.', - 'Equilibrium is a German symphonic metal band', - 'AC/DC are an Australian rock band formed in Sydney in 1973 by Scottish-born brothers Malcolm and Angus Young', - ], - }); - assert.ok(res); - const archEnemy = await collection.findOne({ name: 'Arch Enemy' }); - assert.strictEqual(archEnemy?.$vectorize, 'Arch Enemy is a Swedish melodic death metal band, originally a supergroup from Halmstad, formed in 1995.'); - const equilibrium = await collection.findOne({ name: 'Equilibrium' }); - assert.strictEqual(equilibrium?.$vectorize, 'Equilibrium is a German symphonic metal band'); - const acdc = await collection.findOne({ name: 'AC/DC' }); - assert.strictEqual(acdc?.$vectorize, 'AC/DC are an Australian rock band formed in Sydney in 1973 by Scottish-born brothers Malcolm and Angus Young'); - }); }); diff --git a/tests/integration/data-api/collection/insert-one.test.ts b/tests/integration/data-api/collection/insert-one.test.ts index c164c229..18d2e2ae 100644 --- a/tests/integration/data-api/collection/insert-one.test.ts +++ b/tests/integration/data-api/collection/insert-one.test.ts @@ -9,16 +9,15 @@ // limitations under the License. // noinspection DuplicatedCode -import { Collection, DataAPIResponseError, Db, ObjectId, UUID } from '@/src/data-api'; -import { assertTestsEnabled, initTestObjects, VECTORIZE_COLLECTION_NAME } from '@/tests/fixtures'; +import { Collection, DataAPIResponseError, ObjectId, UUID } from '@/src/data-api'; +import { initTestObjects } from '@/tests/fixtures'; import assert from 'assert'; describe('integration.data-api.collection.insert-one', () => { let collection: Collection; - let db: Db; before(async function () { - [, db, collection] = await initTestObjects(this); + [, , collection] = await initTestObjects(this); }); beforeEach(async () => { @@ -145,17 +144,4 @@ describe('integration.data-api.collection.insert-one', () => { } await assert.rejects(() => collection.insertOne(docToInsert), DataAPIResponseError); }); - - it('[vectorize] should insertOne with vectorize', async function () { - assertTestsEnabled(this, 'VECTORIZE'); - - const collection = db.collection(VECTORIZE_COLLECTION_NAME); - await collection.deleteAll(); - - const res = await collection.insertOne({ name: 'Arch Enemy' }, { vectorize: 'Arch Enemy is a Swedish melodic death metal band, originally a supergroup from Halmstad, formed in 1995.' }); - assert.ok(res); - - const found = await collection.findOne({ name: 'Arch Enemy' }); - assert.deepStrictEqual(found?.$vectorize, 'Arch Enemy is a Swedish melodic death metal band, originally a supergroup from Halmstad, formed in 1995.'); - }); }); diff --git a/tests/integration/data-api/collection/misc.test.ts b/tests/integration/data-api/collection/misc.test.ts index d969b602..f25c3207 100644 --- a/tests/integration/data-api/collection/misc.test.ts +++ b/tests/integration/data-api/collection/misc.test.ts @@ -24,7 +24,7 @@ describe('integration.data-api.collection.misc', () => { describe('initialization', () => { it('should initialize a Collection', () => { - const collection = new Collection(db, db['_httpClient'], 'new_collection', DEFAULT_NAMESPACE); + const collection = new Collection(db, db['_httpClient'], 'new_collection', undefined); assert.ok(collection); }); }); diff --git a/tests/integration/data-api/collection/options.test.ts b/tests/integration/data-api/collection/options.test.ts index 33a33df4..19f33706 100644 --- a/tests/integration/data-api/collection/options.test.ts +++ b/tests/integration/data-api/collection/options.test.ts @@ -17,6 +17,11 @@ describe('integration.data-api.collection.options', () => { before(async function () { [, db] = await initTestObjects(this); + await db.dropCollection('test_db_collection_empty_opts'); + }); + + after(async () => { + await db.dropCollection('test_db_collection_empty_opts'); }); it('lists its own options', async () => { diff --git a/tests/integration/data-api/db.test.ts b/tests/integration/data-api/db.test.ts index 4544b678..de0ffc26 100644 --- a/tests/integration/data-api/db.test.ts +++ b/tests/integration/data-api/db.test.ts @@ -191,8 +191,8 @@ describe('integration.data-api.db', async () => { it('should return the collections in the db', async () => { const collections = await db.collections(); assert.ok(collections instanceof Array); - assert.deepStrictEqual(collections.map(c => c.collectionName), [DEFAULT_COLLECTION_NAME]); - assert.deepStrictEqual(collections.map(c => c.namespace), [DEFAULT_NAMESPACE]); + assert.ok(collections.some(c => c.collectionName === DEFAULT_COLLECTION_NAME)); + assert.ok(collections.map(c => c.namespace).every(ns => ns === DEFAULT_NAMESPACE)); }); it('should return the collections in the db in another namespace', async () => { diff --git a/tests/integration/data-api/vectorize.test.ts b/tests/integration/data-api/vectorize.test.ts new file mode 100644 index 00000000..04a140c9 --- /dev/null +++ b/tests/integration/data-api/vectorize.test.ts @@ -0,0 +1,277 @@ +// Copyright DataStax, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// noinspection DuplicatedCode + +import assert from 'assert'; +import { Db } from '@/src/data-api'; +import { assertTestsEnabled, initTestObjects } from '@/tests/fixtures'; +import * as fs from 'fs'; +import { fetch } from 'fetch-h2'; +import { DEFAULT_DATA_API_AUTH_HEADER, DEFAULT_DATA_API_PATH } from '@/src/api'; +import { after } from 'mocha'; + +interface VectorizeTest { + provider: string, + modelName: string, + header?: string, + providerKey?: string, + authType: string, + parameters: Record | undefined, +} + +describe('integration.data-api.vectorize', () => { + let db: Db; + + before(async function () { + assertTestsEnabled(this, 'VECTORIZE', 'LONG'); + + [, db] = await initTestObjects(this); + + const tests: VectorizeTest[] = await initVectorTests().catch((e) => { + console.error('Failed to initialize vectorize tests', e); + return []; + }); + + describe('[vectorize] [long] generated tests', () => { + const names = tests.map((test) => `${test.provider}_${test.modelName.replace(/\W/g, '')}_${test.authType}`); + + before(async () => { + for (const name of names) { + try { await db.dropCollection(name); } catch (_) { /* empty */ } + } + }); + + tests.forEach((test, i) => { + const name = names[i]; + + describe('generated test', () => { + createVectorizeProvidersTest(db, test, name) + + if (i === 0) { + createVectorizeParamTests(db, name); + } + + after(async () => { + await db.dropCollection(name); + }); + }); + }); + }); + }); + + it('dummy test so before is executed', () => {}); +}); + +async function initVectorTests() { + const credentials = JSON.parse(fs.readFileSync('tests/vectorize_credentials.json', 'utf8')); + + const embeddingProviders = await fetch(`${process.env.ASTRA_URI!}/${DEFAULT_DATA_API_PATH}`, { + body: JSON.stringify({ findEmbeddingProviders: {} }), + headers: { + [DEFAULT_DATA_API_AUTH_HEADER]: process.env.APPLICATION_TOKEN, + }, + method: 'POST', + }) + .then(r => r.json()) + .then(r => r.status['embeddingProviders']); + + return Object.entries(embeddingProviders) + .flatMap(([provider, info]) => { + if (!credentials[provider]) { + console.warn(`No credentials found for provider ${provider}; skipping models `) + return []; + } + + return info['models'].map((model: any) => ({ + provider, + modelName: model.name, + header: info['supportedAuthentication']['HEADER'].enabled ? credentials[provider]['apiKey'] : undefined, + providerKey: info['supportedAuthentication']['SHARED_SECRET'].enabled ? credentials[provider]['providerKey'] : undefined, + none: info['supportedAuthentication']['NONE'].enabled, + parameters: credentials[provider]['parameters']?.[model.name], + })); + }) + .flatMap((test) => { + const tests: VectorizeTest[] = [] + + for (const key of ['header', 'providerKey', 'none']) { + if (test[key as keyof typeof test]) { + tests.push({ + provider: test.provider, + modelName: test.modelName, + [key]: test[key as keyof typeof test], + authType: key, + parameters: test.parameters, + }); + } + } + + return tests; + }); +} + +function createVectorizeParamTests(db: Db, name: string) { + describe('[vectorize] [dev] $vectorize/vectorize params', () => { + const collection = db.collection(name); + + before(async function () { + if (!await db.listCollections({ nameOnly: true }).then(cs => cs.every((c) => c !== name))) { + this.skip(); + } + }) + + beforeEach(async () => { + await collection.deleteAll(); + }); + + it('should override $vectorize if both are set in insertOne', async () => { + await collection.insertOne({ + _id: '1', + $vectorize: 'The grass was as green as it always was that sinister day', + }, { + vectorize: 'The blackbirds sang their song as they always did that black-letter day', + }) + + const result = await collection.findOne({ _id: '1' }, { projection: { '*': 1 } }); + assert.strictEqual(result?.$vectorize, 'The blackbirds sang their song as they always did that black-letter day'); + }); + + it('should override $vectorize if both are set in insertMany', async () => { + await collection.insertMany([ + { + _id: '1', + $vectorize: 'The grass was as green as it always was that sinister day', + }, + { + _id: '2', + $vectorize: 'The grass was as green as it always was that sinister day', + }, + ], { + vectorize: [ + 'The blackbirds sang their song as they always did that black-letter day', + null, + ], + }); + + const result1 = await collection.findOne({ _id: '1' }, { projection: { '*': 1 } }); + assert.strictEqual(result1?.$vectorize, 'The blackbirds sang their song as they always did that black-letter day'); + const result2 = await collection.findOne({ _id: '2' }, { projection: { '*': 1 } }); + assert.strictEqual(result2?.$vectorize, 'The grass was as green as it always was that sinister day'); + }); + + it('should throw an error if vectorize and sort are both set', async () => { + await assert.rejects(async () => { + await collection.findOne({}, { sort: { name: 1 }, vectorize: 'some text' }); + }); + await assert.rejects(async () => { + collection.find({}, { sort: { name: 1 }, vectorize: 'some text' }); + }); + await assert.rejects(async () => { + await collection.updateOne({}, {}, { sort: { name: 1 }, vectorize: 'some text' }); + }); + await assert.rejects(async () => { + await collection.findOneAndUpdate({}, {}, { + sort: { name: 1 }, + vectorize: 'some text', + returnDocument: 'before', + }); + }); + await assert.rejects(async () => { + await collection.replaceOne({}, {}, { sort: { name: 1 }, vectorize: 'some text' }); + }); + await assert.rejects(async () => { + await collection.findOneAndReplace({}, {}, { + sort: { name: 1 }, + vectorize: 'some text', + returnDocument: 'before', + }); + }); + await assert.rejects(async () => { + await collection.deleteOne({}, { sort: { name: 1 }, vectorize: 'some text' }); + }); + await assert.rejects(async () => { + await collection.findOneAndDelete({}, { sort: { name: 1 }, vectorize: 'some text' }); + }); + }); + }); +} + +function createVectorizeProvidersTest(db: Db, test: VectorizeTest, name: string) { + it(`[vectorize] [dev] has a working lifecycle (${test.provider}/${test.modelName}) (${test.authType})`, async () => { + const collection = await db.createCollection(name, { + vector: { + service: { + provider: test.provider, + modelName: test.modelName, + authentication: { + providerKey: test.providerKey, + }, + parameters: test.parameters, + }, + }, + embeddingApiKey: test.header, + }); + + const insertOneResult = await collection.insertOne({ + name: 'Alice', + age: 30, + }, { + vectorize: 'Alice likes big red cars', + }); + + assert.ok(insertOneResult); + + const insertManyResult = await collection.insertMany([ + { + name: 'Bob', + age: 40, + }, + { + name: 'Charlie', + age: 50, + }, + ], { + vectorize: [ + 'Cause maybe, you\'re gonna be the one that saves me... and after all, you\'re my wonderwall...', + 'The water bottle was small', + ], + }); + + assert.ok(insertManyResult); + assert.strictEqual(insertManyResult.insertedCount, 2); + + const findOneResult = await collection.findOne({}, { + vectorize: 'Alice likes big red cars', + includeSimilarity: true, + }); + + assert.ok(findOneResult); + assert.strictEqual(findOneResult._id, insertOneResult.insertedId); + assert.ok(findOneResult.$similarity > 0.8); + + const deleteResult = await collection.deleteOne({}, { + vectorize: 'Alice likes big red cars', + }); + + assert.ok(deleteResult); + assert.strictEqual(deleteResult.deletedCount, 1); + + const findResult = await collection.find({}, { + vectorize: 'Cause maybe, you\'re gonna be the one that saves me... and after all, you\'re my wonderwall...', + includeSimilarity: true, + }).toArray(); + + assert.strictEqual(findResult.length, 2); + }).timeout(90000); +} diff --git a/tests/integration/misc/code-samples.test.ts b/tests/integration/misc/code-samples.test.ts index 0df34f71..07f7aa39 100644 --- a/tests/integration/misc/code-samples.test.ts +++ b/tests/integration/misc/code-samples.test.ts @@ -270,7 +270,6 @@ describe('integration.misc.code-samples', () => { { 'Marco': 'Polo' }, { $set: { title: 'Mr.' } }, ); - // console.log(updated1?.modifiedCount); assert.strictEqual(updated1?.matchedCount, 1); assert.strictEqual(updated1?.modifiedCount, 1); assert.strictEqual(updated1?.upsertedCount, 0); @@ -280,7 +279,6 @@ describe('integration.misc.code-samples', () => { { name: 'Johnny' }, { $set: { rank: 0 } }, ); - // console.log(updated2.matchedCount, updated2?.upsertedCount); assert.strictEqual(updated2?.matchedCount, 0); assert.strictEqual(updated2?.modifiedCount, 0); assert.strictEqual(updated2?.upsertedCount, 0); @@ -291,23 +289,9 @@ describe('integration.misc.code-samples', () => { { $set: { rank: 0 } }, { upsert: true }, ); - // console.log(updated3.matchedCount, updated3?.upsertedCount); assert.strictEqual(updated3?.matchedCount, 0); assert.strictEqual(updated3?.modifiedCount, 0); assert.strictEqual(updated3?.upsertedCount, 1); }); }); - - // describe('[vectorize] vectorize', () => { - // let db: Db; - // - // before(async function () { - // assertTestsEnabled(this, 'VECTORIZE'); - // [, db] = await initTestObjects(this); - // }); - // - // it('works for collection creation', async () => { - // - // }); - // }); }); diff --git a/tests/unit/client/data-api-client.test.ts b/tests/unit/client/data-api-client.test.ts index a07c64c5..d60cb8bf 100644 --- a/tests/unit/client/data-api-client.test.ts +++ b/tests/unit/client/data-api-client.test.ts @@ -73,11 +73,23 @@ describe('unit.client.data-api-client', () => { // @ts-expect-error - testing invalid input assert.throws(() => new DataAPIClient('dummy-token', { caller: [[]] })); // @ts-expect-error - testing invalid input + assert.throws(() => new DataAPIClient('dummy-token', { caller: [['a', 'b'], 3] })); + // @ts-expect-error - testing invalid input assert.throws(() => new DataAPIClient('dummy-token', { caller: [{}] })); // @ts-expect-error - testing invalid input assert.throws(() => new DataAPIClient('dummy-token', { caller: { 0: ['name', 'version'] } })); }); + it('should only accept valid http client types', () => { + assert.doesNotThrow(() => new DataAPIClient('dummy-token', { httpOptions: {} })); + assert.doesNotThrow(() => new DataAPIClient('dummy-token', { httpOptions: { client: 'default' } })); + assert.doesNotThrow(() => new DataAPIClient('dummy-token', { httpOptions: { client: 'fetch' } })); + // @ts-expect-error - testing invalid input + assert.throws(() => new DataAPIClient('dummy-token', { httpOptions: { client: 'archspire' } })); + // @ts-expect-error - testing invalid input + assert.throws(() => new DataAPIClient('dummy-token', { httpOptions: { client: 12312312312 } })); + }); + describe('using fetch-h2', () => { it('uses http2 by default', function () { const client = new DataAPIClient('dummy-token', { httpOptions: {} });