Skip to content

Commit

Permalink
Merge pull request #154 from llm-tools/loaders
Browse files Browse the repository at this point in the history
Added xml loader
  • Loading branch information
adhityan authored Nov 4, 2024
2 parents 959e184 + 3bf6b07 commit 73cd1e7
Show file tree
Hide file tree
Showing 78 changed files with 1,301 additions and 710 deletions.
6 changes: 6 additions & 0 deletions core/embedjs-interfaces/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
## 0.1.16 (2024-11-04)

### 🚀 Features

- added xml loader ([9172511](https://github.com/llm-tools/embedJs/commit/9172511))

## 0.1.15 and 0.1.14 (2024-11-01)

### 🚀 Features
Expand Down
4 changes: 2 additions & 2 deletions core/embedjs-interfaces/package.json
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
{
"name": "@llm-tools/embedjs-interfaces",
"version": "0.1.15",
"version": "0.1.16",
"description": "Interfaces for extending the embedjs ecosystem",
"dependencies": {
"@langchain/core": "^0.3.16",
"@langchain/core": "^0.3.17",
"debug": "^4.3.7",
"md5": "^2.3.0",
"uuid": "^11.0.2"
Expand Down
6 changes: 6 additions & 0 deletions core/embedjs-utils/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
## 0.1.16 (2024-11-04)

### 🚀 Features

- added xml loader ([9172511](https://github.com/llm-tools/embedJs/commit/9172511))

## 0.1.15 and 0.1.14 (2024-11-01)

### 🚀 Features
Expand Down
4 changes: 2 additions & 2 deletions core/embedjs-utils/package.json
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
{
"name": "@llm-tools/embedjs-utils",
"version": "0.1.15",
"version": "0.1.16",
"description": "Useful util functions when extending the embedjs ecosystem",
"dependencies": {
"@llm-tools/embedjs-interfaces": "0.1.15"
"@llm-tools/embedjs-interfaces": "0.1.16"
},
"type": "module",
"main": "./src/index.js",
Expand Down
1 change: 1 addition & 0 deletions core/embedjs-utils/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ export * from './util/arrays.js';
export * from './util/log.js';
export * from './util/stream.js';
export * from './util/strings.js';
export * from './util/web.js';
45 changes: 45 additions & 0 deletions core/embedjs-utils/src/util/web.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import createDebugMessages from 'debug';

const DEFAULT_USER_AGENT =
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36';

type getSafeResponsePartial = {
headers: Headers;
statusCode: number;
};

export async function getSafe(
url: string,
options: { headers?: Record<string, string>; format: 'text' },
): Promise<{ body: string } & getSafeResponsePartial>;
export async function getSafe(
url: string,
options: { headers?: Record<string, string>; format: 'buffer' },
): Promise<{ body: Buffer } & getSafeResponsePartial>;
export async function getSafe(
url: string,
options?: { headers?: Record<string, string>; format?: 'stream' },
): Promise<{ body: NodeJS.ReadableStream } & getSafeResponsePartial>;
export async function getSafe(
url: string,
options?: { headers?: Record<string, string>; format?: 'text' | 'stream' | 'buffer' },
) {
const headers = options?.headers ?? {};
headers['User-Agent'] = headers['User-Agent'] ?? DEFAULT_USER_AGENT;

const format = options?.format ?? 'stream';
const response = await fetch(url, { headers });
createDebugMessages('embedjs:util:getSafe')(`URL '${url}' returned status code ${response.status}`);
if (response.status !== 200) throw new Error(`Failed to fetch URL '${url}'. Got status code ${response.status}.`);

return {
body:
format === 'text'
? await response.text()
: format === 'buffer'
? Buffer.from(await response.arrayBuffer())
: (response.body as unknown as NodeJS.ReadableStream),
statusCode: response.status,
headers: response.headers,
};
}
10 changes: 10 additions & 0 deletions core/embedjs/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
## 0.1.16 (2024-11-04)

### 🚀 Features

- added xml loader ([9172511](https://github.com/llm-tools/embedJs/commit/9172511))

### 🩹 Fixes

- renamed remaining instances if vectorDb to vectorDatabase ([ca79586](https://github.com/llm-tools/embedJs/commit/ca79586))

## 0.1.15 and 0.1.14 (2024-11-01)

### 🚀 Features
Expand Down
8 changes: 4 additions & 4 deletions core/embedjs/package.json
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
{
"type": "module",
"name": "@llm-tools/embedjs",
"version": "0.1.15",
"version": "0.1.16",
"description": "A NodeJS RAG framework to easily work with LLMs and custom datasets",
"dependencies": {
"@langchain/textsplitters": "^0.1.0",
"@llm-tools/embedjs-interfaces": "0.1.15",
"@llm-tools/embedjs-utils": "0.1.15",
"@llm-tools/embedjs-interfaces": "0.1.16",
"@llm-tools/embedjs-utils": "0.1.16",
"debug": "^4.3.7",
"langchain": "^0.3.5",
"md5": "^2.3.0",
Expand All @@ -16,7 +16,7 @@
"devDependencies": {
"@types/debug": "^4.1.12",
"@types/md5": "^2.3.5",
"@types/node": "^22.8.6"
"@types/node": "^22.8.7"
},
"main": "./src/index.js",
"license": "Apache-2.0",
Expand Down
1 change: 0 additions & 1 deletion core/embedjs/src/loaders/json-loader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ export class JsonLoader extends BaseLoader<{ type: 'JsonLoader' }> {
metadata: {
type: 'JsonLoader' as const,
source: tuncatedObjectString,
...entry,
},
};
}
Expand Down
4 changes: 2 additions & 2 deletions core/embedjs/src/loaders/url-loader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import { getMimeType } from 'stream-mime-type';
import createDebugMessages from 'debug';
import md5 from 'md5';

import { contentTypeToMimeType, truncateCenterString } from '@llm-tools/embedjs-utils';
import { contentTypeToMimeType, getSafe, truncateCenterString } from '@llm-tools/embedjs-utils';
import { BaseLoader } from '@llm-tools/embedjs-interfaces';
import { createLoaderFromMimeType } from '../util/mime.js';

Expand All @@ -17,7 +17,7 @@ export class UrlLoader extends BaseLoader<{ type: 'UrlLoader' }> {
}

override async *getUnfilteredChunks() {
const response = await fetch(this.url, { headers: { 'Accept-Encoding': '' } });
const response = await getSafe(this.url.toString(), { headers: { 'Accept-Encoding': '' } });
const stream = response.body as unknown as NodeJS.ReadableStream;
let { mime } = await getMimeType(stream, { strict: true });
this.debug(`Loader stream detected type '${mime}'`);
Expand Down
6 changes: 6 additions & 0 deletions databases/embedjs-astra/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
## 0.1.16 (2024-11-04)

### 🚀 Features

- added xml loader ([9172511](https://github.com/llm-tools/embedJs/commit/9172511))

## 0.1.15 and 0.1.14 (2024-11-01)

### 🚀 Features
Expand Down
4 changes: 2 additions & 2 deletions databases/embedjs-astra/package.json
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
{
"name": "@llm-tools/embedjs-astradb",
"version": "0.1.15",
"version": "0.1.16",
"description": "Add AstraDB support to embedjs",
"dependencies": {
"@datastax/astra-db-ts": "^1.5.0",
"@llm-tools/embedjs-interfaces": "0.1.15",
"@llm-tools/embedjs-interfaces": "0.1.16",
"debug": "^4.3.7"
},
"type": "module",
Expand Down
6 changes: 6 additions & 0 deletions databases/embedjs-cosmos/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
## 0.1.16 (2024-11-04)

### 🚀 Features

- added xml loader ([9172511](https://github.com/llm-tools/embedJs/commit/9172511))

## 0.1.15 and 0.1.14 (2024-11-01)

### 🚀 Features
Expand Down
4 changes: 2 additions & 2 deletions databases/embedjs-cosmos/package.json
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
{
"name": "@llm-tools/embedjs-cosmos",
"version": "0.1.15",
"version": "0.1.16",
"description": "Add CosmosDB support to embedjs",
"dependencies": {
"@azure/cosmos": "^4.1.1",
"@llm-tools/embedjs-interfaces": "0.1.15",
"@llm-tools/embedjs-interfaces": "0.1.16",
"debug": "^4.3.7"
},
"type": "module",
Expand Down
6 changes: 6 additions & 0 deletions databases/embedjs-hnswlib/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
## 0.1.16 (2024-11-04)

### 🚀 Features

- added xml loader ([9172511](https://github.com/llm-tools/embedJs/commit/9172511))

## 0.1.15 and 0.1.14 (2024-11-01)

### 🚀 Features
Expand Down
4 changes: 2 additions & 2 deletions databases/embedjs-hnswlib/package.json
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
{
"name": "@llm-tools/embedjs-hnswlib",
"version": "0.1.15",
"version": "0.1.16",
"description": "Add HNSWLib support to embedjs",
"dependencies": {
"@llm-tools/embedjs-interfaces": "0.1.15",
"@llm-tools/embedjs-interfaces": "0.1.16",
"debug": "^4.3.7",
"hnswlib-node": "^3.0.0"
},
Expand Down
6 changes: 6 additions & 0 deletions databases/embedjs-lancedb/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
## 0.1.16 (2024-11-04)

### 🚀 Features

- added xml loader ([9172511](https://github.com/llm-tools/embedJs/commit/9172511))

## 0.1.15 and 0.1.14 (2024-11-01)

### 🚀 Features
Expand Down
4 changes: 2 additions & 2 deletions databases/embedjs-lancedb/package.json
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
{
"name": "@llm-tools/embedjs-lancedb",
"version": "0.1.15",
"version": "0.1.16",
"description": "Add LanceDb support to embedjs",
"dependencies": {
"@lancedb/lancedb": "^0.12.0",
"@llm-tools/embedjs-interfaces": "0.1.15",
"@llm-tools/embedjs-interfaces": "0.1.16",
"compute-cosine-similarity": "^1.1.0"
},
"type": "module",
Expand Down
6 changes: 6 additions & 0 deletions databases/embedjs-lmdb/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
## 0.1.16 (2024-11-04)

### 🚀 Features

- added xml loader ([9172511](https://github.com/llm-tools/embedJs/commit/9172511))

## 0.1.15 and 0.1.14 (2024-11-01)

### 🚀 Features
Expand Down
4 changes: 2 additions & 2 deletions databases/embedjs-lmdb/package.json
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
{
"name": "@llm-tools/embedjs-lmdb",
"version": "0.1.15",
"version": "0.1.16",
"description": "Add LMDB support to embedjs",
"dependencies": {
"@llm-tools/embedjs-interfaces": "0.1.15",
"@llm-tools/embedjs-interfaces": "0.1.16",
"lmdb": "^3.1.4"
},
"type": "module",
Expand Down
6 changes: 6 additions & 0 deletions databases/embedjs-mongodb/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
## 0.1.16 (2024-11-04)

### 🚀 Features

- added xml loader ([9172511](https://github.com/llm-tools/embedJs/commit/9172511))

## 0.1.15 and 0.1.14 (2024-11-01)

### 🚀 Features
Expand Down
4 changes: 2 additions & 2 deletions databases/embedjs-mongodb/package.json
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
{
"name": "@llm-tools/embedjs-mongodb",
"version": "0.1.15",
"version": "0.1.16",
"description": "Add MongoDB support to embedjs",
"dependencies": {
"@llm-tools/embedjs-interfaces": "0.1.15",
"@llm-tools/embedjs-interfaces": "0.1.16",
"debug": "^4.3.7",
"mongodb": "^6.10.0"
},
Expand Down
6 changes: 6 additions & 0 deletions databases/embedjs-pinecone/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
## 0.1.16 (2024-11-04)

### 🚀 Features

- added xml loader ([9172511](https://github.com/llm-tools/embedJs/commit/9172511))

## 0.1.15 and 0.1.14 (2024-11-01)

### 🚀 Features
Expand Down
4 changes: 2 additions & 2 deletions databases/embedjs-pinecone/package.json
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
{
"name": "@llm-tools/embedjs-pinecone",
"version": "0.1.15",
"version": "0.1.16",
"description": "Add Pinecone support to embedjs",
"dependencies": {
"@llm-tools/embedjs-interfaces": "0.1.15",
"@llm-tools/embedjs-interfaces": "0.1.16",
"@pinecone-database/pinecone": "^4.0.0",
"debug": "^4.3.7"
},
Expand Down
2 changes: 1 addition & 1 deletion databases/embedjs-pinecone/src/pinecone-db.ts
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ export class PineconeDb implements BaseVectorDatabase {
} catch {
this.debug(
`Failed to delete keys for loader '${uniqueLoaderId}'.
Pinecone does not allow deleting by metadata filtering in serverless and free (what they call starter) instances`,
Pinecone does not allow deleting by metadata filtering in serverless and free instances`,
);
return false;
}
Expand Down
6 changes: 6 additions & 0 deletions databases/embedjs-qdrant/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
## 0.1.16 (2024-11-04)

### 🚀 Features

- added xml loader ([9172511](https://github.com/llm-tools/embedJs/commit/9172511))

## 0.1.15 and 0.1.14 (2024-11-01)

### 🚀 Features
Expand Down
4 changes: 2 additions & 2 deletions databases/embedjs-qdrant/package.json
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
{
"name": "@llm-tools/embedjs-qdrant",
"version": "0.1.15",
"version": "0.1.16",
"description": "Add Qdrant support to embedjs",
"dependencies": {
"@llm-tools/embedjs-interfaces": "0.1.15",
"@llm-tools/embedjs-interfaces": "0.1.16",
"@qdrant/js-client-rest": "^1.12.0",
"debug": "^4.3.7",
"uuid": "^11.0.2"
Expand Down
6 changes: 6 additions & 0 deletions databases/embedjs-redis/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
## 0.1.16 (2024-11-04)

### 🚀 Features

- added xml loader ([9172511](https://github.com/llm-tools/embedJs/commit/9172511))

## 0.1.15 and 0.1.14 (2024-11-01)

### 🚀 Features
Expand Down
4 changes: 2 additions & 2 deletions databases/embedjs-redis/package.json
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
{
"name": "@llm-tools/embedjs-redis",
"version": "0.1.15",
"version": "0.1.16",
"description": "Add Redis support to embedjs",
"dependencies": {
"@llm-tools/embedjs-interfaces": "0.1.15",
"@llm-tools/embedjs-interfaces": "0.1.16",
"ioredis": "^5.4.1"
},
"type": "module",
Expand Down
6 changes: 6 additions & 0 deletions databases/embedjs-weaviate/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
## 0.1.16 (2024-11-04)

### 🚀 Features

- added xml loader ([9172511](https://github.com/llm-tools/embedJs/commit/9172511))

## 0.1.15 and 0.1.14 (2024-11-01)

### 🚀 Features
Expand Down
4 changes: 2 additions & 2 deletions databases/embedjs-weaviate/package.json
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
{
"name": "@llm-tools/embedjs-weaviate",
"version": "0.1.15",
"version": "0.1.16",
"description": "Add Weaviate support to embedjs",
"dependencies": {
"@llm-tools/embedjs-interfaces": "0.1.15",
"@llm-tools/embedjs-interfaces": "0.1.16",
"compute-cosine-similarity": "^1.1.0",
"debug": "^4.3.7",
"weaviate-ts-client": "^2.2.0"
Expand Down
Loading

0 comments on commit 73cd1e7

Please sign in to comment.