Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

switch photography app to use Nomic for embeddings instead of OpenAI+MediaPipe #488

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 8 additions & 10 deletions photography-site-demo.js/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,20 @@
# false - if not connecting to an AstraDB and connecting to a local/remote jsonapi instead
IS_ASTRA=

#OPENAI api key
OPENAI_API_KEY=
#Nomic api key for generating embeddings
NOMIC_API_KEY=

#Fill the Local JSON API related details only when IS_ASTRA is set to 'false'
#Local JSON API URL for example: http://127.0.0.1:8181/v1/photography where 'photography' is the keyspace name
JSON_API_URL=
#Auth URL for example: http://127.0.0.1:8081/v1/auth
JSON_API_AUTH_URL=
#Fill the Local Data API related details only when IS_ASTRA is set to 'false'
#Local Data API URL for example: http://127.0.0.1:8181/v1/photography where 'photography' is the keyspace name
DATA_API_URL=
#Auth username and password
JSON_API_AUTH_USERNAME=
JSON_API_AUTH_PASSWORD=
DATA_API_AUTH_USERNAME=
DATA_API_AUTH_PASSWORD=

#Fill the ASTRA DB related details only when IS_ASTRA is set to 'true'
#Astra DB API URL
ASTRA_API_ENDPOINT=
#Astra DB namespace
ASTRA_NAMESPACE=
#Astra DB Application Token
ASTRA_APPLICATION_TOKEN=
ASTRA_APPLICATION_TOKEN=
7 changes: 3 additions & 4 deletions photography-site-demo.js/.env.test
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
JSON_API_URL=http://127.0.0.1:8181/v1/photography
JSON_API_AUTH_URL=http://127.0.0.1:8081/v1/auth
JSON_API_AUTH_USERNAME=cassandra
JSON_API_AUTH_PASSWORD=cassandra
DATA_API_URL=http://127.0.0.1:8181/v1/photography
DATA_API_AUTH_USERNAME=cassandra
DATA_API_AUTH_PASSWORD=cassandra
10 changes: 4 additions & 6 deletions photography-site-demo.js/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

# photography-site

This sample app demonstrates a photography site by using [`express`](https://www.npmjs.com/package/express), [`mongoose`](https://github.com/Automattic/mongoose) and [`stargate-mongoose`](https://github.com/stargate/stargate-mongoose).
This sample app demonstrates a photography site by using [`express`](https://www.npmjs.com/package/express), [`mongoose`](https://github.com/Automattic/mongoose) and [`stargate-mongoose`](https://github.com/stargate/stargate-mongoose).

## Environment

Expand Down Expand Up @@ -38,10 +38,9 @@ Make sure you have Node.js 17 or higher
1. Copy the `.env.example` file to `.env` and fill in the values for the environment variables.
2. Set `IS_ASTRA` to `false`
3. Set `OPENAI_API_KEY` to your openAI api key
4. Set `JSON_API_URL` to `http://127.0.0.1:8181/v1/photography`
5. Set `JSON_API_AUTH_URL` to `http://127.0.0.1:8181/v1/auth`
6. Set `JSON_API_AUTH_USERNAME` to `cassandra`
7. Set `JSON_API_AUTH_PASSWORD` to `cassandra`
4. Set `DATA_API_URL` to `http://127.0.0.1:8181/v1/photography`
5. Set `DATA_API_AUTH_USERNAME` to `cassandra`
6. Set `DATA_API_AUTH_PASSWORD` to `cassandra`



Expand All @@ -51,4 +50,3 @@ Make sure you have Node.js 17 or higher
2. Run `npm run seed`
3. Run `npm start`
4. Visit `http://127.0.0.1:3000` to see the UI

6 changes: 3 additions & 3 deletions photography-site-demo.js/server/models/Photo.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ const mongoose = require('./mongoose');
const options = {
collectionOptions: {
vector: {
size: 1536, //embedding array size for openAI embedding api, text->vector
size: 768, //embedding array size for Nomic embedding api, text->vector
function: 'cosine'
}
}
Expand All @@ -30,10 +30,10 @@ const photoSchema = new mongoose.Schema({
},
$vector: {
type: [Number],
validate: v => v == null || v.length === 1536
validate: [v => v == null || v.length === 768, 'Invalid vector length, must be 768']
}
}, options);



module.exports = mongoose.model('photo', photoSchema);
module.exports = mongoose.model('photo', photoSchema);
6 changes: 3 additions & 3 deletions photography-site-demo.js/server/models/PhotoEmbedding.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ const mongoose = require('./mongoose');
const options = {
collectionOptions: {
vector: {
size: 1280, //embedding array size for google embedding support, image->vector
size: 768, //embedding array size for Nomic embedding support, image->vector
function: 'cosine'
}
}
Expand All @@ -30,10 +30,10 @@ const photoEmbeddingSchema = new mongoose.Schema({
},
$vector: {
type: [Number],
validate: v => v == null || v.length === 1280
validate: [v => v == null || v.length === 768, 'Invalid vector length, must be 768']
}
}, options);



module.exports = mongoose.model('photoembedding', photoEmbeddingSchema);
module.exports = mongoose.model('photoembedding', photoEmbeddingSchema);
9 changes: 4 additions & 5 deletions photography-site-demo.js/server/models/connect.js
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,13 @@ module.exports = async function connect() {
isAstra: true
};
} else {
uri = process.env.JSON_API_URL;
uri = process.env.DATA_API_URL;
jsonApiConnectOptions = {
username: process.env.JSON_API_AUTH_USERNAME,
password: process.env.JSON_API_AUTH_PASSWORD,
authUrl: process.env.JSON_API_AUTH_URL
username: process.env.DATA_API_AUTH_USERNAME,
password: process.env.DATA_API_AUTH_PASSWORD
};
}
await mongoose.connect(uri, jsonApiConnectOptions);
await Promise.all(Object.values(mongoose.connection.models).map(Model => Model.init()));
return conn;
};
};
29 changes: 17 additions & 12 deletions photography-site-demo.js/server/utils/imageEmbeddingGenerator.js
Original file line number Diff line number Diff line change
@@ -1,18 +1,23 @@
'use strict';
const { PythonShell } = require('python-shell');

const axios = require('axios');
const fs = require('fs');

module.exports = async function getPhotoEmbedding(fileName) {
const filePath = './public/uploads/' + fileName;
const options = {
args: [filePath]
};

return new Promise((resolve, reject) => {
PythonShell.run('./server/utils/imageEmbeddingGenerator.py', options).then(messages => {
const vector = JSON.parse(messages[0]);
resolve(vector);
}).catch(error => {
reject(error);
});
const response = await axios.post(
'https://api-atlas.nomic.ai/v1/embedding/image',
{
model: 'nomic-embed-vision-v1.5',
images: fs.createReadStream(filePath)
},
{
headers: {
'Authorization': `Bearer ${process.env.NOMIC_API_KEY}`,
'Content-Type': 'multipart/form-data'
}
});
};

return response.data.embeddings[0];
};

This file was deleted.

Binary file not shown.
34 changes: 16 additions & 18 deletions photography-site-demo.js/server/utils/textEmbeddingGenerator.js
Original file line number Diff line number Diff line change
@@ -1,23 +1,21 @@
'use strict';
// openAI embedding
// https://platform.openai.com/docs/api-reference/embeddings/create
const axios = require('axios');
const OPENAI_API_KEY = process.env.OPENAI_API_KEY;
const endpoint = 'https://api.openai.com/v1/embeddings';
const modelType = 'text-embedding-ada-002';

const config = {
headers: {
Authorization: `Bearer ${OPENAI_API_KEY}`,
'Content-Type': 'application/json'
}
};
const axios = require('axios');

module.exports = async function createPhotoEmbedding(photoDescription) {
const requestData = {
input: photoDescription,
model: modelType
};
const response = await axios.post(endpoint, requestData, config);
return response.data.data[0].embedding;
const response = await axios.post(
'https://api-atlas.nomic.ai/v1/embedding/text',
{
model: 'nomic-embed-text-v1',
texts: [photoDescription],
task_type: 'search_document',
dimensionality: 768
},
{
headers: {
'Authorization': `Bearer ${process.env.NOMIC_API_KEY}`
}
}
);
return response.data.embeddings[0];
};
6 changes: 3 additions & 3 deletions photography-site-demo.js/tests/vectorSearchTest.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ const Photo = require('../server/models/Photo');
describe('Vector Search Tests', function() {
it('Photo Text Embedding Test', async function() {

// Generate 1536 random float numbers between 0 and 1
const targetVector = Array.from({ length: 1536 }, () => Math.random());
// Generate 768 random float numbers between 0 and 1
const targetVector = Array.from({ length: 768 }, () => Math.random());

const photo1 = new Photo({
name: 'testName1',
Expand All @@ -25,7 +25,7 @@ describe('Vector Search Tests', function() {
description: 'These violent delights have violent ends',
category: 'landscape',
image: 'testImage2',
$vector: Array.from({ length: 1536 }, () => Math.random())
$vector: Array.from({ length: 768 }, () => Math.random())
});
await photo2.save();

Expand Down
Loading