Skip to content

Commit

Permalink
Merge pull request #85 from r-world-devs/maciekbanas/69/add-vectordat…
Browse files Browse the repository at this point in the history
…abase-method-purge_records

Add VectorDatabase method to purge records
  • Loading branch information
kalimu authored Feb 4, 2025
2 parents ef68b52 + ec757eb commit 3f8c005
Show file tree
Hide file tree
Showing 5 changed files with 183 additions and 63 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: GitAI
Title: Extracts Knowledge From Git Repositories
Version: 0.0.0.9013
Version: 0.0.0.9015
Authors@R: c(
person("Kamil", "Wais", , "[email protected]", role = c("aut", "cre")),
person("Krystian", "Igras", , "[email protected]", role = "aut"),
Expand Down
175 changes: 116 additions & 59 deletions R/Pinecone.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,23 +8,23 @@ Pinecone <- R6::R6Class(
pinecone_api_key <- Sys.getenv("PINECONE_API_KEY")

url <- paste0("https://api.pinecone.io/indexes/", private$.index)
httr2::request(url) |>
httr2::req_headers("Api-Key" = pinecone_api_key) |>
httr2::req_perform() |>

httr2::request(url) |>
httr2::req_headers("Api-Key" = pinecone_api_key) |>
httr2::req_perform() |>
httr2::resp_body_json()
},
},

write_record = function(id, text, metadata = list()) {
pinecone_api_key <- Sys.getenv("PINECONE_API_KEY")
url <- paste0("https://", private$.index_host)

pinecone_api_key <- Sys.getenv("PINECONE_API_KEY")

url <- paste0("https://", private$.index_host)

embeddings <- private$.get_embeddings(text = text)

metadata$text <- text

body <- list(
namespace = private$.namespace,
vectors = list(
Expand All @@ -33,28 +33,28 @@ Pinecone <- R6::R6Class(
metadata = metadata
)
)
request <- httr2::request(url) |>
httr2::req_url_path_append("vectors/upsert") |>

request <- httr2::request(url) |>
httr2::req_url_path_append("vectors/upsert") |>
httr2::req_headers(
"Api-Key" = pinecone_api_key,
"X-Pinecone-API-Version" = "2024-10"
) |>
httr2::req_body_json(body)
response <- request |>
) |>
httr2::req_body_json(body)

response <- request |>
httr2::req_perform()

response_body <- httr2::resp_body_json(response)
response_body
},

read_record = function(id) {

pinecone_api_key <- Sys.getenv("PINECONE_API_KEY")

url <- paste0("https://", private$.index_host)

request <- httr2::request(url) |>
httr2::req_url_path_append("vectors") |>
httr2::req_url_path_append("fetch") |>
Expand All @@ -65,58 +65,115 @@ Pinecone <- R6::R6Class(
httr2::req_headers(
"Api-Key" = pinecone_api_key,
"X-Pinecone-API-Version" = "2024-10"
)
response <- request |>
)

response <- request |>
httr2::req_perform()

response_body <- httr2::resp_body_json(response)
results <- response_body$vectors
results

results
},


find_records = function(query, top_k = 1) {

embeddings <- private$.get_embeddings(query)

pinecone_api_key <- Sys.getenv("PINECONE_API_KEY")

url <- paste0("https://", private$.index_host)

body <- list(
namespace = private$.namespace,
vector = embeddings,
topK = top_k,
includeValues = FALSE,
includeMetadata = TRUE
)

request <- httr2::request(url) |>
httr2::req_url_path_append("query") |>
httr2::req_headers(
"Api-Key" = pinecone_api_key,
"X-Pinecone-API-Version" = "2024-10"
) |>
httr2::req_body_json(body)
response <- request |>

response <- request |>
httr2::req_perform()

response_body <- httr2::resp_body_json(response)
results <- response_body$matches
results |>

results |>
purrr::map(function(result) {
result$values <- NULL
result
})
},

list_record_IDs = function() {

pinecone_api_key <- Sys.getenv("PINECONE_API_KEY")

url <- paste0("https://", private$.index_host)

response_body <- NULL
has_next_page <- TRUE
record_ids <- c()

while (has_next_page) {

request <- httr2::request(url) |>
httr2::req_url_path_append("vectors") |>
httr2::req_url_path_append("list") |>
httr2::req_url_query(
namespace = private$.namespace,
paginationToken = response_body$pagination$`next`
) |>
httr2::req_headers(
"Api-Key" = pinecone_api_key,
"X-Pinecone-API-Version" = "2024-10"
)

response <- request |>
httr2::req_perform()

response_body <- httr2::resp_body_json(response)
record_ids <- c(record_ids,
purrr::map_vec(response_body$vectors, ~ .$id))
has_next_page <- "pagination" %in% names(response_body)
}

return(record_ids)
},

purge_records = function(ids) {
pinecone_api_key <- Sys.getenv("PINECONE_API_KEY")

url <- paste0("https://", private$.index_host)

body <- list(
ids = ids,
namespace = private$.namespace
)

httr2::request(url) |>
httr2::req_url_path_append("vectors") |>
httr2::req_url_path_append("delete") |>
httr2::req_headers(
"Api-Key" = pinecone_api_key,
"X-Pinecone-API-Version" = "2024-10"
) |>
httr2::req_body_json(body) |>
httr2::req_perform()
}
),

active = list(

namespace = function(value) {
if (missing(value)) return(private$.namespace)
private$.namespace <- value
Expand All @@ -127,14 +184,14 @@ Pinecone <- R6::R6Class(
private$.index <- value
}
),

private = list(

.project_id = NULL,
.index = NULL,
.namespace = NULL,
.index_host = NULL,

.initialize = function(index, namespace) {

private$.index <- index
Expand All @@ -143,37 +200,37 @@ Pinecone <- R6::R6Class(
},

.get_embeddings = function(text) {
pinecone_api_key <- Sys.getenv("PINECONE_API_KEY")

pinecone_api_key <- Sys.getenv("PINECONE_API_KEY")

url <- "https://api.pinecone.io"

body <- list(
model = "multilingual-e5-large",
parameters = list(
input_type = "passage",
truncate = "END"
),
),
inputs = list(
list(text = text)
)
)
)

request <- httr2::request(url) |>
httr2::req_url_path_append("embed") |>
request <- httr2::request(url) |>
httr2::req_url_path_append("embed") |>
httr2::req_headers(
"Api-Key" = pinecone_api_key,
"X-Pinecone-API-Version" = "2024-10"
) |>
httr2::req_body_json(body)
response <- request |>
) |>
httr2::req_body_json(body)

response <- request |>
httr2::req_perform()

response_body <- httr2::resp_body_json(response)

response_body$data[[1]]$values |> unlist()

}
)
)
47 changes: 47 additions & 0 deletions R/test-helpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,31 @@ PineconeMocked <- R6::R6Class(
result$values <- NULL
result
})
},

list_record_IDs = function() {
pinecone_api_key <- Sys.getenv("PINECONE_API_KEY")

url <- paste0("https://", private$.index_host)

request <- httr2::request(url) |>
httr2::req_url_path_append("vectors") |>
httr2::req_url_path_append("list") |>
httr2::req_url_query(
namespace = private$.namespace
) |>
httr2::req_headers(
"Api-Key" = pinecone_api_key,
"X-Pinecone-API-Version" = "2024-10"
)

response <- httr2::response_json(
body = test_fixtures[["list_record_IDs"]]
)

response_body <- httr2::resp_body_json(response)

purrr::map_vec(response_body$vectors, ~ .$id)
}
),

Expand Down Expand Up @@ -235,3 +260,25 @@ test_fixtures[["read_record"]] <- list(
"namespace" = "gitai-tests",
"usage" = list("readUnits" = 1L)
)

test_fixtures[["list_record_IDs"]] <- list(
"vectors" = list(
list(
"id" = "project_1"
),
list(
"id" = "project_2"
),
list(
"id" = "project_3"
),
list(
"id" = "project_4"
),
list(
"id" = "project_5"
)
),
"namespace" = "gitai-tests",
"usage" = list("readUnits" = 1L)
)
9 changes: 6 additions & 3 deletions inst/example_workflow.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
gitai_demo <- initialize_project("gitai-tests") |>
set_database(index = "gitai-mb",
namespace = "gitai-demo-2") |>
gitai_demo <- initialize_project("gitai-demo-2") |>
set_database(index = "gitai-mb") |>
set_github_repos(
orgs = "r-world-devs"
) |>
Expand All @@ -13,3 +12,7 @@ process_repos(gitai_demo)
gitai_demo$db$find_records("Find package with which I can plot data.")

gitai_demo$db$read_record("GitStats")

record_ids <- gitai_demo$db$list_record_IDs()

gitai_demo$db$purge_records(record_ids)
13 changes: 13 additions & 0 deletions tests/testthat/test-Pinecone.R
Original file line number Diff line number Diff line change
Expand Up @@ -81,3 +81,16 @@ test_that("reading records", {
is.character() |>
expect_true()
})

test_that("listing all records IDs", {

db <- PineconeMocked$new(
namespace = "test_project_id",
index = "gitai"
)

result <- db$list_record_IDs()

expect_type(result, "character")
expect_gt(length(result), 1)
})

0 comments on commit 3f8c005

Please sign in to comment.