From 1532e95c5c320d3827c422f3a8b3436c06b943ac Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Fri, 27 Dec 2024 14:38:26 +0000 Subject: [PATCH 1/3] Add new method to list record IDs. --- R/Pinecone.R | 154 ++++++++++++++++++++------------- R/test-helpers.R | 47 ++++++++++ tests/testthat/test-Pinecone.R | 13 +++ 3 files changed, 155 insertions(+), 59 deletions(-) diff --git a/R/Pinecone.R b/R/Pinecone.R index 13c86c4..003f369 100644 --- a/R/Pinecone.R +++ b/R/Pinecone.R @@ -8,23 +8,23 @@ Pinecone <- R6::R6Class( pinecone_api_key <- Sys.getenv("PINECONE_API_KEY") url <- paste0("https://api.pinecone.io/indexes/", private$.index) - - httr2::request(url) |> - httr2::req_headers("Api-Key" = pinecone_api_key) |> - httr2::req_perform() |> + + httr2::request(url) |> + httr2::req_headers("Api-Key" = pinecone_api_key) |> + httr2::req_perform() |> httr2::resp_body_json() - }, - + }, + write_record = function(id, text, metadata = list()) { - - pinecone_api_key <- Sys.getenv("PINECONE_API_KEY") - - url <- paste0("https://", private$.index_host) - + + pinecone_api_key <- Sys.getenv("PINECONE_API_KEY") + + url <- paste0("https://", private$.index_host) + embeddings <- private$.get_embeddings(text = text) - + metadata$text <- text - + body <- list( namespace = private$.namespace, vectors = list( @@ -33,18 +33,18 @@ Pinecone <- R6::R6Class( metadata = metadata ) ) - - request <- httr2::request(url) |> - httr2::req_url_path_append("vectors/upsert") |> + + request <- httr2::request(url) |> + httr2::req_url_path_append("vectors/upsert") |> httr2::req_headers( "Api-Key" = pinecone_api_key, "X-Pinecone-API-Version" = "2024-10" - ) |> - httr2::req_body_json(body) - - response <- request |> + ) |> + httr2::req_body_json(body) + + response <- request |> httr2::req_perform() - + response_body <- httr2::resp_body_json(response) response_body }, @@ -52,9 +52,9 @@ Pinecone <- R6::R6Class( read_record = function(id) { pinecone_api_key <- Sys.getenv("PINECONE_API_KEY") - + url <- paste0("https://", private$.index_host) - + request <- httr2::request(url) |> httr2::req_url_path_append("vectors") |> httr2::req_url_path_append("fetch") |> @@ -65,26 +65,26 @@ Pinecone <- R6::R6Class( httr2::req_headers( "Api-Key" = pinecone_api_key, "X-Pinecone-API-Version" = "2024-10" - ) - - response <- request |> + ) + + response <- request |> httr2::req_perform() - + response_body <- httr2::resp_body_json(response) results <- response_body$vectors - - results + + results }, - + find_records = function(query, top_k = 1) { - + embeddings <- private$.get_embeddings(query) - + pinecone_api_key <- Sys.getenv("PINECONE_API_KEY") - + url <- paste0("https://", private$.index_host) - + body <- list( namespace = private$.namespace, vector = embeddings, @@ -92,7 +92,7 @@ Pinecone <- R6::R6Class( includeValues = FALSE, includeMetadata = TRUE ) - + request <- httr2::request(url) |> httr2::req_url_path_append("query") |> httr2::req_headers( @@ -100,23 +100,59 @@ Pinecone <- R6::R6Class( "X-Pinecone-API-Version" = "2024-10" ) |> httr2::req_body_json(body) - - response <- request |> + + response <- request |> httr2::req_perform() - + response_body <- httr2::resp_body_json(response) results <- response_body$matches - - results |> + + results |> purrr::map(function(result) { result$values <- NULL result }) + }, + + list_record_IDs = function() { + + pinecone_api_key <- Sys.getenv("PINECONE_API_KEY") + + url <- paste0("https://", private$.index_host) + + response_body <- NULL + has_next_page <- TRUE + record_ids <- c() + + while (has_next_page) { + + request <- httr2::request(url) |> + httr2::req_url_path_append("vectors") |> + httr2::req_url_path_append("list") |> + httr2::req_url_query( + namespace = private$.namespace, + paginationToken = response_body$pagination$`next` + ) |> + httr2::req_headers( + "Api-Key" = pinecone_api_key, + "X-Pinecone-API-Version" = "2024-10" + ) + + response <- request |> + httr2::req_perform() + + response_body <- httr2::resp_body_json(response) + record_ids <- c(record_ids, + purrr::map_vec(response_body$vectors, ~ .$id)) + has_next_page <- "pagination" %in% names(response_body) + } + + return(record_ids) } ), active = list( - + namespace = function(value) { if (missing(value)) return(private$.namespace) private$.namespace <- value @@ -127,14 +163,14 @@ Pinecone <- R6::R6Class( private$.index <- value } ), - + private = list( - + .project_id = NULL, .index = NULL, .namespace = NULL, .index_host = NULL, - + .initialize = function(index, namespace) { private$.index <- index @@ -143,37 +179,37 @@ Pinecone <- R6::R6Class( }, .get_embeddings = function(text) { - - pinecone_api_key <- Sys.getenv("PINECONE_API_KEY") - + + pinecone_api_key <- Sys.getenv("PINECONE_API_KEY") + url <- "https://api.pinecone.io" - + body <- list( model = "multilingual-e5-large", parameters = list( input_type = "passage", truncate = "END" - ), + ), inputs = list( list(text = text) - ) + ) ) - request <- httr2::request(url) |> - httr2::req_url_path_append("embed") |> + request <- httr2::request(url) |> + httr2::req_url_path_append("embed") |> httr2::req_headers( "Api-Key" = pinecone_api_key, "X-Pinecone-API-Version" = "2024-10" - ) |> - httr2::req_body_json(body) - - response <- request |> + ) |> + httr2::req_body_json(body) + + response <- request |> httr2::req_perform() - + response_body <- httr2::resp_body_json(response) - + response_body$data[[1]]$values |> unlist() - + } ) ) diff --git a/R/test-helpers.R b/R/test-helpers.R index 59a94b8..3cd5c56 100644 --- a/R/test-helpers.R +++ b/R/test-helpers.R @@ -134,6 +134,31 @@ PineconeMocked <- R6::R6Class( result$values <- NULL result }) + }, + + list_record_IDs = function() { + pinecone_api_key <- Sys.getenv("PINECONE_API_KEY") + + url <- paste0("https://", private$.index_host) + + request <- httr2::request(url) |> + httr2::req_url_path_append("vectors") |> + httr2::req_url_path_append("list") |> + httr2::req_url_query( + namespace = private$.namespace + ) |> + httr2::req_headers( + "Api-Key" = pinecone_api_key, + "X-Pinecone-API-Version" = "2024-10" + ) + + response <- httr2::response_json( + body = test_fixtures[["list_record_IDs"]] + ) + + response_body <- httr2::resp_body_json(response) + + purrr::map_vec(response_body$vectors, ~ .$id) } ), @@ -235,3 +260,25 @@ test_fixtures[["read_record"]] <- list( "namespace" = "gitai-tests", "usage" = list("readUnits" = 1L) ) + +test_fixtures[["list_record_IDs"]] <- list( + "vectors" = list( + list( + "id" = "project_1" + ), + list( + "id" = "project_2" + ), + list( + "id" = "project_3" + ), + list( + "id" = "project_4" + ), + list( + "id" = "project_5" + ) + ), + "namespace" = "gitai-tests", + "usage" = list("readUnits" = 1L) +) diff --git a/tests/testthat/test-Pinecone.R b/tests/testthat/test-Pinecone.R index 7350ff1..0fd4296 100644 --- a/tests/testthat/test-Pinecone.R +++ b/tests/testthat/test-Pinecone.R @@ -81,3 +81,16 @@ test_that("reading records", { is.character() |> expect_true() }) + +test_that("listing all records IDs", { + + db <- PineconeMocked$new( + namespace = "test_project_id", + index = "gitai" + ) + + result <- db$list_record_IDs() + + expect_type(result, "character") + expect_gt(length(result), 1) +}) From 0186226318838150b8c0fe358977f2027d0705ad Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Fri, 27 Dec 2024 14:44:27 +0000 Subject: [PATCH 2/3] Bump version. --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 905d8ba..1d98d80 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: GitAI Title: Extracts Knowledge From Git Repositories -Version: 0.0.0.9013 +Version: 0.0.0.9014 Authors@R: c( person("Kamil", "Wais", , "kamil.wais@gmail.com", role = c("aut", "cre")), person("Krystian", "Igras", , "krystian8207@gmail.com", role = "aut"), From ec757eba6d12bfe67c1fe36d5636edf139217b6e Mon Sep 17 00:00:00 2001 From: Maciej Banas Date: Fri, 27 Dec 2024 15:21:17 +0000 Subject: [PATCH 3/3] Add method to purge records from namespace. --- DESCRIPTION | 2 +- R/Pinecone.R | 21 +++++++++++++++++++++ inst/example_workflow.R | 9 ++++++--- 3 files changed, 28 insertions(+), 4 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 1d98d80..c78c2b3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: GitAI Title: Extracts Knowledge From Git Repositories -Version: 0.0.0.9014 +Version: 0.0.0.9015 Authors@R: c( person("Kamil", "Wais", , "kamil.wais@gmail.com", role = c("aut", "cre")), person("Krystian", "Igras", , "krystian8207@gmail.com", role = "aut"), diff --git a/R/Pinecone.R b/R/Pinecone.R index 003f369..e0e4fbc 100644 --- a/R/Pinecone.R +++ b/R/Pinecone.R @@ -148,6 +148,27 @@ Pinecone <- R6::R6Class( } return(record_ids) + }, + + purge_records = function(ids) { + pinecone_api_key <- Sys.getenv("PINECONE_API_KEY") + + url <- paste0("https://", private$.index_host) + + body <- list( + ids = ids, + namespace = private$.namespace + ) + + httr2::request(url) |> + httr2::req_url_path_append("vectors") |> + httr2::req_url_path_append("delete") |> + httr2::req_headers( + "Api-Key" = pinecone_api_key, + "X-Pinecone-API-Version" = "2024-10" + ) |> + httr2::req_body_json(body) |> + httr2::req_perform() } ), diff --git a/inst/example_workflow.R b/inst/example_workflow.R index d90acec..bc5ae28 100644 --- a/inst/example_workflow.R +++ b/inst/example_workflow.R @@ -1,6 +1,5 @@ -gitai_demo <- initialize_project("gitai-tests") |> - set_database(index = "gitai-mb", - namespace = "gitai-demo-2") |> +gitai_demo <- initialize_project("gitai-demo-2") |> + set_database(index = "gitai-mb") |> set_github_repos( orgs = "r-world-devs" ) |> @@ -13,3 +12,7 @@ process_repos(gitai_demo) gitai_demo$db$find_records("Find package with which I can plot data.") gitai_demo$db$read_record("GitStats") + +record_ids <- gitai_demo$db$list_record_IDs() + +gitai_demo$db$purge_records(record_ids)