diff --git a/DESCRIPTION b/DESCRIPTION index 64f261e..1d98d80 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: GitAI Title: Extracts Knowledge From Git Repositories -Version: 0.0.0.9012 +Version: 0.0.0.9014 Authors@R: c( person("Kamil", "Wais", , "kamil.wais@gmail.com", role = c("aut", "cre")), person("Krystian", "Igras", , "krystian8207@gmail.com", role = "aut"), @@ -30,4 +30,3 @@ Suggests: shiny, withr Config/testthat/edition: 3 -Config/testthat/parallel: true diff --git a/R/Pinecone.R b/R/Pinecone.R index 13c86c4..003f369 100644 --- a/R/Pinecone.R +++ b/R/Pinecone.R @@ -8,23 +8,23 @@ Pinecone <- R6::R6Class( pinecone_api_key <- Sys.getenv("PINECONE_API_KEY") url <- paste0("https://api.pinecone.io/indexes/", private$.index) - - httr2::request(url) |> - httr2::req_headers("Api-Key" = pinecone_api_key) |> - httr2::req_perform() |> + + httr2::request(url) |> + httr2::req_headers("Api-Key" = pinecone_api_key) |> + httr2::req_perform() |> httr2::resp_body_json() - }, - + }, + write_record = function(id, text, metadata = list()) { - - pinecone_api_key <- Sys.getenv("PINECONE_API_KEY") - - url <- paste0("https://", private$.index_host) - + + pinecone_api_key <- Sys.getenv("PINECONE_API_KEY") + + url <- paste0("https://", private$.index_host) + embeddings <- private$.get_embeddings(text = text) - + metadata$text <- text - + body <- list( namespace = private$.namespace, vectors = list( @@ -33,18 +33,18 @@ Pinecone <- R6::R6Class( metadata = metadata ) ) - - request <- httr2::request(url) |> - httr2::req_url_path_append("vectors/upsert") |> + + request <- httr2::request(url) |> + httr2::req_url_path_append("vectors/upsert") |> httr2::req_headers( "Api-Key" = pinecone_api_key, "X-Pinecone-API-Version" = "2024-10" - ) |> - httr2::req_body_json(body) - - response <- request |> + ) |> + httr2::req_body_json(body) + + response <- request |> httr2::req_perform() - + response_body <- httr2::resp_body_json(response) response_body }, @@ -52,9 +52,9 @@ Pinecone <- R6::R6Class( read_record = function(id) { pinecone_api_key <- Sys.getenv("PINECONE_API_KEY") - + url <- paste0("https://", private$.index_host) - + request <- httr2::request(url) |> httr2::req_url_path_append("vectors") |> httr2::req_url_path_append("fetch") |> @@ -65,26 +65,26 @@ Pinecone <- R6::R6Class( httr2::req_headers( "Api-Key" = pinecone_api_key, "X-Pinecone-API-Version" = "2024-10" - ) - - response <- request |> + ) + + response <- request |> httr2::req_perform() - + response_body <- httr2::resp_body_json(response) results <- response_body$vectors - - results + + results }, - + find_records = function(query, top_k = 1) { - + embeddings <- private$.get_embeddings(query) - + pinecone_api_key <- Sys.getenv("PINECONE_API_KEY") - + url <- paste0("https://", private$.index_host) - + body <- list( namespace = private$.namespace, vector = embeddings, @@ -92,7 +92,7 @@ Pinecone <- R6::R6Class( includeValues = FALSE, includeMetadata = TRUE ) - + request <- httr2::request(url) |> httr2::req_url_path_append("query") |> httr2::req_headers( @@ -100,23 +100,59 @@ Pinecone <- R6::R6Class( "X-Pinecone-API-Version" = "2024-10" ) |> httr2::req_body_json(body) - - response <- request |> + + response <- request |> httr2::req_perform() - + response_body <- httr2::resp_body_json(response) results <- response_body$matches - - results |> + + results |> purrr::map(function(result) { result$values <- NULL result }) + }, + + list_record_IDs = function() { + + pinecone_api_key <- Sys.getenv("PINECONE_API_KEY") + + url <- paste0("https://", private$.index_host) + + response_body <- NULL + has_next_page <- TRUE + record_ids <- c() + + while (has_next_page) { + + request <- httr2::request(url) |> + httr2::req_url_path_append("vectors") |> + httr2::req_url_path_append("list") |> + httr2::req_url_query( + namespace = private$.namespace, + paginationToken = response_body$pagination$`next` + ) |> + httr2::req_headers( + "Api-Key" = pinecone_api_key, + "X-Pinecone-API-Version" = "2024-10" + ) + + response <- request |> + httr2::req_perform() + + response_body <- httr2::resp_body_json(response) + record_ids <- c(record_ids, + purrr::map_vec(response_body$vectors, ~ .$id)) + has_next_page <- "pagination" %in% names(response_body) + } + + return(record_ids) } ), active = list( - + namespace = function(value) { if (missing(value)) return(private$.namespace) private$.namespace <- value @@ -127,14 +163,14 @@ Pinecone <- R6::R6Class( private$.index <- value } ), - + private = list( - + .project_id = NULL, .index = NULL, .namespace = NULL, .index_host = NULL, - + .initialize = function(index, namespace) { private$.index <- index @@ -143,37 +179,37 @@ Pinecone <- R6::R6Class( }, .get_embeddings = function(text) { - - pinecone_api_key <- Sys.getenv("PINECONE_API_KEY") - + + pinecone_api_key <- Sys.getenv("PINECONE_API_KEY") + url <- "https://api.pinecone.io" - + body <- list( model = "multilingual-e5-large", parameters = list( input_type = "passage", truncate = "END" - ), + ), inputs = list( list(text = text) - ) + ) ) - request <- httr2::request(url) |> - httr2::req_url_path_append("embed") |> + request <- httr2::request(url) |> + httr2::req_url_path_append("embed") |> httr2::req_headers( "Api-Key" = pinecone_api_key, "X-Pinecone-API-Version" = "2024-10" - ) |> - httr2::req_body_json(body) - - response <- request |> + ) |> + httr2::req_body_json(body) + + response <- request |> httr2::req_perform() - + response_body <- httr2::resp_body_json(response) - + response_body$data[[1]]$values |> unlist() - + } ) ) diff --git a/R/test-helpers.R b/R/test-helpers.R index b1d3010..3cd5c56 100644 --- a/R/test-helpers.R +++ b/R/test-helpers.R @@ -19,3 +19,266 @@ Mocker <- R6::R6Class( } ) ) + +PineconeMocked <- R6::R6Class( + "PineconeMocked", + inherit = Pinecone, + public = list( + get_index_metadata = function() { + pinecone_api_key <- Sys.getenv("PINECONE_API_KEY") + + url <- paste0("https://api.pinecone.io/indexes/", private$.index) + + response <- httr2::response_json( + body = test_fixtures[["pinecone_index_response"]] + ) + httr2::resp_body_json(response) + }, + + write_record = function(id, text, metadata = list()) { + + pinecone_api_key <- Sys.getenv("PINECONE_API_KEY") + + url <- paste0("https://", private$.index_host) + + embeddings <- private$.get_embeddings(text = text) + + metadata$text <- text + + body <- list( + namespace = private$.namespace, + vectors = list( + id = id, + values = embeddings, + metadata = metadata + ) + ) + + request <- httr2::request(url) |> + httr2::req_url_path_append("vectors/upsert") |> + httr2::req_headers( + "Api-Key" = pinecone_api_key, + "X-Pinecone-API-Version" = "2024-10" + ) |> + httr2::req_body_json(body) + + response <- httr2::response_json( + body = list("upsertedCount" = 1) + ) + + response_body <- httr2::resp_body_json(response) + response_body + }, + + read_record = function(id) { + + pinecone_api_key <- Sys.getenv("PINECONE_API_KEY") + + url <- paste0("https://", private$.index_host) + + request <- httr2::request(url) |> + httr2::req_url_path_append("vectors") |> + httr2::req_url_path_append("fetch") |> + httr2::req_url_query( + ids = id, + namespace = private$.namespace + ) |> + httr2::req_headers( + "Api-Key" = pinecone_api_key, + "X-Pinecone-API-Version" = "2024-10" + ) + + response <- httr2::response_json( + body = test_fixtures[["read_record"]] + ) + + response_body <- httr2::resp_body_json(response) + results <- response_body$vectors + + results + }, + + find_records = function(query, top_k = 1) { + + embeddings <- private$.get_embeddings(query) + + pinecone_api_key <- Sys.getenv("PINECONE_API_KEY") + + url <- paste0("https://", private$.index_host) + + body <- list( + namespace = private$.namespace, + vector = embeddings, + topK = top_k, + includeValues = FALSE, + includeMetadata = TRUE + ) + + request <- httr2::request(url) |> + httr2::req_url_path_append("query") |> + httr2::req_headers( + "Api-Key" = pinecone_api_key, + "X-Pinecone-API-Version" = "2024-10" + ) |> + httr2::req_body_json(body) + + response <- httr2::response_json( + body = test_fixtures[["matched_records"]] + ) + + response_body <- httr2::resp_body_json(response) + results <- response_body$matches + + results |> + purrr::map(function(result) { + result$values <- NULL + result + }) + }, + + list_record_IDs = function() { + pinecone_api_key <- Sys.getenv("PINECONE_API_KEY") + + url <- paste0("https://", private$.index_host) + + request <- httr2::request(url) |> + httr2::req_url_path_append("vectors") |> + httr2::req_url_path_append("list") |> + httr2::req_url_query( + namespace = private$.namespace + ) |> + httr2::req_headers( + "Api-Key" = pinecone_api_key, + "X-Pinecone-API-Version" = "2024-10" + ) + + response <- httr2::response_json( + body = test_fixtures[["list_record_IDs"]] + ) + + response_body <- httr2::resp_body_json(response) + + purrr::map_vec(response_body$vectors, ~ .$id) + } + ), + + private = list( + .get_embeddings = function(text) { + pinecone_api_key <- Sys.getenv("PINECONE_API_KEY") + + url <- "https://api.pinecone.io" + + body <- list( + model = "multilingual-e5-large", + parameters = list( + input_type = "passage", + truncate = "END" + ), + inputs = list( + list(text = text) + ) + ) + + request <- httr2::request(url) |> + httr2::req_url_path_append("embed") |> + httr2::req_headers( + "Api-Key" = pinecone_api_key, + "X-Pinecone-API-Version" = "2024-10" + ) |> + httr2::req_body_json(body) + + response <- httr2::response_json( + body = test_fixtures[["embeddings"]] + ) + + response_body <- httr2::resp_body_json(response) + + response_body$data[[1]]$values |> unlist() + } + ) +) + +test_fixtures <- list() + +test_fixtures[["pinecone_index_response"]] <- list( + "name" = "gitai", + "metric" = "cosine", + "dimension" = 1024L, + "status" = list( + "ready" = TRUE, + "state" = "Ready" + ), + "host" = "gitai-test-host", + "spec" = list( + "serverless" = list( + "region" = "us-east-1", + "cloud" = "aws" + ) + ) +) + +test_fixtures[["embeddings"]] <- list( + "model" = "multilingual-e5-large", + "data" = list( + list( + "values" = list( + runif(1024L, -1, 1) |> as.list() + ) + ) + ), + "usage" = list( + "total_tokens" = 78L + ) +) + +test_fixtures[["matched_records"]] <- list( + "results" = list(), + "matches" = list( + list( + "id" = "id_2", + "score" = 0.820673, + "values" = list(), + "metadata" = list( + "files" = c("test_file1", "test_file2"), + "repo_url" = "test_url", + "text" = "This package will best suite you.", + "timestamp" = Sys.Date() + ) + ) + ), + "namespace" = "gitai-tests", + "usage" = list("readUnits" = 10L) +) + +test_fixtures[["read_record"]] <- list( + "vectors" = list( + "TestProject" = list( + "values" = test_fixtures[["embeddings"]][["data"]][[1]]["values"], + "metadata" = test_fixtures[["matched_records"]][["matches"]][[1]][["metadata"]] + ) + ), + "namespace" = "gitai-tests", + "usage" = list("readUnits" = 1L) +) + +test_fixtures[["list_record_IDs"]] <- list( + "vectors" = list( + list( + "id" = "project_1" + ), + list( + "id" = "project_2" + ), + list( + "id" = "project_3" + ), + list( + "id" = "project_4" + ), + list( + "id" = "project_5" + ) + ), + "namespace" = "gitai-tests", + "usage" = list("readUnits" = 1L) +) diff --git a/inst/example_workflow.R b/inst/example_workflow.R new file mode 100644 index 0000000..d90acec --- /dev/null +++ b/inst/example_workflow.R @@ -0,0 +1,15 @@ +gitai_demo <- initialize_project("gitai-tests") |> + set_database(index = "gitai-mb", + namespace = "gitai-demo-2") |> + set_github_repos( + orgs = "r-world-devs" + ) |> + add_files(files = "\\.md") |> + set_llm() |> + set_prompt("Provide a one-two sentence description of the product based on input.") + +process_repos(gitai_demo) + +gitai_demo$db$find_records("Find package with which I can plot data.") + +gitai_demo$db$read_record("GitStats") diff --git a/tests/testthat/test-Pinecone.R b/tests/testthat/test-Pinecone.R index f18b741..0fd4296 100644 --- a/tests/testthat/test-Pinecone.R +++ b/tests/testthat/test-Pinecone.R @@ -1,6 +1,6 @@ test_that("getting index metadata", { - db <- Pinecone$new( + db <- PineconeMocked$new( namespace = "test_project_id", index = "gitai" ) @@ -11,7 +11,7 @@ test_that("getting index metadata", { test_that("getting embeddings", { - db <- Pinecone$new( + db <- PineconeMocked$new( namespace = "test_project_id", index = "gitai" ) @@ -24,7 +24,7 @@ test_that("getting embeddings", { test_that("writting records", { - db <- Pinecone$new( + db <- PineconeMocked$new( namespace = "test_project_id", index = "gitai" ) @@ -51,9 +51,7 @@ test_that("writting records", { test_that("finding records", { - Sys.sleep(3) - - db <- Pinecone$new( + db <- PineconeMocked$new( namespace = "test_project_id", index = "gitai" ) @@ -68,17 +66,11 @@ test_that("finding records", { result[[1]]$metadata$text |> is.character() |> expect_true() result[[1]]$score |> is.numeric() |> expect_true() - result_2 <- db$find_records( - query = "Tell me about apple fruit.", - top_k = 1 - ) - - expect_false(result_2[[1]]$id == result[[1]]$id) }) test_that("reading records", { - db <- Pinecone$new( + db <- PineconeMocked$new( namespace = "test_project_id", index = "gitai" ) @@ -89,3 +81,16 @@ test_that("reading records", { is.character() |> expect_true() }) + +test_that("listing all records IDs", { + + db <- PineconeMocked$new( + namespace = "test_project_id", + index = "gitai" + ) + + result <- db$list_record_IDs() + + expect_type(result, "character") + expect_gt(length(result), 1) +}) diff --git a/tests/testthat/test-set_database.R b/tests/testthat/test-set_database.R index f3dee7a..202c244 100644 --- a/tests/testthat/test-set_database.R +++ b/tests/testthat/test-set_database.R @@ -1,11 +1,11 @@ test_that("setting database provider with default namespace", { - + gitai <- initialize_project("gitai-demo") |> set_database( - provider = "Pinecone", + provider = "PineconeMocked", index = "gitai" - ) - + ) + gitai$db$index |> expect_equal("gitai") gitai$db$namespace |> expect_equal("gitai-demo") }) @@ -14,11 +14,11 @@ test_that("setting database provider with custom namepsace", { gitai <- initialize_project("gitai-demo") |> set_database( - provider = "Pinecone", + provider = "PineconeMocked", index = "gitai", namespace = "test_namespace" - ) - + ) + gitai$db$index |> expect_equal("gitai") gitai$db$namespace |> expect_equal("test_namespace") })