-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #35 from JuliaGenAI/juliadocs_index
Adding index + query examples for Julia documentation, and some evals.
- Loading branch information
Showing
13 changed files
with
585 additions
and
122 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,8 +4,6 @@ authors = ["Siddhant Chaudhary <[email protected]> and contributors"] | |
version = "0.1.0" | ||
|
||
[deps] | ||
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" | ||
Clustering = "aaaa29a8-35af-508c-8bc3-b662a17a0fe5" | ||
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" | ||
Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" | ||
JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819" | ||
|
@@ -19,4 +17,11 @@ TextEncodeBase = "f92c20c0-9f2a-4705-8116-881385faba05" | |
Transformers = "21ca0261-441d-5938-ace7-c90938fde4d4" | ||
|
||
[compat] | ||
Flux = "0.14" | ||
JLD2 = "0.4" | ||
JSON = "0.21" | ||
NeuralAttentionlib = "0.3" | ||
StatsBase = "0.34" | ||
TextEncodeBase = "0.8" | ||
Transformers = "0.3" | ||
julia = "1.10" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,18 +6,20 @@ DocMeta.setdocmeta!(ColBERT, :DocTestSetup, :(using ColBERT); recursive = true) | |
makedocs(; | ||
modules = [ColBERT], | ||
authors = "Siddhant Chaudhary <[email protected]> and contributors", | ||
sitename = "ColBERT.jl", | ||
sitename = "ColBERT", | ||
format = Documenter.HTML(; | ||
canonical = "https://codetalker7.github.io/ColBERT.jl", | ||
edit_link = "main", | ||
assets = String[] | ||
assets = String[], | ||
sidebar_sitename = false | ||
), | ||
pages = [ | ||
"Home" => "index.md" | ||
"Home" => "index.md", | ||
"Reference" => "api.md" | ||
] | ||
) | ||
|
||
deploydocs(; | ||
repo = "github.com/codetalker7/ColBERT.jl", | ||
devbranch = "main" | ||
target = "build", | ||
devbranch = "main", | ||
push_preview = true | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
```@index | ||
``` | ||
|
||
```@autodocs | ||
Modules = [ColBERT] | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
# the knowledge packs | ||
wget https://media.githubusercontent.com/media/svilupp/AIHelpMeArtifacts/main/artifacts/genie__v20240818__textembedding3large-1024-Bool__v1.0.tar.gz?download=true \ | ||
-O genie_knowledge_pack.tar.gz | ||
wget https://media.githubusercontent.com/media/svilupp/AIHelpMeArtifacts/main/artifacts/julia__v1.10.2__textembedding3large-1024-Bool__v1.0.tar.gz?download=true \ | ||
-O julia_v1.10.2_knowledge_pack.tar.gz | ||
wget https://media.githubusercontent.com/media/svilupp/AIHelpMeArtifacts/main/artifacts/juliadata__v20240716__textembedding3large-1024-Bool__v1.0.tar.gz?download=true \ | ||
-O juliadata_knowledge_pack.tar.gz | ||
wget https://media.githubusercontent.com/media/svilupp/AIHelpMeArtifacts/main/artifacts/julialang__v20240819__textembedding3large-1024-Bool__v1.0.tar.gz?download=true \ | ||
-O julialang_knowledge_pack.tar.gz | ||
wget https://media.githubusercontent.com/media/svilupp/AIHelpMeArtifacts/main/artifacts/makie__v20240716__textembedding3large-1024-Bool__v1.0.tar.gz?download=true \ | ||
-O makie_knowledge_pack.tar.gz | ||
wget https://media.githubusercontent.com/media/svilupp/AIHelpMeArtifacts/main/artifacts/plots__v20240716__textembedding3large-1024-Bool__v1.0.tar.gz?download=true \ | ||
-O plots_knowledge_pack.tar.gz | ||
wget https://media.githubusercontent.com/media/svilupp/AIHelpMeArtifacts/main/artifacts/sciml__v20240716__textembedding3large-1024-Bool__v1.0.tar.gz?download=true \ | ||
-O sciml_knowledge_pack.tar.gz | ||
wget https://media.githubusercontent.com/media/svilupp/AIHelpMeArtifacts/main/artifacts/tidier__v20240716__textembedding3large-1024-Bool__v1.0.tar.gz?download=true \ | ||
-O tider_knowledge_pack.tar.gz | ||
|
||
# unpack all the packs | ||
tar -xvzf genie_knowledge_pack.tar.gz | ||
tar -xvzf julia_v1.10.2_knowledge_pack.tar.gz | ||
tar -xvzf juliadata_knowledge_pack.tar.gz | ||
tar -xvzf julialang_knowledge_pack.tar.gz | ||
tar -xvzf makie_knowledge_pack.tar.gz | ||
tar -xvzf plots_knowledge_pack.tar.gz | ||
tar -xvzf sciml_knowledge_pack.tar.gz | ||
tar -xvzf tider_knowledge_pack.tar.gz | ||
|
||
|
||
# eval pack | ||
wget https://raw.githubusercontent.com/svilupp/AIHelpMe.jl/main/evaluations/JuliaData/dataframe_combined_filtered-qa-evals.json -O qa_evals.json |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
# loading the docs | ||
using HDF5 | ||
|
||
doc_passages = String[] | ||
doc_sources = String[] | ||
|
||
for file in ["genie__v20240818__textembedding3large-1024-Bool__v1.0.hdf5", | ||
"JuliaData-text-embedding-3-large-1-Bool__v1.0.hdf5", | ||
"julialang__v20240819__textembedding3large-1024-Bool__v1.0.hdf5", | ||
"Makie-text-embedding-3-large-1-Bool__v1.0.hdf5", | ||
"pack.hdf5", "Plots-text-embedding-3-large-1-Bool__v1.0.hdf5", | ||
"sciml__v20240716__textembedding3large-1024-Bool__v1.0.hdf5", | ||
"tidier__v20240716__textembedding3large-1024-Bool__v1.0.hdf5"] | ||
fid = h5open(file, "r") | ||
chunks, sources = fid["chunks"], fid["sources"] | ||
append!(doc_passages, read(chunks)) | ||
append!(doc_sources, read(sources)) | ||
end | ||
|
||
# evals | ||
using ColBERT, CUDA, Random, JSON; | ||
using PromptingTools: distance_longest_common_subsequence | ||
# CUDA.devices() | ||
# device!(5) | ||
Random.seed!(0) | ||
|
||
## load the evaluation qa | ||
eval_qa = JSON.parsefile("qa_evals.json") | ||
|
||
## get the searcher | ||
searcher = Searcher("./juliadocsindex/"); | ||
|
||
## for each qs, see if the context is returned | ||
k = 5 | ||
num_hits = 0 | ||
for query in eval_qa | ||
@time pids, _ = search(searcher, query["question"], k) | ||
if minimum(distance_longest_common_subsequence( | ||
query["context"], doc_passages[pids])) < 0.33 | ||
num_hits += 1 | ||
end | ||
end | ||
print("Number of hits: ", num_hits / length(eval_qa)) |
Oops, something went wrong.