louisgv · louisgv · Jul 2, 2023 · Jul 3, 2023 · Jul 14, 2023 · Jul 18, 2023
diff --git a/.github/workflows/tauri.yml b/.github/workflows/tauri.yml
@@ -13,6 +13,9 @@ jobs:
       fail-fast: false
       matrix:
         platform: [macos-latest, ubuntu-latest, windows-latest, self-hosted]
+        # include:
+        #   - platform: windows-latest
+        #     args: --config tauri.windows.conf.json
 
     runs-on: ${{ matrix.platform }}
 
@@ -113,3 +116,4 @@ jobs:
           releaseBody: ${{ github.event.release.body }}
           releaseId: ${{ github.event.release.id }}
           tagName: ${{ github.event.release.tag_name }}
+          # args: "${{ matrix.args }}"
diff --git a/apps/desktop/src-tauri/Cargo.toml b/apps/desktop/src-tauri/Cargo.toml
@@ -11,15 +11,14 @@ edition = "2021"
 
 [build-dependencies]
 tauri-build = { version = "1.4.0", features = [] }
+glob = "0.3"
 
 [dependencies]
-llm = { git = "https://github.com/rustformers/llm", branch = "main", package = "llm", features = [
+# Peg the llm version here to prevent unwanted breaking changes
+llm = { git = "https://github.com/rustformers/llm", rev = "645093e", package = "llm", features = [
   "default",
-  # "cublas",
 ] }
 
-# llm = { git = "https://github.com/RedBoxing/llm.git", branch = "hf-tokenizer", package = "llm" }
-
 tauri = { version = "1.4.0", features = [
   "reqwest-client",
   "dialog-confirm",
@@ -76,15 +75,10 @@ blake3 = "1.3.3"
 cocoa = "0.24.1"
 objc = "0.2.7"
 
-[target.aarch64-apple-darwin.dependencies]
-llm = { git = "https://github.com/rustformers/llm", branch = "main", package = "llm", features = [
-  "default",
-  "metal",
-] }
-
 [target."cfg(target_os = \"linux\")".dependencies]
 webkit2gtk = "0.18.2"
 
+
 [target."cfg(target_os = \"windows\")".dependencies]
 webview2-com = "0.19.1"
 windows = "0.39.0"
@@ -102,6 +96,10 @@ default = ["custom-protocol"]
 # DO NOT remove this
 custom-protocol = ["tauri/custom-protocol"]
 
+cublas = ["llm/cublas"]
+clblast = ["llm/clblast"]
+metal = ["llm/metal"]
+
 [profile.dev.package."*"]
 opt-level = 3
 

diff --git a/apps/desktop/src-tauri/build.rs b/apps/desktop/src-tauri/build.rs
@@ -1,25 +1,111 @@
-// #[cfg(any(target_os = "macos", target_os = "linux"))]
-fn main() {
-    tauri_build::build()
+use std::{env, path::{Path, PathBuf}, fs};
+extern crate glob;
+use glob::glob;
+
+fn main() {    
+    #[cfg(feature = "cublas")]
+    copy_cuda_dlls();
+    #[cfg(feature = "clblast")]
+    copy_opencl_dlls();
+
+    tauri_build::build();
+}
+
+#[allow(dead_code)]
+fn get_build_dir()->PathBuf{
+    let manifest_dir = env::var("CARGO_MANIFEST_DIR").unwrap();
+    let mut build_dir = Path::new(&manifest_dir).join("target");
+    build_dir.push(env::var("PROFILE").unwrap());
+    build_dir
 }
 
-// #[cfg(target_os = "windows")]
-// fn main() {
-//     let mut windows = tauri_build::WindowsAttributes::new();
-//     windows = windows.app_manifest(
-//         r#"
-//       <assembly xmlns="urn:schemas-microsoft-com:asm.v1" manifestVersion="1.0">
-//       <trustInfo xmlns="urn:schemas-microsoft-com:asm.v3">
-//           <security>
-//               <requestedPrivileges>
-//                   <requestedExecutionLevel level="requireAdministrator" uiAccess="false" />
-//               </requestedPrivileges>
-//           </security>
-//       </trustInfo>
-//       </assembly>
-//       "#,
-//     );
-
-//     tauri_build::try_build(tauri_build::Attributes::new().windows_attributes(windows))
-//         .expect("failed to run build script")
-// }
+#[allow(dead_code)]
+fn copy_cuda_dlls(){
+    // Get the directory of the output executable.
+    let out_dir = get_build_dir();
+
+    // Get the CUDA path from the environment variable.
+    let cude_env = env::var("CUDA_PATH").expect("CUDA_PATH not found");
+    let cuda_path = Path::new(&cude_env);
+
+    // Patterns to search for the DLL files.
+    #[cfg(target_os  = "windows")]
+    let patterns = [
+        "cublas64_*.dll",
+        "cublasLt64_*.dll",
+        "cudart64_*.dll"
+    ];
+    #[cfg(target_os  = "windows")]
+    let binary_path = cuda_path.join("bin");
+
+    #[cfg(target_os  = "linux")]
+    let patterns = [
+        "libcudart.so",
+        "libcublasLt.so",
+        "libcublas.so"
+    ];
+    #[cfg(target_os  = "linux")]
+    let binary_path = cuda_path.join("lib64");
+
+
+    for pattern in &patterns {
+        // Construct the full glob pattern.
+        let full_pattern = format!("{}/{}", binary_path.to_str().unwrap(), pattern);
+
+        // Use glob to find the DLL files.
+        for entry in glob(&full_pattern).expect("Failed to read glob pattern") {
+            match entry {
+                Ok(dll_path) => {
+                    // Copy the DLL file to the output directory.
+                    let dll_file_name = dll_path.file_name().unwrap();
+                    let destination = Path::new(&out_dir).join(dll_file_name);
+                    if !destination.exists() {
+                        fs::copy(&dll_path, &destination)
+                            .expect("Failed to copy DLL");
+                        println!("Moved {} to {}", dll_file_name.to_string_lossy(), destination.to_string_lossy());
+                    }
+
+                },
+                Err(e) =>  panic!("{}",e),
+            }
+
+        }
+    }
+}
+
+#[allow(dead_code)]
+fn copy_opencl_dlls(){
+    // Get the directory of the output executable.
+    let out_dir = get_build_dir();
+
+    let copy_dll = |source:PathBuf| {
+        let dll_file_name = source.file_name().unwrap();
+        let destination = Path::new(&out_dir).join(dll_file_name);
+        if !destination.exists() {
+            fs::copy(&source, &destination)
+                .expect(format!("Failed to copy DLL {}", dll_file_name.to_string_lossy()).as_str());
+            println!("Moved {} to {}", dll_file_name.to_string_lossy(), destination.to_string_lossy());
+        }
+    };
+
+    let clblast_dll;
+    let opencl_dll;
+    #[cfg(target_os  = "windows")]
+    {
+        let clblast_dir = env::var("CLBLAST_PATH").expect("CLBLAST_PATH not found!");
+        clblast_dll = Path::new(&clblast_dir).join("bin").join("clblast.dll");
+
+        let opencl_dir = env::var("OPENCL_PATH").expect("OPENCL_PATH not found!");
+        opencl_dll = Path::new(&opencl_dir).join("bin").join("OpenCL.dll");
+    }
+
+    #[cfg(target_os  = "linux")]
+    {
+        let lib_path = Path::new("/usr/lib/x86_64-linux-gnu");
+        clblast_dll = lib_path.join("libclblast.so");
+        opencl_dll = lib_path.join("libOpenCL.so");
+    }
+
+    copy_dll(clblast_dll);
+    copy_dll(opencl_dll);
+}
diff --git a/apps/desktop/src-tauri/src/inference/gpu.rs b/apps/desktop/src-tauri/src/inference/gpu.rs
@@ -1,7 +1,6 @@
 #[tauri::command]
 pub async fn check_gpu() -> Result<bool, String> {
-  // TODO: actually check if Metal is available in the future (?)
-  if cfg!(all(target_os = "macos", target_arch = "aarch64")) {
+  if llm::ggml_get_accelerator() != llm::GgmlAccelerator::None {
     Ok(true)
   } else {
     Ok(false)

diff --git a/apps/desktop/src-tauri/src/inference/process.rs b/apps/desktop/src-tauri/src/inference/process.rs
@@ -65,13 +65,7 @@ impl InferenceThreadRequest {
 fn get_inference_params(
   completion_request: &CompletionRequest,
 ) -> InferenceParameters {
-  let n_threads = model::pool::get_n_threads();
-
-  let n_batch = if get_use_gpu() { 240 } else { n_threads };
-
   InferenceParameters {
-    n_threads,
-    n_batch,
     sampler: Arc::new(completion_request.to_top_p_top_k()),
   }
 }
@@ -95,7 +89,23 @@ pub fn start(req: InferenceThreadRequest) -> JoinHandle<()> {
       }
     };
 
-    let mut session = model.start_session(Default::default());
+    let n_threads = model::pool::get_n_threads();
+
+    // set the batch_size according to the accelerator
+    let backend = llm::ggml_get_accelerator();
+    let n_batch = match backend{
+      llm::GgmlAccelerator::Metal =>  if get_use_gpu() {1} else {n_threads}, // 1 is the only supported batch size for Metal
+      llm::GgmlAccelerator::None => n_threads,
+      _ => if get_use_gpu() {512} else {n_threads}
+    };
+
+    let session_config = llm::InferenceSessionConfig {
+      n_batch: n_batch,
+      n_threads: n_threads,
+      ..Default::default()
+    };
+
+    let mut session = model.start_session(session_config);
 
     let mut output_request = OutputRequest::default();
 
@@ -109,7 +119,6 @@ pub fn start(req: InferenceThreadRequest) -> JoinHandle<()> {
 
     match session.feed_prompt::<Infallible, Prompt>(
       model.as_ref(),
-      &inference_params,
       req.completion_request.prompt.as_str().into(),
       &mut output_request,
       |t| {

diff --git a/apps/desktop/src/providers/thread.ts b/apps/desktop/src/providers/thread.ts
@@ -158,7 +158,7 @@ const useThreadProvider = ({ thread }: { thread: FileInfo }) => {
         {
           async onComment(comment) {
             setStatusMessage(comment)
-            await wait(42)
+            await wait(serverConfig.data.useGpu ? 3 : 42)
           },
           async onData(data) {
             const resp = JSON.parse(data) as StreamResponse