diff --git a/.gitignore b/.gitignore index 18148200b36e1..0ca05ee19df2c 100644 --- a/.gitignore +++ b/.gitignore @@ -110,6 +110,7 @@ tests/test-tokenizer-1-bpe /koboldcpp_noavx2.so /koboldcpp_clblast.so /koboldcpp_clblast_noavx2.so +/koboldcpp_clblast_failsafe.so /koboldcpp_cublas.so /koboldcpp_vulkan.so /koboldcpp_vulkan_noavx2.so @@ -119,6 +120,7 @@ tests/test-tokenizer-1-bpe /koboldcpp_clblast.dll /koboldcpp_clblast_noavx2.dll /koboldcpp_vulkan_noavx2.dll +/koboldcpp_clblast_failsafe.dll /koboldcpp_cublas.dll /koboldcpp_vulkan.dll /cublas64_11.dll diff --git a/Makefile b/Makefile index 41f0c4cabfb66..856c927b7be34 100644 --- a/Makefile +++ b/Makefile @@ -3,7 +3,7 @@ .PHONY: finishedmsg -default: koboldcpp_default koboldcpp_failsafe koboldcpp_noavx2 koboldcpp_clblast koboldcpp_clblast_noavx2 koboldcpp_cublas koboldcpp_hipblas koboldcpp_vulkan koboldcpp_vulkan_noavx2 finishedmsg +default: koboldcpp_default koboldcpp_failsafe koboldcpp_noavx2 koboldcpp_clblast koboldcpp_clblast_noavx2 koboldcpp_clblast_failsafe koboldcpp_cublas koboldcpp_hipblas koboldcpp_vulkan koboldcpp_vulkan_noavx2 finishedmsg tools: quantize_gpt2 quantize_gptj quantize_gguf quantize_neox quantize_mpt quantize_clip ttsmain whispermain sdmain gguf-split ifndef UNAME_S @@ -140,24 +140,8 @@ CCV := $(shell $(CC) --version | head -n 1) CXXV := $(shell $(CXX) --version | head -n 1) # Architecture specific -# TODO: probably these flags need to be tweaked on some architectures -# feel free to update the Makefile for your architecture and send a pull request or issue +# For x86 based architectures ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686 amd64)) -# Use all CPU extensions that are available: -# old library NEEDS mf16c to work. so we must build with it. new one doesnt -ifeq ($(OS),Windows_NT) -ifdef LLAMA_PORTABLE - SIMPLECFLAGS += -mavx -msse3 -mssse3 - SIMPLERCFLAGS += -msse3 -mssse3 -ifdef LLAMA_NOAVX2 - FULLCFLAGS += -msse3 -mssse3 -mavx -else - FULLCFLAGS += -mavx2 -msse3 -mssse3 -mfma -mf16c -mavx -endif -else - CFLAGS += -march=native -mtune=native -endif -else ifdef LLAMA_PORTABLE SIMPLECFLAGS += -mavx -msse3 -mssse3 SIMPLERCFLAGS += -msse3 -mssse3 @@ -165,12 +149,11 @@ ifdef LLAMA_NOAVX2 FULLCFLAGS += -msse3 -mssse3 -mavx else FULLCFLAGS += -mavx2 -msse3 -mssse3 -mfma -mf16c -mavx -endif +endif # LLAMA_NOAVX2 else CFLAGS += -march=native -mtune=native -endif -endif -endif +endif # LLAMA_PORTABLE +endif # if x86 ifndef LLAMA_NO_ACCELERATE # Mac M1 - include Accelerate framework. @@ -436,6 +419,8 @@ ggml_v4_clblast.o: ggml/src/ggml.c ggml/include/ggml.h ggml_v4_cublas.o: ggml/src/ggml.c ggml/include/ggml.h $(CC) $(FASTCFLAGS) $(FULLCFLAGS) $(CUBLAS_FLAGS) $(HIPFLAGS) -c $< -o $@ ggml_v4_clblast_noavx2.o: ggml/src/ggml.c ggml/include/ggml.h + $(CC) $(FASTCFLAGS) $(SIMPLECFLAGS) $(CLBLAST_FLAGS) -c $< -o $@ +ggml_v4_clblast_failsafe.o: ggml/src/ggml.c ggml/include/ggml.h $(CC) $(FASTCFLAGS) $(SIMPLERCFLAGS) $(CLBLAST_FLAGS) -c $< -o $@ ggml_v4_vulkan.o: ggml/src/ggml.c ggml/include/ggml.h $(CC) $(FASTCFLAGS) $(FULLCFLAGS) $(VULKAN_FLAGS) -c $< -o $@ @@ -452,6 +437,8 @@ ggml-cpu_v4_noavx2.o: ggml/src/ggml-cpu/ggml-cpu.c ggml/include/ggml-cpu.h ggml-cpu_v4_clblast.o: ggml/src/ggml-cpu/ggml-cpu.c ggml/include/ggml-cpu.h $(CC) $(FASTCFLAGS) $(FULLCFLAGS) $(CLBLAST_FLAGS) -c $< -o $@ ggml-cpu_v4_clblast_noavx2.o: ggml/src/ggml-cpu/ggml-cpu.c ggml/include/ggml-cpu.h + $(CC) $(FASTCFLAGS) $(SIMPLECFLAGS) $(CLBLAST_FLAGS) -c $< -o $@ +ggml-cpu_v4_clblast_failsafe.o: ggml/src/ggml-cpu/ggml-cpu.c ggml/include/ggml-cpu.h $(CC) $(FASTCFLAGS) $(SIMPLERCFLAGS) $(CLBLAST_FLAGS) -c $< -o $@ #quants @@ -548,6 +535,8 @@ ggml_v3_clblast.o: otherarch/ggml_v3.c otherarch/ggml_v3.h ggml_v3_cublas.o: otherarch/ggml_v3.c otherarch/ggml_v3.h $(CC) $(FASTCFLAGS) $(FULLCFLAGS) $(CUBLAS_FLAGS) $(HIPFLAGS) -c $< -o $@ ggml_v3_clblast_noavx2.o: otherarch/ggml_v3.c otherarch/ggml_v3.h + $(CC) $(FASTCFLAGS) $(SIMPLECFLAGS) $(CLBLAST_FLAGS) -c $< -o $@ +ggml_v3_clblast_failsafe.o: otherarch/ggml_v3.c otherarch/ggml_v3.h $(CC) $(FASTCFLAGS) $(SIMPLERCFLAGS) $(CLBLAST_FLAGS) -c $< -o $@ #version 2 libs @@ -562,6 +551,8 @@ ggml_v2_clblast.o: otherarch/ggml_v2.c otherarch/ggml_v2.h ggml_v2_cublas.o: otherarch/ggml_v2.c otherarch/ggml_v2.h $(CC) $(FASTCFLAGS) $(FULLCFLAGS) $(CUBLAS_FLAGS) $(HIPFLAGS) -c $< -o $@ ggml_v2_clblast_noavx2.o: otherarch/ggml_v2.c otherarch/ggml_v2.h + $(CC) $(FASTCFLAGS) $(SIMPLECFLAGS) $(CLBLAST_FLAGS) -c $< -o $@ +ggml_v2_clblast_failsafe.o: otherarch/ggml_v2.c otherarch/ggml_v2.h $(CC) $(FASTCFLAGS) $(SIMPLERCFLAGS) $(CLBLAST_FLAGS) -c $< -o $@ #extreme old version compat @@ -633,7 +624,7 @@ gpttype_adapter_vulkan_noavx2.o: $(GPTTYPE_ADAPTER) $(CXX) $(CXXFLAGS) $(FAILSAFE_FLAGS) $(VULKAN_FLAGS) -c $< -o $@ clean: - rm -vf *.o main sdmain whispermain quantize_gguf quantize_clip quantize_gpt2 quantize_gptj quantize_neox quantize_mpt vulkan-shaders-gen gguf-split gguf-split.exe vulkan-shaders-gen.exe main.exe sdmain.exe whispermain.exe quantize_clip.exe quantize_gguf.exe quantize_gptj.exe quantize_gpt2.exe quantize_neox.exe quantize_mpt.exe koboldcpp_default.dll koboldcpp_failsafe.dll koboldcpp_noavx2.dll koboldcpp_clblast.dll koboldcpp_clblast_noavx2.dll koboldcpp_cublas.dll koboldcpp_hipblas.dll koboldcpp_vulkan.dll koboldcpp_vulkan_noavx2.dll koboldcpp_default.so koboldcpp_failsafe.so koboldcpp_noavx2.so koboldcpp_clblast.so koboldcpp_clblast_noavx2.so koboldcpp_cublas.so koboldcpp_hipblas.so koboldcpp_vulkan.so koboldcpp_vulkan_noavx2.so + rm -vf *.o main sdmain whispermain quantize_gguf quantize_clip quantize_gpt2 quantize_gptj quantize_neox quantize_mpt vulkan-shaders-gen gguf-split gguf-split.exe vulkan-shaders-gen.exe main.exe sdmain.exe whispermain.exe quantize_clip.exe quantize_gguf.exe quantize_gptj.exe quantize_gpt2.exe quantize_neox.exe quantize_mpt.exe koboldcpp_default.dll koboldcpp_failsafe.dll koboldcpp_noavx2.dll koboldcpp_clblast.dll koboldcpp_clblast_noavx2.dll koboldcpp_clblast_failsafe.dll koboldcpp_cublas.dll koboldcpp_hipblas.dll koboldcpp_vulkan.dll koboldcpp_vulkan_noavx2.dll koboldcpp_default.so koboldcpp_failsafe.so koboldcpp_noavx2.so koboldcpp_clblast.so koboldcpp_clblast_noavx2.so koboldcpp_clblast_failsafe.so koboldcpp_cublas.so koboldcpp_hipblas.so koboldcpp_vulkan.so koboldcpp_vulkan_noavx2.so rm -vrf ggml/src/ggml-cuda/*.o rm -vrf ggml/src/ggml-cuda/template-instances/*.o @@ -679,17 +670,23 @@ ifdef CLBLAST_BUILD koboldcpp_clblast: ggml_v4_clblast.o ggml-cpu_v4_clblast.o ggml_v3_clblast.o ggml_v2_clblast.o ggml_v1.o expose.o gpttype_adapter_clblast.o ggml-opencl.o ggml_v3-opencl.o ggml_v2-opencl.o ggml_v2-opencl-legacy.o sdcpp_default.o whispercpp_default.o tts_default.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_FULL) $(OBJS) $(CLBLAST_BUILD) ifdef NOAVX2_BUILD -koboldcpp_clblast_noavx2: ggml_v4_clblast_noavx2.o ggml-cpu_v4_clblast_noavx2.o ggml_v3_clblast_noavx2.o ggml_v2_clblast_noavx2.o ggml_v1_failsafe.o expose.o gpttype_adapter_clblast_noavx2.o ggml-opencl.o ggml_v3-opencl.o ggml_v2-opencl.o ggml_v2-opencl-legacy.o sdcpp_default.o whispercpp_default.o tts_default.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_SIMPLER) $(OBJS) +koboldcpp_clblast_noavx2: ggml_v4_clblast_noavx2.o ggml-cpu_v4_clblast_noavx2.o ggml_v3_clblast_noavx2.o ggml_v2_clblast_noavx2.o ggml_v1_failsafe.o expose.o gpttype_adapter_clblast_noavx2.o ggml-opencl.o ggml_v3-opencl.o ggml_v2-opencl.o ggml_v2-opencl-legacy.o sdcpp_default.o whispercpp_default.o tts_default.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_SIMPLE) $(OBJS) + $(CLBLAST_BUILD) +koboldcpp_clblast_failsafe: ggml_v4_clblast_failsafe.o ggml-cpu_v4_clblast_failsafe.o ggml_v3_clblast_failsafe.o ggml_v2_clblast_failsafe.o ggml_v1_failsafe.o expose.o gpttype_adapter_clblast_noavx2.o ggml-opencl.o ggml_v3-opencl.o ggml_v2-opencl.o ggml_v2-opencl-legacy.o sdcpp_default.o whispercpp_default.o tts_default.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_SIMPLER) $(OBJS) $(CLBLAST_BUILD) else koboldcpp_clblast_noavx2: $(DONOTHING) +koboldcpp_clblast_failsafe: + $(DONOTHING) endif else koboldcpp_clblast: $(DONOTHING) koboldcpp_clblast_noavx2: $(DONOTHING) +koboldcpp_clblast_failsafe: + $(DONOTHING) endif ifdef CUBLAS_BUILD diff --git a/koboldcpp.py b/koboldcpp.py index 4b8a3952e18d9..e62251069d5e9 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -377,6 +377,7 @@ def pick_existant_file(ntoption,nonntoption): lib_noavx2 = pick_existant_file("koboldcpp_noavx2.dll","koboldcpp_noavx2.so") lib_clblast = pick_existant_file("koboldcpp_clblast.dll","koboldcpp_clblast.so") lib_clblast_noavx2 = pick_existant_file("koboldcpp_clblast_noavx2.dll","koboldcpp_clblast_noavx2.so") +lib_clblast_failsafe = pick_existant_file("koboldcpp_clblast_failsafe.dll","koboldcpp_clblast_failsafe.so") lib_cublas = pick_existant_file("koboldcpp_cublas.dll","koboldcpp_cublas.so") lib_hipblas = pick_existant_file("koboldcpp_hipblas.dll","koboldcpp_hipblas.so") lib_vulkan = pick_existant_file("koboldcpp_vulkan.dll","koboldcpp_vulkan.so") @@ -384,26 +385,30 @@ def pick_existant_file(ntoption,nonntoption): libname = "" lib_option_pairs = [ (lib_default, "Use CPU"), - (lib_clblast, "Use CLBlast"), (lib_cublas, "Use CuBLAS"), (lib_hipblas, "Use hipBLAS (ROCm)"), (lib_vulkan, "Use Vulkan"), + (lib_clblast, "Use CLBlast"), (lib_noavx2, "Use CPU (Old CPU)"), (lib_vulkan_noavx2, "Use Vulkan (Old CPU)"), - (lib_clblast_noavx2, "Use CLBlast (Older CPU)"), + (lib_clblast_noavx2, "Use CLBlast (Old CPU)"), + (lib_clblast_failsafe, "Use CLBlast (Older CPU)"), (lib_failsafe, "Failsafe Mode (Older CPU)")] -default_option, clblast_option, cublas_option, hipblas_option, vulkan_option, noavx2_option, vulkan_noavx2_option, clblast_noavx2_option, failsafe_option = (opt if file_exists(lib) or (os.name == 'nt' and file_exists(opt + ".dll")) else None for lib, opt in lib_option_pairs) +default_option, cublas_option, hipblas_option, vulkan_option, clblast_option, noavx2_option, vulkan_noavx2_option, clblast_noavx2_option, clblast_failsafe_option, failsafe_option = (opt if file_exists(lib) or (os.name == 'nt' and file_exists(opt + ".dll")) else None for lib, opt in lib_option_pairs) runopts = [opt for lib, opt in lib_option_pairs if file_exists(lib)] def init_library(): global handle, args, libname - global lib_default,lib_failsafe,lib_noavx2,lib_clblast,lib_clblast_noavx2,lib_cublas,lib_hipblas,lib_vulkan,lib_vulkan_noavx2 + global lib_default,lib_failsafe,lib_noavx2,lib_clblast,lib_clblast_noavx2,lib_clblast_failsafe,lib_cublas,lib_hipblas,lib_vulkan,lib_vulkan_noavx2 libname = lib_default if args.noavx2: - if args.useclblast and file_exists(lib_clblast_noavx2) and (os.name!='nt' or file_exists("clblast.dll")): - libname = lib_clblast_noavx2 + if args.useclblast and (os.name!='nt' or file_exists("clblast.dll")): + if (args.failsafe) and file_exists(lib_clblast_failsafe): + libname = lib_clblast_failsafe + elif file_exists(lib_clblast_noavx2): + libname = lib_clblast_noavx2 elif (args.usevulkan is not None) and file_exists(lib_vulkan_noavx2): libname = lib_vulkan_noavx2 elif (args.failsafe) and file_exists(lib_failsafe): @@ -3425,7 +3430,7 @@ def setup_backend_tooltip(parent): # backend count label with the tooltip function nl = '\n' tooltxt = "Number of backends you have built and available." + (f"\n\nMissing Backends: \n\n{nl.join(antirunopts)}" if len(runopts) < 8 else "") - num_backends_built = makelabel(parent, str(len(runopts)) + "/8", 5, 2,tooltxt) + num_backends_built = makelabel(parent, str(len(runopts)) + "/9", 5, 2,tooltxt) num_backends_built.grid(row=1, column=1, padx=205, pady=0) num_backends_built.configure(text_color="#00ff00") @@ -3446,7 +3451,7 @@ def changed_gpulayers_estimate(*args): predicted_gpu_layers = autoset_gpu_layers(int(contextsize_text[context_var.get()]),(sd_quant_var.get()==1),int(blasbatchsize_values[int(blas_size_var.get())])) max_gpu_layers = (f"/{modelfile_extracted_meta[0][0]+3}" if (modelfile_extracted_meta and modelfile_extracted_meta[0] and modelfile_extracted_meta[0][0]!=0) else "") index = runopts_var.get() - gpu_be = (index == "Use Vulkan" or index == "Use Vulkan (Old CPU)" or index == "Use CLBlast" or index == "Use CLBlast (Older CPU)" or index == "Use CuBLAS" or index == "Use hipBLAS (ROCm)") + gpu_be = (index == "Use Vulkan" or index == "Use Vulkan (Old CPU)" or index == "Use CLBlast" or index == "Use CLBlast (Old CPU)" or index == "Use CLBlast (Older CPU)" or index == "Use CuBLAS" or index == "Use hipBLAS (ROCm)") layercounter_label.grid(row=6, column=1, padx=75, sticky="W") quick_layercounter_label.grid(row=6, column=1, padx=75, sticky="W") if sys.platform=="darwin" and gpulayers_var.get()=="-1": @@ -3477,7 +3482,7 @@ def changed_gpu_choice_var(*args): if v == "Use Vulkan" or v == "Use Vulkan (Old CPU)": quick_gpuname_label.configure(text=VKDevicesNames[s]) gpuname_label.configure(text=VKDevicesNames[s]) - elif v == "Use CLBlast" or v == "Use CLBlast (Older CPU)": + elif v == "Use CLBlast" or v == "Use CLBlast (Old CPU)" or v == "Use CLBlast (Older CPU)": quick_gpuname_label.configure(text=CLDevicesNames[s]) gpuname_label.configure(text=CLDevicesNames[s]) else: @@ -3534,12 +3539,12 @@ def changerunmode(a,b,c): global runmode_untouched runmode_untouched = False index = runopts_var.get() - if index == "Use Vulkan" or index == "Use Vulkan (Old CPU)" or index == "Use CLBlast" or index == "Use CLBlast (Older CPU)" or index == "Use CuBLAS" or index == "Use hipBLAS (ROCm)": + if index == "Use Vulkan" or index == "Use Vulkan (Old CPU)" or index == "Use CLBlast" or index == "Use CLBlast (Old CPU)" or index == "Use CLBlast (Older CPU)" or index == "Use CuBLAS" or index == "Use hipBLAS (ROCm)": quick_gpuname_label.grid(row=3, column=1, padx=75, sticky="W") gpuname_label.grid(row=3, column=1, padx=75, sticky="W") gpu_selector_label.grid(row=3, column=0, padx = 8, pady=1, stick="nw") quick_gpu_selector_label.grid(row=3, column=0, padx = 8, pady=1, stick="nw") - if index == "Use CLBlast" or index == "Use CLBlast (Older CPU)": + if index == "Use CLBlast" or index == "Use CLBlast (Old CPU)" or index == "Use CLBlast (Older CPU)": gpu_selector_box.grid(row=3, column=1, padx=8, pady=1, stick="nw") quick_gpu_selector_box.grid(row=3, column=1, padx=8, pady=1, stick="nw") CUDA_gpu_selector_box.grid_remove() @@ -3583,7 +3588,7 @@ def changerunmode(a,b,c): else: quick_use_flashattn.grid(row=22, column=1, padx=8, pady=1, stick="nw") - if index == "Use Vulkan" or index == "Use Vulkan (Old CPU)" or index == "Use CLBlast" or index == "Use CLBlast (Older CPU)" or index == "Use CuBLAS" or index == "Use hipBLAS (ROCm)": + if index == "Use Vulkan" or index == "Use Vulkan (Old CPU)" or index == "Use CLBlast" or index == "Use CLBlast (Old CPU)" or index == "Use CLBlast (Older CPU)" or index == "Use CuBLAS" or index == "Use hipBLAS (ROCm)": gpu_layers_label.grid(row=6, column=0, padx = 8, pady=1, stick="nw") gpu_layers_entry.grid(row=6, column=1, padx=8, pady=1, stick="nw") quick_gpu_layers_label.grid(row=6, column=0, padx = 8, pady=1, stick="nw") @@ -3954,10 +3959,13 @@ def export_vars(): args.noavx2 = False if gpu_choice_var.get()!="All": gpuchoiceidx = int(gpu_choice_var.get())-1 - if runopts_var.get() == "Use CLBlast" or runopts_var.get() == "Use CLBlast (Older CPU)": + if runopts_var.get() == "Use CLBlast" or runopts_var.get() == "Use CLBlast (Old CPU)" or runopts_var.get() == "Use CLBlast (Older CPU)": args.useclblast = [[0,0], [1,0], [0,1], [1,1]][gpuchoiceidx] - if runopts_var.get() == "Use CLBlast (Older CPU)": + if runopts_var.get() == "Use CLBlast (Old CPU)": args.noavx2 = True + elif runopts_var.get() == "Use CLBlast (Older CPU)": + args.noavx2 = True + args.failsafe = True if runopts_var.get() == "Use CuBLAS" or runopts_var.get() == "Use hipBLAS (ROCm)": if gpu_choice_var.get()=="All": args.usecublas = ["lowvram"] if lowvram_var.get() == 1 else ["normal"] @@ -4926,6 +4934,9 @@ def main(launch_args,start_server=True): if args.quantkv and args.quantkv>0 and not args.flashattention: exit_with_error(1, "Error: Using --quantkv requires --flashattention") + if args.failsafe: #failsafe implies noavx2 + args.noavx2 = True + if not args.model_param: args.model_param = args.model @@ -5596,7 +5607,7 @@ def range_checker(arg: str): compatgroup3.add_argument("--usemmap", help="If set, uses mmap to load model. This model will not be unloadable.", action='store_true') advparser.add_argument("--usemlock", help="Enables mlock, preventing the RAM used to load the model from being paged out. Not usually recommended.", action='store_true') advparser.add_argument("--noavx2", help="Do not use AVX2 instructions, a slower compatibility mode for older devices.", action='store_true') - advparser.add_argument("--failsafe", help="Use failsafe mode, extremely slow CPU only compatibility mode that should work on all devices.", action='store_true') + advparser.add_argument("--failsafe", help="Use failsafe mode, extremely slow CPU only compatibility mode that should work on all devices. Can be combined with useclblast if your device supports OpenCL.", action='store_true') advparser.add_argument("--debugmode", help="Shows additional debug info in the terminal.", nargs='?', const=1, type=int, default=0) advparser.add_argument("--onready", help="An optional shell command to execute after the model has been loaded.", metavar=('[shell command]'), type=str, default="",nargs=1) advparser.add_argument("--benchmark", help="Do not start server, instead run benchmarks. If filename is provided, appends results to provided file.", metavar=('[filename]'), nargs='?', const="stdout", type=str, default=None) diff --git a/koboldcpp.sh b/koboldcpp.sh index 5c39910045147..8c18ad237338d 100755 --- a/koboldcpp.sh +++ b/koboldcpp.sh @@ -38,10 +38,10 @@ if [[ $1 == "rebuild" ]]; then elif [[ $1 == "dist" ]]; then bin/micromamba remove --no-rc -r conda -p conda/envs/linux --force ocl-icd -y if [ -n "$NOAVX2" ]; then - bin/micromamba run -r conda -p conda/envs/linux pyinstaller --noconfirm --onefile --collect-all customtkinter --collect-all psutil --add-data './koboldcpp_default.so:.' --add-data './koboldcpp_cublas.so:.' --add-data './koboldcpp_vulkan.so:.' --add-data './koboldcpp_clblast.so:.' --add-data './koboldcpp_failsafe.so:.' --add-data './koboldcpp_noavx2.so:.' --add-data './koboldcpp_clblast_noavx2.so:.' --add-data './koboldcpp_vulkan_noavx2.so:.' --add-data './kcpp_adapters:./kcpp_adapters' --add-data './koboldcpp.py:.' --add-data './klite.embd:.' --add-data './kcpp_docs.embd:.' --add-data './kcpp_sdui.embd:.' --add-data './taesd.embd:.' --add-data './taesd_xl.embd:.' --add-data './taesd_f.embd:.' --add-data './taesd_3.embd:.' --add-data './rwkv_vocab.embd:.' --add-data './rwkv_world_vocab.embd:.' --version-file './version.txt' --clean --console koboldcpp.py -n "koboldcpp-linux-x64$KCPP_CUDAAPPEND" + bin/micromamba run -r conda -p conda/envs/linux pyinstaller --noconfirm --onefile --collect-all customtkinter --collect-all psutil --add-data './koboldcpp_default.so:.' --add-data './koboldcpp_cublas.so:.' --add-data './koboldcpp_vulkan.so:.' --add-data './koboldcpp_clblast.so:.' --add-data './koboldcpp_failsafe.so:.' --add-data './koboldcpp_noavx2.so:.' --add-data './koboldcpp_clblast_noavx2.so:.' --add-data './koboldcpp_clblast_failsafe.so:.' --add-data './koboldcpp_vulkan_noavx2.so:.' --add-data './kcpp_adapters:./kcpp_adapters' --add-data './koboldcpp.py:.' --add-data './klite.embd:.' --add-data './kcpp_docs.embd:.' --add-data './kcpp_sdui.embd:.' --add-data './taesd.embd:.' --add-data './taesd_xl.embd:.' --add-data './taesd_f.embd:.' --add-data './taesd_3.embd:.' --add-data './rwkv_vocab.embd:.' --add-data './rwkv_world_vocab.embd:.' --version-file './version.txt' --clean --console koboldcpp.py -n "koboldcpp-linux-x64$KCPP_CUDAAPPEND" else - bin/micromamba run -r conda -p conda/envs/linux pyinstaller --noconfirm --onefile --collect-all customtkinter --collect-all psutil --add-data './koboldcpp_default.so:.' --add-data './koboldcpp_cublas.so:.' --add-data './koboldcpp_vulkan.so:.' --add-data './koboldcpp_clblast.so:.' --add-data './koboldcpp_failsafe.so:.' --add-data './koboldcpp_noavx2.so:.' --add-data './koboldcpp_clblast_noavx2.so:.' --add-data './koboldcpp_vulkan_noavx2.so:.' --add-data './kcpp_adapters:./kcpp_adapters' --add-data './koboldcpp.py:.' --add-data './klite.embd:.' --add-data './kcpp_docs.embd:.' --add-data './kcpp_sdui.embd:.' --add-data './taesd.embd:.' --add-data './taesd_xl.embd:.' --add-data './taesd_f.embd:.' --add-data './taesd_3.embd:.' --add-data './rwkv_vocab.embd:.' --add-data './rwkv_world_vocab.embd:.' --version-file './version.txt' --clean --console koboldcpp.py -n "koboldcpp-linux-x64$KCPP_CUDAAPPEND" - bin/micromamba run -r conda -p conda/envs/linux pyinstaller --noconfirm --onefile --collect-all customtkinter --collect-all psutil --add-data './koboldcpp_default.so:.' --add-data './koboldcpp_vulkan.so:.' --add-data './koboldcpp_clblast.so:.' --add-data './koboldcpp_failsafe.so:.' --add-data './koboldcpp_noavx2.so:.' --add-data './koboldcpp_clblast_noavx2.so:.' --add-data './koboldcpp_vulkan_noavx2.so:.' --add-data './kcpp_adapters:./kcpp_adapters' --add-data './koboldcpp.py:.' --add-data './klite.embd:.' --add-data './kcpp_docs.embd:.' --add-data './kcpp_sdui.embd:.' --add-data './taesd.embd:.' --add-data './taesd_xl.embd:.' --add-data './taesd_f.embd:.' --add-data './taesd_3.embd:.' --add-data './rwkv_vocab.embd:.' --add-data './rwkv_world_vocab.embd:.' --version-file './version.txt' --clean --console koboldcpp.py -n "koboldcpp-linux-x64-nocuda$KCPP_APPEND" + bin/micromamba run -r conda -p conda/envs/linux pyinstaller --noconfirm --onefile --collect-all customtkinter --collect-all psutil --add-data './koboldcpp_default.so:.' --add-data './koboldcpp_cublas.so:.' --add-data './koboldcpp_vulkan.so:.' --add-data './koboldcpp_clblast.so:.' --add-data './koboldcpp_failsafe.so:.' --add-data './koboldcpp_noavx2.so:.' --add-data './koboldcpp_clblast_noavx2.so:.' --add-data './koboldcpp_clblast_failsafe.so:.' --add-data './koboldcpp_vulkan_noavx2.so:.' --add-data './kcpp_adapters:./kcpp_adapters' --add-data './koboldcpp.py:.' --add-data './klite.embd:.' --add-data './kcpp_docs.embd:.' --add-data './kcpp_sdui.embd:.' --add-data './taesd.embd:.' --add-data './taesd_xl.embd:.' --add-data './taesd_f.embd:.' --add-data './taesd_3.embd:.' --add-data './rwkv_vocab.embd:.' --add-data './rwkv_world_vocab.embd:.' --version-file './version.txt' --clean --console koboldcpp.py -n "koboldcpp-linux-x64$KCPP_CUDAAPPEND" + bin/micromamba run -r conda -p conda/envs/linux pyinstaller --noconfirm --onefile --collect-all customtkinter --collect-all psutil --add-data './koboldcpp_default.so:.' --add-data './koboldcpp_vulkan.so:.' --add-data './koboldcpp_clblast.so:.' --add-data './koboldcpp_failsafe.so:.' --add-data './koboldcpp_noavx2.so:.' --add-data './koboldcpp_clblast_noavx2.so:.' --add-data './koboldcpp_clblast_failsafe.so:.' --add-data './koboldcpp_vulkan_noavx2.so:.' --add-data './kcpp_adapters:./kcpp_adapters' --add-data './koboldcpp.py:.' --add-data './klite.embd:.' --add-data './kcpp_docs.embd:.' --add-data './kcpp_sdui.embd:.' --add-data './taesd.embd:.' --add-data './taesd_xl.embd:.' --add-data './taesd_f.embd:.' --add-data './taesd_3.embd:.' --add-data './rwkv_vocab.embd:.' --add-data './rwkv_world_vocab.embd:.' --version-file './version.txt' --clean --console koboldcpp.py -n "koboldcpp-linux-x64-nocuda$KCPP_APPEND" fi bin/micromamba install --no-rc -r conda -p conda/envs/linux ocl-icd -c conda-forge -y else diff --git a/make_pyinstaller.bat b/make_pyinstaller.bat index 6961e58bb66af..699e05933fe40 100644 --- a/make_pyinstaller.bat +++ b/make_pyinstaller.bat @@ -1,2 +1,2 @@ call create_ver_file.bat -PyInstaller --noconfirm --onefile --clean --console --collect-all customtkinter --collect-all psutil --icon "./niko.ico" --add-data "./winclinfo.exe;." --add-data "./OpenCL.dll;." --add-data "./kcpp_adapters;./kcpp_adapters" --add-data "./koboldcpp.py;." --add-data "./klite.embd;." --add-data "./kcpp_docs.embd;." --add-data "./kcpp_sdui.embd;." --add-data "./taesd.embd;." --add-data "./taesd_xl.embd;." --add-data "./taesd_f.embd;." --add-data "./taesd_3.embd;." --add-data "./koboldcpp_default.dll;." --add-data "./koboldcpp_failsafe.dll;." --add-data "./koboldcpp_noavx2.dll;." --add-data "./koboldcpp_clblast.dll;." --add-data "./koboldcpp_clblast_noavx2.dll;." --add-data "./koboldcpp_vulkan_noavx2.dll;." --add-data "./clblast.dll;." --add-data "./koboldcpp_vulkan.dll;." --add-data "./vulkan-1.dll;." --add-data "./rwkv_vocab.embd;." --add-data "./rwkv_world_vocab.embd;." --version-file "./version.txt" "./koboldcpp.py" -n "koboldcpp_nocuda.exe" \ No newline at end of file +PyInstaller --noconfirm --onefile --clean --console --collect-all customtkinter --collect-all psutil --icon "./niko.ico" --add-data "./winclinfo.exe;." --add-data "./OpenCL.dll;." --add-data "./kcpp_adapters;./kcpp_adapters" --add-data "./koboldcpp.py;." --add-data "./klite.embd;." --add-data "./kcpp_docs.embd;." --add-data "./kcpp_sdui.embd;." --add-data "./taesd.embd;." --add-data "./taesd_xl.embd;." --add-data "./taesd_f.embd;." --add-data "./taesd_3.embd;." --add-data "./koboldcpp_default.dll;." --add-data "./koboldcpp_failsafe.dll;." --add-data "./koboldcpp_noavx2.dll;." --add-data "./koboldcpp_clblast.dll;." --add-data "./koboldcpp_clblast_noavx2.dll;." --add-data "./koboldcpp_clblast_failsafe.dll;." --add-data "./koboldcpp_vulkan_noavx2.dll;." --add-data "./clblast.dll;." --add-data "./koboldcpp_vulkan.dll;." --add-data "./vulkan-1.dll;." --add-data "./rwkv_vocab.embd;." --add-data "./rwkv_world_vocab.embd;." --version-file "./version.txt" "./koboldcpp.py" -n "koboldcpp_nocuda.exe" \ No newline at end of file diff --git a/make_pyinstaller.sh b/make_pyinstaller.sh index b8d4ed630728e..0114209a00aa3 100644 --- a/make_pyinstaller.sh +++ b/make_pyinstaller.sh @@ -16,6 +16,7 @@ pyinstaller --noconfirm --onefile --clean --console --collect-all customtkinter --add-data "./koboldcpp_noavx2.so:." \ --add-data "./koboldcpp_clblast.so:." \ --add-data "./koboldcpp_clblast_noavx2.so:." \ +--add-data "./koboldcpp_clblast_failsafe.so:." \ --add-data "./koboldcpp_vulkan_noavx2.so:." \ --add-data "./koboldcpp_vulkan.so:." \ --add-data "./rwkv_vocab.embd:." \ diff --git a/make_pyinstaller_cuda.bat b/make_pyinstaller_cuda.bat index e57727b0aa559..7bfd8a06210f4 100644 --- a/make_pyinstaller_cuda.bat +++ b/make_pyinstaller_cuda.bat @@ -1,2 +1,2 @@ call create_ver_file.bat -PyInstaller --noconfirm --onefile --clean --console --collect-all customtkinter --collect-all psutil --icon "./nikogreen.ico" --add-data "./winclinfo.exe;." --add-data "./OpenCL.dll;." --add-data "./kcpp_adapters;./kcpp_adapters" --add-data "./koboldcpp.py;." --add-data "./klite.embd;." --add-data "./kcpp_docs.embd;." --add-data "./kcpp_sdui.embd;." --add-data "./taesd.embd;." --add-data "./taesd_xl.embd;." --add-data "./taesd_f.embd;." --add-data "./taesd_3.embd;." --add-data "./koboldcpp_default.dll;." --add-data "./koboldcpp_failsafe.dll;." --add-data "./koboldcpp_noavx2.dll;." --add-data "./koboldcpp_clblast.dll;." --add-data "./koboldcpp_clblast_noavx2.dll;." --add-data "./koboldcpp_vulkan_noavx2.dll;." --add-data "./clblast.dll;." --add-data "./koboldcpp_vulkan.dll;." --add-data "./vulkan-1.dll;." --add-data "./koboldcpp_cublas.dll;." --add-data "./cublas64_11.dll;." --add-data "./cublasLt64_11.dll;." --add-data "./cudart64_110.dll;." --add-data "./msvcp140.dll;." --add-data "./msvcp140_codecvt_ids.dll;." --add-data "./vcruntime140.dll;." --add-data "./vcruntime140_1.dll;." --add-data "./rwkv_vocab.embd;." --add-data "./rwkv_world_vocab.embd;." --version-file "./version.txt" "./koboldcpp.py" -n "koboldcpp.exe" \ No newline at end of file +PyInstaller --noconfirm --onefile --clean --console --collect-all customtkinter --collect-all psutil --icon "./nikogreen.ico" --add-data "./winclinfo.exe;." --add-data "./OpenCL.dll;." --add-data "./kcpp_adapters;./kcpp_adapters" --add-data "./koboldcpp.py;." --add-data "./klite.embd;." --add-data "./kcpp_docs.embd;." --add-data "./kcpp_sdui.embd;." --add-data "./taesd.embd;." --add-data "./taesd_xl.embd;." --add-data "./taesd_f.embd;." --add-data "./taesd_3.embd;." --add-data "./koboldcpp_default.dll;." --add-data "./koboldcpp_failsafe.dll;." --add-data "./koboldcpp_noavx2.dll;." --add-data "./koboldcpp_clblast.dll;." --add-data "./koboldcpp_clblast_noavx2.dll;." --add-data "./koboldcpp_clblast_failsafe.dll;." --add-data "./koboldcpp_vulkan_noavx2.dll;." --add-data "./clblast.dll;." --add-data "./koboldcpp_vulkan.dll;." --add-data "./vulkan-1.dll;." --add-data "./koboldcpp_cublas.dll;." --add-data "./cublas64_11.dll;." --add-data "./cublasLt64_11.dll;." --add-data "./cudart64_110.dll;." --add-data "./msvcp140.dll;." --add-data "./msvcp140_codecvt_ids.dll;." --add-data "./vcruntime140.dll;." --add-data "./vcruntime140_1.dll;." --add-data "./rwkv_vocab.embd;." --add-data "./rwkv_world_vocab.embd;." --version-file "./version.txt" "./koboldcpp.py" -n "koboldcpp.exe" \ No newline at end of file diff --git a/make_pyinstaller_cuda12.bat b/make_pyinstaller_cuda12.bat index be555e628ad04..6c3d1d3d9ed5a 100644 --- a/make_pyinstaller_cuda12.bat +++ b/make_pyinstaller_cuda12.bat @@ -1,2 +1,2 @@ call create_ver_file.bat -PyInstaller --noconfirm --onefile --clean --console --collect-all customtkinter --collect-all psutil --icon "./nikogreen.ico" --add-data "./winclinfo.exe;." --add-data "./OpenCL.dll;." --add-data "./kcpp_adapters;./kcpp_adapters" --add-data "./koboldcpp.py;." --add-data "./klite.embd;." --add-data "./kcpp_docs.embd;." --add-data "./kcpp_sdui.embd;." --add-data "./taesd.embd;." --add-data "./taesd_xl.embd;." --add-data "./taesd_f.embd;." --add-data "./taesd_3.embd;." --add-data "./koboldcpp_default.dll;." --add-data "./koboldcpp_failsafe.dll;." --add-data "./koboldcpp_noavx2.dll;." --add-data "./koboldcpp_clblast.dll;." --add-data "./koboldcpp_clblast_noavx2.dll;." --add-data "./koboldcpp_vulkan_noavx2.dll;." --add-data "./clblast.dll;." --add-data "./koboldcpp_vulkan.dll;." --add-data "./vulkan-1.dll;." --add-data "./koboldcpp_cublas.dll;." --add-data "./cublas64_12.dll;." --add-data "./cublasLt64_12.dll;." --add-data "./cudart64_12.dll;." --add-data "./msvcp140.dll;." --add-data "./msvcp140_codecvt_ids.dll;." --add-data "./vcruntime140.dll;." --add-data "./vcruntime140_1.dll;." --add-data "./rwkv_vocab.embd;." --add-data "./rwkv_world_vocab.embd;." --version-file "./version.txt" "./koboldcpp.py" -n "koboldcpp_cu12.exe" \ No newline at end of file +PyInstaller --noconfirm --onefile --clean --console --collect-all customtkinter --collect-all psutil --icon "./nikogreen.ico" --add-data "./winclinfo.exe;." --add-data "./OpenCL.dll;." --add-data "./kcpp_adapters;./kcpp_adapters" --add-data "./koboldcpp.py;." --add-data "./klite.embd;." --add-data "./kcpp_docs.embd;." --add-data "./kcpp_sdui.embd;." --add-data "./taesd.embd;." --add-data "./taesd_xl.embd;." --add-data "./taesd_f.embd;." --add-data "./taesd_3.embd;." --add-data "./koboldcpp_default.dll;." --add-data "./koboldcpp_failsafe.dll;." --add-data "./koboldcpp_noavx2.dll;." --add-data "./koboldcpp_clblast.dll;." --add-data "./koboldcpp_clblast_noavx2.dll;." --add-data "./koboldcpp_clblast_failsafe.dll;." --add-data "./koboldcpp_vulkan_noavx2.dll;." --add-data "./clblast.dll;." --add-data "./koboldcpp_vulkan.dll;." --add-data "./vulkan-1.dll;." --add-data "./koboldcpp_cublas.dll;." --add-data "./cublas64_12.dll;." --add-data "./cublasLt64_12.dll;." --add-data "./cudart64_12.dll;." --add-data "./msvcp140.dll;." --add-data "./msvcp140_codecvt_ids.dll;." --add-data "./vcruntime140.dll;." --add-data "./vcruntime140_1.dll;." --add-data "./rwkv_vocab.embd;." --add-data "./rwkv_world_vocab.embd;." --version-file "./version.txt" "./koboldcpp.py" -n "koboldcpp_cu12.exe" \ No newline at end of file diff --git a/make_pyinstaller_cuda_oldcpu.bat b/make_pyinstaller_cuda_oldcpu.bat index df2095be7b6a9..af8d277e2db4c 100644 --- a/make_pyinstaller_cuda_oldcpu.bat +++ b/make_pyinstaller_cuda_oldcpu.bat @@ -1,2 +1,2 @@ call create_ver_file.bat -PyInstaller --noconfirm --onefile --clean --console --collect-all customtkinter --collect-all psutil --icon "./nikogreen.ico" --add-data "./winclinfo.exe;." --add-data "./OpenCL.dll;." --add-data "./kcpp_adapters;./kcpp_adapters" --add-data "./koboldcpp.py;." --add-data "./klite.embd;." --add-data "./kcpp_docs.embd;." --add-data "./kcpp_sdui.embd;." --add-data "./taesd.embd;." --add-data "./taesd_xl.embd;." --add-data "./taesd_f.embd;." --add-data "./taesd_3.embd;." --add-data "./koboldcpp_default.dll;." --add-data "./koboldcpp_failsafe.dll;." --add-data "./koboldcpp_noavx2.dll;." --add-data "./koboldcpp_clblast.dll;." --add-data "./koboldcpp_clblast_noavx2.dll;." --add-data "./koboldcpp_vulkan_noavx2.dll;." --add-data "./clblast.dll;." --add-data "./koboldcpp_vulkan.dll;." --add-data "./vulkan-1.dll;." --add-data "./koboldcpp_cublas.dll;." --add-data "./cublas64_11.dll;." --add-data "./cublasLt64_11.dll;." --add-data "./cudart64_110.dll;." --add-data "./msvcp140.dll;." --add-data "./msvcp140_codecvt_ids.dll;." --add-data "./vcruntime140.dll;." --add-data "./vcruntime140_1.dll;." --add-data "./rwkv_vocab.embd;." --add-data "./rwkv_world_vocab.embd;." --version-file "./version.txt" "./koboldcpp.py" -n "koboldcpp_oldcpu.exe" \ No newline at end of file +PyInstaller --noconfirm --onefile --clean --console --collect-all customtkinter --collect-all psutil --icon "./nikogreen.ico" --add-data "./winclinfo.exe;." --add-data "./OpenCL.dll;." --add-data "./kcpp_adapters;./kcpp_adapters" --add-data "./koboldcpp.py;." --add-data "./klite.embd;." --add-data "./kcpp_docs.embd;." --add-data "./kcpp_sdui.embd;." --add-data "./taesd.embd;." --add-data "./taesd_xl.embd;." --add-data "./taesd_f.embd;." --add-data "./taesd_3.embd;." --add-data "./koboldcpp_default.dll;." --add-data "./koboldcpp_failsafe.dll;." --add-data "./koboldcpp_noavx2.dll;." --add-data "./koboldcpp_clblast.dll;." --add-data "./koboldcpp_clblast_noavx2.dll;." --add-data "./koboldcpp_clblast_failsafe.dll;." --add-data "./koboldcpp_vulkan_noavx2.dll;." --add-data "./clblast.dll;." --add-data "./koboldcpp_vulkan.dll;." --add-data "./vulkan-1.dll;." --add-data "./koboldcpp_cublas.dll;." --add-data "./cublas64_11.dll;." --add-data "./cublasLt64_11.dll;." --add-data "./cudart64_110.dll;." --add-data "./msvcp140.dll;." --add-data "./msvcp140_codecvt_ids.dll;." --add-data "./vcruntime140.dll;." --add-data "./vcruntime140_1.dll;." --add-data "./rwkv_vocab.embd;." --add-data "./rwkv_world_vocab.embd;." --version-file "./version.txt" "./koboldcpp.py" -n "koboldcpp_oldcpu.exe" \ No newline at end of file diff --git a/otherarch/tools/unused/class.py b/otherarch/tools/unused/class.py deleted file mode 100644 index 6e1f0d70cdf96..0000000000000 --- a/otherarch/tools/unused/class.py +++ /dev/null @@ -1,336 +0,0 @@ -## KoboldCpp based GGML Backend by Concedo -## For use as a custom backend in KoboldAI United -## Not intended for general use. - -from __future__ import annotations - -import time, json -import torch -import requests -import numpy as np -from typing import List, Optional, Union -import os, time -from . import koboldcpp - -import utils -from logger import logger -from modeling.inference_model import ( - GenerationResult, - GenerationSettings, - InferenceModel, -) - -model_backend_name = "KoboldCPP" #specific instead of ggml -model_backend_type = "ggml" #This should be a generic name in case multiple model backends are compatible (think Hugging Face Custom and Basic Hugging Face) - -class KoboldCppException(Exception): - """To be used for errors on cpp side of KoboldCpp.""" - -class KcppArgsObject: - def __init__(self, **kwargs): - self.__dict__.update(kwargs) - -class model_backend(InferenceModel): - def __init__(self) -> None: - super().__init__() - self.kcpp_backend_loaded = False - - def is_valid(self, model_name, model_path, menu_path): - - foundfile = False - try: - files = os.listdir(model_path) - foundfile = len([filename for filename in files if (("ggml" in filename.lower() and ".bin" in filename.lower()) or ".gguf" in filename.lower())])>0 - except: - pass - return foundfile - - def get_requested_parameters(self, model_name, model_path, menu_path, parameters = {}): - - self.kcpp_threads = 5 - self.model_name = "GGML_Model" - self.kcpp_ctxsize = 2048 - self.kcpp_blasbatchsize = 512 - self.kcpp_gpulayers = 0 - self.kcpp_smartcontext = False - self.kcpp_ropescale = 0.0 - self.kcpp_ropebase = 10000.0 - self.kcpp_useclblast = None - self.kcpp_usecublas = None - self.kcpp_usecpu = False - self.kcpp_noavx2 = False - self.kcpp_nommap = False - self.kcpp_usevulkan = None - self.kcpp_debugmode = 0 - self.kcpp_tensor_split_str = "" - self.kcpp_tensor_split = None - - files = os.listdir(model_path) - foundfiles = [filename for filename in files if (("ggml" in filename.lower() and ".bin" in filename.lower()) or ".gguf" in filename.lower())] - - requested_parameters = [] - foldermdls = [] - for ff in foundfiles: - foldermdls.append({'text': ff, 'value': os.path.join(model_path, ff)}) - requested_parameters.append({ - "uitype": "dropdown", - "unit": "string", - "label": "GGML DataFile Name", - "id": "kcpp_filename", - "default": os.path.join(model_path, foundfiles[0]) if len(foundfiles)>0 else model_name, - "check": {"value": "", 'check': "!="}, - "tooltip": "Actual GGML DataFile Name", - "menu_path": "", - "refresh_model_inputs": False, - "extra_classes": "", - 'children': foldermdls - }) - requested_parameters.append({ - "uitype": "dropdown", - "unit": "int", - "label": "KoboldCpp Accelerator", - "id": "kcpp_accelerator", - "default": 0, - "check": {"value": "", 'check': "!="}, - 'multiple': False, - "tooltip": "KoboldCpp Accelerator", - "menu_path": "", - "refresh_model_inputs": False, - "extra_classes": "", - 'children': [{'text': 'Use No BLAS', 'value': 0}, {'text': 'Use CuBLAS', 'value': 1}, - {'text': 'Use CLBLast GPU #1', 'value': 2},{'text': 'Use CLBLast GPU #2', 'value': 3},{'text': 'Use CLBLast GPU #3', 'value': 4} - ,{'text': 'NoAVX2 Mode (Old CPU)', 'value': 5},{'text': 'Failsafe Mode (Older CPU)', 'value': 6},{'text': 'Use Vulkan GPU #1', 'value': 7},{'text': 'Use Vulkan GPU #2', 'value': 8}], - }) - requested_parameters.append({ - "uitype": "text", - "unit": "int", - "label": "Threads", - "id": "kcpp_threads", - "default": self.kcpp_threads, - "check": {"value": "", 'check': "!="}, - "tooltip": "Thread Count", - "menu_path": "", - "refresh_model_inputs": False, - "extra_classes": "" - }) - - requested_parameters.append({ - "uitype": "text", - "unit": "int", - "label": "Max Context Size", - "id": "kcpp_ctxsize", - "default": self.kcpp_ctxsize, - "check": {"value": "", 'check': "!="}, - "tooltip": "Max Context Size", - "menu_path": "", - "refresh_model_inputs": False, - "extra_classes": "" - }) - requested_parameters.append({ - "uitype": "text", - "unit": "int", - "label": "BLAS Batch Size", - "id": "kcpp_blasbatchsize", - "default": self.kcpp_blasbatchsize, - "check": {"value": "", 'check': "!="}, - "tooltip": "BLAS Batch Size", - "menu_path": "", - "refresh_model_inputs": False, - "extra_classes": "" - }) - requested_parameters.append({ - "uitype": "text", - "unit": "int", - "label": "GPU Layers", - "id": "kcpp_gpulayers", - "default": self.kcpp_gpulayers, - "check": {"value": "", 'check': "!="}, - "tooltip": "GPU Layers", - "menu_path": "", - "refresh_model_inputs": False, - "extra_classes": "" - }) - requested_parameters.append({ - "uitype": "text", - "unit": "int", - "label": "Rope Scale", - "id": "kcpp_ropescale", - "default": self.kcpp_ropescale, - "check": {"value": "", 'check': "!="}, - "tooltip": "Rope Scale", - "menu_path": "", - "refresh_model_inputs": False, - "extra_classes": "" - }) - requested_parameters.append({ - "uitype": "text", - "unit": "int", - "label": "Rope Base", - "id": "kcpp_ropebase", - "default": self.kcpp_ropebase, - "check": {"value": "", 'check': "!="}, - "tooltip": "Rope Base", - "menu_path": "", - "refresh_model_inputs": False, - "extra_classes": "" - }) - requested_parameters.append({ - "uitype": "dropdown", - "unit": "int", - "label": "Smart Context", - "id": "kcpp_smartcontext", - "default": self.kcpp_smartcontext, - "check": {"value": "", 'check': "!="}, - 'multiple': False, - "tooltip": "Smart Context", - "menu_path": "", - "refresh_model_inputs": False, - "extra_classes": "", - 'children': [{'text': 'False', 'value': False}, {'text': 'True', 'value': True}], - }) - requested_parameters.append({ - "uitype": "text", - "unit": "text", - "label": "GPU ID", - "id": "kcpp_tensor_split_str", - "default": "1", - "check": {"value": "", 'check': "!="}, - "tooltip": "Which GPU's do we use? For example:1 2", - "menu_path": "", - "refresh_model_inputs": False, - "extra_classes": "" - }) - requested_parameters.append({ - "uitype": "dropdown", - "unit": "int", - "label": "Debug Mode", - "id": "kcpp_debugmode", - "default": self.kcpp_debugmode, - "check": {"value": "", 'check': "!="}, - 'multiple': False, - "tooltip": "Debug Mode", - "menu_path": "", - "refresh_model_inputs": False, - "extra_classes": "", - 'children': [{'text': 'False', 'value': 0}, {'text': 'True', 'value': 1}], - }) - return requested_parameters - - def set_input_parameters(self, parameters): - self.kcpp_threads = parameters["kcpp_threads"] - self.kcpp_filename = parameters["kcpp_filename"] - self.kcpp_ctxsize = parameters["kcpp_ctxsize"] - self.kcpp_blasbatchsize = parameters["kcpp_blasbatchsize"] - self.kcpp_gpulayers = parameters["kcpp_gpulayers"] - self.kcpp_smartcontext = parameters["kcpp_smartcontext"] - self.kcpp_ropescale = parameters["kcpp_ropescale"] - self.kcpp_ropebase = parameters["kcpp_ropebase"] - self.kcpp_debugmode = parameters["kcpp_debugmode"] - self.kcpp_tensor_split_str = parameters["kcpp_tensor_split_str"] - if self.kcpp_tensor_split_str and self.kcpp_tensor_split_str!="": - splits = self.kcpp_tensor_split_str.split() - self.kcpp_tensor_split = [] - for s in splits: - self.kcpp_tensor_split.append(int(s)) - print(self.kcpp_tensor_split) - - accel = parameters["kcpp_accelerator"] - if accel==0: - self.kcpp_usecpu = True - elif accel==1: - self.kcpp_usecublas = ["normal"] - elif accel==2: - self.kcpp_useclblast = [0,0] - elif accel==3: - self.kcpp_useclblast = [1,0] - elif accel==4: - self.kcpp_useclblast = [0,1] - elif accel==5: - self.kcpp_noavx2 = True - elif accel==6: - self.kcpp_noavx2 = True - self.kcpp_usecpu = True - self.kcpp_nommap = True - elif accel==7: - self.kcpp_usevulkan = [0] - elif accel==8: - self.kcpp_usevulkan = [1] - pass - - def unload(self): - print("Attemping to unload library") - self.process.terminate() - - - def _load(self, save_model: bool, initial_load: bool) -> None: - self.tokenizer = self._get_tokenizer("gpt2") - kcppargs = KcppArgsObject(model=self.kcpp_filename, model_param=self.kcpp_filename, - port=5001, port_param=5001, host='', launch=False, lora=None, threads=self.kcpp_threads, blasthreads=self.kcpp_threads, - psutil_set_threads=False, highpriority=False, contextsize=self.kcpp_ctxsize, blasbatchsize=self.kcpp_blasbatchsize, - ropeconfig=[self.kcpp_ropescale, self.kcpp_ropebase], stream=False, smartcontext=self.kcpp_smartcontext, forceversion=0, - nommap=self.kcpp_nommap, usemlock=False, noavx2=self.kcpp_noavx2, debugmode=self.kcpp_debugmode, skiplauncher=True, usecpu=self.kcpp_usecpu, - useclblast=self.kcpp_useclblast, usecublas=self.kcpp_usecublas, usevulkan=self.kcpp_usevulkan, gpulayers=self.kcpp_gpulayers, - tensor_split=self.kcpp_tensor_split, config=None, onready='', multiuser=False, foreground=False, preloadstory=None, noshift=False, - remotetunnel=False, ssl=False, benchmark=None, nocertify=False, mmproj=None, password=None, chatcompletionsadapter=None) - - - #koboldcpp.main(kcppargs,False) #initialize library without enabling Lite http server - (self.output_queue, self.input_queue, self.process) = koboldcpp.start_in_seperate_process(kcppargs) - while True: - data = self.output_queue.get() - if data['command'] == 'load status': - utils.koboldai_vars.total_layers = data['data']['total'] - utils.koboldai_vars.loaded_layers = data['data']['loaded'] - elif data['command'] == 'complete': - break - time.sleep(0.02) - - def _save_settings(self): - pass - - def _raw_generate( - self, - prompt_tokens: Union[List[int], torch.Tensor], - max_new: int, - gen_settings: GenerationSettings, - single_line: bool = False, - batch_count: int = 1, - seed: Optional[int] = None, - **kwargs, - ) -> GenerationResult: - - decoded_prompt = utils.decodenewlines(self.tokenizer.decode(prompt_tokens)) - - # Store context in memory to use it for comparison with generated content - utils.koboldai_vars.lastctx = decoded_prompt - - self.input_queue.put({'command': 'generate', 'data': {'prompt':decoded_prompt, 'max_length': max_new, 'max_context_length': utils.koboldai_vars.max_length, - 'temperature': gen_settings.temp, 'top_k': int(gen_settings.top_k), 'top_a': gen_settings.top_a, 'top_p': gen_settings.top_p, - 'typical_p': gen_settings.typical, 'tfs': gen_settings.tfs, 'rep_pen': gen_settings.rep_pen, 'rep_pen_range': gen_settings.rep_pen_range, - "sampler_order": gen_settings.sampler_order, "use_default_badwordsids": utils.koboldai_vars.use_default_badwordsids} - }) - - #genresult = koboldcpp.generate(decoded_prompt,"",max_new,utils.koboldai_vars.max_length, - #gen_settings.temp,int(gen_settings.top_k),gen_settings.top_a,gen_settings.top_p, - #gen_settings.typical,gen_settings.tfs,gen_settings.rep_pen,gen_settings.rep_pen_range, - #sampler_order=gen_settings.sampler_order,use_default_badwordsids=utils.koboldai_vars.use_default_badwordsids) - - genresult = [] - while True: - data = self.output_queue.get() - print(data) - if data['command'] == 'generated text': - genresult.append(data['data']) - if self.output_queue.empty(): - break - time.sleep(0.02) - - return GenerationResult( - model=self, - out_batches=np.array( - [self.tokenizer.encode(x) for x in genresult] - ), - prompt=prompt_tokens, - is_whole_generation=True, - single_line=single_line, - )