diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9467b22a8210..16e54d885ae3 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,31 @@
# Changelog
+### [Version 1.36.42](https://github.com/lobehub/lobe-chat/compare/v1.36.41...v1.36.42)
+
+Released on **2024-12-21**
+
+#### 🐛 Bug Fixes
+
+- **misc**: Fix HUGGINGFACE endpoint url.
+
+
+
+
+Improvements and Fixes
+
+#### What's fixed
+
+- **misc**: Fix HUGGINGFACE endpoint url, closes [#5099](https://github.com/lobehub/lobe-chat/issues/5099) ([abc80dc](https://github.com/lobehub/lobe-chat/commit/abc80dc))
+
+
+
+
+
+[![](https://img.shields.io/badge/-BACK_TO_TOP-151515?style=flat-square)](#readme-top)
+
+
+
### [Version 1.36.41](https://github.com/lobehub/lobe-chat/compare/v1.36.40...v1.36.41)
Released on **2024-12-21**
diff --git a/changelog/v1.json b/changelog/v1.json
index 26d6fa80e951..45535a76dcc3 100644
--- a/changelog/v1.json
+++ b/changelog/v1.json
@@ -1,4 +1,11 @@
[
+ {
+ "children": {
+ "fixes": ["Fix HUGGINGFACE endpoint url."]
+ },
+ "date": "2024-12-21",
+ "version": "1.36.42"
+ },
{
"children": {
"improvements": ["Upgrade react scan."]
diff --git a/package.json b/package.json
index f1e2c16031a5..fd4dce7ed549 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
{
"name": "@lobehub/chat",
- "version": "1.36.41",
+ "version": "1.36.42",
"description": "Lobe Chat - an open-source, high-performance chatbot framework that supports speech synthesis, multimodal, and extensible Function Call plugin system. Supports one-click free deployment of your private ChatGPT/LLM web application.",
"keywords": [
"framework",
diff --git a/src/libs/agent-runtime/huggingface/index.ts b/src/libs/agent-runtime/huggingface/index.ts
index b31a12b1af27..552652f9470a 100644
--- a/src/libs/agent-runtime/huggingface/index.ts
+++ b/src/libs/agent-runtime/huggingface/index.ts
@@ -1,4 +1,5 @@
import { HfInference } from '@huggingface/inference';
+import urlJoin from 'url-join';
import { AgentRuntimeErrorType } from '../error';
import { ModelProvider } from '../types';
@@ -23,7 +24,9 @@ export const LobeHuggingFaceAI = LobeOpenAICompatibleFactory({
createChatCompletionStream: (client: HfInference, payload, instance) => {
const { max_tokens = 4096} = payload;
const hfRes = client.chatCompletionStream({
- endpointUrl: instance.baseURL,
+ endpointUrl: instance.baseURL
+ ? urlJoin(instance.baseURL, payload.model)
+ : instance.baseURL,
max_tokens: max_tokens,
messages: payload.messages,
model: payload.model,