diff --git a/.github/workflows/build-and-push-wasm-plugin-image.yaml b/.github/workflows/build-and-push-wasm-plugin-image.yaml
index 41935de210..2406582296 100644
--- a/.github/workflows/build-and-push-wasm-plugin-image.yaml
+++ b/.github/workflows/build-and-push-wasm-plugin-image.yaml
@@ -133,6 +133,11 @@ jobs:
           command="
           set -e
           cd /workspace/plugins/wasm-rust/extensions/${PLUGIN_NAME}
+          if [ -f ./.prebuild ]; then
+            echo 'Found .prebuild file, sourcing it...'
+            . ./.prebuild
+          fi
+          rustup target add wasm32-wasip1
           cargo build --target wasm32-wasip1 --release
           cp target/wasm32-wasip1/release/*.wasm plugin.wasm
           tar czvf plugin.tar.gz plugin.wasm
diff --git a/.github/workflows/build-image-and-push.yaml b/.github/workflows/build-image-and-push.yaml
index 938b041f30..4d789ddef2 100644
--- a/.github/workflows/build-image-and-push.yaml
+++ b/.github/workflows/build-image-and-push.yaml
@@ -1,229 +1,258 @@
-name: Build Docker Images and Push to Image Registry
-
-on:
-  push:
-    tags:
-    - "v*.*.*"
-  workflow_dispatch: ~
-
-jobs:
-  build-controller-image:
-    runs-on: ubuntu-latest
-    environment:
-      name: image-registry-controller
-    env:
-      CONTROLLER_IMAGE_REGISTRY: ${{ vars.IMAGE_REGISTRY || 'higress-registry.cn-hangzhou.cr.aliyuncs.com' }}
-      CONTROLLER_IMAGE_NAME: ${{ vars.CONTROLLER_IMAGE_NAME || 'higress/higress' }}
-    steps:
-      - name: "Checkout ${{ github.ref }}"
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: 1
-
-      - name: Free Up GitHub Actions Ubuntu Runner Disk Space 🔧
-        uses: jlumbroso/free-disk-space@main
-        with:
-          tool-cache: false
-          android: true
-          dotnet: true
-          haskell: true
-          large-packages: true
-          swap-storage: true
-
-      - name: "Setup Go"
-        uses: actions/setup-go@v5
-        with:
-          go-version: 1.21.5
-
-      - name: Setup Golang Caches
-        uses: actions/cache@v4
-        with:
-          path: |-
-            ~/.cache/go-build
-            ~/go/pkg/mod
-          key: ${{ runner.os }}-go-${{ github.run_id }}
-          restore-keys: ${{ runner.os }}-go
-
-      - name: Calculate Docker metadata
-        id: docker-meta
-        uses: docker/metadata-action@v5
-        with:
-          images: |
-            ${{ env.CONTROLLER_IMAGE_REGISTRY }}/${{ env.CONTROLLER_IMAGE_NAME }}
-          tags: |
-            type=sha
-            type=ref,event=tag
-            type=semver,pattern={{version}}
-            type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', 'main') }}
-
-      - name: Login to Docker Registry
-        uses: docker/login-action@v3
-        with:
-          registry: ${{ env.CONTROLLER_IMAGE_REGISTRY }}
-          username: ${{ secrets.REGISTRY_USERNAME }}
-          password: ${{ secrets.REGISTRY_PASSWORD }}
-
-      - name: Build Docker Image and Push
-        run: |
-          GOPROXY="https://proxy.golang.org,direct" make docker-buildx-push
-          BUILT_IMAGE="higress-registry.cn-hangzhou.cr.aliyuncs.com/higress/higress"
-          readarray -t IMAGES <<< "${{ steps.docker-meta.outputs.tags }}"
-          for image in ${IMAGES[@]}; do
-            echo "Image: $image"
-            docker buildx imagetools create $BUILT_IMAGE:$GITHUB_SHA --tag $image
-          done
-
-  build-pilot-image:
-    runs-on: ubuntu-latest
-    environment:
-      name: image-registry-pilot
-    env:
-      PILOT_IMAGE_REGISTRY: ${{ vars.IMAGE_REGISTRY || 'higress-registry.cn-hangzhou.cr.aliyuncs.com' }}
-      PILOT_IMAGE_NAME: ${{ vars.PILOT_IMAGE_NAME || 'higress/pilot' }}
-    steps:
-      - name: "Checkout ${{ github.ref }}"
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: 1
-
-      - name: Free Up GitHub Actions Ubuntu Runner Disk Space 🔧
-        uses: jlumbroso/free-disk-space@main
-        with:
-          tool-cache: false
-          android: true
-          dotnet: true
-          haskell: true
-          large-packages: true
-          swap-storage: true
-
-      - name: "Setup Go"
-        uses: actions/setup-go@v5
-        with:
-          go-version: 1.21.5
-
-      - name: Setup Golang Caches
-        uses: actions/cache@v4
-        with:
-          path: |-
-            ~/.cache/go-build
-            ~/go/pkg/mod
-          key: ${{ runner.os }}-go-${{ github.run_id }}
-          restore-keys: ${{ runner.os }}-go
-
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v1
-
-      - name: Cache Docker layers
-        uses: actions/cache@v2
-        with:
-          path: /tmp/.buildx-cache
-          key: ${{ runner.os }}-buildx-${{ github.sha }}
-          restore-keys: |
-            ${{ runner.os }}-buildx-
-
-      - name: Calculate Docker metadata
-        id: docker-meta
-        uses: docker/metadata-action@v5
-        with:
-          images: |
-            ${{ env.PILOT_IMAGE_REGISTRY }}/${{ env.PILOT_IMAGE_NAME }}
-          tags: |
-            type=sha
-            type=ref,event=tag
-            type=semver,pattern={{version}}
-            type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', 'main') }}
-
-      - name: Login to Docker Registry
-        uses: docker/login-action@v3
-        with:
-          registry: ${{ env.PILOT_IMAGE_REGISTRY }}
-          username: ${{ secrets.REGISTRY_USERNAME }}
-          password: ${{ secrets.REGISTRY_PASSWORD }}
-
-      - name: Build Pilot-Discovery Image and Push
-        run: |
-          GOPROXY="https://proxy.golang.org,direct" make build-istio
-          BUILT_IMAGE="higress-registry.cn-hangzhou.cr.aliyuncs.com/higress/pilot"
-          readarray -t IMAGES <<< "${{ steps.docker-meta.outputs.tags }}"
-          for image in ${IMAGES[@]}; do
-            echo "Image: $image"
-            docker buildx imagetools create $BUILT_IMAGE:$GITHUB_SHA --tag $image
-          done
-
-
-  build-gateway-image:
-    runs-on: ubuntu-latest
-    environment:
-      name: image-registry-pilot
-    env:
-      GATEWAY_IMAGE_REGISTRY: ${{ vars.IMAGE_REGISTRY || 'higress-registry.cn-hangzhou.cr.aliyuncs.com' }}
-      GATEWAY_IMAGE_NAME: ${{ vars.GATEWAY_IMAGE_NAME || 'higress/gateway' }}
-    steps:
-      - name: "Checkout ${{ github.ref }}"
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: 1
-
-      - name: Free Up GitHub Actions Ubuntu Runner Disk Space 🔧
-        uses: jlumbroso/free-disk-space@main
-        with:
-          tool-cache: false
-          android: true
-          dotnet: true
-          haskell: true
-          large-packages: true
-          swap-storage: true
-
-      - name: "Setup Go"
-        uses: actions/setup-go@v5
-        with:
-          go-version: 1.21.5
-
-      - name: Setup Golang Caches
-        uses: actions/cache@v4
-        with:
-          path: |-
-            ~/.cache/go-build
-            ~/go/pkg/mod
-          key: ${{ runner.os }}-go-${{ github.run_id }}
-          restore-keys: ${{ runner.os }}-go
-
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v1
-
-      - name: Cache Docker layers
-        uses: actions/cache@v2
-        with:
-          path: /tmp/.buildx-cache
-          key: ${{ runner.os }}-buildx-${{ github.sha }}
-          restore-keys: |
-            ${{ runner.os }}-buildx-
-
-      - name: Calculate Docker metadata
-        id: docker-meta
-        uses: docker/metadata-action@v5
-        with:
-          images: |
-            ${{ env.GATEWAY_IMAGE_REGISTRY }}/${{ env.GATEWAY_IMAGE_NAME }}
-          tags: |
-            type=sha
-            type=ref,event=tag
-            type=semver,pattern={{version}}
-            type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', 'main') }}
-
-      - name: Login to Docker Registry
-        uses: docker/login-action@v3
-        with:
-          registry: ${{ env.GATEWAY_IMAGE_REGISTRY }}
-          username: ${{ secrets.REGISTRY_USERNAME }}
-          password: ${{ secrets.REGISTRY_PASSWORD }}            
-          
-      - name: Build Gateway Image and Push
-        run: |
-          GOPROXY="https://proxy.golang.org,direct" make build-gateway
-          BUILT_IMAGE="higress-registry.cn-hangzhou.cr.aliyuncs.com/higress/proxyv2"
-          readarray -t IMAGES <<< "${{ steps.docker-meta.outputs.tags }}"
-          for image in ${IMAGES[@]}; do
-            echo "Image: $image"
-            docker buildx imagetools create $BUILT_IMAGE:$GITHUB_SHA --tag $image
-          done
+name: Build Docker Images and Push to Image Registry
+
+on:
+  push:
+    tags:
+    - "v*.*.*"
+  workflow_dispatch: ~
+
+jobs:
+  build-controller-image:
+    runs-on: ubuntu-latest
+    environment:
+      name: image-registry-controller
+    env:
+      CONTROLLER_IMAGE_REGISTRY: ${{ vars.IMAGE_REGISTRY || 'higress-registry.cn-hangzhou.cr.aliyuncs.com' }}
+      CONTROLLER_IMAGE_NAME: ${{ vars.CONTROLLER_IMAGE_NAME || 'higress/higress' }}
+    steps:
+      - name: "Checkout ${{ github.ref }}"
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 1
+
+      - name: Free Up GitHub Actions Ubuntu Runner Disk Space 🔧
+        uses: jlumbroso/free-disk-space@main
+        with:
+          tool-cache: false
+          android: true
+          dotnet: true
+          haskell: true
+          large-packages: true
+          swap-storage: true
+
+      - name: "Setup Go"
+        uses: actions/setup-go@v5
+        with:
+          go-version: 1.21.5
+
+      - name: Setup Golang Caches
+        uses: actions/cache@v4
+        with:
+          path: |-
+            ~/.cache/go-build
+            ~/go/pkg/mod
+          key: ${{ runner.os }}-go-${{ github.run_id }}
+          restore-keys: ${{ runner.os }}-go
+
+      - name: Calculate Docker metadata
+        id: docker-meta
+        uses: docker/metadata-action@v5
+        with:
+          images: |
+            ${{ env.CONTROLLER_IMAGE_REGISTRY }}/${{ env.CONTROLLER_IMAGE_NAME }}
+          tags: |
+            type=sha
+            type=ref,event=tag
+            type=semver,pattern={{version}}
+            type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', 'main') }}
+
+      - name: Login to Docker Registry
+        uses: docker/login-action@v3
+        with:
+          registry: ${{ env.CONTROLLER_IMAGE_REGISTRY }}
+          username: ${{ secrets.REGISTRY_USERNAME }}
+          password: ${{ secrets.REGISTRY_PASSWORD }}
+
+      - name: Build Docker Image and Push
+        run: |
+          BUILT_IMAGE=""
+          readarray -t IMAGES <<< "${{ steps.docker-meta.outputs.tags }}"
+          for image in ${IMAGES[@]}; do
+            echo "Image: $image"
+            if [ "$BUILT_IMAGE" == "" ]; then
+              GOPROXY="https://proxy.golang.org,direct" IMG_URL="$image" make docker-buildx-push
+              BUILT_IMAGE="$image"
+            else
+              docker buildx imagetools create $BUILT_IMAGE --tag $image
+            fi
+          done
+
+  build-pilot-image:
+    runs-on: ubuntu-latest
+    environment:
+      name: image-registry-pilot
+    env:
+      PILOT_IMAGE_REGISTRY: ${{ vars.IMAGE_REGISTRY || 'higress-registry.cn-hangzhou.cr.aliyuncs.com' }}
+      PILOT_IMAGE_NAME: ${{ vars.PILOT_IMAGE_NAME || 'higress/pilot' }}
+    steps:
+      - name: "Checkout ${{ github.ref }}"
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 1
+
+      - name: Free Up GitHub Actions Ubuntu Runner Disk Space 🔧
+        uses: jlumbroso/free-disk-space@main
+        with:
+          tool-cache: false
+          android: true
+          dotnet: true
+          haskell: true
+          large-packages: true
+          swap-storage: true
+
+      - name: "Setup Go"
+        uses: actions/setup-go@v5
+        with:
+          go-version: 1.21.5
+
+      - name: Setup Golang Caches
+        uses: actions/cache@v4
+        with:
+          path: |-
+            ~/.cache/go-build
+            ~/go/pkg/mod
+          key: ${{ runner.os }}-go-${{ github.run_id }}
+          restore-keys: ${{ runner.os }}-go
+
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v3
+        with:
+          image: tonistiigi/binfmt:qemu-v7.0.0
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Cache Docker layers
+        uses: actions/cache@v2
+        with:
+          path: /tmp/.buildx-cache
+          key: ${{ runner.os }}-buildx-${{ github.sha }}
+          restore-keys: |
+            ${{ runner.os }}-buildx-
+
+      - name: Calculate Docker metadata
+        id: docker-meta
+        uses: docker/metadata-action@v5
+        with:
+          images: |
+            ${{ env.PILOT_IMAGE_REGISTRY }}/${{ env.PILOT_IMAGE_NAME }}
+          tags: |
+            type=sha
+            type=ref,event=tag
+            type=semver,pattern={{version}}
+            type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', 'main') }}
+
+      - name: Login to Docker Registry
+        uses: docker/login-action@v3
+        with:
+          registry: ${{ env.PILOT_IMAGE_REGISTRY }}
+          username: ${{ secrets.REGISTRY_USERNAME }}
+          password: ${{ secrets.REGISTRY_PASSWORD }}
+
+      - name: Build Pilot-Discovery Image and Push
+        run: |
+          BUILT_IMAGE=""
+          readarray -t IMAGES <<< "${{ steps.docker-meta.outputs.tags }}"
+          for image in ${IMAGES[@]}; do
+            echo "Image: $image"
+            if [ "$BUILT_IMAGE" == "" ]; then
+              TAG=${image#*:}
+              HUB=${image%:*}
+              HUB=${HUB%/*}
+              BUILT_IMAGE="$HUB/pilot:$TAG"
+              GOPROXY="https://proxy.golang.org,direct" IMG_URL="$BUILT_IMAGE" make build-istio
+            fi
+            if [ "$BUILT_IMAGE" != "$image" ]; then
+              docker buildx imagetools create $BUILT_IMAGE --tag $image
+            fi
+          done
+
+  build-gateway-image:
+    runs-on: ubuntu-latest
+    environment:
+      name: image-registry-gateway
+    env:
+      GATEWAY_IMAGE_REGISTRY: ${{ vars.IMAGE_REGISTRY || 'higress-registry.cn-hangzhou.cr.aliyuncs.com' }}
+      GATEWAY_IMAGE_NAME: ${{ vars.GATEWAY_IMAGE_NAME || 'higress/gateway' }}
+    steps:
+      - name: "Checkout ${{ github.ref }}"
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 1
+
+      - name: Free Up GitHub Actions Ubuntu Runner Disk Space 🔧
+        uses: jlumbroso/free-disk-space@main
+        with:
+          tool-cache: false
+          android: true
+          dotnet: true
+          haskell: true
+          large-packages: true
+          swap-storage: true
+
+      - name: "Setup Go"
+        uses: actions/setup-go@v5
+        with:
+          go-version: 1.21.5
+
+      - name: Setup Golang Caches
+        uses: actions/cache@v4
+        with:
+          path: |-
+            ~/.cache/go-build
+            ~/go/pkg/mod
+          key: ${{ runner.os }}-go-${{ github.run_id }}
+          restore-keys: ${{ runner.os }}-go
+
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v3
+        with:
+          image: tonistiigi/binfmt:qemu-v7.0.0
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Cache Docker layers
+        uses: actions/cache@v2
+        with:
+          path: /tmp/.buildx-cache
+          key: ${{ runner.os }}-buildx-${{ github.sha }}
+          restore-keys: |
+            ${{ runner.os }}-buildx-
+
+      - name: Calculate Docker metadata
+        id: docker-meta
+        uses: docker/metadata-action@v5
+        with:
+          images: |
+            ${{ env.GATEWAY_IMAGE_REGISTRY }}/${{ env.GATEWAY_IMAGE_NAME }}
+          tags: |
+            type=sha
+            type=ref,event=tag
+            type=semver,pattern={{version}}
+            type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', 'main') }}
+
+      - name: Login to Docker Registry
+        uses: docker/login-action@v3
+        with:
+          registry: ${{ env.GATEWAY_IMAGE_REGISTRY }}
+          username: ${{ secrets.REGISTRY_USERNAME }}
+          password: ${{ secrets.REGISTRY_PASSWORD }}
+
+      - name: Build Gateway Image and Push
+        run: |
+          BUILT_IMAGE=""
+          readarray -t IMAGES <<< "${{ steps.docker-meta.outputs.tags }}"
+          for image in ${IMAGES[@]}; do
+            echo "Image: $image"
+            if [ "$BUILT_IMAGE" == "" ]; then
+              TAG=${image#*:}
+              HUB=${image%:*}
+              HUB=${HUB%/*}
+              BUILT_IMAGE="$HUB/proxyv2:$TAG"
+              GOPROXY="https://proxy.golang.org,direct" IMG_URL="$BUILT_IMAGE" make build-gateway
+            fi
+            if [ "$BUILT_IMAGE" != "$image" ]; then
+              docker buildx imagetools create $BUILT_IMAGE --tag $image
+            fi
+          done
\ No newline at end of file
diff --git a/.github/workflows/helm-docs.yaml b/.github/workflows/helm-docs.yaml
index d4637dbe1b..6ed5937fe3 100644
--- a/.github/workflows/helm-docs.yaml
+++ b/.github/workflows/helm-docs.yaml
@@ -10,7 +10,7 @@ on:
   push:
     branches: [ main ]
     paths:
-      - 'helm/**'    
+      - 'helm/**'
 
 jobs:
   helm:
@@ -39,6 +39,7 @@ jobs:
           rm -f ./helm-docs
 
   translate-readme:
+    if: ${{ ! always() }}
     needs: helm
     runs-on: ubuntu-latest
 
diff --git a/Makefile.core.mk b/Makefile.core.mk
index 93aff0df81..2d84c0b118 100644
--- a/Makefile.core.mk
+++ b/Makefile.core.mk
@@ -162,13 +162,13 @@ buildx-prepare:
 build-gateway: prebuild buildx-prepare
 	USE_REAL_USER=1 TARGET_ARCH=amd64 DOCKER_TARGETS="docker.proxyv2" ./tools/hack/build-istio-image.sh init
 	USE_REAL_USER=1 TARGET_ARCH=arm64 DOCKER_TARGETS="docker.proxyv2" ./tools/hack/build-istio-image.sh init
-	DOCKER_TARGETS="docker.proxyv2" ./tools/hack/build-istio-image.sh docker.buildx
+	DOCKER_TARGETS="docker.proxyv2" IMG_URL="${IMG_URL}" ./tools/hack/build-istio-image.sh docker.buildx
 
 build-gateway-local: prebuild
 	TARGET_ARCH=${TARGET_ARCH} DOCKER_TARGETS="docker.proxyv2" ./tools/hack/build-istio-image.sh docker
 
 build-istio: prebuild buildx-prepare
-	DOCKER_TARGETS="docker.pilot" ./tools/hack/build-istio-image.sh docker.buildx
+	DOCKER_TARGETS="docker.pilot" IMG_URL="${IMG_URL}" ./tools/hack/build-istio-image.sh docker.buildx
 
 build-istio-local: prebuild
 	TARGET_ARCH=${TARGET_ARCH} DOCKER_TARGETS="docker.pilot" ./tools/hack/build-istio-image.sh docker
diff --git a/docker/docker.mk b/docker/docker.mk
index b572176508..f9315a3271 100644
--- a/docker/docker.mk
+++ b/docker/docker.mk
@@ -35,6 +35,8 @@ DOCKER_ALL_VARIANTS ?= debug distroless
 INCLUDE_UNTAGGED_DEFAULT ?= false
 DEFAULT_DISTRIBUTION=debug
 
-HIGRESS_DOCKER_BUILDX_RULE ?= $(foreach VARIANT,$(DOCKER_BUILD_VARIANTS), time (mkdir -p $(HIGRESS_DOCKER_BUILD_TOP)/$@ && TARGET_ARCH=$(TARGET_ARCH) ./docker/docker-copy.sh $^ $(HIGRESS_DOCKER_BUILD_TOP)/$@ && cd $(HIGRESS_DOCKER_BUILD_TOP)/$@ $(BUILD_PRE) && docker buildx create --name higress --node higress0 --platform linux/amd64,linux/arm64 --use && docker buildx build --no-cache --platform linux/amd64,linux/arm64 $(BUILD_ARGS) --build-arg BASE_DISTRIBUTION=$(call normalize-tag,$(VARIANT)) -t $(HUB)/higress:$(TAG)$(call variant-tag,$(VARIANT)) -f Dockerfile.higress . --push  ); )
-HIGRESS_DOCKER_RULE ?= $(foreach VARIANT,$(DOCKER_BUILD_VARIANTS), time (mkdir -p $(HIGRESS_DOCKER_BUILD_TOP)/$@ && TARGET_ARCH=$(TARGET_ARCH) ./docker/docker-copy.sh $^ $(HIGRESS_DOCKER_BUILD_TOP)/$@ && cd $(HIGRESS_DOCKER_BUILD_TOP)/$@ $(BUILD_PRE) && docker build $(BUILD_ARGS) --build-arg BASE_DISTRIBUTION=$(call normalize-tag,$(VARIANT)) -t $(HUB)/higress:$(TAG)$(call variant-tag,$(VARIANT)) -f Dockerfile.higress . ); )
+IMG ?= higress
+IMG_URL ?= $(HUB)/$(IMG):$(TAG)
 
+HIGRESS_DOCKER_BUILDX_RULE ?= $(foreach VARIANT,$(DOCKER_BUILD_VARIANTS), time (mkdir -p $(HIGRESS_DOCKER_BUILD_TOP)/$@ && TARGET_ARCH=$(TARGET_ARCH) ./docker/docker-copy.sh $^ $(HIGRESS_DOCKER_BUILD_TOP)/$@ && cd $(HIGRESS_DOCKER_BUILD_TOP)/$@ $(BUILD_PRE) && docker buildx create --name higress --node higress0 --platform linux/amd64,linux/arm64 --use && docker buildx build --no-cache --platform linux/amd64,linux/arm64 $(BUILD_ARGS) --build-arg BASE_DISTRIBUTION=$(call normalize-tag,$(VARIANT)) -t $(IMG_URL)$(call variant-tag,$(VARIANT)) -f Dockerfile.higress . --push  ); )
+HIGRESS_DOCKER_RULE ?= $(foreach VARIANT,$(DOCKER_BUILD_VARIANTS), time (mkdir -p $(HIGRESS_DOCKER_BUILD_TOP)/$@ && TARGET_ARCH=$(TARGET_ARCH) ./docker/docker-copy.sh $^ $(HIGRESS_DOCKER_BUILD_TOP)/$@ && cd $(HIGRESS_DOCKER_BUILD_TOP)/$@ $(BUILD_PRE) && docker build $(BUILD_ARGS) --build-arg BASE_DISTRIBUTION=$(call normalize-tag,$(VARIANT)) -t $(IMG_URL)$(call variant-tag,$(VARIANT)) -f Dockerfile.higress . ); )
diff --git a/helm/core/README.md b/helm/core/README.md
index fdd3e61a79..0ccad6dfb4 100644
--- a/helm/core/README.md
+++ b/helm/core/README.md
@@ -2,4 +2,4 @@
 
 Installs the core components of cloud-native gateway [Higress](http://higress.io/)
 
-**Note:** It is highly recommended to install the whole package of Higress. Please visit https://higress.io/docs/user/quickstart/ for details.
\ No newline at end of file
+**Note:** It is highly recommended to install the whole package of Higress. Please visit https://higress.io/docs/user/quickstart/ for details.
diff --git a/helm/core/templates/_pod.tpl b/helm/core/templates/_pod.tpl
index 3e883d248b..c87e4d3eff 100644
--- a/helm/core/templates/_pod.tpl
+++ b/helm/core/templates/_pod.tpl
@@ -45,9 +45,9 @@ template:
           - router
           - --domain
           - $(POD_NAMESPACE).svc.cluster.local
-          - --proxyLogLevel=warning
-          - --proxyComponentLogLevel=misc:error
-          - --log_output_level=all:info
+          - --proxyLogLevel={{- default "warning" .Values.global.proxy.logLevel }}
+          - --proxyComponentLogLevel={{- default "misc:error" .Values.global.proxy.componentLogLevel }}
+          - --log_output_level={{- default "default:info" .Values.global.logging.level }}
           - --serviceCluster=higress-gateway
         securityContext:
         {{- if .Values.gateway.containerSecurityContext }}
diff --git a/helm/core/values.yaml b/helm/core/values.yaml
index 6186654a05..d4fdff6eb1 100644
--- a/helm/core/values.yaml
+++ b/helm/core/values.yaml
@@ -491,6 +491,7 @@ gateway:
     externalTrafficPolicy: ""
 
   rollingMaxSurge: 100%
+  # -- If global.local is true, the default value is 100%, otherwise it is 25%
   rollingMaxUnavailable: 25%
 
   resources:
diff --git a/helm/higress/README.md b/helm/higress/README.md
index 3c23f42532..2cecaa91f4 100644
--- a/helm/higress/README.md
+++ b/helm/higress/README.md
@@ -130,7 +130,7 @@ The command removes all the Kubernetes components associated with the chart and
 | gateway.resources.requests.memory | string | `"2048Mi"` |  |
 | gateway.revision | string | `""` | revision declares which revision this gateway is a part of |
 | gateway.rollingMaxSurge | string | `"100%"` |  |
-| gateway.rollingMaxUnavailable | string | `"25%"` |  |
+| gateway.rollingMaxUnavailable | string | `"25%"` | If global.local is true, the default value is 100%, otherwise it is 25% |
 | gateway.securityContext | string | `nil` | Define the security context for the pod. If unset, this will be automatically set to the minimum privileges required to bind to port 80 and 443. On Kubernetes 1.22+, this only requires the `net.ipv4.ip_unprivileged_port_start` sysctl. |
 | gateway.service.annotations | object | `{}` |  |
 | gateway.service.externalTrafficPolicy | string | `""` |  |
diff --git a/plugins/wasm-go/extensions/ai-cache/README.md b/plugins/wasm-go/extensions/ai-cache/README.md
index 999f472270..70f3e1b9d4 100644
--- a/plugins/wasm-go/extensions/ai-cache/README.md
+++ b/plugins/wasm-go/extensions/ai-cache/README.md
@@ -86,7 +86,8 @@ LLM 结果缓存插件，默认配置方式可以直接用于 openai 协议的
 | cache.password | string | optional | "" | 缓存服务密码 |
 | cache.timeout | uint32 | optional | 10000 | 缓存服务的超时时间，单位为毫秒。默认值是10000，即10秒 |
 | cache.cacheTTL | int | optional | 0 | 缓存过期时间，单位为秒。默认值是 0，即 永不过期|
-| cacheKeyPrefix | string | optional | "higress-ai-cache:" | 缓存 Key 的前缀，默认值为 "higress-ai-cache:" |
+| cache.cacheKeyPrefix | string | optional | "higress-ai-cache:" | 缓存 Key 的前缀，默认值为 "higress-ai-cache:" |
+| cache.database | int | optional | 0 | 使用的数据库id，仅限redis，例如配置为1，对应`SELECT 1` |
 
 
 ## 其他配置
@@ -168,6 +169,7 @@ redis:
   serviceName: my_redis.dns
   servicePort: 6379
   timeout: 100
+  database: 1
 ```
 
 ## 进阶用法
diff --git a/plugins/wasm-go/extensions/ai-cache/README_EN.md b/plugins/wasm-go/extensions/ai-cache/README_EN.md
index 7544995999..d48f9f71b9 100644
--- a/plugins/wasm-go/extensions/ai-cache/README_EN.md
+++ b/plugins/wasm-go/extensions/ai-cache/README_EN.md
@@ -15,26 +15,29 @@ Plugin Execution Phase: `Authentication Phase`
 Plugin Execution Priority: `10`
 
 ## Configuration Description
-| Name                              | Type     | Requirement | Default                                                                                                                                                                                                                                                 | Description                                                                                                |
-| --------                          | -------- | --------    | --------                                                                                                                                                                                                                                                | --------                                                                                                   |
-| cacheKeyFrom.requestBody          | string   | optional    | "messages.@reverse.0.content"                                                                                                                                                                                                                           | Extracts a string from the request Body based on [GJSON PATH](https://github.com/tidwall/gjson/blob/master/SYNTAX.md) syntax     |
-| cacheValueFrom.responseBody       | string   | optional    | "choices.0.message.content"                                                                                                                                                                                                                             | Extracts a string from the response Body based on [GJSON PATH](https://github.com/tidwall/gjson/blob/master/SYNTAX.md) syntax     |
+| Name                              | Type     | Requirement | Default                                                                                                                                                                                                                                                 | Description                                                                                                                             |
+| --------                          | -------- | --------    | --------                                                                                                                                                                                                                                                | --------                                                                                                                                |
+| cacheKeyFrom.requestBody          | string   | optional    | "messages.@reverse.0.content"                                                                                                                                                                                                                           | Extracts a string from the request Body based on [GJSON PATH](https://github.com/tidwall/gjson/blob/master/SYNTAX.md) syntax            |
+| cacheValueFrom.responseBody       | string   | optional    | "choices.0.message.content"                                                                                                                                                                                                                             | Extracts a string from the response Body based on [GJSON PATH](https://github.com/tidwall/gjson/blob/master/SYNTAX.md) syntax           |
 | cacheStreamValueFrom.responseBody | string   | optional    | "choices.0.delta.content"                                                                                                                                                                                                                               | Extracts a string from the streaming response Body based on [GJSON PATH](https://github.com/tidwall/gjson/blob/master/SYNTAX.md) syntax |
-| cacheKeyPrefix                    | string   | optional    | "higress-ai-cache:"                                                                                                                                                                                                                                     | Prefix for the Redis cache key                                                                                         |
-| cacheTTL                          | integer  | optional    | 0                                                                                                                                                                                                                                                       | Cache expiration time in seconds, default value is 0, which means never expire                                                            |
-| redis.serviceName                 | string   | required    | -                                                                                                                                                                                                                                                       | The complete FQDN name of the Redis service, including the service type, e.g., my-redis.dns, redis.my-ns.svc.cluster.local               |
-| redis.servicePort                 | integer  | optional    | 6379                                                                                                                                                                                                                                                    | Redis service port                                                                                             |
-| redis.timeout                     | integer  | optional    | 1000                                                                                                                                                                                                                                                    | Timeout for requests to Redis, in milliseconds                                                                          |
-| redis.username                    | string   | optional    | -                                                                                                                                                                                                                                                       | Username for logging into Redis                                                                                        |
-| redis.password                    | string   | optional    | -                                                                                                                                                                                                                                                       | Password for logging into Redis                                                                                          |
-| returnResponseTemplate            | string   | optional    | `{"id":"from-cache","choices":[%s],"model":"gpt-4o","object":"chat.completion","usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}`                                                                                                     | Template for returning HTTP response, with %s marking the part to be replaced by cache value                                              |
-| returnStreamResponseTemplate      | string   | optional    | `data:{"id":"from-cache","choices":[{"index":0,"delta":{"role":"assistant","content":"%s"},"finish_reason":"stop"}],"model":"gpt-4o","object":"chat.completion","usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}\n\ndata:[DONE]\n\n` | Template for returning streaming HTTP response, with %s marking the part to be replaced by cache value                                          |
+| cacheKeyPrefix                    | string   | optional    | "higress-ai-cache:"                                                                                                                                                                                                                                     | Prefix for the Redis cache key                                                                                                          |
+| cacheTTL                          | integer  | optional    | 0                                                                                                                                                                                                                                                       | Cache expiration time in seconds, default value is 0, which means never expire                                                          |
+| redis.serviceName                 | string   | required    | -                                                                                                                                                                                                                                                       | The complete FQDN name of the Redis service, including the service type, e.g., my-redis.dns, redis.my-ns.svc.cluster.local              |
+| redis.servicePort                 | integer  | optional    | 6379                                                                                                                                                                                                                                                    | Redis service port                                                                                                                      |
+| redis.timeout                     | integer  | optional    | 1000                                                                                                                                                                                                                                                    | Timeout for requests to Redis, in milliseconds                                                                                          |
+| redis.username                    | string   | optional    | -                                                                                                                                                                                                                                                       | Username for logging into Redis                                                                                                         |
+| redis.database                    | int      | optional    | 0                                                                                                                                                                                                                                                       | The database ID used, limited to Redis, for example, configured as 1, corresponds to `SELECT 1`.                                        |
+| redis.password                    | string   | optional    | -                                                                                                                                                                                                                                                       | Password for logging into Redis                                                                                                         |
+| returnResponseTemplate            | string   | optional    | `{"id":"from-cache","choices":[%s],"model":"gpt-4o","object":"chat.completion","usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}`                                                                                                     | Template for returning HTTP response, with %s marking the part to be replaced by cache value                                            |
+| returnStreamResponseTemplate      | string   | optional    | `data:{"id":"from-cache","choices":[{"index":0,"delta":{"role":"assistant","content":"%s"},"finish_reason":"stop"}],"model":"gpt-4o","object":"chat.completion","usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}\n\ndata:[DONE]\n\n` | Template for returning streaming HTTP response, with %s marking the part to be replaced by cache value                                  |
 
 ## Configuration Example
 ```yaml  
 redis:  
   serviceName: my-redis.dns  
   timeout: 2000  
+  servicePort: 6379
+  database: 1
 ```  
 
 ## Advanced Usage
diff --git a/plugins/wasm-go/extensions/ai-cache/cache/provider.go b/plugins/wasm-go/extensions/ai-cache/cache/provider.go
index d68acd5099..9afca2c12e 100644
--- a/plugins/wasm-go/extensions/ai-cache/cache/provider.go
+++ b/plugins/wasm-go/extensions/ai-cache/cache/provider.go
@@ -52,6 +52,9 @@ type ProviderConfig struct {
 	// @Title 缓存 Key 前缀
 	// @Description 缓存 Key 的前缀，默认值为 "higressAiCache:"
 	cacheKeyPrefix string
+	// @Title redis database
+	// @Description 指定 redis 的 database，默认使用0
+	database int
 }
 
 func (c *ProviderConfig) GetProviderType() string {
@@ -79,6 +82,7 @@ func (c *ProviderConfig) FromJson(json gjson.Result) {
 	if !json.Get("password").Exists() {
 		c.password = ""
 	}
+	c.database = int(json.Get("database").Int())
 	c.timeout = uint32(json.Get("timeout").Int())
 	if !json.Get("timeout").Exists() {
 		c.timeout = 10000
diff --git a/plugins/wasm-go/extensions/ai-cache/cache/redis.go b/plugins/wasm-go/extensions/ai-cache/cache/redis.go
index 4cb69744e1..b4a116ab89 100644
--- a/plugins/wasm-go/extensions/ai-cache/cache/redis.go
+++ b/plugins/wasm-go/extensions/ai-cache/cache/redis.go
@@ -38,7 +38,7 @@ func (rp *redisProvider) GetProviderType() string {
 }
 
 func (rp *redisProvider) Init(username string, password string, timeout uint32) error {
-	return rp.client.Init(rp.config.username, rp.config.password, int64(rp.config.timeout))
+	return rp.client.Init(rp.config.username, rp.config.password, int64(rp.config.timeout), wrapper.WithDataBase(rp.config.database))
 }
 
 func (rp *redisProvider) Get(key string, cb wrapper.RedisResponseCallback) error {
diff --git a/plugins/wasm-go/extensions/ai-cache/config/config.go b/plugins/wasm-go/extensions/ai-cache/config/config.go
index 80c6147374..bc1093a567 100644
--- a/plugins/wasm-go/extensions/ai-cache/config/config.go
+++ b/plugins/wasm-go/extensions/ai-cache/config/config.go
@@ -28,9 +28,9 @@ type PluginConfig struct {
 	embeddingProvider embedding.Provider
 	vectorProvider    vector.Provider
 
-	embeddingProviderConfig embedding.ProviderConfig
-	vectorProviderConfig    vector.ProviderConfig
-	cacheProviderConfig     cache.ProviderConfig
+	embeddingProviderConfig *embedding.ProviderConfig
+	vectorProviderConfig    *vector.ProviderConfig
+	cacheProviderConfig     *cache.ProviderConfig
 
 	CacheKeyFrom         string
 	CacheValueFrom       string
@@ -47,7 +47,9 @@ type PluginConfig struct {
 }
 
 func (c *PluginConfig) FromJson(json gjson.Result, log wrapper.Log) {
-
+	c.embeddingProviderConfig = &embedding.ProviderConfig{}
+	c.vectorProviderConfig = &vector.ProviderConfig{}
+	c.cacheProviderConfig = &cache.ProviderConfig{}
 	c.vectorProviderConfig.FromJson(json.Get("vector"))
 	c.embeddingProviderConfig.FromJson(json.Get("embedding"))
 	c.cacheProviderConfig.FromJson(json.Get("cache"))
@@ -142,7 +144,7 @@ func (c *PluginConfig) Complete(log wrapper.Log) error {
 	var err error
 	if c.embeddingProviderConfig.GetProviderType() != "" {
 		log.Debugf("embedding provider is set to %s", c.embeddingProviderConfig.GetProviderType())
-		c.embeddingProvider, err = embedding.CreateProvider(c.embeddingProviderConfig)
+		c.embeddingProvider, err = embedding.CreateProvider(*c.embeddingProviderConfig)
 		if err != nil {
 			return err
 		}
@@ -152,7 +154,7 @@ func (c *PluginConfig) Complete(log wrapper.Log) error {
 	}
 	if c.cacheProviderConfig.GetProviderType() != "" {
 		log.Debugf("cache provider is set to %s", c.cacheProviderConfig.GetProviderType())
-		c.cacheProvider, err = cache.CreateProvider(c.cacheProviderConfig)
+		c.cacheProvider, err = cache.CreateProvider(*c.cacheProviderConfig)
 		if err != nil {
 			return err
 		}
@@ -162,7 +164,7 @@ func (c *PluginConfig) Complete(log wrapper.Log) error {
 	}
 	if c.vectorProviderConfig.GetProviderType() != "" {
 		log.Debugf("vector provider is set to %s", c.vectorProviderConfig.GetProviderType())
-		c.vectorProvider, err = vector.CreateProvider(c.vectorProviderConfig)
+		c.vectorProvider, err = vector.CreateProvider(*c.vectorProviderConfig)
 		if err != nil {
 			return err
 		}
@@ -182,7 +184,7 @@ func (c *PluginConfig) GetVectorProvider() vector.Provider {
 }
 
 func (c *PluginConfig) GetVectorProviderConfig() vector.ProviderConfig {
-	return c.vectorProviderConfig
+	return *c.vectorProviderConfig
 }
 
 func (c *PluginConfig) GetCacheProvider() cache.Provider {
diff --git a/plugins/wasm-go/extensions/ai-cache/embedding/ollama.go b/plugins/wasm-go/extensions/ai-cache/embedding/ollama.go
new file mode 100644
index 0000000000..a61bf77827
--- /dev/null
+++ b/plugins/wasm-go/extensions/ai-cache/embedding/ollama.go
@@ -0,0 +1,151 @@
+package embedding
+
+import (
+	"encoding/json"
+	"errors"
+	"fmt"
+	"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
+	"github.com/tidwall/gjson"
+	"net/http"
+	"strconv"
+)
+
+const (
+	OLLAMA_DOMAIN             = "localhost"
+	OLLAMA_PORT               = 11434
+	OLLAMA_DEFAULT_MODEL_NAME = "llama3.2"
+	OLLAMA_ENDPOINT           = "/api/embed"
+)
+
+type ollamaProviderInitializer struct {
+}
+
+func (c *ollamaProviderInitializer) InitConfig(json gjson.Result) {}
+
+func (c *ollamaProviderInitializer) ValidateConfig() error {
+	return nil
+}
+
+type ollamaProvider struct {
+	config ProviderConfig
+	client *wrapper.ClusterClient[wrapper.FQDNCluster]
+}
+
+func (t *ollamaProviderInitializer) CreateProvider(c ProviderConfig) (Provider, error) {
+	if c.servicePort == 0 {
+		c.servicePort = OLLAMA_PORT
+	}
+	if c.serviceHost == "" {
+		c.serviceHost = OLLAMA_DOMAIN
+	}
+	if c.model == "" {
+		c.model = OLLAMA_DEFAULT_MODEL_NAME
+	}
+
+	return &ollamaProvider{
+		config: c,
+		client: wrapper.NewClusterClient(wrapper.FQDNCluster{
+			FQDN: c.serviceName,
+			Host: c.serviceHost,
+			Port: c.servicePort,
+		}),
+	}, nil
+}
+
+func (t *ollamaProvider) GetProviderType() string {
+	return PROVIDER_TYPE_OLLAMA
+}
+
+type ollamaResponse struct {
+	Model           string      `json:"model"`
+	Embeddings      [][]float64 `json:"embeddings"`
+	TotalDuration   int64       `json:"total_duration"`
+	LoadDuration    int64       `json:"load_duration"`
+	PromptEvalCount int64       `json:"prompt_eval_count"`
+}
+
+type ollamaEmbeddingRequest struct {
+	Input string `json:"input"`
+	Model string `json:"model"`
+}
+
+func (t *ollamaProvider) constructParameters(text string, log wrapper.Log) (string, [][2]string, []byte, error) {
+	if text == "" {
+		err := errors.New("queryString text cannot be empty")
+		return "", nil, nil, err
+	}
+
+	data := ollamaEmbeddingRequest{
+		Input: text,
+		Model: t.config.model,
+	}
+
+	requestBody, err := json.Marshal(data)
+	if err != nil {
+		log.Errorf("failed to marshal request data: %v", err)
+		return "", nil, nil, err
+	}
+
+	headers := [][2]string{
+		{"Content-Type", "application/json"},
+	}
+	log.Debugf("constructParameters: %s", string(requestBody))
+
+	return OLLAMA_ENDPOINT, headers, requestBody, err
+}
+
+func (t *ollamaProvider) parseTextEmbedding(responseBody []byte) (*ollamaResponse, error) {
+	var resp ollamaResponse
+	if err := json.Unmarshal(responseBody, &resp); err != nil {
+		return nil, fmt.Errorf("failed to parse response: %w", err)
+	}
+	return &resp, nil
+}
+
+func (t *ollamaProvider) GetEmbedding(
+	queryString string,
+	ctx wrapper.HttpContext,
+	log wrapper.Log,
+	callback func(emb []float64, err error)) error {
+	embUrl, embHeaders, embRequestBody, err := t.constructParameters(queryString, log)
+	if err != nil {
+		log.Errorf("failed to construct parameters: %v", err)
+		return err
+	}
+
+	var resp *ollamaResponse
+
+	defer func() {
+		if err != nil {
+			callback(nil, err)
+		}
+	}()
+	err = t.client.Post(embUrl, embHeaders, embRequestBody,
+		func(statusCode int, responseHeaders http.Header, responseBody []byte) {
+
+			if statusCode != http.StatusOK {
+				err = errors.New("failed to get embedding due to status code: " + strconv.Itoa(statusCode))
+				callback(nil, err)
+				return
+			}
+
+			resp, err = t.parseTextEmbedding(responseBody)
+			if err != nil {
+				err = fmt.Errorf("failed to parse response: %v", err)
+				callback(nil, err)
+				return
+			}
+
+			log.Debugf("get embedding response: %d, %s", statusCode, responseBody)
+
+			if len(resp.Embeddings) == 0 {
+				err = errors.New("no embedding found in response")
+				callback(nil, err)
+				return
+			}
+
+			callback(resp.Embeddings[0], nil)
+
+		}, t.config.timeout)
+	return err
+}
diff --git a/plugins/wasm-go/extensions/ai-cache/embedding/provider.go b/plugins/wasm-go/extensions/ai-cache/embedding/provider.go
index 608f50ad54..7f0e14b269 100644
--- a/plugins/wasm-go/extensions/ai-cache/embedding/provider.go
+++ b/plugins/wasm-go/extensions/ai-cache/embedding/provider.go
@@ -12,6 +12,7 @@ const (
 	PROVIDER_TYPE_TEXTIN    = "textin"
 	PROVIDER_TYPE_COHERE    = "cohere"
 	PROVIDER_TYPE_OPENAI    = "openai"
+	PROVIDER_TYPE_OLLAMA    = "ollama"
 )
 
 type providerInitializer interface {
@@ -26,6 +27,7 @@ var (
 		PROVIDER_TYPE_TEXTIN:    &textInProviderInitializer{},
 		PROVIDER_TYPE_COHERE:    &cohereProviderInitializer{},
 		PROVIDER_TYPE_OPENAI:    &openAIProviderInitializer{},
+		PROVIDER_TYPE_OLLAMA:    &ollamaProviderInitializer{},
 	}
 )
 
diff --git a/plugins/wasm-go/extensions/ai-cache/main.go b/plugins/wasm-go/extensions/ai-cache/main.go
index 4bb3f2bad1..41014c5ebd 100644
--- a/plugins/wasm-go/extensions/ai-cache/main.go
+++ b/plugins/wasm-go/extensions/ai-cache/main.go
@@ -23,7 +23,7 @@ const (
 	SKIP_CACHE_HEADER           = "x-higress-skip-ai-cache"
 	ERROR_PARTIAL_MESSAGE_KEY   = "errorPartialMessage"
 
-	DEFAULT_MAX_BODY_BYTES uint32 = 10 * 1024 * 1024
+	DEFAULT_MAX_BODY_BYTES uint32 = 100 * 1024 * 1024
 )
 
 func main() {
diff --git a/plugins/wasm-go/extensions/ai-history/README.md b/plugins/wasm-go/extensions/ai-history/README.md
index d4684d292d..b8462345c5 100644
--- a/plugins/wasm-go/extensions/ai-history/README.md
+++ b/plugins/wasm-go/extensions/ai-history/README.md
@@ -20,17 +20,18 @@ description: AI 历史对话插件配置参考
 
 ## 配置字段
 
-| 名称                | 数据类型    | 填写要求     | 默认值                   | Description                                                               |
-|-------------------|---------|----------|-----------------------|---------------------------------------------------------------------------|
-| identityHeader    | string  | optional | "Authorization"       | 身份解析对应的请求头,可用 Authorization,X-Mse-Consumer等                               |
-| fillHistoryCnt    | integer | optional | 3                     | 默认填充历史对话轮次                                                                |
-| cacheKeyPrefix    | string  | optional | "higress-ai-history:" | Redis缓存Key的前缀                                                             |
-| cacheTTL          | integer | optional | 0                     | 缓存的过期时间，单位是秒，默认值为0，即永不过期                                                  |
-| redis.serviceName | string  | required | -                     | redis 服务名称，带服务类型的完整 FQDN 名称，例如 my-redis.dns、redis.my-ns.svc.cluster.local |
-| redis.servicePort | integer | optional | 6379                  | redis 服务端口                                                                |
-| redis.timeout     | integer | optional | 1000                  | 请求 redis 的超时时间，单位为毫秒                                                      |
-| redis.username    | string  | optional | -                     | 登陆 redis 的用户名                                                             |
-| redis.password    | string  | optional | -                     | 登陆 redis 的密码                                                              |
+| 名称              | 数据类型 | 填写要求 | 默认值                | Description                                                                                  |
+|-------------------|----------|----------|-----------------------|----------------------------------------------------------------------------------------------|
+| identityHeader    | string   | optional | "Authorization"       | 身份解析对应的请求头,可用 Authorization,X-Mse-Consumer等                                     |
+| fillHistoryCnt    | integer  | optional | 3                     | 默认填充历史对话轮次                                                                         |
+| cacheKeyPrefix    | string   | optional | "higress-ai-history:" | Redis缓存Key的前缀                                                                           |
+| cacheTTL          | integer  | optional | 0                     | 缓存的过期时间，单位是秒，默认值为0，即永不过期                                              |
+| redis.serviceName | string   | required | -                     | redis 服务名称，带服务类型的完整 FQDN 名称，例如 my-redis.dns、redis.my-ns.svc.cluster.local |
+| redis.servicePort | integer  | optional | 6379                  | redis 服务端口                                                                               |
+| redis.timeout     | integer  | optional | 1000                  | 请求 redis 的超时时间，单位为毫秒                                                            |
+| redis.username    | string   | optional | -                     | 登陆 redis 的用户名                                                                          |
+| redis.password    | string   | optional | -                     | 登陆 redis 的密码                                                                            |
+| redis.database    | int      | optional | 0                     | 使用的数据库id，例如配置为1，对应`SELECT 1`                                                  |
 
 ## 用法示例
 
diff --git a/plugins/wasm-go/extensions/ai-history/README_EN.md b/plugins/wasm-go/extensions/ai-history/README_EN.md
index 1fc6144d40..7d0149a019 100644
--- a/plugins/wasm-go/extensions/ai-history/README_EN.md
+++ b/plugins/wasm-go/extensions/ai-history/README_EN.md
@@ -15,17 +15,19 @@ Plugin Execution Phase: `Default Phase`
 Plugin Execution Priority: `650`
 
 ## Configuration Fields
-| Name                | Data Type  | Required   | Default Value                   | Description                                                               |
-|-------------------|---------|----------|-----------------------|---------------------------------------------------------------------------|
-| identityHeader    | string  | optional | "Authorization"       | The request header for identity resolution, can be Authorization, X-Mse-Consumer, etc.                               |
-| fillHistoryCnt    | integer | optional | 3                     | Default number of historical dialogues to be filled.                                                                |
-| cacheKeyPrefix    | string  | optional | "higress-ai-history:" | Prefix for Redis cache key.                                                             |
-| cacheTTL          | integer | optional | 0                     | Cache expiration time in seconds, default value is 0, meaning it never expires.                                                  |
-| redis.serviceName | string  | required | -                     | Redis service name, full FQDN name with service type, e.g., my-redis.dns, redis.my-ns.svc.cluster.local |
-| redis.servicePort | integer | optional | 6379                  | Redis service port.                                                                |
-| redis.timeout     | integer | optional | 1000                  | Timeout for requests to Redis, in milliseconds.                                                      |
-| redis.username    | string  | optional | -                     | Username for logging into Redis.                                                             |
-| redis.password    | string  | optional | -                     | Password for logging into Redis.                                                              |
+| Name              | Data Type | Required | Default Value         | Description                                                                                             |
+|-------------------|-----------|----------|-----------------------|---------------------------------------------------------------------------------------------------------|
+| identityHeader    | string    | optional | "Authorization"       | The request header for identity resolution, can be Authorization, X-Mse-Consumer, etc.                  |
+| fillHistoryCnt    | integer   | optional | 3                     | Default number of historical dialogues to be filled.                                                    |
+| cacheKeyPrefix    | string    | optional | "higress-ai-history:" | Prefix for Redis cache key.                                                                             |
+| cacheTTL          | integer   | optional | 0                     | Cache expiration time in seconds, default value is 0, meaning it never expires.                         |
+| redis.serviceName | string    | required | -                     | Redis service name, full FQDN name with service type, e.g., my-redis.dns, redis.my-ns.svc.cluster.local |
+| redis.servicePort | integer   | optional | 6379                  | Redis service port.                                                                                     |
+| redis.timeout     | integer   | optional | 1000                  | Timeout for requests to Redis, in milliseconds.                                                         |
+| redis.username    | string    | optional | -                     | Username for logging into Redis.                                                                        |
+| redis.password    | string    | optional | -                     | Password for logging into Redis.                                                                        |
+| redis.database    | int       | optional | 0                     | The database ID used, for example, configured as 1, corresponds to `SELECT 1`.                          |
+
 
 ## Usage Example
 ### Configuration Information
diff --git a/plugins/wasm-go/extensions/ai-history/main.go b/plugins/wasm-go/extensions/ai-history/main.go
index 3f728dd96d..f0fabaaa4c 100644
--- a/plugins/wasm-go/extensions/ai-history/main.go
+++ b/plugins/wasm-go/extensions/ai-history/main.go
@@ -76,6 +76,9 @@ type RedisInfo struct {
 	// @Title zh-CN 请求超时
 	// @Description zh-CN 请求 redis 的超时时间，单位为毫秒。默认值是1000，即1秒
 	Timeout int `required:"false" yaml:"timeout" json:"timeout"`
+	// @Title zh-CN Database
+	// @Description zh-CN redis database
+	Database int `required:"false" yaml:"database" json:"database"`
 }
 
 type KVExtractor struct {
@@ -138,6 +141,7 @@ func parseConfig(json gjson.Result, c *PluginConfig, log wrapper.Log) error {
 	if c.RedisInfo.Timeout == 0 {
 		c.RedisInfo.Timeout = 1000
 	}
+	c.RedisInfo.Database = int(json.Get("redis.database").Int())
 	c.QuestionFrom.RequestBody = "messages.@reverse.0.content"
 	c.AnswerValueFrom.ResponseBody = "choices.0.message.content"
 	c.AnswerStreamValueFrom.ResponseBody = "choices.0.delta.content"
@@ -159,7 +163,7 @@ func parseConfig(json gjson.Result, c *PluginConfig, log wrapper.Log) error {
 		FQDN: c.RedisInfo.ServiceName,
 		Port: int64(c.RedisInfo.ServicePort),
 	})
-	return c.redisClient.Init(c.RedisInfo.Username, c.RedisInfo.Password, int64(c.RedisInfo.Timeout))
+	return c.redisClient.Init(c.RedisInfo.Username, c.RedisInfo.Password, int64(c.RedisInfo.Timeout), wrapper.WithDataBase(c.RedisInfo.Database))
 }
 
 func onHttpRequestHeaders(ctx wrapper.HttpContext, config PluginConfig, log wrapper.Log) types.Action {
diff --git a/plugins/wasm-go/extensions/ai-proxy/README.md b/plugins/wasm-go/extensions/ai-proxy/README.md
index 8f281ffd2b..cb685e6e03 100644
--- a/plugins/wasm-go/extensions/ai-proxy/README.md
+++ b/plugins/wasm-go/extensions/ai-proxy/README.md
@@ -31,18 +31,19 @@ description: AI 代理插件配置参考
 
 `provider`的配置字段说明如下：
 
-| 名称               | 数据类型        | 填写要求 | 默认值 | 描述                                                                                                                                                        |
-|------------------| --------------- | -------- | ------ |-----------------------------------------------------------------------------------------------------------------------------------------------------------|
-| `type`           | string          | 必填     | -      | AI 服务提供商名称                                                                                                                                                |
-| `apiTokens`      | array of string | 非必填   | -      | 用于在访问 AI 服务时进行认证的令牌。如果配置了多个 token，插件会在请求时随机进行选择。部分服务提供商只支持配置一个 token。                                                                                     |
-| `timeout`        | number          | 非必填   | -      | 访问 AI 服务的超时时间。单位为毫秒。默认值为 120000，即 2 分钟                                                                                                                    |
-| `modelMapping`   | map of string   | 非必填   | -      | AI 模型映射表，用于将请求中的模型名称映射为服务提供商支持模型名称。<br/>1. 支持前缀匹配。例如用 "gpt-3-*" 匹配所有名称以“gpt-3-”开头的模型；<br/>2. 支持使用 "*" 为键来配置通用兜底映射关系；<br/>3. 如果映射的目标名称为空字符串 ""，则表示保留原模型名称。 |
-| `protocol`       | string          | 非必填   | -      | 插件对外提供的 API 接口契约。目前支持以下取值：openai（默认值，使用 OpenAI 的接口契约）、original（使用目标服务提供商的原始接口契约）                                                                          |
-| `context`        | object          | 非必填   | -      | 配置 AI 对话上下文信息                                                                                                                                             |
-| `customSettings` | array of customSetting | 非必填   | -      | 为AI请求指定覆盖或者填充参数                                                                                                                                           |
-| `failover`       | object | 非必填   | -      | 配置 apiToken 的 failover 策略，当 apiToken 不可用时，将其移出 apiToken 列表，待健康检测通过后重新添加回 apiToken 列表                                                                      |
-| `retryOnFailure` | object | 非必填   | -      | 当请求失败时立即进行重试                                                                                                                                              |  
-| `capabilities`       | map of string | 非必填   | -      |  部分provider的部分ai能力原生兼容openai/v1格式，不需要重写，可以直接转发，通过此配置项指定来开启转发, key表示的是采用的厂商协议能力，values表示的真实的厂商该能力的api path, 厂商协议能力当前支持: openai/v1/chatcompletions, openai/v1/embeddings, openai/v1/imagegeneration, openai/v1/audiospeech, cohere/v1/rerank |
+| 名称               | 数据类型        | 填写要求 | 默认值 | 描述                                                                                                                                                                                                                                        |
+|------------------| --------------- | -------- | ------ |-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| `type`           | string          | 必填     | -      | AI 服务提供商名称                                                                                                                                                                                                                                |
+| `apiTokens`      | array of string | 非必填   | -      | 用于在访问 AI 服务时进行认证的令牌。如果配置了多个 token，插件会在请求时随机进行选择。部分服务提供商只支持配置一个 token。                                                                                                                                                                     |
+| `timeout`        | number          | 非必填   | -      | 访问 AI 服务的超时时间。单位为毫秒。默认值为 120000，即 2 分钟                                                                                                                                                                                                    |
+| `modelMapping`   | map of string   | 非必填   | -      | AI 模型映射表，用于将请求中的模型名称映射为服务提供商支持模型名称。<br/>1. 支持前缀匹配。例如用 "gpt-3-*" 匹配所有名称以“gpt-3-”开头的模型；<br/>2. 支持使用 "*" 为键来配置通用兜底映射关系；<br/>3. 如果映射的目标名称为空字符串 ""，则表示保留原模型名称。                                                                                 |
+| `protocol`       | string          | 非必填   | -      | 插件对外提供的 API 接口契约。目前支持以下取值：openai（默认值，使用 OpenAI 的接口契约）、original（使用目标服务提供商的原始接口契约）                                                                                                                                                          |
+| `context`        | object          | 非必填   | -      | 配置 AI 对话上下文信息                                                                                                                                                                                                                             |
+| `customSettings` | array of customSetting | 非必填   | -      | 为AI请求指定覆盖或者填充参数                                                                                                                                                                                                                           |
+| `failover`       | object | 非必填   | -      | 配置 apiToken 的 failover 策略，当 apiToken 不可用时，将其移出 apiToken 列表，待健康检测通过后重新添加回 apiToken 列表                                                                                                                                                      |
+| `retryOnFailure` | object | 非必填   | -      | 当请求失败时立即进行重试                                                                                                                                                                                                                              |  
+| `reasoningContentMode`       | string          | 非必填   | -      | 如何处理大模型服务返回的推理内容。目前支持以下取值：passthrough（正常输出推理内容）、ignore（不输出推理内容）、concat（将推理内容拼接在常规输出内容之前）。默认为 passthrough。仅支持通义千问服务。                                                                                                                            |
+| `capabilities`       | map of string | 非必填   | -      | 部分provider的部分ai能力原生兼容openai/v1格式，不需要重写，可以直接转发，通过此配置项指定来开启转发, key表示的是采用的厂商协议能力，values表示的真实的厂商该能力的api path, 厂商协议能力当前支持: openai/v1/chatcompletions, openai/v1/embeddings, openai/v1/imagegeneration, openai/v1/audiospeech, cohere/v1/rerank |
 
 `context`的配置字段说明如下：
 
diff --git a/plugins/wasm-go/extensions/ai-proxy/config/config.go b/plugins/wasm-go/extensions/ai-proxy/config/config.go
index 48f08dd9e4..f0b820345a 100644
--- a/plugins/wasm-go/extensions/ai-proxy/config/config.go
+++ b/plugins/wasm-go/extensions/ai-proxy/config/config.go
@@ -80,13 +80,16 @@ func (c *PluginConfig) Complete(log wrapper.Log) error {
 		c.activeProvider = nil
 		return nil
 	}
+
 	var err error
+
 	c.activeProvider, err = provider.CreateProvider(*c.activeProviderConfig)
+	if err != nil {
+		return err
+	}
 
 	providerConfig := c.GetProviderConfig()
-	err = providerConfig.SetApiTokensFailover(log, c.activeProvider)
-
-	return err
+	return providerConfig.SetApiTokensFailover(log, c.activeProvider)
 }
 
 func (c *PluginConfig) GetProvider() provider.Provider {
diff --git a/plugins/wasm-go/extensions/ai-proxy/main.go b/plugins/wasm-go/extensions/ai-proxy/main.go
index dc6bc123ce..35d06b9502 100644
--- a/plugins/wasm-go/extensions/ai-proxy/main.go
+++ b/plugins/wasm-go/extensions/ai-proxy/main.go
@@ -15,12 +15,13 @@ import (
 	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
 	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
 	"github.com/tidwall/gjson"
+	"github.com/tidwall/sjson"
 )
 
 const (
 	pluginName = "ai-proxy"
 
-	defaultMaxBodyBytes uint32 = 10 * 1024 * 1024
+	defaultMaxBodyBytes uint32 = 100 * 1024 * 1024
 )
 
 func main() {
@@ -40,9 +41,11 @@ func parseGlobalConfig(json gjson.Result, pluginConfig *config.PluginConfig, log
 
 	pluginConfig.FromJson(json)
 	if err := pluginConfig.Validate(); err != nil {
+		log.Errorf("global rule config is invalid: %v", err)
 		return err
 	}
 	if err := pluginConfig.Complete(log); err != nil {
+		log.Errorf("failed to apply global rule config: %v", err)
 		return err
 	}
 
@@ -56,9 +59,11 @@ func parseOverrideRuleConfig(json gjson.Result, global config.PluginConfig, plug
 
 	pluginConfig.FromJson(json)
 	if err := pluginConfig.Validate(); err != nil {
+		log.Errorf("overriden rule config is invalid: %v", err)
 		return err
 	}
 	if err := pluginConfig.Complete(log); err != nil {
+		log.Errorf("failed to apply overriden rule config: %v", err)
 		return err
 	}
 
@@ -98,21 +103,23 @@ func onHttpRequestHeader(ctx wrapper.HttpContext, pluginConfig config.PluginConf
 
 	// Always remove the Accept-Encoding header to prevent the LLM from sending compressed responses,
 	// allowing plugins to inspect or modify the response correctly
-	proxywasm.RemoveHttpRequestHeader("Accept-Encoding")
+	_ = proxywasm.RemoveHttpRequestHeader("Accept-Encoding")
 
 	if handler, ok := activeProvider.(provider.RequestHeadersHandler); ok {
 		// Set the apiToken for the current request.
 		providerConfig.SetApiTokenInUse(ctx, log)
+		// Set available apiTokens of current request in the context, will be used in the retryOnFailure
+		providerConfig.SetAvailableApiTokens(ctx, log)
 
 		err := handler.OnRequestHeaders(ctx, apiName, log)
 		if err != nil {
-			util.ErrorHandler("ai-proxy.proc_req_headers_failed", fmt.Errorf("failed to process request headers: %v", err))
+			_ = util.ErrorHandler("ai-proxy.proc_req_headers_failed", fmt.Errorf("failed to process request headers: %v", err))
 			return types.ActionContinue
 		}
 
 		hasRequestBody := wrapper.HasRequestBody()
 		if hasRequestBody {
-			proxywasm.RemoveHttpRequestHeader("Content-Length")
+			_ = proxywasm.RemoveHttpRequestHeader("Content-Length")
 			ctx.SetRequestBodyBufferLimit(defaultMaxBodyBytes)
 			// Delay the header processing to allow changing in OnRequestBody
 			return types.HeaderStopIteration
@@ -136,23 +143,21 @@ func onHttpRequestBody(ctx wrapper.HttpContext, pluginConfig config.PluginConfig
 
 	if handler, ok := activeProvider.(provider.RequestBodyHandler); ok {
 		apiName, _ := ctx.GetContext(provider.CtxKeyApiName).(provider.ApiName)
-
-		newBody, settingErr := pluginConfig.GetProviderConfig().ReplaceByCustomSettings(body)
+		providerConfig := pluginConfig.GetProviderConfig()
+		newBody, settingErr := providerConfig.ReplaceByCustomSettings(body)
 		if settingErr != nil {
-			util.ErrorHandler(
-				"ai-proxy.proc_req_body_failed",
-				fmt.Errorf("failed to replace request body by custom settings: %v", settingErr),
-			)
-			return types.ActionContinue
+			log.Errorf("failed to replace request body by custom settings: %v", settingErr)
+		}
+		if providerConfig.IsOpenAIProtocol() {
+			newBody = normalizeOpenAiRequestBody(newBody, log)
 		}
-
 		log.Debugf("[onHttpRequestBody] newBody=%s", newBody)
 		body = newBody
 		action, err := handler.OnRequestBody(ctx, apiName, body, log)
 		if err == nil {
 			return action
 		}
-		util.ErrorHandler("ai-proxy.proc_req_body_failed", fmt.Errorf("failed to process request body: %v", err))
+		_ = util.ErrorHandler("ai-proxy.proc_req_body_failed", fmt.Errorf("failed to process request body: %v", err))
 	}
 	return types.ActionContinue
 }
@@ -176,6 +181,7 @@ func onHttpResponseHeaders(ctx wrapper.HttpContext, pluginConfig config.PluginCo
 
 	providerConfig := pluginConfig.GetProviderConfig()
 	apiTokenInUse := providerConfig.GetApiTokenInUse(ctx)
+	apiTokens := providerConfig.GetAvailableApiToken(ctx)
 
 	status, err := proxywasm.GetHttpResponseHeader(":status")
 	if err != nil || status != "200" {
@@ -183,7 +189,7 @@ func onHttpResponseHeaders(ctx wrapper.HttpContext, pluginConfig config.PluginCo
 			log.Errorf("unable to load :status header from response: %v", err)
 		}
 		ctx.DontReadResponseBody()
-		return providerConfig.OnRequestFailed(activeProvider, ctx, apiTokenInUse, log)
+		return providerConfig.OnRequestFailed(activeProvider, ctx, apiTokenInUse, apiTokens, log)
 	}
 
 	// Reset ctxApiTokenRequestFailureCount if the request is successful,
@@ -201,7 +207,11 @@ func onHttpResponseHeaders(ctx wrapper.HttpContext, pluginConfig config.PluginCo
 
 	checkStream(ctx, log)
 	_, needHandleBody := activeProvider.(provider.TransformResponseBodyHandler)
-	_, needHandleStreamingBody := activeProvider.(provider.StreamingResponseBodyHandler)
+	var needHandleStreamingBody bool
+	_, needHandleStreamingBody = activeProvider.(provider.StreamingResponseBodyHandler)
+	if !needHandleStreamingBody {
+		_, needHandleStreamingBody = activeProvider.(provider.StreamingEventHandler)
+	}
 	if !needHandleBody && !needHandleStreamingBody {
 		ctx.DontReadResponseBody()
 	} else if !needHandleStreamingBody {
@@ -220,7 +230,7 @@ func onStreamingResponseBody(ctx wrapper.HttpContext, pluginConfig config.Plugin
 	}
 
 	log.Debugf("[onStreamingResponseBody] provider=%s", activeProvider.GetProviderType())
-	log.Debugf("isLastChunk=%v chunk: %s", isLastChunk, string(chunk))
+	log.Debugf("[onStreamingResponseBody] isLastChunk=%v chunk: %s", isLastChunk, string(chunk))
 
 	if handler, ok := activeProvider.(provider.StreamingResponseBodyHandler); ok {
 		apiName, _ := ctx.GetContext(provider.CtxKeyApiName).(provider.ApiName)
@@ -230,6 +240,38 @@ func onStreamingResponseBody(ctx wrapper.HttpContext, pluginConfig config.Plugin
 		}
 		return chunk
 	}
+	if handler, ok := activeProvider.(provider.StreamingEventHandler); ok {
+		apiName, _ := ctx.GetContext(provider.CtxKeyApiName).(provider.ApiName)
+		events := provider.ExtractStreamingEvents(ctx, chunk, log)
+		log.Debugf("[onStreamingResponseBody] %d events received", len(events))
+		if len(events) == 0 {
+			// No events are extracted, return the original chunk
+			return chunk
+		}
+		var responseBuilder strings.Builder
+		for _, event := range events {
+			log.Debugf("processing event: %v", event)
+
+			if event.IsEndData() {
+				responseBuilder.WriteString(event.ToHttpString())
+				continue
+			}
+
+			outputEvents, err := handler.OnStreamingEvent(ctx, apiName, event, log)
+			if err != nil {
+				log.Errorf("[onStreamingResponseBody] failed to process streaming event: %v\n%s", err, chunk)
+				return chunk
+			}
+			if outputEvents == nil || len(outputEvents) == 0 {
+				responseBuilder.WriteString(event.ToHttpString())
+			} else {
+				for _, outputEvent := range outputEvents {
+					responseBuilder.WriteString(outputEvent.ToHttpString())
+				}
+			}
+		}
+		return []byte(responseBuilder.String())
+	}
 	return chunk
 }
 
@@ -247,16 +289,28 @@ func onHttpResponseBody(ctx wrapper.HttpContext, pluginConfig config.PluginConfi
 		apiName, _ := ctx.GetContext(provider.CtxKeyApiName).(provider.ApiName)
 		body, err := handler.TransformResponseBody(ctx, apiName, body, log)
 		if err != nil {
-			util.ErrorHandler("ai-proxy.proc_resp_body_failed", fmt.Errorf("failed to process response body: %v", err))
+			_ = util.ErrorHandler("ai-proxy.proc_resp_body_failed", fmt.Errorf("failed to process response body: %v", err))
 			return types.ActionContinue
 		}
 		if err = provider.ReplaceResponseBody(body, log); err != nil {
-			util.ErrorHandler("ai-proxy.replace_resp_body_failed", fmt.Errorf("failed to replace response body: %v", err))
+			_ = util.ErrorHandler("ai-proxy.replace_resp_body_failed", fmt.Errorf("failed to replace response body: %v", err))
 		}
 	}
 	return types.ActionContinue
 }
 
+func normalizeOpenAiRequestBody(body []byte, log wrapper.Log) []byte {
+	var err error
+	// Default setting include_usage.
+	if gjson.GetBytes(body, "stream").Bool() {
+		body, err = sjson.SetBytes(body, "stream_options.include_usage", true)
+		if err != nil {
+			log.Errorf("set include_usage failed, err:%s", err)
+		}
+	}
+	return body
+}
+
 func checkStream(ctx wrapper.HttpContext, log wrapper.Log) {
 	contentType, err := proxywasm.GetHttpResponseHeader("Content-Type")
 	if err != nil || !strings.HasPrefix(contentType, "text/event-stream") {
diff --git a/plugins/wasm-go/extensions/ai-proxy/provider/failover.go b/plugins/wasm-go/extensions/ai-proxy/provider/failover.go
index 6c8259949b..9644693f5e 100644
--- a/plugins/wasm-go/extensions/ai-proxy/provider/failover.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/failover.go
@@ -32,6 +32,8 @@ type failover struct {
 	healthCheckModel string `required:"false" yaml:"healthCheckModel" json:"healthCheckModel"`
 	// @Title zh-CN 本次请求使用的 apiToken
 	ctxApiTokenInUse string
+	// @Title zh-CN 记录本次请求时所有可用的 apiToken
+	ctxAvailableApiTokensInRequest string
 	// @Title zh-CN 记录 apiToken 请求失败的次数，key 为 apiToken，value 为失败次数
 	ctxApiTokenRequestFailureCount string
 	// @Title zh-CN 记录 apiToken 健康检测成功的次数，key 为 apiToken，value 为成功次数
@@ -527,6 +529,22 @@ func (c *ProviderConfig) GetGlobalRandomToken(log wrapper.Log) string {
 	}
 }
 
+func (c *ProviderConfig) GetAvailableApiToken(ctx wrapper.HttpContext) []string {
+	apiTokens, _ := ctx.GetContext(c.failover.ctxAvailableApiTokensInRequest).([]string)
+	return apiTokens
+}
+
+// SetAvailableApiTokens set available apiTokens of current request in the context, will be used in the retryOnFailure
+func (c *ProviderConfig) SetAvailableApiTokens(ctx wrapper.HttpContext, log wrapper.Log) {
+	var apiTokens []string
+	if c.isFailoverEnabled() {
+		apiTokens, _, _ = getApiTokens(c.failover.ctxApiTokens)
+	} else {
+		apiTokens = c.apiTokens
+	}
+	ctx.SetContext(c.failover.ctxAvailableApiTokensInRequest, apiTokens)
+}
+
 func (c *ProviderConfig) isFailoverEnabled() bool {
 	return c.failover.enabled
 }
@@ -539,12 +557,12 @@ func (c *ProviderConfig) resetSharedData() {
 	_ = proxywasm.SetSharedData(c.failover.ctxApiTokenRequestFailureCount, nil, 0)
 }
 
-func (c *ProviderConfig) OnRequestFailed(activeProvider Provider, ctx wrapper.HttpContext, apiTokenInUse string, log wrapper.Log) types.Action {
+func (c *ProviderConfig) OnRequestFailed(activeProvider Provider, ctx wrapper.HttpContext, apiTokenInUse string, apiTokens []string, log wrapper.Log) types.Action {
 	if c.isFailoverEnabled() {
 		c.handleUnavailableApiToken(ctx, apiTokenInUse, log)
 	}
 	if c.isRetryOnFailureEnabled() && ctx.GetContext(ctxKeyIsStreaming) != nil && !ctx.GetContext(ctxKeyIsStreaming).(bool) {
-		c.retryFailedRequest(activeProvider, ctx, log)
+		c.retryFailedRequest(activeProvider, ctx, apiTokenInUse, apiTokens, log)
 		return types.HeaderStopAllIterationAndWatermark
 	}
 	return types.ActionContinue
diff --git a/plugins/wasm-go/extensions/ai-proxy/provider/model.go b/plugins/wasm-go/extensions/ai-proxy/provider/model.go
index 726a18fca6..7de9cfe2fa 100644
--- a/plugins/wasm-go/extensions/ai-proxy/provider/model.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/model.go
@@ -1,6 +1,9 @@
 package provider
 
-import "strings"
+import (
+	"fmt"
+	"strings"
+)
 
 const (
 	streamEventIdItemKey        = "id:"
@@ -110,9 +113,16 @@ type chatCompletionChoice struct {
 }
 
 type usage struct {
-	PromptTokens     int `json:"prompt_tokens,omitempty"`
-	CompletionTokens int `json:"completion_tokens,omitempty"`
-	TotalTokens      int `json:"total_tokens,omitempty"`
+	PromptTokens            int                      `json:"prompt_tokens,omitempty"`
+	CompletionTokens        int                      `json:"completion_tokens,omitempty"`
+	TotalTokens             int                      `json:"total_tokens,omitempty"`
+	CompletionTokensDetails *completionTokensDetails `json:"completion_tokens_details,omitempty"`
+}
+
+type completionTokensDetails struct {
+	ReasoningTokens          int `json:"reasoning_tokens,omitempty"`
+	AcceptedPredictionTokens int `json:"accepted_prediction_tokens,omitempty"`
+	RejectedPredictionTokens int `json:"rejected_prediction_tokens,omitempty"`
 }
 
 type chatMessage struct {
@@ -126,6 +136,24 @@ type chatMessage struct {
 	Refusal          string                 `json:"refusal,omitempty"`
 }
 
+func (m *chatMessage) handleReasoningContent(reasoningContentMode string) {
+	if m.ReasoningContent == "" {
+		return
+	}
+	switch reasoningContentMode {
+	case reasoningBehaviorIgnore:
+		m.ReasoningContent = ""
+		break
+	case reasoningBehaviorConcat:
+		m.Content = fmt.Sprintf("%v\n%v", m.ReasoningContent, m.Content)
+		m.ReasoningContent = ""
+		break
+	case reasoningBehaviorPassThrough:
+	default:
+		break
+	}
+}
+
 type messageContent struct {
 	Type     string    `json:"type,omitempty"`
 	Text     string    `json:"text"`
@@ -138,6 +166,9 @@ type imageUrl struct {
 }
 
 func (m *chatMessage) IsEmpty() bool {
+	if m.ReasoningContent != "" {
+		return false
+	}
 	if m.IsStringContent() && m.Content != "" {
 		return false
 	}
@@ -247,14 +278,18 @@ func (m *functionCall) IsEmpty() bool {
 	return m.Name == "" && m.Arguments == ""
 }
 
-type streamEvent struct {
+type StreamEvent struct {
 	Id         string `json:"id"`
 	Event      string `json:"event"`
 	Data       string `json:"data"`
 	HttpStatus string `json:"http_status"`
 }
 
-func (e *streamEvent) setValue(key, value string) {
+func (e *StreamEvent) IsEndData() bool {
+	return e.Data == streamEndDataValue
+}
+
+func (e *StreamEvent) SetValue(key, value string) {
 	switch key {
 	case streamEventIdItemKey:
 		e.Id = value
@@ -269,6 +304,10 @@ func (e *streamEvent) setValue(key, value string) {
 	}
 }
 
+func (e *StreamEvent) ToHttpString() string {
+	return fmt.Sprintf("%s %s\n\n", streamDataItemKey, e.Data)
+}
+
 // https://platform.openai.com/docs/guides/images
 type imageGenerationRequest struct {
 	Model  string `json:"model"`
diff --git a/plugins/wasm-go/extensions/ai-proxy/provider/moonshot.go b/plugins/wasm-go/extensions/ai-proxy/provider/moonshot.go
index f0f63cf792..46fa68c734 100644
--- a/plugins/wasm-go/extensions/ai-proxy/provider/moonshot.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/moonshot.go
@@ -102,12 +102,12 @@ func (m *moonshotProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiNam
 		}()
 		if err != nil {
 			log.Errorf("failed to load context file: %v", err)
-			util.ErrorHandler("ai-proxy.moonshot.load_ctx_failed", fmt.Errorf("failed to load context file: %v", err))
+			_ = util.ErrorHandler("ai-proxy.moonshot.load_ctx_failed", fmt.Errorf("failed to load context file: %v", err))
 			return
 		}
 		err = m.performChatCompletion(ctx, content, request, log)
 		if err != nil {
-			util.ErrorHandler("ai-proxy.moonshot.insert_ctx_failed", fmt.Errorf("failed to perform chat completion: %v", err))
+			_ = util.ErrorHandler("ai-proxy.moonshot.insert_ctx_failed", fmt.Errorf("failed to perform chat completion: %v", err))
 		}
 	}, log)
 	if err == nil {
@@ -161,79 +161,9 @@ func (m *moonshotProvider) sendRequest(method, path, body, apiKey string, callba
 	}
 }
 
-func (m *moonshotProvider) OnStreamingResponseBody(ctx wrapper.HttpContext, name ApiName, chunk []byte, isLastChunk bool, log wrapper.Log) ([]byte, error) {
+func (m *moonshotProvider) OnStreamingEvent(ctx wrapper.HttpContext, name ApiName, event StreamEvent, log wrapper.Log) ([]StreamEvent, error) {
 	if name != ApiNameChatCompletion {
-		return chunk, nil
-	}
-	receivedBody := chunk
-	if bufferedStreamingBody, has := ctx.GetContext(ctxKeyStreamingBody).([]byte); has {
-		receivedBody = append(bufferedStreamingBody, chunk...)
-	}
-
-	eventStartIndex, lineStartIndex, valueStartIndex := -1, -1, -1
-
-	defer func() {
-		if eventStartIndex >= 0 && eventStartIndex < len(receivedBody) {
-			// Just in case the received chunk is not a complete event.
-			ctx.SetContext(ctxKeyStreamingBody, receivedBody[eventStartIndex:])
-		} else {
-			ctx.SetContext(ctxKeyStreamingBody, nil)
-		}
-	}()
-
-	var responseBuilder strings.Builder
-	currentKey := ""
-	currentEvent := &streamEvent{}
-	i, length := 0, len(receivedBody)
-	for i = 0; i < length; i++ {
-		ch := receivedBody[i]
-		if ch != '\n' {
-			if lineStartIndex == -1 {
-				if eventStartIndex == -1 {
-					eventStartIndex = i
-				}
-				lineStartIndex = i
-				valueStartIndex = -1
-			}
-			if valueStartIndex == -1 {
-				if ch == ':' {
-					valueStartIndex = i + 1
-					currentKey = string(receivedBody[lineStartIndex:valueStartIndex])
-				}
-			} else if valueStartIndex == i && ch == ' ' {
-				// Skip leading spaces in data.
-				valueStartIndex = i + 1
-			}
-			continue
-		}
-
-		if lineStartIndex != -1 {
-			value := string(receivedBody[valueStartIndex:i])
-			currentEvent.setValue(currentKey, value)
-		} else {
-			// Extra new line. The current event is complete.
-			log.Debugf("processing event: %v", currentEvent)
-			m.convertStreamEvent(&responseBuilder, currentEvent, log)
-			// Reset event parsing state.
-			eventStartIndex = -1
-			currentEvent = &streamEvent{}
-		}
-
-		// Reset line parsing state.
-		lineStartIndex = -1
-		valueStartIndex = -1
-		currentKey = ""
-	}
-
-	modifiedResponseChunk := responseBuilder.String()
-	log.Debugf("=== modified response chunk: %s", modifiedResponseChunk)
-	return []byte(modifiedResponseChunk), nil
-}
-
-func (m *moonshotProvider) convertStreamEvent(responseBuilder *strings.Builder, event *streamEvent, log wrapper.Log) error {
-	if event.Data == streamEndDataValue {
-		m.appendStreamEvent(responseBuilder, event)
-		return nil
+		return nil, nil
 	}
 
 	if gjson.Get(event.Data, "choices.0.usage").Exists() {
@@ -241,20 +171,19 @@ func (m *moonshotProvider) convertStreamEvent(responseBuilder *strings.Builder,
 		newData, err := sjson.Delete(event.Data, "choices.0.usage")
 		if err != nil {
 			log.Errorf("convert usage event error: %v", err)
-			return err
+			return nil, err
 		}
 		newData, err = sjson.SetRaw(newData, "usage", usageStr)
 		if err != nil {
 			log.Errorf("convert usage event error: %v", err)
-			return err
+			return nil, err
 		}
 		event.Data = newData
 	}
-	m.appendStreamEvent(responseBuilder, event)
-	return nil
+	return []StreamEvent{event}, nil
 }
 
-func (m *moonshotProvider) appendStreamEvent(responseBuilder *strings.Builder, event *streamEvent) {
+func (m *moonshotProvider) appendStreamEvent(responseBuilder *strings.Builder, event *StreamEvent) {
 	responseBuilder.WriteString(streamDataItemKey)
 	responseBuilder.WriteString(event.Data)
 	responseBuilder.WriteString("\n\n")
diff --git a/plugins/wasm-go/extensions/ai-proxy/provider/openai.go b/plugins/wasm-go/extensions/ai-proxy/provider/openai.go
index 0a170347f5..f875dbaa40 100644
--- a/plugins/wasm-go/extensions/ai-proxy/provider/openai.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/openai.go
@@ -2,7 +2,6 @@ package provider
 
 import (
 	"encoding/json"
-	"fmt"
 	"net/http"
 	"path"
 	"strings"
@@ -58,10 +57,10 @@ func (m *openaiProviderInitializer) CreateProvider(config ProviderConfig) (Provi
 	}
 	customUrl := strings.TrimPrefix(strings.TrimPrefix(config.openaiCustomUrl, "http://"), "https://")
 	pairs := strings.SplitN(customUrl, "/", 2)
-	if len(pairs) != 2 {
-		return nil, fmt.Errorf("invalid openaiCustomUrl:%s", config.openaiCustomUrl)
+	customPath := "/"
+	if len(pairs) == 2 {
+		customPath += pairs[1]
 	}
-	customPath := "/" + pairs[1]
 	isDirectCustomPath := isDirectPath(customPath)
 	capabilities := m.DefaultCapabilities()
 	if !isDirectCustomPath {
@@ -128,21 +127,14 @@ func (m *openaiProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName,
 }
 
 func (m *openaiProvider) TransformRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) ([]byte, error) {
-	request := &chatCompletionRequest{}
-	if err := decodeChatCompletionRequest(body, request); err != nil {
-		return nil, err
-	}
 	if m.config.responseJsonSchema != nil {
+		request := &chatCompletionRequest{}
+		if err := decodeChatCompletionRequest(body, request); err != nil {
+			return nil, err
+		}
 		log.Debugf("[ai-proxy] set response format to %s", m.config.responseJsonSchema)
 		request.ResponseFormat = m.config.responseJsonSchema
+		body, _ = json.Marshal(request)
 	}
-	if request.Stream {
-		// For stream requests, we need to include usage in the response.
-		if request.StreamOptions == nil {
-			request.StreamOptions = &streamOptions{IncludeUsage: true}
-		} else if !request.StreamOptions.IncludeUsage {
-			request.StreamOptions.IncludeUsage = true
-		}
-	}
-	return json.Marshal(request)
+	return m.config.defaultTransformRequestBody(ctx, apiName, body, log)
 }
diff --git a/plugins/wasm-go/extensions/ai-proxy/provider/provider.go b/plugins/wasm-go/extensions/ai-proxy/provider/provider.go
index 67cce2888b..c5ec8ce2d4 100644
--- a/plugins/wasm-go/extensions/ai-proxy/provider/provider.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/provider.go
@@ -85,6 +85,10 @@ const (
 	objectChatCompletion      = "chat.completion"
 	objectChatCompletionChunk = "chat.completion.chunk"
 
+	reasoningBehaviorPassThrough = "passthrough"
+	reasoningBehaviorIgnore      = "ignore"
+	reasoningBehaviorConcat      = "concat"
+
 	wildcard = "*"
 
 	defaultTimeout = 2 * 60 * 1000 // ms
@@ -145,6 +149,10 @@ type StreamingResponseBodyHandler interface {
 	OnStreamingResponseBody(ctx wrapper.HttpContext, name ApiName, chunk []byte, isLastChunk bool, log wrapper.Log) ([]byte, error)
 }
 
+type StreamingEventHandler interface {
+	OnStreamingEvent(ctx wrapper.HttpContext, name ApiName, event StreamEvent, log wrapper.Log) ([]StreamEvent, error)
+}
+
 type ApiNameHandler interface {
 	GetApiName(path string) ApiName
 }
@@ -190,6 +198,9 @@ type ProviderConfig struct {
 	// @Title zh-CN 失败请求重试
 	// @Description zh-CN 对失败的请求立即进行重试
 	retryOnFailure *retryOnFailure `required:"false" yaml:"retryOnFailure" json:"retryOnFailure"`
+	// @Title zh-CN 推理内容处理方式
+	// @Description zh-CN 如何处理大模型服务返回的推理内容。目前支持以下取值：passthrough（正常输出推理内容）、ignore（不输出推理内容）、concat（将推理内容拼接在常规输出内容之前）。默认为 normal。仅支持通义千问服务。
+	reasoningContentMode string `required:"false" yaml:"reasoningContentMode" json:"reasoningContentMode"`
 	// @Title zh-CN 基于OpenAI协议的自定义后端URL
 	// @Description zh-CN 仅适用于支持 openai 协议的服务。
 	openaiCustomUrl string `required:"false" yaml:"openaiCustomUrl" json:"openaiCustomUrl"`
@@ -281,6 +292,10 @@ func (c *ProviderConfig) GetProtocol() string {
 	return c.protocol
 }
 
+func (c *ProviderConfig) IsOpenAIProtocol() bool {
+	return c.protocol == protocolOpenAI
+}
+
 func (c *ProviderConfig) FromJson(json gjson.Result) {
 	c.id = json.Get("id").String()
 	c.typ = json.Get("type").String()
@@ -359,6 +374,20 @@ func (c *ProviderConfig) FromJson(json gjson.Result) {
 		}
 	}
 
+	c.reasoningContentMode = json.Get("reasoningContentMode").String()
+	if c.reasoningContentMode == "" {
+		c.reasoningContentMode = reasoningBehaviorPassThrough
+	} else {
+		c.reasoningContentMode = strings.ToLower(c.reasoningContentMode)
+		switch c.reasoningContentMode {
+		case reasoningBehaviorPassThrough, reasoningBehaviorIgnore, reasoningBehaviorConcat:
+			break
+		default:
+			c.reasoningContentMode = reasoningBehaviorPassThrough
+			break
+		}
+	}
+
 	failoverJson := json.Get("failover")
 	c.failover = &failover{
 		enabled: false,
@@ -554,6 +583,81 @@ func doGetMappedModel(model string, modelMapping map[string]string, log wrapper.
 	return ""
 }
 
+func ExtractStreamingEvents(ctx wrapper.HttpContext, chunk []byte, log wrapper.Log) []StreamEvent {
+	body := chunk
+	if bufferedStreamingBody, has := ctx.GetContext(ctxKeyStreamingBody).([]byte); has {
+		body = append(bufferedStreamingBody, chunk...)
+	}
+
+	eventStartIndex, lineStartIndex, valueStartIndex := -1, -1, -1
+
+	defer func() {
+		if eventStartIndex >= 0 && eventStartIndex < len(body) {
+			// Just in case the received chunk is not a complete event.
+			ctx.SetContext(ctxKeyStreamingBody, body[eventStartIndex:])
+		} else {
+			ctx.SetContext(ctxKeyStreamingBody, nil)
+		}
+	}()
+
+	// Sample Qwen event response:
+	//
+	// event:result
+	// :HTTP_STATUS/200
+	// data:{"output":{"choices":[{"message":{"content":"你好！","role":"assistant"},"finish_reason":"null"}]},"usage":{"total_tokens":116,"input_tokens":114,"output_tokens":2},"request_id":"71689cfc-1f42-9949-86e8-9563b7f832b1"}
+	//
+	// event:error
+	// :HTTP_STATUS/400
+	// data:{"code":"InvalidParameter","message":"Preprocessor error","request_id":"0cbe6006-faec-9854-bf8b-c906d75c3bd8"}
+	//
+
+	var events []StreamEvent
+
+	currentKey := ""
+	currentEvent := &StreamEvent{}
+	i, length := 0, len(body)
+	for i = 0; i < length; i++ {
+		ch := body[i]
+		if ch != '\n' {
+			if lineStartIndex == -1 {
+				if eventStartIndex == -1 {
+					eventStartIndex = i
+				}
+				lineStartIndex = i
+				valueStartIndex = -1
+			}
+			if valueStartIndex == -1 {
+				if ch == ':' {
+					valueStartIndex = i + 1
+					currentKey = string(body[lineStartIndex:valueStartIndex])
+				}
+			} else if valueStartIndex == i && ch == ' ' {
+				// Skip leading spaces in data.
+				valueStartIndex = i + 1
+			}
+			continue
+		}
+
+		if lineStartIndex != -1 {
+			value := string(body[valueStartIndex:i])
+			currentEvent.SetValue(currentKey, value)
+		} else {
+			// Extra new line. The current event is complete.
+			events = append(events, *currentEvent)
+			// Reset event parsing state.
+			eventStartIndex = -1
+			currentEvent = &StreamEvent{}
+		}
+
+		// Reset line parsing state.
+		lineStartIndex = -1
+		valueStartIndex = -1
+		currentKey = ""
+	}
+
+	return events
+}
+
 func (c *ProviderConfig) isSupportedAPI(apiName ApiName) bool {
 	_, exist := c.capabilities[string(apiName)]
 	return exist
diff --git a/plugins/wasm-go/extensions/ai-proxy/provider/qwen.go b/plugins/wasm-go/extensions/ai-proxy/provider/qwen.go
index 2f757c683a..4bb39c1210 100644
--- a/plugins/wasm-go/extensions/ai-proxy/provider/qwen.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/qwen.go
@@ -188,89 +188,32 @@ func (m *qwenProvider) onEmbeddingsRequestBody(ctx wrapper.HttpContext, body []b
 	return json.Marshal(qwenRequest)
 }
 
-func (m *qwenProvider) OnStreamingResponseBody(ctx wrapper.HttpContext, name ApiName, chunk []byte, isLastChunk bool, log wrapper.Log) ([]byte, error) {
+func (m *qwenProvider) OnStreamingEvent(ctx wrapper.HttpContext, name ApiName, event StreamEvent, log wrapper.Log) ([]StreamEvent, error) {
 	if m.config.qwenEnableCompatible || name != ApiNameChatCompletion {
-		return chunk, nil
-	}
-
-	receivedBody := chunk
-	if bufferedStreamingBody, has := ctx.GetContext(ctxKeyStreamingBody).([]byte); has {
-		receivedBody = append(bufferedStreamingBody, chunk...)
+		return nil, nil
 	}
 
 	incrementalStreaming := ctx.GetBoolContext(ctxKeyIncrementalStreaming, false)
 
-	eventStartIndex, lineStartIndex, valueStartIndex := -1, -1, -1
-
-	defer func() {
-		if eventStartIndex >= 0 && eventStartIndex < len(receivedBody) {
-			// Just in case the received chunk is not a complete event.
-			ctx.SetContext(ctxKeyStreamingBody, receivedBody[eventStartIndex:])
-		} else {
-			ctx.SetContext(ctxKeyStreamingBody, nil)
-		}
-	}()
-
-	// Sample Qwen event response:
-	//
-	// event:result
-	// :HTTP_STATUS/200
-	// data:{"output":{"choices":[{"message":{"content":"你好！","role":"assistant"},"finish_reason":"null"}]},"usage":{"total_tokens":116,"input_tokens":114,"output_tokens":2},"request_id":"71689cfc-1f42-9949-86e8-9563b7f832b1"}
-	//
-	// event:error
-	// :HTTP_STATUS/400
-	// data:{"code":"InvalidParameter","message":"Preprocessor error","request_id":"0cbe6006-faec-9854-bf8b-c906d75c3bd8"}
-	//
-
-	var responseBuilder strings.Builder
-	currentKey := ""
-	currentEvent := &streamEvent{}
-	i, length := 0, len(receivedBody)
-	for i = 0; i < length; i++ {
-		ch := receivedBody[i]
-		if ch != '\n' {
-			if lineStartIndex == -1 {
-				if eventStartIndex == -1 {
-					eventStartIndex = i
-				}
-				lineStartIndex = i
-				valueStartIndex = -1
-			}
-			if valueStartIndex == -1 {
-				if ch == ':' {
-					valueStartIndex = i + 1
-					currentKey = string(receivedBody[lineStartIndex:valueStartIndex])
-				}
-			} else if valueStartIndex == i && ch == ' ' {
-				// Skip leading spaces in data.
-				valueStartIndex = i + 1
-			}
-			continue
-		}
+	qwenResponse := &qwenTextGenResponse{}
+	if err := json.Unmarshal([]byte(event.Data), qwenResponse); err != nil {
+		log.Errorf("unable to unmarshal Qwen response: %v", err)
+		return nil, fmt.Errorf("unable to unmarshal Qwen response: %v", err)
+	}
 
-		if lineStartIndex != -1 {
-			value := string(receivedBody[valueStartIndex:i])
-			currentEvent.setValue(currentKey, value)
-		} else {
-			// Extra new line. The current event is complete.
-			log.Debugf("processing event: %v", currentEvent)
-			if err := m.convertStreamEvent(ctx, &responseBuilder, currentEvent, incrementalStreaming, log); err != nil {
-				return nil, err
-			}
-			// Reset event parsing state.
-			eventStartIndex = -1
-			currentEvent = &streamEvent{}
+	var outputEvents []StreamEvent
+	responses := m.buildChatCompletionStreamingResponse(ctx, qwenResponse, incrementalStreaming, log)
+	for _, response := range responses {
+		responseBody, err := json.Marshal(response)
+		if err != nil {
+			log.Errorf("unable to marshal response: %v", err)
+			return nil, fmt.Errorf("unable to marshal response: %v", err)
 		}
-
-		// Reset line parsing state.
-		lineStartIndex = -1
-		valueStartIndex = -1
-		currentKey = ""
+		modifiedEvent := event
+		modifiedEvent.Data = string(responseBody)
+		outputEvents = append(outputEvents, modifiedEvent)
 	}
-
-	modifiedResponseChunk := responseBuilder.String()
-	log.Debugf("=== modified response chunk: %s", modifiedResponseChunk)
-	return []byte(modifiedResponseChunk), nil
+	return outputEvents, nil
 }
 
 func (m *qwenProvider) TransformResponseBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) ([]byte, error) {
@@ -357,7 +300,7 @@ func (m *qwenProvider) buildQwenTextGenerationRequest(ctx wrapper.HttpContext, o
 func (m *qwenProvider) buildChatCompletionResponse(ctx wrapper.HttpContext, qwenResponse *qwenTextGenResponse) *chatCompletionResponse {
 	choices := make([]chatCompletionChoice, 0, len(qwenResponse.Output.Choices))
 	for _, qwenChoice := range qwenResponse.Output.Choices {
-		message := qwenMessageToChatMessage(qwenChoice.Message)
+		message := qwenMessageToChatMessage(qwenChoice.Message, m.config.reasoningContentMode)
 		choices = append(choices, chatCompletionChoice{
 			Message:      &message,
 			FinishReason: qwenChoice.FinishReason,
@@ -395,7 +338,8 @@ func (m *qwenProvider) buildChatCompletionStreamingResponse(ctx wrapper.HttpCont
 	finished := qwenChoice.FinishReason != "" && qwenChoice.FinishReason != "null"
 	message := qwenChoice.Message
 
-	deltaContentMessage := &chatMessage{Role: message.Role, Content: message.Content}
+	deltaContentMessage := &chatMessage{Role: message.Role, Content: message.Content, ReasoningContent: message.ReasoningContent}
+	deltaContentMessage.handleReasoningContent(m.config.reasoningContentMode)
 	deltaToolCallsMessage := &chatMessage{Role: message.Role, ToolCalls: append([]toolCall{}, message.ToolCalls...)}
 	if !incrementalStreaming {
 		for _, tc := range message.ToolCalls {
@@ -430,6 +374,11 @@ func (m *qwenProvider) buildChatCompletionStreamingResponse(ctx wrapper.HttpCont
 					}
 				}
 			}
+			if message.ReasoningContent == "" {
+				message.ReasoningContent = pushedMessage.ReasoningContent
+			} else {
+				deltaContentMessage.ReasoningContent = util.StripPrefix(deltaContentMessage.ReasoningContent, pushedMessage.ReasoningContent)
+			}
 			if len(deltaToolCallsMessage.ToolCalls) > 0 && pushedMessage.ToolCalls != nil {
 				for i, tc := range deltaToolCallsMessage.ToolCalls {
 					if i >= len(pushedMessage.ToolCalls) {
@@ -475,39 +424,6 @@ func (m *qwenProvider) buildChatCompletionStreamingResponse(ctx wrapper.HttpCont
 	return responses
 }
 
-func (m *qwenProvider) convertStreamEvent(ctx wrapper.HttpContext, responseBuilder *strings.Builder, event *streamEvent, incrementalStreaming bool, log wrapper.Log) error {
-	if event.Data == streamEndDataValue {
-		m.appendStreamEvent(responseBuilder, event)
-		return nil
-	}
-
-	if event.Event != eventResult || event.HttpStatus != httpStatus200 {
-		// Something goes wrong. Just pass through the event.
-		m.appendStreamEvent(responseBuilder, event)
-		return nil
-	}
-
-	qwenResponse := &qwenTextGenResponse{}
-	if err := json.Unmarshal([]byte(event.Data), qwenResponse); err != nil {
-		log.Errorf("unable to unmarshal Qwen response: %v", err)
-		return fmt.Errorf("unable to unmarshal Qwen response: %v", err)
-	}
-
-	responses := m.buildChatCompletionStreamingResponse(ctx, qwenResponse, incrementalStreaming, log)
-	for _, response := range responses {
-		responseBody, err := json.Marshal(response)
-		if err != nil {
-			log.Errorf("unable to marshal response: %v", err)
-			return fmt.Errorf("unable to marshal response: %v", err)
-		}
-		modifiedEvent := &*event
-		modifiedEvent.Data = string(responseBody)
-		m.appendStreamEvent(responseBuilder, modifiedEvent)
-	}
-
-	return nil
-}
-
 func (m *qwenProvider) insertHttpContextMessage(body []byte, content string, onlyOneSystemBeforeFile bool) ([]byte, error) {
 	request := &qwenTextGenRequest{}
 	if err := json.Unmarshal(body, request); err != nil {
@@ -552,7 +468,7 @@ func (m *qwenProvider) insertHttpContextMessage(body []byte, content string, onl
 	return json.Marshal(request)
 }
 
-func (m *qwenProvider) appendStreamEvent(responseBuilder *strings.Builder, event *streamEvent) {
+func (m *qwenProvider) appendStreamEvent(responseBuilder *strings.Builder, event *StreamEvent) {
 	responseBuilder.WriteString(streamDataItemKey)
 	responseBuilder.WriteString(event.Data)
 	responseBuilder.WriteString("\n\n")
@@ -690,13 +606,16 @@ type qwenTextEmbeddings struct {
 	Embedding []float64 `json:"embedding"`
 }
 
-func qwenMessageToChatMessage(qwenMessage qwenMessage) chatMessage {
-	return chatMessage{
-		Name:      qwenMessage.Name,
-		Role:      qwenMessage.Role,
-		Content:   qwenMessage.Content,
-		ToolCalls: qwenMessage.ToolCalls,
+func qwenMessageToChatMessage(qwenMessage qwenMessage, reasoningContentMode string) chatMessage {
+	msg := chatMessage{
+		Name:             qwenMessage.Name,
+		Role:             qwenMessage.Role,
+		Content:          qwenMessage.Content,
+		ReasoningContent: qwenMessage.ReasoningContent,
+		ToolCalls:        qwenMessage.ToolCalls,
 	}
+	msg.handleReasoningContent(reasoningContentMode)
+	return msg
 }
 
 func (m *qwenMessage) IsStringContent() bool {
diff --git a/plugins/wasm-go/extensions/ai-proxy/provider/retry.go b/plugins/wasm-go/extensions/ai-proxy/provider/retry.go
index 033a8cd8c5..59691d855f 100644
--- a/plugins/wasm-go/extensions/ai-proxy/provider/retry.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/retry.go
@@ -1,11 +1,13 @@
 package provider
 
 import (
+	"math/rand"
+	"net/http"
+
 	"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-proxy/util"
 	"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
 	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
 	"github.com/tidwall/gjson"
-	"net/http"
 )
 
 const (
@@ -38,12 +40,12 @@ func (c *ProviderConfig) isRetryOnFailureEnabled() bool {
 	return c.retryOnFailure.enabled
 }
 
-func (c *ProviderConfig) retryFailedRequest(activeProvider Provider, ctx wrapper.HttpContext, log wrapper.Log) {
+func (c *ProviderConfig) retryFailedRequest(activeProvider Provider, ctx wrapper.HttpContext, apiTokenInUse string, apiTokens []string, log wrapper.Log) {
 	log.Debugf("Retry failed request: provider=%s", activeProvider.GetProviderType())
 	retryClient := createRetryClient(ctx)
 	apiName, _ := ctx.GetContext(CtxKeyApiName).(ApiName)
 	ctx.SetContext(ctxRetryCount, 1)
-	c.sendRetryRequest(ctx, apiName, activeProvider, retryClient, log)
+	c.sendRetryRequest(ctx, apiName, activeProvider, retryClient, apiTokenInUse, apiTokens, log)
 }
 
 func (c *ProviderConfig) transformResponseHeadersAndBody(ctx wrapper.HttpContext, activeProvider Provider, apiName ApiName, headers http.Header, body []byte, log wrapper.Log) ([][2]string, []byte) {
@@ -67,7 +69,8 @@ func (c *ProviderConfig) transformResponseHeadersAndBody(ctx wrapper.HttpContext
 func (c *ProviderConfig) retryCall(
 	ctx wrapper.HttpContext, log wrapper.Log, activeProvider Provider,
 	apiName ApiName, statusCode int, responseHeaders http.Header, responseBody []byte,
-	retryClient *wrapper.ClusterClient[wrapper.RouteCluster]) {
+	retryClient *wrapper.ClusterClient[wrapper.RouteCluster],
+	apiTokenInUse string, apiTokens []string) {
 
 	retryCount := ctx.GetContext(ctxRetryCount).(int)
 	log.Debugf("Sent retry request: %d/%d", retryCount, c.retryOnFailure.maxRetries)
@@ -76,6 +79,7 @@ func (c *ProviderConfig) retryCall(
 		log.Debugf("Retry request succeeded")
 		headers, body := c.transformResponseHeadersAndBody(ctx, activeProvider, apiName, responseHeaders, responseBody, log)
 		proxywasm.SendHttpResponse(200, headers, body, -1)
+		return
 	} else {
 		log.Debugf("The retry request still failed, status: %d, responseHeaders: %v, responseBody: %s", statusCode, responseHeaders, string(responseBody))
 	}
@@ -83,26 +87,41 @@ func (c *ProviderConfig) retryCall(
 	retryCount++
 	if retryCount <= int(c.retryOnFailure.maxRetries) {
 		ctx.SetContext(ctxRetryCount, retryCount)
-		c.sendRetryRequest(ctx, apiName, activeProvider, retryClient, log)
+		c.sendRetryRequest(ctx, apiName, activeProvider, retryClient, apiTokenInUse, apiTokens, log)
 	} else {
 		log.Debugf("Reached the maximum retry count: %d", c.retryOnFailure.maxRetries)
 		proxywasm.ResumeHttpResponse()
+		return
 	}
 }
 
 func (c *ProviderConfig) sendRetryRequest(
 	ctx wrapper.HttpContext, apiName ApiName, activeProvider Provider,
-	retryClient *wrapper.ClusterClient[wrapper.RouteCluster], log wrapper.Log) {
+	retryClient *wrapper.ClusterClient[wrapper.RouteCluster],
+	apiTokenInUse string, apiTokens []string, log wrapper.Log) {
+
+	// Remove last failed token from retry apiTokens list
+	apiTokens = removeApiTokenFromRetryList(apiTokens, apiTokenInUse, log)
+	if len(apiTokens) == 0 {
+		log.Debugf("No more apiTokens to retry")
+		proxywasm.ResumeHttpResponse()
+		return
+	}
+	// Set apiTokenInUse for the retry request
+	apiTokenInUse = GetRandomToken(apiTokens)
+	log.Debugf("Retry request with apiToken: %s", apiTokenInUse)
+	ctx.SetContext(c.failover.ctxApiTokenInUse, apiTokenInUse)
 
 	requestHeaders, requestBody := c.getRetryRequestHeadersAndBody(ctx, activeProvider, apiName, log)
 	path := getRetryPath(ctx)
 
 	err := retryClient.Post(path, util.HeaderToSlice(requestHeaders), requestBody, func(statusCode int, responseHeaders http.Header, responseBody []byte) {
-		c.retryCall(ctx, log, activeProvider, apiName, statusCode, responseHeaders, responseBody, retryClient)
+		c.retryCall(ctx, log, activeProvider, apiName, statusCode, responseHeaders, responseBody, retryClient, apiTokenInUse, apiTokens)
 	}, uint32(c.retryOnFailure.retryTimeout))
 	if err != nil {
 		log.Errorf("Failed to send retry request: %v", err)
 		proxywasm.ResumeHttpResponse()
+		return
 	}
 }
 
@@ -126,9 +145,7 @@ func getRetryPath(ctx wrapper.HttpContext) string {
 }
 
 func (c *ProviderConfig) getRetryRequestHeadersAndBody(ctx wrapper.HttpContext, activeProvider Provider, apiName ApiName, log wrapper.Log) (http.Header, []byte) {
-	// The retry request may be sent with different apiToken, so the header needs to be regenerated
-	c.SetApiTokenInUse(ctx, log)
-
+	// The retry request is sent with different apiToken, so the header needs to be regenerated
 	requestHeaders := http.Header{
 		"Content-Type": []string{"application/json"},
 	}
@@ -139,3 +156,27 @@ func (c *ProviderConfig) getRetryRequestHeadersAndBody(ctx wrapper.HttpContext,
 
 	return requestHeaders, requestBody
 }
+
+func removeApiTokenFromRetryList(apiTokens []string, removedApiToken string, log wrapper.Log) []string {
+	var availableApiTokens []string
+	for _, s := range apiTokens {
+		if s != removedApiToken {
+			availableApiTokens = append(availableApiTokens, s)
+		}
+	}
+	log.Debugf("Remove apiToken %s from retry apiTokens list", removedApiToken)
+	log.Debugf("Available retry apiTokens: %v", availableApiTokens)
+	return availableApiTokens
+}
+
+func GetRandomToken(apiTokens []string) string {
+	count := len(apiTokens)
+	switch count {
+	case 0:
+		return ""
+	case 1:
+		return apiTokens[0]
+	default:
+		return apiTokens[rand.Intn(count)]
+	}
+}
diff --git a/plugins/wasm-go/extensions/ai-quota/README.md b/plugins/wasm-go/extensions/ai-quota/README.md
index 4305272902..4b0d362fed 100644
--- a/plugins/wasm-go/extensions/ai-quota/README.md
+++ b/plugins/wasm-go/extensions/ai-quota/README.md
@@ -26,14 +26,14 @@ description: AI 配额管理插件配置参考
 
 `redis`中每一项的配置字段说明
 
-| 配置项       | 类型   | 必填 | 默认值                                                     | 说明                        |
-| ------------ | ------ | ---- | ---------------------------------------------------------- | --------------------------- |
-| service_name | string | 必填 | -                                                          | redis 服务名称，带服务类型的完整 FQDN 名称，例如 my-redis.dns、redis.my-ns.svc.cluster.local     |
-| service_port | int    | 否   | 服务类型为固定地址（static service）默认值为80，其他为6379 | 输入redis服务的服务端口     |
-| username     | string | 否   | -                                                          | redis用户名                 |
-| password     | string | 否   | -                                                          | redis密码                   |
-| timeout      | int    | 否   | 1000                                                       | redis连接超时时间，单位毫秒 |
-
+| 配置项       | 类型   | 必填 | 默认值                                                     | 说明                                                                                         |
+| ------------ | ------ | ---- | ---------------------------------------------------------- | ---------------------------                                                                  |
+| service_name | string | 必填 | -                                                          | redis 服务名称，带服务类型的完整 FQDN 名称，例如 my-redis.dns、redis.my-ns.svc.cluster.local |
+| service_port | int    | 否   | 服务类型为固定地址（static service）默认值为80，其他为6379 | 输入redis服务的服务端口                                                                      |
+| username     | string | 否   | -                                                          | redis用户名                                                                                  |
+| password     | string | 否   | -                                                          | redis密码                                                                                    |
+| timeout      | int    | 否   | 1000                                                       | redis连接超时时间，单位毫秒                                                                  |
+| database     | int    | 否   | 0                                                          | 使用的数据库id，例如配置为1，对应`SELECT 1`                                                  |
 
 
 ## 配置示例
diff --git a/plugins/wasm-go/extensions/ai-quota/README_EN.md b/plugins/wasm-go/extensions/ai-quota/README_EN.md
index e136a75969..0eff19aeed 100644
--- a/plugins/wasm-go/extensions/ai-quota/README_EN.md
+++ b/plugins/wasm-go/extensions/ai-quota/README_EN.md
@@ -18,13 +18,14 @@ Plugin execution priority: `750`
 | `admin_path`        | string           | Optional                                   |   /quota      | Prefix for the path to manage quota requests      |
 | `redis`             | object           | Yes                                        |               | Redis related configuration                        |
 Explanation of each configuration field in `redis`
-| Configuration Item  | Type             | Required | Default Value                                            | Explanation                                   |
-|---------------------|------------------|----------|---------------------------------------------------------|-----------------------------------------------|
-| service_name        | string           | Required | -                                                       | Redis service name, full FQDN name with service type, e.g., my-redis.dns, redis.my-ns.svc.cluster.local |
-| service_port        | int              | No       | Default value for static service is 80; others are 6379 | Service port for the redis service            |
-| username            | string           | No       | -                                                       | Redis username                                |
-| password            | string           | No       | -                                                       | Redis password                                |
-| timeout             | int              | No       | 1000                                                    | Redis connection timeout in milliseconds      |
+| Configuration Item | Type   | Required | Default Value                                           | Explanation                                                                                             |
+|--------------------|--------|----------|---------------------------------------------------------|---------------------------------------------------------------------------------------------------------|
+| service_name       | string | Required | -                                                       | Redis service name, full FQDN name with service type, e.g., my-redis.dns, redis.my-ns.svc.cluster.local |
+| service_port       | int    | No       | Default value for static service is 80; others are 6379 | Service port for the redis service                                                                      |
+| username           | string | No       | -                                                       | Redis username                                                                                          |
+| password           | string | No       | -                                                       | Redis password                                                                                          |
+| timeout            | int    | No       | 1000                                                    | Redis connection timeout in milliseconds                                                                |
+| database           | int    | No       | 0                                                       | The database ID used, for example, configured as 1, corresponds to `SELECT 1`.                          |
 
 ## Configuration Example
 ### Identify request parameter apikey and apply rate limiting accordingly
diff --git a/plugins/wasm-go/extensions/ai-quota/go.mod b/plugins/wasm-go/extensions/ai-quota/go.mod
index ec77e402e4..8b9e11fd10 100644
--- a/plugins/wasm-go/extensions/ai-quota/go.mod
+++ b/plugins/wasm-go/extensions/ai-quota/go.mod
@@ -2,11 +2,11 @@ module github.com/alibaba/higress/plugins/wasm-go/extensions/ai-quota
 
 go 1.19
 
-//replace github.com/alibaba/higress/plugins/wasm-go => ../..
+replace github.com/alibaba/higress/plugins/wasm-go => ../..
 
 require (
 	github.com/alibaba/higress/plugins/wasm-go v1.4.3-0.20240808022948-34f5722d93de
-	github.com/higress-group/proxy-wasm-go-sdk v0.0.0-20240711023527-ba358c48772f
+	github.com/higress-group/proxy-wasm-go-sdk v1.0.0
 	github.com/tidwall/gjson v1.17.3
 	github.com/tidwall/resp v0.1.1
 )
diff --git a/plugins/wasm-go/extensions/ai-quota/go.sum b/plugins/wasm-go/extensions/ai-quota/go.sum
index 996d474d43..b4ab172fe2 100644
--- a/plugins/wasm-go/extensions/ai-quota/go.sum
+++ b/plugins/wasm-go/extensions/ai-quota/go.sum
@@ -1,12 +1,10 @@
-github.com/alibaba/higress/plugins/wasm-go v1.4.3-0.20240808022948-34f5722d93de h1:lDLqj7Hw41ox8VdsP7oCTPhjPa3+QJUCKApcLh2a45Y=
-github.com/alibaba/higress/plugins/wasm-go v1.4.3-0.20240808022948-34f5722d93de/go.mod h1:359don/ahMxpfeLMzr29Cjwcu8IywTTDUzWlBPRNLHw=
 github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
 github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
 github.com/higress-group/nottinygc v0.0.0-20231101025119-e93c4c2f8520 h1:IHDghbGQ2DTIXHBHxWfqCYQW1fKjyJ/I7W1pMyUDeEA=
 github.com/higress-group/nottinygc v0.0.0-20231101025119-e93c4c2f8520/go.mod h1:Nz8ORLaFiLWotg6GeKlJMhv8cci8mM43uEnLA5t8iew=
-github.com/higress-group/proxy-wasm-go-sdk v0.0.0-20240711023527-ba358c48772f h1:ZIiIBRvIw62gA5MJhuwp1+2wWbqL9IGElQ499rUsYYg=
-github.com/higress-group/proxy-wasm-go-sdk v0.0.0-20240711023527-ba358c48772f/go.mod h1:hNFjhrLUIq+kJ9bOcs8QtiplSQ61GZXtd2xHKx4BYRo=
+github.com/higress-group/proxy-wasm-go-sdk v1.0.0 h1:BZRNf4R7jr9hwRivg/E29nkVaKEak5MWjBDhWjuHijU=
+github.com/higress-group/proxy-wasm-go-sdk v1.0.0/go.mod h1:iiSyFbo+rAtbtGt/bsefv8GU57h9CCLYGJA74/tF5/0=
 github.com/magefile/mage v1.14.0 h1:6QDX3g6z1YvJ4olPhT1wksUcSa/V0a1B+pJb73fBjyo=
 github.com/magefile/mage v1.14.0/go.mod h1:z5UZb/iS3GoOSn0JgWuiw7dxlurVYTu+/jHXqQg881A=
 github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
diff --git a/plugins/wasm-go/extensions/ai-quota/main.go b/plugins/wasm-go/extensions/ai-quota/main.go
index 2facd912bc..2c6d75e8f4 100644
--- a/plugins/wasm-go/extensions/ai-quota/main.go
+++ b/plugins/wasm-go/extensions/ai-quota/main.go
@@ -69,6 +69,7 @@ type RedisInfo struct {
 	Username    string `required:"false" yaml:"username" json:"username"`
 	Password    string `required:"false" yaml:"password" json:"password"`
 	Timeout     int    `required:"false" yaml:"timeout" json:"timeout"`
+	Database    int    `required:"false" yaml:"database" json:"database"`
 }
 
 func parseConfig(json gjson.Result, config *QuotaConfig, log wrapper.Log) error {
@@ -110,17 +111,19 @@ func parseConfig(json gjson.Result, config *QuotaConfig, log wrapper.Log) error
 	if timeout == 0 {
 		timeout = 1000
 	}
+	database := int(redisConfig.Get("database").Int())
 	config.redisInfo.ServiceName = serviceName
 	config.redisInfo.ServicePort = servicePort
 	config.redisInfo.Username = username
 	config.redisInfo.Password = password
 	config.redisInfo.Timeout = timeout
+	config.redisInfo.Database = database
 	config.redisClient = wrapper.NewRedisClusterClient(wrapper.FQDNCluster{
 		FQDN: serviceName,
 		Port: int64(servicePort),
 	})
 
-	return config.redisClient.Init(username, password, int64(timeout))
+	return config.redisClient.Init(username, password, int64(timeout), wrapper.WithDataBase(database))
 }
 
 func onHttpRequestHeaders(context wrapper.HttpContext, config QuotaConfig, log wrapper.Log) types.Action {
diff --git a/plugins/wasm-go/extensions/ai-search/README.md b/plugins/wasm-go/extensions/ai-search/README.md
new file mode 100644
index 0000000000..5ae133148a
--- /dev/null
+++ b/plugins/wasm-go/extensions/ai-search/README.md
@@ -0,0 +1,244 @@
+---
+title: AI 搜索增强
+keywords: [higress,ai search]
+description: higress 支持通过集成搜索引擎（Google/Bing/Arxiv/Elasticsearch等）的实时结果，增强DeepSeek-R1等模型等回答准确性和时效性
+---
+
+## 功能说明
+
+`ai-search`插件通过集成搜索引擎（Google/Bing/Arxiv/Elasticsearch等）的实时结果，增强AI模型的回答准确性和时效性。插件会自动将搜索结果注入到提示模板中，并根据配置决定是否在最终回答中添加引用来源。
+
+## 运行属性
+
+插件执行阶段：`默认阶段`
+插件执行优先级：`440`
+
+## 配置字段
+
+| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 |
+|------|----------|----------|--------|------|
+| needReference | bool | 选填 | false | 是否在回答中添加引用来源 |
+| referenceFormat | string | 选填 | `"**References:**\n%s"` | 引用内容格式，必须包含%s占位符 |
+| defaultLang | string | 选填 | - | 默认搜索语言代码（如zh-CN/en-US） |
+| promptTemplate | string | 选填 | 内置模板 | 提示模板，必须包含`{search_results}`和`{question}`占位符 |
+| searchFrom | array of object | 必填 | - | 参考下面搜索引擎配置，至少配置一个引擎 |
+| searchRewrite | object | 选填 | - | 搜索重写配置，用于使用LLM服务优化搜索查询 |
+
+## 搜索重写说明
+
+搜索重写功能使用LLM服务对用户的原始查询进行分析和优化，可以：
+1. 将用户的自然语言查询转换为更适合搜索引擎的关键词组合
+2. 对于Arxiv论文搜索，自动识别相关的论文类别并添加类别限定
+3. 对于私有知识库搜索，将长查询拆分成多个精准的关键词组合
+
+强烈建议在使用Arxiv或Elasticsearch引擎时启用此功能。对于Arxiv搜索，它能准确识别论文所属领域并优化英文关键词；对于私有知识库搜索，它能提供更精准的关键词匹配，显著提升搜索效果。
+
+## 搜索重写配置
+
+| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 |
+|------|----------|----------|--------|------|
+| llmServiceName | string | 必填 | - | LLM服务名称 |
+| llmServicePort | number | 必填 | - | LLM服务端口 |
+| llmApiKey | string | 必填 | - | LLM服务API密钥 |
+| llmUrl | string | 必填 | - | LLM服务API地址 |
+| llmModelName | string | 必填 | - | LLM模型名称 |
+| timeoutMillisecond | number | 选填 | 30000 | API调用超时时间（毫秒） |
+
+## 搜索引擎通用配置
+
+| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 |
+|------|----------|----------|--------|------|
+| type | string | 必填 | - | 引擎类型（google/bing/arxiv/elasticsearch/quark） |
+| serviceName | string | 必填 | - | 后端服务名称 |
+| servicePort | number | 必填 | - | 后端服务端口 |
+| apiKey | string | 必填 | - | 搜索引擎API密钥/Aliyun AccessKey |
+| count | number | 选填 | 10 | 单次搜索返回结果数量 |
+| start | number | 选填 | 0 | 搜索结果偏移量（从第start+1条结果开始返回） |
+| timeoutMillisecond | number | 选填 | 5000 | API调用超时时间（毫秒） |
+| optionArgs | map | 选填 | - | 搜索引擎特定参数（key-value格式） |
+
+## Google 特定配置
+
+| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 |
+|------|----------|----------|--------|------|
+| cx | string | 必填 | - | Google自定义搜索引擎ID，用于指定搜索范围 |
+
+## Arxiv 特定配置
+
+| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 |
+|------|----------|----------|--------|------|
+| arxivCategory | string | 选填 | - | 搜索的论文[类别](https://arxiv.org/category_taxonomy)（如cs.AI, cs.CL等） |
+
+## Elasticsearch 特定配置
+
+| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 |
+|------|----------|----------|--------|------|
+| index | string | 必填 | - | 要搜索的Elasticsearch索引名称 |
+| contentField | string | 必填 | - | 要查询的内容字段名称 |
+| linkField | string | 必填 | - | 结果链接字段名称 |
+| titleField | string | 必填 | - | 结果标题字段名称 |
+
+## Quark 特定配置
+
+| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 |
+|------|----------|----------|--------|------|
+| secretKey | string | 必填 | - | Aliyun SecretKey |
+| endpoint | string | 选填 | iqs.cn-zhangjiakou.aliyuncs.com | 请求搜索引擎服务时的接入点 |
+
+## 配置示例
+
+### 基础配置（单搜索引擎）
+
+```yaml
+needReference: true
+searchFrom:
+- type: google
+  apiKey: "your-google-api-key"
+  cx: "search-engine-id"
+  serviceName: "google-svc.dns"
+  servicePort: 443
+  count: 5
+  optionArgs:
+    fileType: "pdf"
+```
+
+### Arxiv搜索配置
+
+```yaml
+searchFrom:
+- type: arxiv
+  serviceName: "arxiv-svc.dns" 
+  servicePort: 443
+  arxivCategory: "cs.AI"
+  count: 10
+```
+
+
+### 夸克搜索配置
+
+```yaml
+searchFrom:
+- type: quark
+  serviceName: "quark-svc.dns" 
+  servicePort: 443
+  apiKey: "aliyun accessKey"
+  count: 10 # 搜索网页数，最多10条
+  secretKey: "aliyun secretKey"
+  endpoint: "iqs.cn-zhangjiakou.aliyuncs.com"
+```
+
+### 多搜索引擎配置
+
+```yaml
+defaultLang: "en-US"
+promptTemplate: |
+  # Search Results:
+  {search_results}
+  
+  # Please answer this question: 
+  {question}
+searchFrom:
+- type: google
+  apiKey: "google-key"
+  cx: "github-search-id"  # 专门搜索GitHub内容的搜索引擎ID
+  serviceName: "google-svc.dns"
+  servicePort: 443
+- type: google
+  apiKey: "google-key"
+  cx: "news-search-id"    # 专门搜索Google News内容的搜索引擎ID 
+  serviceName: "google-svc.dns"
+  servicePort: 443
+- type: bing
+  apiKey: "bing-key"
+  serviceName: "bing-svc.dns"
+  servicePort: 443
+  optionArgs:
+    answerCount: "5"
+```
+
+### 并发查询配置
+
+由于搜索引擎对单次查询返回结果数量有限制（如Google限制单次最多返回100条结果），可以通过以下方式获取更多结果：
+1. 设置较小的count值（如10）
+2. 通过start参数指定结果偏移量
+3. 并发发起多个查询请求，每个请求的start值按count递增
+
+例如，要获取30条结果，可以配置count=10并并发发起20个查询，每个查询的start值分别为0,10,20：
+
+```yaml
+searchFrom:
+- type: google
+  apiKey: "your-google-api-key"
+  cx: "search-engine-id"
+  serviceName: "google-svc.dns"
+  servicePort: 443
+  start: 0
+  count: 10
+- type: google
+  apiKey: "your-google-api-key"
+  cx: "search-engine-id"
+  serviceName: "google-svc.dns"
+  servicePort: 443
+  start: 10
+  count: 10
+- type: google
+  apiKey: "your-google-api-key"
+  cx: "search-engine-id"
+  serviceName: "google-svc.dns"
+  servicePort: 443
+  start: 20
+  count: 10 
+```
+
+注意，过高的并发可能会导致限流，需要根据实际情况调整。
+
+### Elasticsearch 配置（用于对接私有知识库）
+
+```yaml
+searchFrom:
+- type: elasticsearch
+  serviceName: "es-svc.static"
+  # 固定地址服务的端口默认是80
+  servicePort: 80
+  index: "knowledge_base"
+  contentField: "content"
+  linkField: "url" 
+  titleField: "title"
+```
+
+### 自定义引用格式
+
+```yaml
+needReference: true
+referenceFormat: "### 数据来源\n%s"
+searchFrom:
+- type: bing
+  apiKey: "your-bing-key"
+  serviceName: "search-service.dns"
+  servicePort: 8080
+```
+
+### 搜索重写配置
+
+```yaml
+searchFrom:
+- type: google
+  apiKey: "your-google-api-key"
+  cx: "search-engine-id"
+  serviceName: "google-svc.dns"
+  servicePort: 443
+searchRewrite:
+  llmServiceName: "llm-svc.dns"
+  llmServicePort: 443
+  llmApiKey: "your-llm-api-key"
+  llmUrl: "https://api.example.com/v1/chat/completions"
+  llmModelName: "gpt-3.5-turbo"
+  timeoutMillisecond: 15000
+```
+
+## 注意事项
+
+1. 提示词模版必须包含`{search_results}`和`{question}`占位符，可选使用`{cur_date}`插入当前日期（格式：2006年1月2日）
+2. 默认模板包含搜索结果处理指引和回答规范，如无特殊需要可以直接用默认模板，否则请根据实际情况修改
+3. 多个搜索引擎是并行查询，总超时时间 = 所有搜索引擎配置中最大timeoutMillisecond值 + 处理时间
+4. Arxiv搜索不需要API密钥，但可以指定论文类别（arxivCategory）来缩小搜索范围
diff --git a/plugins/wasm-go/extensions/ai-search/README_EN.md b/plugins/wasm-go/extensions/ai-search/README_EN.md
new file mode 100644
index 0000000000..1afd955bd9
--- /dev/null
+++ b/plugins/wasm-go/extensions/ai-search/README_EN.md
@@ -0,0 +1,243 @@
+---
+title: AI Search Enhancement
+keywords: [higress, ai search]
+description: Higress supports enhancing the accuracy and timeliness of responses from models like DeepSeek-R1 by integrating real-time results from search engines (Google/Bing/Arxiv/Elasticsearch etc.)
+---
+
+## Feature Description
+
+The `ai-search` plugin enhances the accuracy and timeliness of AI model responses by integrating real-time results from search engines (Google/Bing/Arxiv/Elasticsearch etc.). The plugin automatically injects search results into the prompt template and determines whether to add reference sources in the final response based on configuration.
+
+## Runtime Properties
+
+Plugin execution stage: `Default stage`
+Plugin execution priority: `440`
+
+## Configuration Fields
+
+| Name | Data Type | Requirement | Default Value | Description |
+|------|-----------|-------------|---------------|-------------|
+| needReference | bool | Optional | false | Whether to add reference sources in the response |
+| referenceFormat | string | Optional | `"**References:**\n%s"` | Reference content format, must include %s placeholder |
+| defaultLang | string | Optional | - | Default search language code (e.g. zh-CN/en-US) |
+| promptTemplate | string | Optional | Built-in template | Prompt template, must include `{search_results}` and `{question}` placeholders |
+| searchFrom | array of object | Required | - | Refer to search engine configuration below, at least one engine must be configured |
+| searchRewrite | object | Optional | - | Search rewrite configuration, used to optimize search queries using an LLM service |
+
+## Search Rewrite Description
+
+The search rewrite feature uses an LLM service to analyze and optimize the user's original query, which can:
+1. Convert natural language queries into keyword combinations better suited for search engines
+2. For Arxiv paper searches, automatically identify relevant paper categories and add category constraints
+3. For private knowledge base searches, break down long queries into multiple precise keyword combinations
+
+It is strongly recommended to enable this feature when using Arxiv or Elasticsearch engines. For Arxiv searches, it can accurately identify paper domains and optimize English keywords; for private knowledge base searches, it can provide more precise keyword matching, significantly improving search effectiveness.
+
+## Search Rewrite Configuration
+
+| Name | Data Type | Requirement | Default Value | Description |
+|------|-----------|-------------|---------------|-------------|
+| llmServiceName | string | Required | - | LLM service name |
+| llmServicePort | number | Required | - | LLM service port |
+| llmApiKey | string | Required | - | LLM service API key |
+| llmUrl | string | Required | - | LLM service API URL |
+| llmModelName | string | Required | - | LLM model name |
+| timeoutMillisecond | number | Optional | 30000 | API call timeout (milliseconds) |
+
+## Search Engine Common Configuration
+
+| Name | Data Type | Requirement | Default Value | Description |
+|------|-----------|-------------|---------------|-------------|
+| type | string | Required | - | Engine type (google/bing/arxiv/elasticsearch/quark) |
+| apiKey | string | Required | - | Search engine API key/Aliyun AccessKey |
+| serviceName | string | Required | - | Backend service name |
+| servicePort | number | Required | - | Backend service port |
+| count | number | Optional | 10 | Number of results returned per search |
+| start | number | Optional | 0 | Search result offset (start returning from the start+1 result) |
+| timeoutMillisecond | number | Optional | 5000 | API call timeout (milliseconds) |
+| optionArgs | map | Optional | - | Search engine specific parameters (key-value format) |
+
+## Google Specific Configuration
+
+| Name | Data Type | Requirement | Default Value | Description |
+|------|-----------|-------------|---------------|-------------|
+| cx | string | Required | - | Google Custom Search Engine ID, used to specify search scope |
+
+## Arxiv Specific Configuration
+
+| Name | Data Type | Requirement | Default Value | Description |
+|------|-----------|-------------|---------------|-------------|
+| arxivCategory | string | Optional | - | Search paper [category](https://arxiv.org/category_taxonomy) (e.g. cs.AI, cs.CL etc.) |
+
+## Elasticsearch Specific Configuration
+
+| Name | Data Type | Requirement | Default Value | Description |
+|------|-----------|-------------|---------------|-------------|
+| index | string | Required | - | Elasticsearch index name to search |
+| contentField | string | Required | - | Content field name to query |
+| linkField | string | Required | - | Result link field name |
+| titleField | string | Required | - | Result title field name |
+
+## Quark Specific Configuration
+
+| Name | Data Type | Requirement | Default Value | Description |
+|------|----------|----------|--------|------|
+| secretKey | string | Required | - | Aliyun SecretKey |
+| endpoint | string | Optional | iqs.cn-zhangjiakou.aliyuncs.com | Endpoint for accessing quark |
+
+## Configuration Examples
+
+### Basic Configuration (Single Search Engine)
+
+```yaml
+needReference: true
+searchFrom:
+- type: google
+  apiKey: "your-google-api-key"
+  cx: "search-engine-id"
+  serviceName: "google-svc.dns"
+  servicePort: 443
+  count: 5
+  optionArgs:
+    fileType: "pdf"
+```
+
+### Arxiv Search Configuration
+
+```yaml
+searchFrom:
+- type: arxiv
+  serviceName: "arxiv-svc.dns" 
+  servicePort: 443
+  arxivCategory: "cs.AI"
+  count: 10
+```
+
+### 夸克搜索配置
+
+```yaml
+searchFrom:
+- type: quark
+  serviceName: "quark-svc.dns" 
+  servicePort: 443
+  apiKey: "aliyun accessKey"
+  count: 10
+  secretKey: "aliyun secretKey"
+  endpoint: "iqs.cn-zhangjiakou.aliyuncs.com"
+```
+
+### Multiple Search Engines Configuration
+
+```yaml
+defaultLang: "en-US"
+promptTemplate: |
+  # Search Results:
+  {search_results}
+  
+  # Please answer this question: 
+  {question}
+searchFrom:
+- type: google
+  apiKey: "google-key"
+  cx: "github-search-id"  # Search engine ID specifically for GitHub content
+  serviceName: "google-svc.dns"
+  servicePort: 443
+- type: google
+  apiKey: "google-key"
+  cx: "news-search-id"    # Search engine ID specifically for Google News content 
+  serviceName: "google-svc.dns"
+  servicePort: 443
+- type: bing
+  apiKey: "bing-key"
+  serviceName: "bing-svc.dns"
+  servicePort: 443
+  optionArgs:
+    answerCount: "5"
+```
+
+### Concurrent Query Configuration
+
+Since search engines limit the number of results per query (e.g. Google limits to 100 results per query), you can get more results by:
+1. Setting a smaller count value (e.g. 10)
+2. Specifying result offset with start parameter
+3. Concurrently initiating multiple query requests, with each request's start value incrementing by count
+
+For example, to get 30 results, configure count=10 and concurrently initiate 3 queries with start values 0,10,20 respectively:
+
+```yaml
+searchFrom:
+- type: google
+  apiKey: "your-google-api-key"
+  cx: "search-engine-id"
+  serviceName: "google-svc.dns"
+  servicePort: 443
+  start: 0
+  count: 10
+- type: google
+  apiKey: "your-google-api-key"
+  cx: "search-engine-id"
+  serviceName: "google-svc.dns"
+  servicePort: 443
+  start: 10
+  count: 10
+- type: google
+  apiKey: "your-google-api-key"
+  cx: "search-engine-id"
+  serviceName: "google-svc.dns"
+  servicePort: 443
+  start: 20
+  count: 10 
+```
+
+Note that excessive concurrency may lead to rate limiting, adjust according to actual situation.
+
+### Elasticsearch Configuration (For Private Knowledge Base Integration)
+
+```yaml
+searchFrom:
+- type: elasticsearch
+  serviceName: "es-svc.static"
+  # static ip service use 80 as default port
+  servicePort: 80
+  index: "knowledge_base"
+  contentField: "content"
+  linkField: "url" 
+  titleField: "title"
+```
+
+### Custom Reference Format
+
+```yaml
+needReference: true
+referenceFormat: "### Data Sources\n%s"
+searchFrom: 
+- type: bing
+  apiKey: "your-bing-key"
+  serviceName: "search-service.dns"
+  servicePort: 8080
+```
+
+### Search Rewrite Configuration
+
+```yaml
+searchFrom:
+- type: google
+  apiKey: "your-google-api-key"
+  cx: "search-engine-id"
+  serviceName: "google-svc.dns"
+  servicePort: 443
+searchRewrite:
+  llmServiceName: "llm-svc.dns"
+  llmServicePort: 443
+  llmApiKey: "your-llm-api-key"
+  llmUrl: "https://api.example.com/v1/chat/completions"
+  llmModelName: "gpt-3.5-turbo"
+  timeoutMillisecond: 15000
+```
+
+## Notes
+
+1. The prompt template must include `{search_results}` and `{question}` placeholders, optionally use `{cur_date}` to insert current date (format: January 2, 2006)
+2. The default template includes search results processing instructions and response specifications, you can use the default template unless there are special needs
+3. Multiple search engines query in parallel, total timeout = maximum timeoutMillisecond value among all search engine configurations + processing time
+4. Arxiv search doesn't require API key, but you can specify paper category (arxivCategory) to narrow search scope
diff --git a/plugins/wasm-go/extensions/ai-search/engine/arxiv/arxiv.go b/plugins/wasm-go/extensions/ai-search/engine/arxiv/arxiv.go
new file mode 100644
index 0000000000..56a998ca33
--- /dev/null
+++ b/plugins/wasm-go/extensions/ai-search/engine/arxiv/arxiv.go
@@ -0,0 +1,134 @@
+package arxiv
+
+import (
+	"bytes"
+	"errors"
+	"fmt"
+	"net/http"
+	"net/url"
+	"strings"
+
+	"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
+	"github.com/antchfx/xmlquery"
+	"github.com/tidwall/gjson"
+
+	"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search/engine"
+)
+
+type ArxivSearch struct {
+	optionArgs         map[string]string
+	start              int
+	count              int
+	timeoutMillisecond uint32
+	client             wrapper.HttpClient
+	arxivCategory      string
+}
+
+func NewArxivSearch(config *gjson.Result) (*ArxivSearch, error) {
+	engine := &ArxivSearch{}
+	serviceName := config.Get("serviceName").String()
+	if serviceName == "" {
+		return nil, errors.New("serviceName not found")
+	}
+	servicePort := config.Get("servicePort").Int()
+	if servicePort == 0 {
+		return nil, errors.New("servicePort not found")
+	}
+	engine.client = wrapper.NewClusterClient(wrapper.FQDNCluster{
+		FQDN: serviceName,
+		Port: servicePort,
+	})
+	engine.start = int(config.Get("start").Uint())
+	engine.count = int(config.Get("count").Uint())
+	if engine.count == 0 {
+		engine.count = 10
+	}
+	engine.timeoutMillisecond = uint32(config.Get("timeoutMillisecond").Uint())
+	if engine.timeoutMillisecond == 0 {
+		engine.timeoutMillisecond = 5000
+	}
+	engine.optionArgs = map[string]string{}
+	for key, value := range config.Get("optionArgs").Map() {
+		valStr := value.String()
+		if valStr != "" {
+			engine.optionArgs[key] = value.String()
+		}
+	}
+	engine.arxivCategory = config.Get("arxivCategory").String()
+	return engine, nil
+}
+
+func (a ArxivSearch) NeedExectue(ctx engine.SearchContext) bool {
+	return ctx.EngineType == "arxiv"
+}
+
+func (a ArxivSearch) Client() wrapper.HttpClient {
+	return a.client
+}
+
+func (a ArxivSearch) CallArgs(ctx engine.SearchContext) engine.CallArgs {
+	var searchQueryItems []string
+	for _, q := range ctx.Querys {
+		searchQueryItems = append(searchQueryItems, fmt.Sprintf("all:%s", url.QueryEscape(q)))
+	}
+	searchQuery := strings.Join(searchQueryItems, "+AND+")
+	category := ctx.ArxivCategory
+	if category == "" {
+		category = a.arxivCategory
+	}
+	if category != "" {
+		searchQuery = fmt.Sprintf("%s+AND+cat:%s", searchQuery, category)
+	}
+	queryUrl := fmt.Sprintf("https://export.arxiv.org/api/query?search_query=%s&max_results=%d&start=%d",
+		searchQuery, a.count, a.start)
+	var extraArgs []string
+	for key, value := range a.optionArgs {
+		extraArgs = append(extraArgs, fmt.Sprintf("%s=%s", key, url.QueryEscape(value)))
+	}
+	if len(extraArgs) > 0 {
+		queryUrl = fmt.Sprintf("%s&%s", queryUrl, strings.Join(extraArgs, "&"))
+	}
+	return engine.CallArgs{
+		Method:             http.MethodGet,
+		Url:                queryUrl,
+		Headers:            [][2]string{{"Accept", "application/atom+xml"}},
+		TimeoutMillisecond: a.timeoutMillisecond,
+	}
+}
+
+func (a ArxivSearch) ParseResult(ctx engine.SearchContext, response []byte) []engine.SearchResult {
+	var results []engine.SearchResult
+	doc, err := xmlquery.Parse(bytes.NewReader(response))
+	if err != nil {
+		return results
+	}
+
+	entries := xmlquery.Find(doc, "//entry")
+	for _, entry := range entries {
+		title := entry.SelectElement("title").InnerText()
+		link := ""
+		for _, l := range entry.SelectElements("link") {
+			if l.SelectAttr("rel") == "alternate" && l.SelectAttr("type") == "text/html" {
+				link = l.SelectAttr("href")
+				break
+			}
+		}
+		summary := entry.SelectElement("summary").InnerText()
+		publishTime := entry.SelectElement("published").InnerText()
+		authors := entry.SelectElements("author")
+		var authorNames []string
+		for _, author := range authors {
+			authorNames = append(authorNames, author.SelectElement("name").InnerText())
+		}
+		content := fmt.Sprintf("%s\nAuthors: %s\nPublication time: %s", summary, strings.Join(authorNames, ", "), publishTime)
+		result := engine.SearchResult{
+			Title:   title,
+			Link:    link,
+			Content: content,
+		}
+		if result.Valid() {
+			results = append(results, result)
+		}
+	}
+	return results
+}
diff --git a/plugins/wasm-go/extensions/ai-search/engine/bing/bing.go b/plugins/wasm-go/extensions/ai-search/engine/bing/bing.go
new file mode 100644
index 0000000000..b24fe33464
--- /dev/null
+++ b/plugins/wasm-go/extensions/ai-search/engine/bing/bing.go
@@ -0,0 +1,128 @@
+package bing
+
+import (
+	"errors"
+	"fmt"
+	"net/http"
+	"net/url"
+	"strings"
+
+	"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
+	"github.com/tidwall/gjson"
+
+	"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search/engine"
+)
+
+type BingSearch struct {
+	optionArgs         map[string]string
+	apiKey             string
+	start              int
+	count              int
+	timeoutMillisecond uint32
+	client             wrapper.HttpClient
+}
+
+func NewBingSearch(config *gjson.Result) (*BingSearch, error) {
+	engine := &BingSearch{}
+	engine.apiKey = config.Get("apiKey").String()
+	if engine.apiKey == "" {
+		return nil, errors.New("apiKey not found")
+	}
+	serviceName := config.Get("serviceName").String()
+	if serviceName == "" {
+		return nil, errors.New("serviceName not found")
+	}
+	servicePort := config.Get("servicePort").Int()
+	if servicePort == 0 {
+		return nil, errors.New("servicePort not found")
+	}
+	engine.client = wrapper.NewClusterClient(wrapper.FQDNCluster{
+		FQDN: serviceName,
+		Port: servicePort,
+	})
+	engine.start = int(config.Get("start").Uint())
+	engine.count = int(config.Get("count").Uint())
+	if engine.count == 0 {
+		engine.count = 10
+	}
+	engine.timeoutMillisecond = uint32(config.Get("timeoutMillisecond").Uint())
+	if engine.timeoutMillisecond == 0 {
+		engine.timeoutMillisecond = 5000
+	}
+	engine.optionArgs = map[string]string{}
+	for key, value := range config.Get("optionArgs").Map() {
+		valStr := value.String()
+		if valStr != "" {
+			engine.optionArgs[key] = value.String()
+		}
+	}
+	return engine, nil
+}
+
+func (b BingSearch) NeedExectue(ctx engine.SearchContext) bool {
+	return ctx.EngineType == "" || ctx.EngineType == "internet"
+}
+
+func (b BingSearch) Client() wrapper.HttpClient {
+	return b.client
+}
+
+func (b BingSearch) CallArgs(ctx engine.SearchContext) engine.CallArgs {
+	queryUrl := fmt.Sprintf("https://api.bing.microsoft.com/v7.0/search?q=%s&count=%d&offset=%d",
+		url.QueryEscape(strings.Join(ctx.Querys, " ")), b.count, b.start)
+	var extraArgs []string
+	for key, value := range b.optionArgs {
+		extraArgs = append(extraArgs, fmt.Sprintf("%s=%s", key, url.QueryEscape(value)))
+	}
+	if ctx.Language != "" {
+		extraArgs = append(extraArgs, fmt.Sprintf("mkt=%s", ctx.Language))
+	}
+	if len(extraArgs) > 0 {
+		queryUrl = fmt.Sprintf("%s&%s", queryUrl, strings.Join(extraArgs, "&"))
+	}
+	return engine.CallArgs{
+		Method:             http.MethodGet,
+		Url:                queryUrl,
+		Headers:            [][2]string{{"Ocp-Apim-Subscription-Key", b.apiKey}},
+		TimeoutMillisecond: b.timeoutMillisecond,
+	}
+}
+
+func (b BingSearch) ParseResult(ctx engine.SearchContext, response []byte) []engine.SearchResult {
+	jsonObj := gjson.ParseBytes(response)
+	var results []engine.SearchResult
+	webPages := jsonObj.Get("webPages.value")
+	for _, page := range webPages.Array() {
+		result := engine.SearchResult{
+			Title:   page.Get("name").String(),
+			Link:    page.Get("url").String(),
+			Content: page.Get("snippet").String(),
+		}
+		if result.Valid() {
+			results = append(results, result)
+		}
+		deepLinks := page.Get("deepLinks")
+		for _, inner := range deepLinks.Array() {
+			innerResult := engine.SearchResult{
+				Title:   inner.Get("name").String(),
+				Link:    inner.Get("url").String(),
+				Content: inner.Get("snippet").String(),
+			}
+			if innerResult.Valid() {
+				results = append(results, innerResult)
+			}
+		}
+	}
+	news := jsonObj.Get("news.value")
+	for _, article := range news.Array() {
+		result := engine.SearchResult{
+			Title:   article.Get("name").String(),
+			Link:    article.Get("url").String(),
+			Content: article.Get("description").String(),
+		}
+		if result.Valid() {
+			results = append(results, result)
+		}
+	}
+	return results
+}
diff --git a/plugins/wasm-go/extensions/ai-search/engine/elasticsearch/elasticsearch.go b/plugins/wasm-go/extensions/ai-search/engine/elasticsearch/elasticsearch.go
new file mode 100644
index 0000000000..4290558c38
--- /dev/null
+++ b/plugins/wasm-go/extensions/ai-search/engine/elasticsearch/elasticsearch.go
@@ -0,0 +1,114 @@
+package elasticsearch
+
+import (
+	"errors"
+	"fmt"
+	"net/http"
+	"strings"
+
+	"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
+	"github.com/tidwall/gjson"
+
+	"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search/engine"
+)
+
+type ElasticsearchSearch struct {
+	client             wrapper.HttpClient
+	index              string
+	contentField       string
+	linkField          string
+	titleField         string
+	start              int
+	count              int
+	timeoutMillisecond uint32
+}
+
+func NewElasticsearchSearch(config *gjson.Result) (*ElasticsearchSearch, error) {
+	engine := &ElasticsearchSearch{}
+	serviceName := config.Get("serviceName").String()
+	if serviceName == "" {
+		return nil, errors.New("serviceName not found")
+	}
+	servicePort := config.Get("servicePort").Int()
+	if servicePort == 0 {
+		return nil, errors.New("servicePort not found")
+	}
+	engine.client = wrapper.NewClusterClient(wrapper.FQDNCluster{
+		FQDN: serviceName,
+		Port: servicePort,
+	})
+	engine.index = config.Get("index").String()
+	if engine.index == "" {
+		return nil, errors.New("index not found")
+	}
+	engine.contentField = config.Get("contentField").String()
+	if engine.contentField == "" {
+		return nil, errors.New("contentField not found")
+	}
+	engine.linkField = config.Get("linkField").String()
+	if engine.linkField == "" {
+		return nil, errors.New("linkField not found")
+	}
+	engine.titleField = config.Get("titleField").String()
+	if engine.titleField == "" {
+		return nil, errors.New("titleField not found")
+	}
+	engine.timeoutMillisecond = uint32(config.Get("timeoutMillisecond").Uint())
+	if engine.timeoutMillisecond == 0 {
+		engine.timeoutMillisecond = 5000
+	}
+	engine.start = int(config.Get("start").Uint())
+	engine.count = int(config.Get("count").Uint())
+	if engine.count == 0 {
+		engine.count = 10
+	}
+	return engine, nil
+}
+
+func (e ElasticsearchSearch) NeedExectue(ctx engine.SearchContext) bool {
+	return ctx.EngineType == "private"
+}
+
+func (e ElasticsearchSearch) Client() wrapper.HttpClient {
+	return e.client
+}
+
+func (e ElasticsearchSearch) CallArgs(ctx engine.SearchContext) engine.CallArgs {
+	searchBody := fmt.Sprintf(`{
+		"query": {
+			"match": {
+				"%s": {
+					"query": "%s",
+					"operator": "AND"
+				}
+			}
+		}
+	}`, e.contentField, strings.Join(ctx.Querys, " "))
+
+	return engine.CallArgs{
+		Method: http.MethodPost,
+		Url:    fmt.Sprintf("/%s/_search?from=%d&size=%d", e.index, e.start, e.count),
+		Headers: [][2]string{
+			{"Content-Type", "application/json"},
+		},
+		Body:               []byte(searchBody),
+		TimeoutMillisecond: e.timeoutMillisecond,
+	}
+}
+
+func (e ElasticsearchSearch) ParseResult(ctx engine.SearchContext, response []byte) []engine.SearchResult {
+	jsonObj := gjson.ParseBytes(response)
+	var results []engine.SearchResult
+	for _, hit := range jsonObj.Get("hits.hits").Array() {
+		source := hit.Get("_source")
+		result := engine.SearchResult{
+			Title:   source.Get(e.titleField).String(),
+			Link:    source.Get(e.linkField).String(),
+			Content: source.Get(e.contentField).String(),
+		}
+		if result.Valid() {
+			results = append(results, result)
+		}
+	}
+	return results
+}
diff --git a/plugins/wasm-go/extensions/ai-search/engine/google/google.go b/plugins/wasm-go/extensions/ai-search/engine/google/google.go
new file mode 100644
index 0000000000..e189646b99
--- /dev/null
+++ b/plugins/wasm-go/extensions/ai-search/engine/google/google.go
@@ -0,0 +1,120 @@
+package google
+
+import (
+	"errors"
+	"fmt"
+	"net/http"
+	"net/url"
+	"strings"
+
+	"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
+	"github.com/tidwall/gjson"
+
+	"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search/engine"
+)
+
+type GoogleSearch struct {
+	optionArgs         map[string]string
+	apiKey             string
+	cx                 string
+	start              int
+	count              int
+	timeoutMillisecond uint32
+	client             wrapper.HttpClient
+}
+
+func NewGoogleSearch(config *gjson.Result) (*GoogleSearch, error) {
+	engine := &GoogleSearch{}
+	engine.apiKey = config.Get("apiKey").String()
+	if engine.apiKey == "" {
+		return nil, errors.New("apiKey not found")
+	}
+	engine.cx = config.Get("cx").String()
+	if engine.cx == "" {
+		return nil, errors.New("cx not found")
+	}
+	serviceName := config.Get("serviceName").String()
+	if serviceName == "" {
+		return nil, errors.New("serviceName not found")
+	}
+	servicePort := config.Get("servicePort").Int()
+	if servicePort == 0 {
+		return nil, errors.New("servicePort not found")
+	}
+	engine.client = wrapper.NewClusterClient(wrapper.FQDNCluster{
+		FQDN: serviceName,
+		Port: servicePort,
+	})
+	engine.start = int(config.Get("start").Uint())
+	engine.count = int(config.Get("count").Uint())
+	if engine.count == 0 {
+		engine.count = 10
+	}
+	if engine.count > 10 || engine.start+engine.count > 100 {
+		return nil, errors.New("count must be less than 10, and start + count must be less than or equal to 100.")
+	}
+	engine.timeoutMillisecond = uint32(config.Get("timeoutMillisecond").Uint())
+	if engine.timeoutMillisecond == 0 {
+		engine.timeoutMillisecond = 5000
+	}
+	engine.optionArgs = map[string]string{}
+	for key, value := range config.Get("optionArgs").Map() {
+		valStr := value.String()
+		if valStr != "" {
+			engine.optionArgs[key] = value.String()
+		}
+	}
+	return engine, nil
+}
+
+func (g GoogleSearch) NeedExectue(ctx engine.SearchContext) bool {
+	return ctx.EngineType == "" || ctx.EngineType == "internet"
+}
+
+func (g GoogleSearch) Client() wrapper.HttpClient {
+	return g.client
+}
+
+func (g GoogleSearch) CallArgs(ctx engine.SearchContext) engine.CallArgs {
+	queryUrl := fmt.Sprintf("https://customsearch.googleapis.com/customsearch/v1?cx=%s&q=%s&num=%d&key=%s&start=%d",
+		g.cx, url.QueryEscape(strings.Join(ctx.Querys, " ")), g.count, g.apiKey, g.start+1)
+	var extraArgs []string
+	for key, value := range g.optionArgs {
+		extraArgs = append(extraArgs, fmt.Sprintf("%s=%s", key, url.QueryEscape(value)))
+	}
+	if ctx.Language != "" {
+		extraArgs = append(extraArgs, fmt.Sprintf("lr=lang_%s", ctx.Language))
+	}
+	if len(extraArgs) > 0 {
+		queryUrl = fmt.Sprintf("%s&%s", queryUrl, strings.Join(extraArgs, "&"))
+	}
+	return engine.CallArgs{
+		Method: http.MethodGet,
+		Url:    queryUrl,
+		Headers: [][2]string{
+			{"Accept", "application/json"},
+		},
+		TimeoutMillisecond: g.timeoutMillisecond,
+	}
+}
+
+func (g GoogleSearch) ParseResult(ctx engine.SearchContext, response []byte) []engine.SearchResult {
+	jsonObj := gjson.ParseBytes(response)
+	var results []engine.SearchResult
+	for _, item := range jsonObj.Get("items").Array() {
+		content := item.Get("snippet").String()
+		metaDescription := item.Get("pagemap.metatags.0.og:description").String()
+		if metaDescription != "" {
+			content = fmt.Sprintf("%s\n...\n%s", content, metaDescription)
+		}
+		result := engine.SearchResult{
+			Title:   item.Get("title").String(),
+			Link:    item.Get("link").String(),
+			Content: content,
+		}
+		if result.Valid() {
+			results = append(results, result)
+		}
+	}
+	return results
+}
diff --git a/plugins/wasm-go/extensions/ai-search/engine/quark/quark.go b/plugins/wasm-go/extensions/ai-search/engine/quark/quark.go
new file mode 100644
index 0000000000..84273bb776
--- /dev/null
+++ b/plugins/wasm-go/extensions/ai-search/engine/quark/quark.go
@@ -0,0 +1,194 @@
+package quark
+
+import (
+	"crypto/hmac"
+	"crypto/rand"
+	"crypto/sha256"
+	"encoding/hex"
+	"errors"
+	"fmt"
+	"net/http"
+	"net/url"
+	"sort"
+	"strings"
+	"time"
+
+	"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
+	"github.com/tidwall/gjson"
+
+	"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search/engine"
+)
+
+type QuarkSearch struct {
+	apiKey             string
+	secretKey          string
+	timeoutMillisecond uint32
+	client             wrapper.HttpClient
+	count              uint32
+	endpoint           string
+}
+
+const (
+	Path               = "/linked-retrieval/linked-retrieval-entry/v2/linkedRetrieval/commands/genericSearch"
+	ContentSha256      = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" // for empty body
+	Action             = "GenericSearch"
+	Version            = "2024-11-11"
+	SignatureAlgorithm = "ACS3-HMAC-SHA256"
+	SignedHeaders      = "host;x-acs-action;x-acs-content-sha256;x-acs-date;x-acs-signature-nonce;x-acs-version"
+)
+
+func urlEncoding(rawStr string) string {
+	encodedStr := url.PathEscape(rawStr)
+	encodedStr = strings.ReplaceAll(encodedStr, "+", "%2B")
+	encodedStr = strings.ReplaceAll(encodedStr, ":", "%3A")
+	encodedStr = strings.ReplaceAll(encodedStr, "=", "%3D")
+	encodedStr = strings.ReplaceAll(encodedStr, "&", "%26")
+	encodedStr = strings.ReplaceAll(encodedStr, "$", "%24")
+	encodedStr = strings.ReplaceAll(encodedStr, "@", "%40")
+	// encodedStr := url.QueryEscape(rawStr)
+	return encodedStr
+}
+
+func getSignature(stringToSign, secret string) string {
+	h := hmac.New(sha256.New, []byte(secret))
+	h.Write([]byte(stringToSign))
+	hash := h.Sum(nil)
+	return hex.EncodeToString(hash)
+}
+
+func getCanonicalHeaders(params map[string]string) string {
+	paramArray := []string{}
+	for k, v := range params {
+		paramArray = append(paramArray, k+":"+v)
+	}
+	sort.Slice(paramArray, func(i, j int) bool {
+		return paramArray[i] <= paramArray[j]
+	})
+	return strings.Join(paramArray, "\n") + "\n"
+}
+
+func getHasedString(input string) string {
+	hash := sha256.Sum256([]byte(input))
+	hashHex := hex.EncodeToString(hash[:])
+	return hashHex
+}
+
+func generateHexID(length int) (string, error) {
+	bytes := make([]byte, length/2)
+	if _, err := rand.Read(bytes); err != nil {
+		return "", err
+	}
+	return hex.EncodeToString(bytes), nil
+}
+
+func NewQuarkSearch(config *gjson.Result) (*QuarkSearch, error) {
+	engine := &QuarkSearch{}
+	engine.apiKey = config.Get("apiKey").String()
+	if engine.apiKey == "" {
+		return nil, errors.New("apiKey not found")
+	}
+	engine.secretKey = config.Get("secretKey").String()
+	if engine.secretKey == "" {
+		return nil, errors.New("secretKey not found")
+	}
+	serviceName := config.Get("serviceName").String()
+	if serviceName == "" {
+		return nil, errors.New("serviceName not found")
+	}
+	servicePort := config.Get("servicePort").Int()
+	if servicePort == 0 {
+		return nil, errors.New("servicePort not found")
+	}
+	engine.endpoint = config.Get("endpoint").String()
+	if engine.endpoint == "" {
+		engine.endpoint = "iqs.cn-zhangjiakou.aliyuncs.com"
+	}
+	engine.count = uint32(config.Get("count").Int())
+	if engine.count == 0 {
+		engine.count = 10
+	}
+	engine.client = wrapper.NewClusterClient(wrapper.FQDNCluster{
+		FQDN: serviceName,
+		Port: servicePort,
+	})
+	engine.timeoutMillisecond = uint32(config.Get("timeoutMillisecond").Uint())
+	if engine.timeoutMillisecond == 0 {
+		engine.timeoutMillisecond = 5000
+	}
+	return engine, nil
+}
+
+func (g QuarkSearch) NeedExectue(ctx engine.SearchContext) bool {
+	return ctx.EngineType == "" || ctx.EngineType == "internet"
+}
+
+func (g QuarkSearch) Client() wrapper.HttpClient {
+	return g.client
+}
+
+func (g QuarkSearch) CallArgs(ctx engine.SearchContext) engine.CallArgs {
+	query := strings.Join(ctx.Querys, " ")
+	canonicalURI := Path
+	queryParams := map[string]string{
+		"query":     query,
+		"timeRange": "NoLimit",
+	}
+	queryParamsStr := []string{}
+	for k, v := range queryParams {
+		queryParamsStr = append(queryParamsStr, k+"="+urlEncoding(v))
+	}
+	canonicalQueryString := strings.Join(queryParamsStr, "&")
+	timeStamp := time.Now().UTC().Format("2006-01-02T15:04:05Z")
+	randomID, _ := generateHexID(32)
+	params := map[string]string{
+		"host":                  g.endpoint,
+		"x-acs-action":          Action,
+		"x-acs-content-sha256":  ContentSha256,
+		"x-acs-date":            timeStamp,
+		"x-acs-signature-nonce": randomID,
+		"x-acs-version":         Version,
+	}
+	canonicalHeaders := getCanonicalHeaders(params)
+	canonicalRequest := http.MethodGet + "\n" + canonicalURI + "\n" + canonicalQueryString + "\n" + canonicalHeaders + "\n" + SignedHeaders + "\n" + ContentSha256
+	stringToSign := SignatureAlgorithm + "\n" + getHasedString(canonicalRequest)
+
+	authHeaderFmt := "%s Credential=%s,SignedHeaders=%s,Signature=%s"
+	authHeader := fmt.Sprintf(authHeaderFmt, SignatureAlgorithm, g.apiKey, SignedHeaders, getSignature(stringToSign, g.secretKey))
+
+	reqParams := url.Values{}
+	for k, v := range queryParams {
+		reqParams.Add(k, v)
+	}
+	requestURL := fmt.Sprintf("https://%s%s?%s", g.endpoint, Path, reqParams.Encode())
+
+	return engine.CallArgs{
+		Method: http.MethodGet,
+		Url:    requestURL,
+		Headers: [][2]string{
+			{"x-acs-date", timeStamp},
+			{"x-acs-signature-nonce", randomID},
+			{"x-acs-content-sha256", ContentSha256},
+			{"x-acs-version", Version},
+			{"x-acs-action", Action},
+			{"Authorization", authHeader},
+		},
+		Body:               nil,
+		TimeoutMillisecond: g.timeoutMillisecond,
+	}
+}
+
+func (g QuarkSearch) ParseResult(ctx engine.SearchContext, response []byte) []engine.SearchResult {
+	jsonObj := gjson.ParseBytes(response)
+	var results []engine.SearchResult
+	for index, item := range jsonObj.Get("pageItems").Array() {
+		result := engine.SearchResult{
+			Title:   item.Get("title").String(),
+			Link:    item.Get("link").String(),
+			Content: item.Get("mainText").String(),
+		}
+		if result.Valid() && index < int(g.count) {
+			results = append(results, result)
+		}
+	}
+	return results
+}
diff --git a/plugins/wasm-go/extensions/ai-search/engine/types.go b/plugins/wasm-go/extensions/ai-search/engine/types.go
new file mode 100644
index 0000000000..a0d6780bae
--- /dev/null
+++ b/plugins/wasm-go/extensions/ai-search/engine/types.go
@@ -0,0 +1,37 @@
+package engine
+
+import (
+	"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
+)
+
+type SearchResult struct {
+	Title   string
+	Link    string
+	Content string
+}
+
+func (result SearchResult) Valid() bool {
+	return result.Title != "" && result.Link != "" && result.Content != ""
+}
+
+type SearchContext struct {
+	EngineType    string
+	Querys        []string
+	Language      string
+	ArxivCategory string
+}
+
+type CallArgs struct {
+	Method             string
+	Url                string
+	Headers            [][2]string
+	Body               []byte
+	TimeoutMillisecond uint32
+}
+
+type SearchEngine interface {
+	NeedExectue(ctx SearchContext) bool
+	Client() wrapper.HttpClient
+	CallArgs(ctx SearchContext) CallArgs
+	ParseResult(ctx SearchContext, response []byte) []SearchResult
+}
diff --git a/plugins/wasm-go/extensions/ai-search/go.mod b/plugins/wasm-go/extensions/ai-search/go.mod
new file mode 100644
index 0000000000..17bd972c49
--- /dev/null
+++ b/plugins/wasm-go/extensions/ai-search/go.mod
@@ -0,0 +1,26 @@
+module github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search
+
+go 1.18
+
+replace github.com/alibaba/higress/plugins/wasm-go => ../..
+
+require (
+	github.com/alibaba/higress/plugins/wasm-go v0.0.0
+	github.com/antchfx/xmlquery v1.4.4
+	github.com/higress-group/proxy-wasm-go-sdk v1.0.0
+	github.com/tidwall/gjson v1.18.0
+	github.com/tidwall/sjson v1.2.5
+)
+
+require (
+	github.com/antchfx/xpath v1.3.3 // indirect
+	github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
+	github.com/google/uuid v1.3.0 // indirect
+	github.com/higress-group/nottinygc v0.0.0-20231101025119-e93c4c2f8520 // indirect
+	github.com/magefile/mage v1.14.0 // indirect
+	github.com/tidwall/match v1.1.1 // indirect
+	github.com/tidwall/pretty v1.2.0 // indirect
+	github.com/tidwall/resp v0.1.1 // indirect
+	golang.org/x/net v0.33.0 // indirect
+	golang.org/x/text v0.21.0 // indirect
+)
diff --git a/plugins/wasm-go/extensions/ai-search/go.sum b/plugins/wasm-go/extensions/ai-search/go.sum
new file mode 100644
index 0000000000..81d555f4bd
--- /dev/null
+++ b/plugins/wasm-go/extensions/ai-search/go.sum
@@ -0,0 +1,96 @@
+github.com/antchfx/xmlquery v1.4.4 h1:mxMEkdYP3pjKSftxss4nUHfjBhnMk4imGoR96FRY2dg=
+github.com/antchfx/xmlquery v1.4.4/go.mod h1:AEPEEPYE9GnA2mj5Ur2L5Q5/2PycJ0N9Fusrx9b12fc=
+github.com/antchfx/xpath v1.3.3 h1:tmuPQa1Uye0Ym1Zn65vxPgfltWb/Lxu2jeqIGteJSRs=
+github.com/antchfx/xpath v1.3.3/go.mod h1:i54GszH55fYfBmoZXapTHN8T8tkcHfRgLyVwwqzXNcs=
+github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
+github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE=
+github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
+github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
+github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
+github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
+github.com/higress-group/nottinygc v0.0.0-20231101025119-e93c4c2f8520 h1:IHDghbGQ2DTIXHBHxWfqCYQW1fKjyJ/I7W1pMyUDeEA=
+github.com/higress-group/nottinygc v0.0.0-20231101025119-e93c4c2f8520/go.mod h1:Nz8ORLaFiLWotg6GeKlJMhv8cci8mM43uEnLA5t8iew=
+github.com/higress-group/proxy-wasm-go-sdk v1.0.0 h1:BZRNf4R7jr9hwRivg/E29nkVaKEak5MWjBDhWjuHijU=
+github.com/higress-group/proxy-wasm-go-sdk v1.0.0/go.mod h1:iiSyFbo+rAtbtGt/bsefv8GU57h9CCLYGJA74/tF5/0=
+github.com/magefile/mage v1.14.0 h1:6QDX3g6z1YvJ4olPhT1wksUcSa/V0a1B+pJb73fBjyo=
+github.com/magefile/mage v1.14.0/go.mod h1:z5UZb/iS3GoOSn0JgWuiw7dxlurVYTu+/jHXqQg881A=
+github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
+github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
+github.com/tidwall/gjson v1.14.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
+github.com/tidwall/gjson v1.18.0 h1:FIDeeyB800efLX89e5a8Y0BNH+LOngJyGrIWxG2FKQY=
+github.com/tidwall/gjson v1.18.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
+github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA=
+github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM=
+github.com/tidwall/pretty v1.2.0 h1:RWIZEg2iJ8/g6fDDYzMpobmaoGh5OLl4AXtGUGPcqCs=
+github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
+github.com/tidwall/resp v0.1.1 h1:Ly20wkhqKTmDUPlyM1S7pWo5kk0tDu8OoC/vFArXmwE=
+github.com/tidwall/resp v0.1.1/go.mod h1:3/FrruOBAxPTPtundW0VXgmsQ4ZBA0Aw714lVYgwFa0=
+github.com/tidwall/sjson v1.2.5 h1:kLy8mja+1c9jlljvWTlSazM7cKDRfJuR/bOJhcY5NcY=
+github.com/tidwall/sjson v1.2.5/go.mod h1:Fvgq9kS/6ociJEDnK0Fk1cpYF4FIW6ZF7LAe+6jwd28=
+github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
+golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
+golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
+golang.org/x/crypto v0.13.0/go.mod h1:y6Z2r+Rw4iayiXXAIxJIDAJ1zMW4yaTpebo8fPOliYc=
+golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU=
+golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8=
+golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk=
+golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
+golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
+golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
+golang.org/x/mod v0.15.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
+golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
+golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
+golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
+golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
+golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=
+golang.org/x/net v0.15.0/go.mod h1:idbUs1IY1+zTqbi8yxTbhexhEEk5ur9LInksu6HrEpk=
+golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44=
+golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM=
+golang.org/x/net v0.33.0 h1:74SYHlV8BIgHIFC/LrYkOGIwL19eTYXQ5wc6TBuO36I=
+golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4=
+golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y=
+golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
+golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
+golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
+golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/telemetry v0.0.0-20240228155512-f48c80bd79b2/go.mod h1:TeRTkGYfJXctD9OcfyVLyj2J3IxLnKwHJR8f4D8a3YE=
+golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
+golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
+golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
+golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo=
+golang.org/x/term v0.12.0/go.mod h1:owVbMEjm3cBLCHdkQu9b1opXd4ETQWc3BhuQGKgXgvU=
+golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk=
+golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY=
+golang.org/x/term v0.27.0/go.mod h1:iMsnZpn0cago0GOrHO2+Y7u7JPn5AylBrcoWkElMTSM=
+golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
+golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
+golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
+golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
+golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
+golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
+golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
+golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
+golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo=
+golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ=
+golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
+golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
+golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
+golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
+golang.org/x/tools v0.13.0/go.mod h1:HvlwmtVNQAhOuCjW7xxvovg8wbNq7LwfXh/k7wXUl58=
+golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk=
+golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
diff --git a/plugins/wasm-go/extensions/ai-search/main.go b/plugins/wasm-go/extensions/ai-search/main.go
new file mode 100644
index 0000000000..720e688ccc
--- /dev/null
+++ b/plugins/wasm-go/extensions/ai-search/main.go
@@ -0,0 +1,568 @@
+// Copyright (c) 2022 Alibaba Group Holding Ltd.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package main
+
+import (
+	_ "embed"
+	"errors"
+	"fmt"
+	"net/http"
+	"strings"
+	"time"
+
+	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
+	"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
+	"github.com/tidwall/gjson"
+	"github.com/tidwall/sjson"
+
+	"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
+
+	"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search/engine"
+	"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search/engine/arxiv"
+	"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search/engine/bing"
+	"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search/engine/elasticsearch"
+	"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search/engine/google"
+	"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search/engine/quark"
+)
+
+type SearchRewrite struct {
+	client             wrapper.HttpClient
+	url                string
+	apiKey             string
+	modelName          string
+	timeoutMillisecond uint32
+	prompt             string
+}
+
+type Config struct {
+	engine          []engine.SearchEngine
+	promptTemplate  string
+	referenceFormat string
+	defaultLanguage string
+	needReference   bool
+	searchRewrite   *SearchRewrite
+}
+
+const (
+	DEFAULT_MAX_BODY_BYTES uint32 = 100 * 1024 * 1024
+)
+
+//go:embed prompts/full.md
+var fullSearchPrompts string
+
+//go:embed prompts/arxiv.md
+var arxivSearchPrompts string
+
+//go:embed prompts/internet.md
+var internetSearchPrompts string
+
+//go:embed prompts/private.md
+var privateSearchPrompts string
+
+func main() {
+	wrapper.SetCtx(
+		"ai-search",
+		wrapper.ParseConfigBy(parseConfig),
+		wrapper.ProcessRequestHeadersBy(onHttpRequestHeaders),
+		wrapper.ProcessRequestBodyBy(onHttpRequestBody),
+		wrapper.ProcessResponseHeadersBy(onHttpResponseHeaders),
+		wrapper.ProcessStreamingResponseBodyBy(onStreamingResponseBody),
+		wrapper.ProcessResponseBodyBy(onHttpResponseBody),
+	)
+}
+
+func parseConfig(json gjson.Result, config *Config, log wrapper.Log) error {
+	config.needReference = json.Get("needReference").Bool()
+	if config.needReference {
+		config.referenceFormat = json.Get("referenceFormat").String()
+		if config.referenceFormat == "" {
+			config.referenceFormat = "**References:**\n%s"
+		} else if !strings.Contains(config.referenceFormat, "%s") {
+			return fmt.Errorf("invalid referenceFormat:%s", config.referenceFormat)
+		}
+	}
+	config.defaultLanguage = json.Get("defaultLang").String()
+	config.promptTemplate = json.Get("promptTemplate").String()
+	if config.promptTemplate == "" {
+		if config.needReference {
+			config.promptTemplate = `# 以下内容是基于用户发送的消息的搜索结果:
+{search_results}
+在我给你的搜索结果中，每个结果都是[webpage X begin]...[webpage X end]格式的，X代表每篇文章的数字索引。请在适当的情况下在句子末尾引用上下文。请按照引用编号[X]的格式在答案中对应部分引用上下文。如果一句话源自多个上下文，请列出所有相关的引用编号，例如[3][5]，切记不要将引用集中在最后返回引用编号，而是在答案对应部分列出。
+在回答时，请注意以下几点：
+- 今天是北京时间：{cur_date}。
+- 并非搜索结果的所有内容都与用户的问题密切相关，你需要结合问题，对搜索结果进行甄别、筛选。
+- 对于列举类的问题（如列举所有航班信息），尽量将答案控制在10个要点以内，并告诉用户可以查看搜索来源、获得完整信息。优先提供信息完整、最相关的列举项；如非必要，不要主动告诉用户搜索结果未提供的内容。
+- 对于创作类的问题（如写论文），请务必在正文的段落中引用对应的参考编号，例如[3][5]，不能只在文章末尾引用。你需要解读并概括用户的题目要求，选择合适的格式，充分利用搜索结果并抽取重要信息，生成符合用户要求、极具思想深度、富有创造力与专业性的答案。你的创作篇幅需要尽可能延长，对于每一个要点的论述要推测用户的意图，给出尽可能多角度的回答要点，且务必信息量大、论述详尽。
+- 如果回答很长，请尽量结构化、分段落总结。如果需要分点作答，尽量控制在5个点以内，并合并相关的内容。
+- 对于客观类的问答，如果问题的答案非常简短，可以适当补充一到两句相关信息，以丰富内容。
+- 你需要根据用户要求和回答内容选择合适、美观的回答格式，确保可读性强。
+- 你的回答应该综合多个相关网页来回答，不能重复引用一个网页。
+- 除非用户要求，否则你回答的语言需要和用户提问的语言保持一致。
+
+# 用户消息为：
+{question}`
+		} else {
+			config.promptTemplate = `# 以下内容是基于用户发送的消息的搜索结果:
+{search_results}
+在我给你的搜索结果中，每个结果都是[webpage begin]...[webpage end]格式的。
+在回答时，请注意以下几点：
+- 今天是北京时间：{cur_date}。
+- 并非搜索结果的所有内容都与用户的问题密切相关，你需要结合问题，对搜索结果进行甄别、筛选。
+- 对于列举类的问题（如列举所有航班信息），尽量将答案控制在10个要点以内。如非必要，不要主动告诉用户搜索结果未提供的内容。
+- 对于创作类的问题（如写论文），你需要解读并概括用户的题目要求，选择合适的格式，充分利用搜索结果并抽取重要信息，生成符合用户要求、极具思想深度、富有创造力与专业性的答案。你的创作篇幅需要尽可能延长，对于每一个要点的论述要推测用户的意图，给出尽可能多角度的回答要点，且务必信息量大、论述详尽。
+- 如果回答很长，请尽量结构化、分段落总结。如果需要分点作答，尽量控制在5个点以内，并合并相关的内容。
+- 对于客观类的问答，如果问题的答案非常简短，可以适当补充一到两句相关信息，以丰富内容。
+- 你需要根据用户要求和回答内容选择合适、美观的回答格式，确保可读性强。
+- 你的回答应该综合多个相关网页来回答，但回答中不要给出网页的引用来源。
+- 除非用户要求，否则你回答的语言需要和用户提问的语言保持一致。
+
+# 用户消息为：
+{question}`
+		}
+	}
+	if !strings.Contains(config.promptTemplate, "{search_results}") ||
+		!strings.Contains(config.promptTemplate, "{question}") {
+		return fmt.Errorf("invalid promptTemplate, must contains {search_results} and {question}:%s", config.promptTemplate)
+	}
+	var internetExists, privateExists, arxivExists bool
+	for _, e := range json.Get("searchFrom").Array() {
+		switch e.Get("type").String() {
+		case "bing":
+			searchEngine, err := bing.NewBingSearch(&e)
+			if err != nil {
+				return fmt.Errorf("bing search engine init failed:%s", err)
+			}
+			config.engine = append(config.engine, searchEngine)
+			internetExists = true
+		case "google":
+			searchEngine, err := google.NewGoogleSearch(&e)
+			if err != nil {
+				return fmt.Errorf("google search engine init failed:%s", err)
+			}
+			config.engine = append(config.engine, searchEngine)
+			internetExists = true
+		case "arxiv":
+			searchEngine, err := arxiv.NewArxivSearch(&e)
+			if err != nil {
+				return fmt.Errorf("arxiv search engine init failed:%s", err)
+			}
+			config.engine = append(config.engine, searchEngine)
+			arxivExists = true
+		case "elasticsearch":
+			searchEngine, err := elasticsearch.NewElasticsearchSearch(&e)
+			if err != nil {
+				return fmt.Errorf("elasticsearch search engine init failed:%s", err)
+			}
+			config.engine = append(config.engine, searchEngine)
+			privateExists = true
+		case "quark":
+			searchEngine, err := quark.NewQuarkSearch(&e)
+			if err != nil {
+				return fmt.Errorf("elasticsearch search engine init failed:%s", err)
+			}
+			config.engine = append(config.engine, searchEngine)
+			internetExists = true
+		default:
+			return fmt.Errorf("unkown search engine:%s", e.Get("type").String())
+		}
+	}
+	searchRewriteJson := json.Get("searchRewrite")
+	if searchRewriteJson.Exists() {
+		searchRewrite := &SearchRewrite{}
+		llmServiceName := searchRewriteJson.Get("llmServiceName").String()
+		if llmServiceName == "" {
+			return errors.New("llm_service_name not found")
+		}
+		llmServicePort := searchRewriteJson.Get("llmServicePort").Int()
+		if llmServicePort == 0 {
+			return errors.New("llmServicePort not found")
+		}
+		searchRewrite.client = wrapper.NewClusterClient(wrapper.FQDNCluster{
+			FQDN: llmServiceName,
+			Port: llmServicePort,
+		})
+		llmApiKey := searchRewriteJson.Get("llmApiKey").String()
+		if llmApiKey == "" {
+			return errors.New("llmApiKey not found")
+		}
+		searchRewrite.apiKey = llmApiKey
+		llmUrl := searchRewriteJson.Get("llmUrl").String()
+		if llmUrl == "" {
+			return errors.New("llmUrl not found")
+		}
+		searchRewrite.url = llmUrl
+		llmModelName := searchRewriteJson.Get("llmModelName").String()
+		if llmModelName == "" {
+			return errors.New("llmModelName not found")
+		}
+		searchRewrite.modelName = llmModelName
+		llmTimeout := searchRewriteJson.Get("timeoutMillisecond").Uint()
+		if llmTimeout == 0 {
+			llmTimeout = 30000
+		}
+		searchRewrite.timeoutMillisecond = uint32(llmTimeout)
+		// The consideration here is that internet searches are generally available, but arxiv and private sources may not be.
+		if arxivExists {
+			if privateExists {
+				// private + internet + arxiv
+				searchRewrite.prompt = fullSearchPrompts
+			} else {
+				// internet + arxiv
+				searchRewrite.prompt = arxivSearchPrompts
+			}
+		} else if privateExists {
+			// private + internet
+			searchRewrite.prompt = privateSearchPrompts
+		} else if internetExists {
+			// only internet
+			searchRewrite.prompt = internetSearchPrompts
+		}
+		config.searchRewrite = searchRewrite
+	}
+	if len(config.engine) == 0 {
+		return fmt.Errorf("no avaliable search engine found")
+	}
+	log.Debugf("ai search enabled, config: %#v", config)
+	return nil
+}
+
+func onHttpRequestHeaders(ctx wrapper.HttpContext, config Config, log wrapper.Log) types.Action {
+	contentType, _ := proxywasm.GetHttpRequestHeader("content-type")
+	// The request does not have a body.
+	if contentType == "" {
+		return types.ActionContinue
+	}
+	if !strings.Contains(contentType, "application/json") {
+		log.Warnf("content is not json, can't process: %s", contentType)
+		ctx.DontReadRequestBody()
+		return types.ActionContinue
+	}
+	ctx.SetRequestBodyBufferLimit(DEFAULT_MAX_BODY_BYTES)
+	_ = proxywasm.RemoveHttpRequestHeader("Accept-Encoding")
+	return types.ActionContinue
+}
+
+func onHttpRequestBody(ctx wrapper.HttpContext, config Config, body []byte, log wrapper.Log) types.Action {
+	var queryIndex int
+	var query string
+	messages := gjson.GetBytes(body, "messages").Array()
+	for i := len(messages) - 1; i >= 0; i-- {
+		if messages[i].Get("role").String() == "user" {
+			queryIndex = i
+			query = messages[i].Get("content").String()
+			break
+		}
+	}
+	if query == "" {
+		log.Errorf("not found user query in body:%s", body)
+		return types.ActionContinue
+	}
+	searchRewrite := config.searchRewrite
+	if searchRewrite != nil {
+		startTime := time.Now()
+		rewritePrompt := strings.Replace(searchRewrite.prompt, "{question}", query, 1)
+		rewriteBody, _ := sjson.SetBytes([]byte(fmt.Sprintf(
+			`{"stream":false,"max_tokens":100,"model":"%s","messages":[{"role":"user","content":""}]}`,
+			searchRewrite.modelName)), "messages.0.content", rewritePrompt)
+		err := searchRewrite.client.Post(searchRewrite.url,
+			[][2]string{
+				{"Content-Type", "application/json"},
+				{"Authorization", fmt.Sprintf("Bearer %s", searchRewrite.apiKey)},
+			}, rewriteBody,
+			func(statusCode int, responseHeaders http.Header, responseBody []byte) {
+				if statusCode != http.StatusOK {
+					log.Errorf("search rewrite failed, status: %d", statusCode)
+					// After a rewrite failure, no further search is performed, thus quickly identifying the failure.
+					proxywasm.ResumeHttpRequest()
+					return
+				}
+
+				content := gjson.GetBytes(responseBody, "choices.0.message.content").String()
+				log.Infof("LLM rewritten query response: %s (took %v), original search query:%s",
+					strings.ReplaceAll(content, "\n", `\n`), time.Since(startTime), query)
+				if strings.Contains(content, "none") {
+					log.Debugf("no search required")
+					proxywasm.ResumeHttpRequest()
+					return
+				}
+
+				// Parse search queries from LLM response
+				var searchContexts []engine.SearchContext
+				for _, line := range strings.Split(content, "\n") {
+					line = strings.TrimSpace(line)
+					if line == "" {
+						continue
+					}
+
+					parts := strings.SplitN(line, ":", 2)
+					if len(parts) != 2 {
+						continue
+					}
+
+					engineType := strings.TrimSpace(parts[0])
+					queryStr := strings.TrimSpace(parts[1])
+
+					var ctx engine.SearchContext
+					ctx.Language = config.defaultLanguage
+
+					switch {
+					case engineType == "internet":
+						ctx.EngineType = engineType
+						ctx.Querys = []string{queryStr}
+					case engineType == "private":
+						ctx.EngineType = engineType
+						ctx.Querys = strings.Split(queryStr, ",")
+						for i := range ctx.Querys {
+							ctx.Querys[i] = strings.TrimSpace(ctx.Querys[i])
+						}
+					default:
+						// Arxiv category
+						ctx.EngineType = "arxiv"
+						ctx.ArxivCategory = engineType
+						ctx.Querys = strings.Split(queryStr, ",")
+						for i := range ctx.Querys {
+							ctx.Querys[i] = strings.TrimSpace(ctx.Querys[i])
+						}
+					}
+
+					if len(ctx.Querys) > 0 {
+						searchContexts = append(searchContexts, ctx)
+						if ctx.ArxivCategory != "" {
+							// Conduct i/nquiries in all areas to increase recall.
+							backupCtx := ctx
+							backupCtx.ArxivCategory = ""
+							searchContexts = append(searchContexts, backupCtx)
+						}
+					}
+				}
+
+				if len(searchContexts) == 0 {
+					log.Errorf("no valid search contexts found")
+					proxywasm.ResumeHttpRequest()
+					return
+				}
+				if types.ActionContinue == executeSearch(ctx, config, queryIndex, body, searchContexts, log) {
+					proxywasm.ResumeHttpRequest()
+				}
+			}, searchRewrite.timeoutMillisecond)
+		if err != nil {
+			log.Errorf("search rewrite call llm service failed:%s", err)
+			// After a rewrite failure, no further search is performed, thus quickly identifying the failure.
+			return types.ActionContinue
+		}
+		return types.ActionPause
+	}
+
+	// Execute search without rewrite
+	return executeSearch(ctx, config, queryIndex, body, []engine.SearchContext{{
+		Querys:   []string{query},
+		Language: config.defaultLanguage,
+	}}, log)
+}
+
+func executeSearch(ctx wrapper.HttpContext, config Config, queryIndex int, body []byte, searchContexts []engine.SearchContext, log wrapper.Log) types.Action {
+	searchResultGroups := make([][]engine.SearchResult, len(config.engine))
+	var finished int
+	var searching int
+	for i := 0; i < len(config.engine); i++ {
+		configEngine := config.engine[i]
+
+		// Check if engine needs to execute for any of the search contexts
+		var needsExecute bool
+		for _, searchCtx := range searchContexts {
+			if configEngine.NeedExectue(searchCtx) {
+				needsExecute = true
+				break
+			}
+		}
+		if !needsExecute {
+			continue
+		}
+
+		// Process all search contexts for this engine
+		for _, searchCtx := range searchContexts {
+			if !configEngine.NeedExectue(searchCtx) {
+				continue
+			}
+			args := configEngine.CallArgs(searchCtx)
+			index := i
+			err := configEngine.Client().Call(args.Method, args.Url, args.Headers, args.Body,
+				func(statusCode int, responseHeaders http.Header, responseBody []byte) {
+					defer func() {
+						finished++
+						if finished == searching {
+							// Merge search results from all engines with deduplication
+							var mergedResults []engine.SearchResult
+							seenLinks := make(map[string]bool)
+							for _, results := range searchResultGroups {
+								for _, result := range results {
+									if !seenLinks[result.Link] {
+										seenLinks[result.Link] = true
+										mergedResults = append(mergedResults, result)
+									}
+								}
+							}
+							// Format search results for prompt template
+							var formattedResults []string
+							var formattedReferences []string
+							for j, result := range mergedResults {
+								if config.needReference {
+									formattedResults = append(formattedResults,
+										fmt.Sprintf("[webpage %d begin]\n%s\n[webpage %d end]", j+1, result.Content, j+1))
+									formattedReferences = append(formattedReferences,
+										fmt.Sprintf("[%d] [%s](%s)", j+1, result.Title, result.Link))
+								} else {
+									formattedResults = append(formattedResults,
+										fmt.Sprintf("[webpage begin]\n%s\n[webpage end]", result.Content))
+								}
+							}
+							// Prepare template variables
+							curDate := time.Now().In(time.FixedZone("CST", 8*3600)).Format("2006年1月2日")
+							searchResults := strings.Join(formattedResults, "\n")
+							log.Debugf("searchResults: %s", searchResults)
+							// Fill prompt template
+							prompt := strings.Replace(config.promptTemplate, "{search_results}", searchResults, 1)
+							prompt = strings.Replace(prompt, "{question}", searchContexts[0].Querys[0], 1)
+							prompt = strings.Replace(prompt, "{cur_date}", curDate, 1)
+							// Update request body with processed prompt
+							modifiedBody, err := sjson.SetBytes(body, fmt.Sprintf("messages.%d.content", queryIndex), prompt)
+							if err != nil {
+								log.Errorf("modify request message content failed, err:%v, body:%s", err, body)
+							} else {
+								log.Debugf("modifeid body:%s", modifiedBody)
+								proxywasm.ReplaceHttpRequestBody(modifiedBody)
+								if config.needReference {
+									ctx.SetContext("References", strings.Join(formattedReferences, "\n"))
+								}
+							}
+							proxywasm.ResumeHttpRequest()
+						}
+					}()
+					if statusCode != http.StatusOK {
+						log.Errorf("search call failed, status: %d, engine: %#v", statusCode, configEngine)
+						return
+					}
+					// Append results to existing slice for this engine
+					searchResultGroups[index] = append(searchResultGroups[index], configEngine.ParseResult(searchCtx, responseBody)...)
+				}, args.TimeoutMillisecond)
+			if err != nil {
+				log.Errorf("search call failed, engine: %#v", configEngine)
+				continue
+			}
+			searching++
+		}
+	}
+	if searching > 0 {
+		return types.ActionPause
+	}
+	return types.ActionContinue
+}
+
+func onHttpResponseHeaders(ctx wrapper.HttpContext, config Config, log wrapper.Log) types.Action {
+	if !config.needReference {
+		ctx.DontReadResponseBody()
+		return types.ActionContinue
+	}
+	proxywasm.RemoveHttpResponseHeader("content-length")
+	contentType, err := proxywasm.GetHttpResponseHeader("Content-Type")
+	if err != nil || !strings.HasPrefix(contentType, "text/event-stream") {
+		if err != nil {
+			log.Errorf("unable to load content-type header from response: %v", err)
+		}
+		ctx.BufferResponseBody()
+		ctx.SetResponseBodyBufferLimit(DEFAULT_MAX_BODY_BYTES)
+	}
+	return types.ActionContinue
+}
+
+func onHttpResponseBody(ctx wrapper.HttpContext, config Config, body []byte, log wrapper.Log) types.Action {
+	references := ctx.GetStringContext("References", "")
+	if references == "" {
+		return types.ActionContinue
+	}
+	content := gjson.GetBytes(body, "choices.0.message.content")
+	modifiedContent := fmt.Sprintf("%s\n\n%s", fmt.Sprintf(config.referenceFormat, references), content)
+	body, err := sjson.SetBytes(body, "choices.0.message.content", modifiedContent)
+	if err != nil {
+		log.Errorf("modify response message content failed, err:%v, body:%s", err, body)
+		return types.ActionContinue
+	}
+	proxywasm.ReplaceHttpResponseBody(body)
+	return types.ActionContinue
+}
+
+func onStreamingResponseBody(ctx wrapper.HttpContext, config Config, chunk []byte, isLastChunk bool, log wrapper.Log) []byte {
+	if ctx.GetBoolContext("ReferenceAppended", false) {
+		return chunk
+	}
+	references := ctx.GetStringContext("References", "")
+	if references == "" {
+		return chunk
+	}
+	modifiedChunk, responseReady := setReferencesToFirstMessage(ctx, chunk, fmt.Sprintf(config.referenceFormat, references), log)
+	if responseReady {
+		ctx.SetContext("ReferenceAppended", true)
+		return modifiedChunk
+	} else {
+		return []byte("")
+	}
+}
+
+const PARTIAL_MESSAGE_CONTEXT_KEY = "partialMessage"
+
+func setReferencesToFirstMessage(ctx wrapper.HttpContext, chunk []byte, references string, log wrapper.Log) ([]byte, bool) {
+	if len(chunk) == 0 {
+		log.Debugf("chunk is empty")
+		return nil, false
+	}
+
+	var partialMessage []byte
+	partialMessageI := ctx.GetContext(PARTIAL_MESSAGE_CONTEXT_KEY)
+	if partialMessageI != nil {
+		if pMsg, ok := partialMessageI.([]byte); ok {
+			partialMessage = append(pMsg, chunk...)
+		} else {
+			log.Warnf("invalid partial message type: %T", partialMessageI)
+			partialMessage = chunk
+		}
+	} else {
+		partialMessage = chunk
+	}
+
+	if len(partialMessage) == 0 {
+		log.Debugf("partial message is empty")
+		return nil, false
+	}
+	messages := strings.Split(string(partialMessage), "\n\n")
+	if len(messages) > 1 {
+		firstMessage := messages[0]
+		log.Debugf("first message: %s", firstMessage)
+		firstMessage = strings.TrimPrefix(firstMessage, "data:")
+		firstMessage = strings.TrimPrefix(firstMessage, " ")
+		firstMessage = strings.TrimSuffix(firstMessage, "\n")
+		deltaContent := gjson.Get(firstMessage, "choices.0.delta.content")
+		modifiedMessage, err := sjson.Set(firstMessage, "choices.0.delta.content", fmt.Sprintf("%s\n\n%s", references, deltaContent))
+		if err != nil {
+			log.Errorf("modify response delta content failed, err:%v", err)
+			return partialMessage, true
+		}
+		modifiedMessage = fmt.Sprintf("data: %s", modifiedMessage)
+		log.Debugf("modified message: %s", firstMessage)
+		messages[0] = string(modifiedMessage)
+		return []byte(strings.Join(messages, "\n\n")), true
+	}
+	ctx.SetContext(PARTIAL_MESSAGE_CONTEXT_KEY, partialMessage)
+	return nil, false
+}
diff --git a/plugins/wasm-go/extensions/ai-search/prompts/arxiv.md b/plugins/wasm-go/extensions/ai-search/prompts/arxiv.md
new file mode 100644
index 0000000000..34aeefa413
--- /dev/null
+++ b/plugins/wasm-go/extensions/ai-search/prompts/arxiv.md
@@ -0,0 +1,214 @@
+# 目标
+你需要分析**用户发送的消息**，是否需要查询搜索引擎(Google/Bing)/论文资料库(Arxiv)，并按照如下情况回复相应内容:
+
+## 情况一：不需要查询搜索引擎/论文资料/私有知识库
+### 情况举例：
+1. **用户发送的消息**不是在提问或寻求帮助
+2. **用户发送的消息**是要求翻译文字
+
+### 思考过程
+根据上面的**情况举例**，如果符合，则按照下面**回复内容示例**进行回复，注意不要输出思考过程
+
+### 回复内容示例：
+none
+
+## 情况二：需要查询搜索引擎/论文资料
+### 情况举例：
+1. 答复**用户发送的消息**，需依赖互联网上最新的资料
+2. 答复**用户发送的消息**，需依赖论文等专业资料
+3. 通过查询资料，可以更好地答复**用户发送的消息**
+
+### 思考过程
+根据上面的**情况举例**，以及其他需要查询资料的情况，如果符合，按照以下步骤思考，并按照下面**回复内容示例**进行回复，注意不要输出思考过程:
+1. What: 分析要答复**用户发送的消息**，需要了解什么知识和资料
+2. Where: 判断了解这个知识和资料要向Google等搜索引擎提问，还是向Arxiv论文资料库进行查询，或者需要同时查询多个地方
+3. How: 分析对于要查询的知识和资料，应该提出什么样的问题
+4. Adjust: 明确要向什么地方查询什么问题后，按下面方式对问题进行调整
+  4.1. 向搜索引擎提问：用一句话概括问题，并且针对搜索引擎做问题优化
+  4.2. 向Arxiv论文资料库提问：
+    4.2.1. 明确问题所属领域，然后确定Arxiv的Category值，Category可选的枚举如下:
+      - cs.AI: Artificial Intelligence
+      - cs.AR: Hardware Architecture
+      - cs.CC: Computational Complexity
+      - cs.CE: Computational Engineering, Finance, and Science
+      - cs.CG: Computational Geometry
+      - cs.CL: Computation and Language
+      - cs.CR: Cryptography and Security
+      - cs.CV: Computer Vision and Pattern Recognition
+      - cs.CY: Computers and Society
+      - cs.DB: Databases
+      - cs.DC: Distributed, Parallel, and Cluster Computing
+      - cs.DL: Digital Libraries
+      - cs.DM: Discrete Mathematics
+      - cs.DS: Data Structures and Algorithms
+      - cs.ET: Emerging Technologies
+      - cs.FL: Formal Languages and Automata Theory
+      - cs.GL: General Literature
+      - cs.GR: Graphics
+      - cs.GT: Computer Science and Game Theory
+      - cs.HC: Human-Computer Interaction
+      - cs.IR: Information Retrieval
+      - cs.IT: Information Theory
+      - cs.LG: Machine Learning
+      - cs.LO: Logic in Computer Science
+      - cs.MA: Multiagent Systems
+      - cs.MM: Multimedia
+      - cs.MS: Mathematical Software
+      - cs.NA: Numerical Analysis
+      - cs.NE: Neural and Evolutionary Computing
+      - cs.NI: Networking and Internet Architecture
+      - cs.OH: Other Computer Science
+      - cs.OS: Operating Systems
+      - cs.PF: Performance
+      - cs.PL: Programming Languages
+      - cs.RO: Robotics
+      - cs.SC: Symbolic Computation
+      - cs.SD: Sound
+      - cs.SE: Software Engineering
+      - cs.SI: Social and Information Networks
+      - cs.SY: Systems and Control
+      - econ.EM: Econometrics
+      - econ.GN: General Economics
+      - econ.TH: Theoretical Economics
+      - eess.AS: Audio and Speech Processing
+      - eess.IV: Image and Video Processing
+      - eess.SP: Signal Processing
+      - eess.SY: Systems and Control
+      - math.AC: Commutative Algebra
+      - math.AG: Algebraic Geometry
+      - math.AP: Analysis of PDEs
+      - math.AT: Algebraic Topology
+      - math.CA: Classical Analysis and ODEs
+      - math.CO: Combinatorics
+      - math.CT: Category Theory
+      - math.CV: Complex Variables
+      - math.DG: Differential Geometry
+      - math.DS: Dynamical Systems
+      - math.FA: Functional Analysis
+      - math.GM: General Mathematics
+      - math.GN: General Topology
+      - math.GR: Group Theory
+      - math.GT: Geometric Topology
+      - math.HO: History and Overview
+      - math.IT: Information Theory
+      - math.KT: K-Theory and Homology
+      - math.LO: Logic
+      - math.MG: Metric Geometry
+      - math.MP: Mathematical Physics
+      - math.NA: Numerical Analysis
+      - math.NT: Number Theory
+      - math.OA: Operator Algebras
+      - math.OC: Optimization and Control
+      - math.PR: Probability
+      - math.QA: Quantum Algebra
+      - math.RA: Rings and Algebras
+      - math.RT: Representation Theory
+      - math.SG: Symplectic Geometry
+      - math.SP: Spectral Theory
+      - math.ST: Statistics Theory
+      - astro-ph.CO: Cosmology and Nongalactic Astrophysics
+      - astro-ph.EP: Earth and Planetary Astrophysics
+      - astro-ph.GA: Astrophysics of Galaxies
+      - astro-ph.HE: High Energy Astrophysical Phenomena
+      - astro-ph.IM: Instrumentation and Methods for Astrophysics
+      - astro-ph.SR: Solar and Stellar Astrophysics
+      - cond-mat.dis-nn: Disordered Systems and Neural Networks
+      - cond-mat.mes-hall: Mesoscale and Nanoscale Physics
+      - cond-mat.mtrl-sci: Materials Science
+      - cond-mat.other: Other Condensed Matter
+      - cond-mat.quant-gas: Quantum Gases
+      - cond-mat.soft: Soft Condensed Matter
+      - cond-mat.stat-mech: Statistical Mechanics
+      - cond-mat.str-el: Strongly Correlated Electrons
+      - cond-mat.supr-con: Superconductivity
+      - gr-qc: General Relativity and Quantum Cosmology
+      - hep-ex: High Energy Physics - Experiment
+      - hep-lat: High Energy Physics - Lattice
+      - hep-ph: High Energy Physics - Phenomenology
+      - hep-th: High Energy Physics - Theory
+      - math-ph: Mathematical Physics
+      - nlin.AO: Adaptation and Self-Organizing Systems
+      - nlin.CD: Chaotic Dynamics
+      - nlin.CG: Cellular Automata and Lattice Gases
+      - nlin.PS: Pattern Formation and Solitons
+      - nlin.SI: Exactly Solvable and Integrable Systems
+      - nucl-ex: Nuclear Experiment
+      - nucl-th: Nuclear Theory
+      - physics.acc-ph: Accelerator Physics
+      - physics.ao-ph: Atmospheric and Oceanic Physics
+      - physics.app-ph: Applied Physics
+      - physics.atm-clus: Atomic and Molecular Clusters
+      - physics.atom-ph: Atomic Physics
+      - physics.bio-ph: Biological Physics
+      - physics.chem-ph: Chemical Physics
+      - physics.class-ph: Classical Physics
+      - physics.comp-ph: Computational Physics
+      - physics.data-an: Data Analysis, Statistics and Probability
+      - physics.ed-ph: Physics Education
+      - physics.flu-dyn: Fluid Dynamics
+      - physics.gen-ph: General Physics
+      - physics.geo-ph: Geophysics
+      - physics.hist-ph: History and Philosophy of Physics
+      - physics.ins-det: Instrumentation and Detectors
+      - physics.med-ph: Medical Physics
+      - physics.optics: Optics
+      - physics.plasm-ph: Plasma Physics
+      - physics.pop-ph: Popular Physics
+      - physics.soc-ph: Physics and Society
+      - physics.space-ph: Space Physics
+      - quant-ph: Quantum Physics
+      - q-bio.BM: Biomolecules
+      - q-bio.CB: Cell Behavior
+      - q-bio.GN: Genomics
+      - q-bio.MN: Molecular Networks
+      - q-bio.NC: Neurons and Cognition
+      - q-bio.OT: Other Quantitative Biology
+      - q-bio.PE: Populations and Evolution
+      - q-bio.QM: Quantitative Methods
+      - q-bio.SC: Subcellular Processes
+      - q-bio.TO: Tissues and Organs
+      - q-fin.CP: Computational Finance
+      - q-fin.EC: Economics
+      - q-fin.GN: General Finance
+      - q-fin.MF: Mathematical Finance
+      - q-fin.PM: Portfolio Management
+      - q-fin.PR: Pricing of Securities
+      - q-fin.RM: Risk Management
+      - q-fin.ST: Statistical Finance
+      - q-fin.TR: Trading and Market Microstructure
+      - stat.AP: Applications
+      - stat.CO: Computation
+      - stat.ME: Methodology
+      - stat.ML: Machine Learning
+      - stat.OT: Other Statistics
+      - stat.TH: Statistics Theory
+    4.2.2. 根据问题所属领域，将问题拆分成多组关键词的组合，同时组合中的关键词个数尽量不要超过3个
+5. Final: 按照下面**回复内容示例**进行回复，注意:
+  - 不要输出思考过程
+  - 可以向多个查询目标分别查询多次，多个查询用换行分隔，总查询次数控制在5次以内
+  - 查询搜索引擎时，需要以"internet:"开头
+  - 查询Arxiv论文时，需要以Arxiv的Category值开头，例如"cs.AI:"
+  - 查询Arxiv论文时，优先用英文表述关键词进行搜索
+  - 当用多个关键词查询时，关键词之间用","分隔
+  - 尽量满足**用户发送的消息**中的搜索要求，例如用户要求用英文搜索，则需用英文表述问题和关键词
+  - 用户如果没有要求搜索语言，则用和**用户发送的消息**一致的语言表述问题和关键词
+  - 如果**用户发送的消息**使用中文，至少要有一条向搜索引擎查询的中文问题
+
+### 回复内容示例：
+
+#### 用不同语言查询多次搜索引擎
+internet: 黄金价格走势
+internet: The trend of gold prices
+
+#### 向Arxiv的多个类目查询多次
+cs.AI: attention mechanism
+cs.AI: neuron
+q-bio.NC: brain,attention mechanism
+
+#### 向多个查询目标查询多次
+internet: 中国未来房价趋势
+internet: 最新中国经济政策
+econ.TH: policy, real estate
+
+# 用户发送的消息为：
+{question}
diff --git a/plugins/wasm-go/extensions/ai-search/prompts/full.md b/plugins/wasm-go/extensions/ai-search/prompts/full.md
new file mode 100644
index 0000000000..aec605d1b8
--- /dev/null
+++ b/plugins/wasm-go/extensions/ai-search/prompts/full.md
@@ -0,0 +1,221 @@
+# 目标
+你需要分析**用户发送的消息**，是否需要查询搜索引擎(Google/Bing)/论文资料库(Arxiv)/私有知识库，并按照如下情况回复相应内容:
+
+## 情况一：不需要查询搜索引擎/论文资料/私有知识库
+### 情况举例：
+1. **用户发送的消息**不是在提问或寻求帮助
+2. **用户发送的消息**是要求翻译文字
+
+### 思考过程
+根据上面的**情况举例**，如果符合，则按照下面**回复内容示例**进行回复，注意不要输出思考过程
+
+### 回复内容示例：
+none
+
+## 情况二：需要查询搜索引擎/论文资料/私有知识库
+### 情况举例：
+1. 答复**用户发送的消息**，需依赖互联网上最新的资料
+2. 答复**用户发送的消息**，需依赖论文等专业资料
+3. 通过查询资料，可以更好地答复**用户发送的消息**
+
+### 思考过程
+根据上面的**情况举例**，以及其他需要查询资料的情况，如果符合，按照以下步骤思考，并按照下面**回复内容示例**进行回复，注意不要输出思考过程:
+1. What: 分析要答复**用户发送的消息**，需要了解什么知识和资料
+2. Where: 判断了解这个知识和资料要向Google等搜索引擎提问，还是向Arxiv论文资料库进行查询，还是向私有知识库进行查询，或者需要同时查询多个地方
+3. How: 分析对于要查询的知识和资料，应该提出什么样的问题
+4. Adjust: 明确要向什么地方查询什么问题后，按下面方式对问题进行调整
+  4.1. 向搜索引擎提问：用一句话概括问题，并且针对搜索引擎做问题优化
+  4.2. 向私有知识库提问：将问题拆分成多组关键词的组合，同时组合中的关键词个数尽量不要超过3个
+  4.3. 向Arxiv论文资料库提问：
+    4.3.1. 明确问题所属领域，然后确定Arxiv的Category值，Category可选的枚举如下:
+      - cs.AI: Artificial Intelligence
+      - cs.AR: Hardware Architecture
+      - cs.CC: Computational Complexity
+      - cs.CE: Computational Engineering, Finance, and Science
+      - cs.CG: Computational Geometry
+      - cs.CL: Computation and Language
+      - cs.CR: Cryptography and Security
+      - cs.CV: Computer Vision and Pattern Recognition
+      - cs.CY: Computers and Society
+      - cs.DB: Databases
+      - cs.DC: Distributed, Parallel, and Cluster Computing
+      - cs.DL: Digital Libraries
+      - cs.DM: Discrete Mathematics
+      - cs.DS: Data Structures and Algorithms
+      - cs.ET: Emerging Technologies
+      - cs.FL: Formal Languages and Automata Theory
+      - cs.GL: General Literature
+      - cs.GR: Graphics
+      - cs.GT: Computer Science and Game Theory
+      - cs.HC: Human-Computer Interaction
+      - cs.IR: Information Retrieval
+      - cs.IT: Information Theory
+      - cs.LG: Machine Learning
+      - cs.LO: Logic in Computer Science
+      - cs.MA: Multiagent Systems
+      - cs.MM: Multimedia
+      - cs.MS: Mathematical Software
+      - cs.NA: Numerical Analysis
+      - cs.NE: Neural and Evolutionary Computing
+      - cs.NI: Networking and Internet Architecture
+      - cs.OH: Other Computer Science
+      - cs.OS: Operating Systems
+      - cs.PF: Performance
+      - cs.PL: Programming Languages
+      - cs.RO: Robotics
+      - cs.SC: Symbolic Computation
+      - cs.SD: Sound
+      - cs.SE: Software Engineering
+      - cs.SI: Social and Information Networks
+      - cs.SY: Systems and Control
+      - econ.EM: Econometrics
+      - econ.GN: General Economics
+      - econ.TH: Theoretical Economics
+      - eess.AS: Audio and Speech Processing
+      - eess.IV: Image and Video Processing
+      - eess.SP: Signal Processing
+      - eess.SY: Systems and Control
+      - math.AC: Commutative Algebra
+      - math.AG: Algebraic Geometry
+      - math.AP: Analysis of PDEs
+      - math.AT: Algebraic Topology
+      - math.CA: Classical Analysis and ODEs
+      - math.CO: Combinatorics
+      - math.CT: Category Theory
+      - math.CV: Complex Variables
+      - math.DG: Differential Geometry
+      - math.DS: Dynamical Systems
+      - math.FA: Functional Analysis
+      - math.GM: General Mathematics
+      - math.GN: General Topology
+      - math.GR: Group Theory
+      - math.GT: Geometric Topology
+      - math.HO: History and Overview
+      - math.IT: Information Theory
+      - math.KT: K-Theory and Homology
+      - math.LO: Logic
+      - math.MG: Metric Geometry
+      - math.MP: Mathematical Physics
+      - math.NA: Numerical Analysis
+      - math.NT: Number Theory
+      - math.OA: Operator Algebras
+      - math.OC: Optimization and Control
+      - math.PR: Probability
+      - math.QA: Quantum Algebra
+      - math.RA: Rings and Algebras
+      - math.RT: Representation Theory
+      - math.SG: Symplectic Geometry
+      - math.SP: Spectral Theory
+      - math.ST: Statistics Theory
+      - astro-ph.CO: Cosmology and Nongalactic Astrophysics
+      - astro-ph.EP: Earth and Planetary Astrophysics
+      - astro-ph.GA: Astrophysics of Galaxies
+      - astro-ph.HE: High Energy Astrophysical Phenomena
+      - astro-ph.IM: Instrumentation and Methods for Astrophysics
+      - astro-ph.SR: Solar and Stellar Astrophysics
+      - cond-mat.dis-nn: Disordered Systems and Neural Networks
+      - cond-mat.mes-hall: Mesoscale and Nanoscale Physics
+      - cond-mat.mtrl-sci: Materials Science
+      - cond-mat.other: Other Condensed Matter
+      - cond-mat.quant-gas: Quantum Gases
+      - cond-mat.soft: Soft Condensed Matter
+      - cond-mat.stat-mech: Statistical Mechanics
+      - cond-mat.str-el: Strongly Correlated Electrons
+      - cond-mat.supr-con: Superconductivity
+      - gr-qc: General Relativity and Quantum Cosmology
+      - hep-ex: High Energy Physics - Experiment
+      - hep-lat: High Energy Physics - Lattice
+      - hep-ph: High Energy Physics - Phenomenology
+      - hep-th: High Energy Physics - Theory
+      - math-ph: Mathematical Physics
+      - nlin.AO: Adaptation and Self-Organizing Systems
+      - nlin.CD: Chaotic Dynamics
+      - nlin.CG: Cellular Automata and Lattice Gases
+      - nlin.PS: Pattern Formation and Solitons
+      - nlin.SI: Exactly Solvable and Integrable Systems
+      - nucl-ex: Nuclear Experiment
+      - nucl-th: Nuclear Theory
+      - physics.acc-ph: Accelerator Physics
+      - physics.ao-ph: Atmospheric and Oceanic Physics
+      - physics.app-ph: Applied Physics
+      - physics.atm-clus: Atomic and Molecular Clusters
+      - physics.atom-ph: Atomic Physics
+      - physics.bio-ph: Biological Physics
+      - physics.chem-ph: Chemical Physics
+      - physics.class-ph: Classical Physics
+      - physics.comp-ph: Computational Physics
+      - physics.data-an: Data Analysis, Statistics and Probability
+      - physics.ed-ph: Physics Education
+      - physics.flu-dyn: Fluid Dynamics
+      - physics.gen-ph: General Physics
+      - physics.geo-ph: Geophysics
+      - physics.hist-ph: History and Philosophy of Physics
+      - physics.ins-det: Instrumentation and Detectors
+      - physics.med-ph: Medical Physics
+      - physics.optics: Optics
+      - physics.plasm-ph: Plasma Physics
+      - physics.pop-ph: Popular Physics
+      - physics.soc-ph: Physics and Society
+      - physics.space-ph: Space Physics
+      - quant-ph: Quantum Physics
+      - q-bio.BM: Biomolecules
+      - q-bio.CB: Cell Behavior
+      - q-bio.GN: Genomics
+      - q-bio.MN: Molecular Networks
+      - q-bio.NC: Neurons and Cognition
+      - q-bio.OT: Other Quantitative Biology
+      - q-bio.PE: Populations and Evolution
+      - q-bio.QM: Quantitative Methods
+      - q-bio.SC: Subcellular Processes
+      - q-bio.TO: Tissues and Organs
+      - q-fin.CP: Computational Finance
+      - q-fin.EC: Economics
+      - q-fin.GN: General Finance
+      - q-fin.MF: Mathematical Finance
+      - q-fin.PM: Portfolio Management
+      - q-fin.PR: Pricing of Securities
+      - q-fin.RM: Risk Management
+      - q-fin.ST: Statistical Finance
+      - q-fin.TR: Trading and Market Microstructure
+      - stat.AP: Applications
+      - stat.CO: Computation
+      - stat.ME: Methodology
+      - stat.ML: Machine Learning
+      - stat.OT: Other Statistics
+      - stat.TH: Statistics Theory
+    4.3.2. 根据问题所属领域，将问题拆分成多组关键词的组合，同时组合中的关键词个数尽量不要超过3个
+5. Final: 按照下面**回复内容示例**进行回复，注意:
+  - 不要输出思考过程
+  - 可以向多个查询目标分别查询多次，多个查询用换行分隔，总查询次数控制在5次以内
+  - 查询搜索引擎时，需要以"internet:"开头
+  - 查询私有知识库时，需要以"private:"开头
+  - 查询Arxiv论文时，需要以Arxiv的Category值开头，例如"cs.AI:"
+  - 查询Arxiv论文时，优先用英文表述关键词进行搜索
+  - 当用多个关键词查询时，关键词之间用","分隔
+  - 尽量满足**用户发送的消息**中的搜索要求，例如用户要求用英文搜索，则需用英文表述问题和关键词
+  - 用户如果没有要求搜索语言，则用和**用户发送的消息**一致的语言表述问题和关键词
+  - 如果**用户发送的消息**使用中文，至少要有一条向搜索引擎查询的中文问题
+
+### 回复内容示例：
+
+#### 用不同语言查询多次搜索引擎
+internet: 黄金价格走势
+internet: The trend of gold prices
+
+#### 向Arxiv的多个类目查询多次
+cs.AI: attention mechanism
+cs.AI: neuron
+q-bio.NC: brain,attention mechanism
+
+#### 向私有知识库查询多次
+private: 电子钱包,密码
+private: 张三,身份证号
+
+#### 向多个查询目标查询多次
+internet: 中国未来房价趋势
+internet: 最新中国经济政策
+econ.TH: policy, real estate
+private: 财务状况
+
+# 用户发送的消息为：
+{question}
diff --git a/plugins/wasm-go/extensions/ai-search/prompts/internet.md b/plugins/wasm-go/extensions/ai-search/prompts/internet.md
new file mode 100644
index 0000000000..f12836fc62
--- /dev/null
+++ b/plugins/wasm-go/extensions/ai-search/prompts/internet.md
@@ -0,0 +1,41 @@
+# 目标
+你需要分析**用户发送的消息**，是否需要查询搜索引擎(Google/Bing)，并按照如下情况回复相应内容:
+
+## 情况一：不需要查询搜索引擎
+### 情况举例：
+1. **用户发送的消息**不是在提问或寻求帮助
+2. **用户发送的消息**是要求翻译文字
+
+### 思考过程
+根据上面的**情况举例**，如果符合，则按照下面**回复内容示例**进行回复，注意不要输出思考过程
+
+### 回复内容示例：
+none
+
+## 情况二：需要查询搜索引擎
+### 情况举例：
+1. 答复**用户发送的消息**，需依赖互联网上最新的资料
+2. 答复**用户发送的消息**，需依赖论文等专业资料
+3. 通过查询资料，可以更好地答复**用户发送的消息**
+
+### 思考过程
+根据上面的**情况举例**，以及其他需要查询资料的情况，如果符合，按照以下步骤思考，并按照下面**回复内容示例**进行回复，注意不要输出思考过程:
+1. What: 分析要答复**用户发送的消息**，需要了解什么知识和资料
+2. How: 分析对于要查询的知识和资料，应该提出什么样的问题
+3. Adjust: 明确查询什么问题后，用一句话概括问题，并且针对搜索引擎做问题优化
+4. Final: 按照下面**回复内容示例**进行回复，注意:
+  - 不要输出思考过程
+  - 可以查询多次，多个查询用换行分隔，总查询次数控制在5次以内
+  - 需要以"internet:"开头
+  - 尽量满足**用户发送的消息**中的搜索要求，例如用户要求用英文搜索，则需用英文表述问题和关键词
+  - 用户如果没有要求搜索语言，则用和**用户发送的消息**一致的语言表述问题和关键词
+  - 如果**用户发送的消息**使用中文，至少要有一条向搜索引擎查询的中文问题
+
+### 回复内容示例：
+
+#### 用不同语言查询多次搜索引擎
+internet: 黄金价格走势
+internet: The trend of gold prices
+
+# 用户发送的消息为：
+{question}
diff --git a/plugins/wasm-go/extensions/ai-search/prompts/private.md b/plugins/wasm-go/extensions/ai-search/prompts/private.md
new file mode 100644
index 0000000000..4ba0fc62ce
--- /dev/null
+++ b/plugins/wasm-go/extensions/ai-search/prompts/private.md
@@ -0,0 +1,55 @@
+# 目标
+你需要分析**用户发送的消息**，是否需要查询搜索引擎(Google/Bing)/私有知识库，并按照如下情况回复相应内容:
+
+## 情况一：不需要查询搜索引擎/私有知识库
+### 情况举例：
+1. **用户发送的消息**不是在提问或寻求帮助
+2. **用户发送的消息**是要求翻译文字
+
+### 思考过程
+根据上面的**情况举例**，如果符合，则按照下面**回复内容示例**进行回复，注意不要输出思考过程
+
+### 回复内容示例：
+none
+
+## 情况二：需要查询搜索引擎/私有知识库
+### 情况举例：
+1. 答复**用户发送的消息**，需依赖互联网上最新的资料
+2. 答复**用户发送的消息**，需依赖论文等专业资料
+3. 通过查询资料，可以更好地答复**用户发送的消息**
+
+### 思考过程
+根据上面的**情况举例**，以及其他需要查询资料的情况，如果符合，按照以下步骤思考，并按照下面**回复内容示例**进行回复，注意不要输出思考过程:
+1. What: 分析要答复**用户发送的消息**，需要了解什么知识和资料
+2. Where: 判断了解这个知识和资料要向Google等搜索引擎提问，还是向私有知识库进行查询，或者需要同时查询多个地方
+3. How: 分析对于要查询的知识和资料，应该提出什么样的问题
+4. Adjust: 明确要向什么地方查询什么问题后，按下面方式对问题进行调整
+  4.1. 向搜索引擎提问：用一句话概括问题，并且针对搜索引擎做问题优化
+  4.2. 向私有知识库提问：将问题拆分成多组关键词的组合，同时组合中的关键词个数尽量不要超过3个
+5. Final: 按照下面**回复内容示例**进行回复，注意:
+  - 不要输出思考过程
+  - 可以向多个查询目标分别查询多次，多个查询用换行分隔，总查询次数控制在5次以内
+  - 查询搜索引擎时，需要以"internet:"开头
+  - 查询私有知识库时，需要以"private:"开头
+  - 当用多个关键词查询时，关键词之间用","分隔
+  - 尽量满足**用户发送的消息**中的搜索要求，例如用户要求用英文搜索，则需用英文表述问题和关键词
+  - 用户如果没有要求搜索语言，则用和**用户发送的消息**一致的语言表述问题和关键词
+  - 如果**用户发送的消息**使用中文，至少要有一条向搜索引擎查询的中文问题
+
+### 回复内容示例：
+
+#### 用不同语言查询多次搜索引擎
+internet: 黄金价格走势
+internet: The trend of gold prices
+
+#### 向私有知识库查询多次
+private: 电子钱包,密码
+private: 张三,身份证号
+
+#### 向多个查询目标查询多次
+internet: 中国未来房价趋势
+internet: 最新中国经济政策
+private: 财务状况
+
+# 用户发送的消息为：
+{question}
diff --git a/plugins/wasm-go/extensions/ai-search/prompts/test_ai_search.py b/plugins/wasm-go/extensions/ai-search/prompts/test_ai_search.py
new file mode 100644
index 0000000000..64fbce9545
--- /dev/null
+++ b/plugins/wasm-go/extensions/ai-search/prompts/test_ai_search.py
@@ -0,0 +1,56 @@
+import argparse
+import requests
+import time
+import json
+
+def main():
+    # 解析命令行参数
+    parser = argparse.ArgumentParser(description='AI Search Test Script')
+    parser.add_argument('--question', required=True, help='The question to analyze')
+    parser.add_argument('--prompt', required=True, help='The prompt file to analyze')    
+    args = parser.parse_args()
+
+    # 读取并解析prompts.md模板
+    # 这里假设prompts.md已经复制到当前目录
+    with open(args.prompt, 'r', encoding='utf-8') as f:
+        prompt_template = f.read()
+    
+    # 替换模板中的{question}变量
+    prompt = prompt_template.replace('{question}', args.question)
+
+    # 准备请求数据
+    headers = {
+        'Content-Type': 'application/json',
+    }
+    data = {
+        "model": "deepseek-v3",
+        "max_tokens": 100,
+        "messages": [
+            {
+                "role": "user",
+                "content": prompt
+            }
+        ]
+    }
+
+    # 发送请求并计时
+    start_time = time.time()
+    try:
+        response = requests.post(
+            'http://localhost:8080/v1/chat/completions', 
+            headers=headers,
+            data=json.dumps(data)
+        )
+        response.raise_for_status()
+        end_time = time.time()
+
+        # 处理响应
+        result = response.json()
+        print("Response:")
+        print(result['choices'][0]['message']['content'])
+        print(f"\nRequest took {end_time - start_time:.2f} seconds")
+    except requests.exceptions.RequestException as e:
+        print(f"Request failed: {e}")
+
+if __name__ == '__main__':
+    main()
diff --git a/plugins/wasm-go/extensions/ai-token-ratelimit/README.md b/plugins/wasm-go/extensions/ai-token-ratelimit/README.md
index a1d6a2fe36..c3c0c23340 100644
--- a/plugins/wasm-go/extensions/ai-token-ratelimit/README.md
+++ b/plugins/wasm-go/extensions/ai-token-ratelimit/README.md
@@ -51,14 +51,14 @@ description: AI Token限流插件配置参考
 
 `redis`中每一项的配置字段说明
 
-| 配置项       | 类型   | 必填 | 默认值                                                     | 说明                        |
-| ------------ | ------ | ---- | ---------------------------------------------------------- | --------------------------- |
-| service_name | string | 必填 | -                                                          | redis 服务名称，带服务类型的完整 FQDN 名称，例如 my-redis.dns、redis.my-ns.svc.cluster.local     |
-| service_port | int    | 否   | 服务类型为固定地址（static service）默认值为80，其他为6379 | 输入redis服务的服务端口     |
-| username     | string | 否   | -                                                          | redis用户名                 |
-| password     | string | 否   | -                                                          | redis密码                   |
-| timeout      | int    | 否   | 1000                                                       | redis连接超时时间，单位毫秒 |
-
+| 配置项       | 类型   | 必填 | 默认值                                                     | 说明                                                                                         |
+| ------------ | ------ | ---- | ---------------------------------------------------------- | ---------------------------                                                                  |
+| service_name | string | 必填 | -                                                          | redis 服务名称，带服务类型的完整 FQDN 名称，例如 my-redis.dns、redis.my-ns.svc.cluster.local |
+| service_port | int    | 否   | 服务类型为固定地址（static service）默认值为80，其他为6379 | 输入redis服务的服务端口                                                                      |
+| username     | string | 否   | -                                                          | redis用户名                                                                                  |
+| password     | string | 否   | -                                                          | redis密码                                                                                    |
+| timeout      | int    | 否   | 1000                                                       | redis连接超时时间，单位毫秒                                                                  |
+| database     | int    | 否   | 0                                                          | 使用的数据库id，例如配置为1，对应`SELECT 1`                                       |
 
 
 ## 配置示例
@@ -258,24 +258,12 @@ spec:
           '*': "qwen-turbo"
     ingress:
     - qwen
-  url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-proxy:v1.0.0
+  url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-proxy:1.0.0
   phase: UNSPECIFIED_PHASE
   priority: 100
 ---
 apiVersion: extensions.higress.io/v1alpha1
 kind: WasmPlugin
-metadata:
-  name: ai-statistics
-  namespace: higress-system
-spec:
-  defaultConfig:
-    enable: true
-  url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-token-statistics:v1.0.0
-  phase: UNSPECIFIED_PHASE
-  priority: 200
----
-apiVersion: extensions.higress.io/v1alpha1
-kind: WasmPlugin
 metadata:
   name: ai-token-ratelimit
   namespace: higress-system
@@ -294,7 +282,7 @@ spec:
       # service_name: redis.default.svc.cluster.local
       service_name: redis.dns
       service_port: 6379
-  url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-token-ratelimit:v1.0.0
+  url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-token-ratelimit:1.0.0
   phase: UNSPECIFIED_PHASE
   priority: 600
 ```
@@ -370,10 +358,19 @@ spec:
         pathType: Prefix
 ```
 
+转发 higress-gateway 的流量到本地，方便进行测试。
+
+```bash
+kubectl port-forward svc/higress-gateway -n higress-system 18000:80
+```
+
 触发限流效果如下：
 
 ```bash
-curl "http://qwen-test.com:18000/v1/chat/completions?apikey=123456" -H "Content-Type: application/json"  -d '{
+curl "http://localhost:18000/v1/chat/completions?apikey=123456" \
+-H "Host: qwen-test.com" \
+-H "Content-Type: application/json"  \
+-d '{
   "model": "gpt-3",
   "messages": [
     {
diff --git a/plugins/wasm-go/extensions/ai-token-ratelimit/README_EN.md b/plugins/wasm-go/extensions/ai-token-ratelimit/README_EN.md
index c07e7aa2f6..cf502198e2 100644
--- a/plugins/wasm-go/extensions/ai-token-ratelimit/README_EN.md
+++ b/plugins/wasm-go/extensions/ai-token-ratelimit/README_EN.md
@@ -43,13 +43,14 @@ Field descriptions for each item in `limit_keys`
 | token_per_day          | int               | No, optionally select one in `token_per_second`, `token_per_minute`, `token_per_hour`, `token_per_day` | -             | Allowed number of token requests per day        |
 
 Field descriptions for each item in `redis`
-| Configuration Item      | Type              | Required | Default Value                                                     | Description                                     |
-| ----------------------- | ----------------- | -------- | --------------------------------------------------------------- | ----------------------------------------------- |
-| service_name            | string            | Required | -                                                               | Full FQDN name of the redis service, including service type, e.g., my-redis.dns, redis.my-ns.svc.cluster.local |
-| service_port            | int               | No       | Default value for static addresses (static service) is 80; otherwise, it is 6379 | Input the service port of the redis service     |
-| username                | string            | No       | -                                                               | Redis username                                  |
-| password                | string            | No       | -                                                               | Redis password                                  |
-| timeout                 | int               | No       | 1000                                                            | Redis connection timeout in milliseconds       |
+| Configuration Item      | Type              | Required | Default Value                                                                    | Description                                                                                                    |
+| ----------------------- | ----------------- | -------- | ---------------------------------------------------------------                  | -----------------------------------------------                                                                |
+| service_name            | string            | Required | -                                                                                | Full FQDN name of the redis service, including service type, e.g., my-redis.dns, redis.my-ns.svc.cluster.local |
+| service_port            | int               | No       | Default value for static addresses (static service) is 80; otherwise, it is 6379 | Input the service port of the redis service                                                                    |
+| username                | string            | No       | -                                                                                | Redis username                                                                                                 |
+| password                | string            | No       | -                                                                                | Redis password                                                                                                 |
+| timeout                 | int               | No       | 1000                                                                             | Redis connection timeout in milliseconds                                                                       |
+| database                | int               | No       | 0                                                                                | The database ID used, for example, configured as 1, corresponds to `SELECT 1`.                                 |
 
 ## Configuration Examples
 ### Identify request parameter apikey for differentiated rate limiting
@@ -233,24 +234,12 @@ spec:
           '*': "qwen-turbo"
     ingress:
     - qwen
-  url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-proxy:v1.0.0
+  url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-proxy:1.0.0
   phase: UNSPECIFIED_PHASE
   priority: 100
 ---
 apiVersion: extensions.higress.io/v1alpha1
 kind: WasmPlugin
-metadata:
-  name: ai-statistics
-  namespace: higress-system
-spec:
-  defaultConfig:
-    enable: true
-  url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-token-statistics:v1.0.0
-  phase: UNSPECIFIED_PHASE
-  priority: 200
----
-apiVersion: extensions.higress.io/v1alpha1
-kind: WasmPlugin
 metadata:
   name: ai-token-ratelimit
   namespace: higress-system
@@ -269,7 +258,7 @@ spec:
       # service_name: redis.default.svc.cluster.local
       service_name: redis.dns
       service_port: 6379
-  url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-token-ratelimit:v1.0.0
+  url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-token-ratelimit:1.0.0
   phase: UNSPECIFIED_PHASE
   priority: 600
 ```
@@ -346,10 +335,19 @@ spec:
         pathType: Prefix
 ```
 
+Forward the traffic of higress-gateway to the local, making it convenient for testing.
+
+```bash
+kubectl port-forward svc/higress-gateway -n higress-system 18000:80
+```
+
 The rate limiting effect is triggered as follows:
 
 ```bash
-curl "http://qwen-test.com:18000/v1/chat/completions?apikey=123456" -H "Content-Type: application/json"  -d '{
+curl "http://localhost:18000/v1/chat/completions?apikey=123456" \
+-H "Host: qwen-test.com" \
+-H "Content-Type: application/json" \
+-d '{
   "model": "gpt-3",
   "messages": [
     {
diff --git a/plugins/wasm-go/extensions/ai-token-ratelimit/config.go b/plugins/wasm-go/extensions/ai-token-ratelimit/config.go
index 9668f18617..743f2925f5 100644
--- a/plugins/wasm-go/extensions/ai-token-ratelimit/config.go
+++ b/plugins/wasm-go/extensions/ai-token-ratelimit/config.go
@@ -110,7 +110,8 @@ func initRedisClusterClient(json gjson.Result, config *ClusterKeyRateLimitConfig
 		FQDN: serviceName,
 		Port: int64(servicePort),
 	})
-	return config.redisClient.Init(username, password, int64(timeout))
+	database := int(redisConfig.Get("database").Int())
+	return config.redisClient.Init(username, password, int64(timeout), wrapper.WithDataBase(database))
 }
 
 func parseClusterKeyRateLimitConfig(json gjson.Result, config *ClusterKeyRateLimitConfig) error {
diff --git a/plugins/wasm-go/extensions/cluster-key-rate-limit/README.md b/plugins/wasm-go/extensions/cluster-key-rate-limit/README.md
index 883e2535c2..f75ea01bcb 100644
--- a/plugins/wasm-go/extensions/cluster-key-rate-limit/README.md
+++ b/plugins/wasm-go/extensions/cluster-key-rate-limit/README.md
@@ -52,13 +52,14 @@ description: 基于 Key 集群限流插件配置参考
 
 `redis` 中每一项的配置字段说明。
 
-| 配置项       | 类型   | 必填 | 默认值                                                     | 说明                                                                        |
-| ------------ | ------ | ---- | ---------------------------------------------------------- |---------------------------------------------------------------------------|
+| 配置项       | 类型   | 必填 | 默认值                                                     | 说明                                                                                         |
+| ------------ | ------ | ---- | ---------------------------------------------------------- | ---------------------------------------------------------------------------                  |
 | service_name | string | 必填 | -                                                          | redis 服务名称，带服务类型的完整 FQDN 名称，例如 my-redis.dns、redis.my-ns.svc.cluster.local |
-| service_port | int    | 否   | 服务类型为固定地址（static service）默认值为80，其他为6379 | 输入redis服务的服务端口                                                            |
-| username     | string | 否   | -                                                          | redis 用户名                                                                 |
-| password     | string | 否   | -                                                          | redis 密码                                                                  |
-| timeout      | int    | 否   | 1000                                                       | redis 连接超时时间，单位毫秒                                                         |
+| service_port | int    | 否   | 服务类型为固定地址（static service）默认值为80，其他为6379 | 输入redis服务的服务端口                                                                      |
+| username     | string | 否   | -                                                          | redis 用户名                                                                                 |
+| password     | string | 否   | -                                                          | redis 密码                                                                                   |
+| timeout      | int    | 否   | 1000                                                       | redis 连接超时时间，单位毫秒                                                                 |
+| database     | int    | 否   | 0                                                          | 使用的数据库id，例如配置为1，对应`SELECT 1`                                                  |
 
 ## 配置示例
 
diff --git a/plugins/wasm-go/extensions/cluster-key-rate-limit/README_EN.md b/plugins/wasm-go/extensions/cluster-key-rate-limit/README_EN.md
index 4a4dcf8633..83e0935d91 100644
--- a/plugins/wasm-go/extensions/cluster-key-rate-limit/README_EN.md
+++ b/plugins/wasm-go/extensions/cluster-key-rate-limit/README_EN.md
@@ -46,13 +46,15 @@ Description of configuration fields for each item in `limit_keys`.
 | query_per_day             | int           | No, one of `query_per_second`, `query_per_minute`, `query_per_hour`, `query_per_day` is optional. | -             | Allowed number of requests per day.                              |
 
 Description of configuration fields for each item in `redis`.
-| Configuration Item        | Type          | Required | Default Value                                               | Description                                                               |
-|---------------------------|---------------|----------|------------------------------------------------------------|---------------------------------------------------------------------------|
-| service_name              | string        | Required | -                                                          | Full FQDN name of the Redis service, including service type, e.g., my-redis.dns, redis.my-ns.svc.cluster.local. |
-| service_port              | int           | No       | 80 for static services; otherwise 6379                     | Service port for the Redis service.                                      |
-| username                  | string        | No       | -                                                          | Redis username.                                                          |
-| password                  | string        | No       | -                                                          | Redis password.                                                          |
-| timeout                   | int           | No       | 1000                                                       | Redis connection timeout in milliseconds.                               |
+| Configuration Item | Type   | Required | Default Value                          | Description                                                                                                     |
+|--------------------|--------|----------|----------------------------------------|-----------------------------------------------------------------------------------------------------------------|
+| service_name       | string | Required | -                                      | Full FQDN name of the Redis service, including service type, e.g., my-redis.dns, redis.my-ns.svc.cluster.local. |
+| service_port       | int    | No       | 80 for static services; otherwise 6379 | Service port for the Redis service.                                                                             |
+| username           | string | No       | -                                      | Redis username.                                                                                                 |
+| password           | string | No       | -                                      | Redis password.                                                                                                 |
+| timeout            | int    | No       | 1000                                   | Redis connection timeout in milliseconds.                                                                       |
+| database           | int    | No       | 0                                      | The database ID used, for example, configured as 1, corresponds to `SELECT 1`.                                  |
+
 
 ## Configuration Examples
 
diff --git a/plugins/wasm-go/extensions/cluster-key-rate-limit/config.go b/plugins/wasm-go/extensions/cluster-key-rate-limit/config.go
index 3689c36561..00d84b21fc 100644
--- a/plugins/wasm-go/extensions/cluster-key-rate-limit/config.go
+++ b/plugins/wasm-go/extensions/cluster-key-rate-limit/config.go
@@ -110,7 +110,8 @@ func initRedisClusterClient(json gjson.Result, config *ClusterKeyRateLimitConfig
 		FQDN: serviceName,
 		Port: int64(servicePort),
 	})
-	return config.redisClient.Init(username, password, int64(timeout))
+	database := int(redisConfig.Get("database").Int())
+	return config.redisClient.Init(username, password, int64(timeout), wrapper.WithDataBase(database))
 }
 
 func parseClusterKeyRateLimitConfig(json gjson.Result, config *ClusterKeyRateLimitConfig) error {
diff --git a/plugins/wasm-go/extensions/ext-auth/README.md b/plugins/wasm-go/extensions/ext-auth/README.md
index cca0f655c8..de7e2feb83 100644
--- a/plugins/wasm-go/extensions/ext-auth/README.md
+++ b/plugins/wasm-go/extensions/ext-auth/README.md
@@ -77,6 +77,7 @@ MatchRule 类型每一项的配置字段说明，在使用 `array of MatchRule`
 | 名称                | 数据类型 | 必填 | 默认值 | 描述                                                         |
 | ------------------- | -------- | ---- | ------ | ------------------------------------------------------------ |
 | `match_rule_domain` | string   | 否   | -      | 匹配规则域名，支持通配符模式，例如 `*.bar.com`               |
+| `match_rule_method` | []string | 否   | -      | 匹配请求方法                                                 |
 | `match_rule_path`   | string   | 否   | -      | 匹配请求路径的规则                                           |
 | `match_rule_type`   | string   | 否   | -      | 匹配请求路径的规则类型，可选 `exact` , `prefix` , `suffix`, `contains`, `regex` |
 
@@ -100,27 +101,41 @@ MatchRule 类型每一项的配置字段说明，在使用 `array of MatchRule`
 **白名单模式**
 
 ```yaml
+# 白名单模式配置，符合白名单规则的请求无需验证
 match_type: 'whitelist'
 match_list:
-    - match_rule_domain: '*.bar.com'
-      match_rule_path: '/foo'
-      match_rule_type: 'prefix'
+  # 所有以 api.example.com 为域名，且路径前缀为 /public 的请求无需验证
+  - match_rule_domain: 'api.example.com'
+    match_rule_path: '/public'
+    match_rule_type: 'prefix'
+  # 针对图片资源服务器 images.example.com，所有 GET 请求无需验证
+  - match_rule_domain: 'images.example.com'
+    match_rule_method: ["GET"]
+  # 所有域名下，路径精确匹配 /health-check 的 HEAD 请求无需验证
+  - match_rule_method: ["HEAD"]
+    match_rule_path: '/health-check'
+    match_rule_type: 'exact'
 ```
 
-泛域名 `*.bar.com` 下前缀匹配 `/foo` 的请求无需验证
-
 **黑名单模式**
 
 ```yaml
+# 黑名单模式配置，符合黑名单规则的请求需要验证
 match_type: 'blacklist'
 match_list:
-    - match_rule_domain: '*.bar.com'
-      match_rule_path: '/headers'
-      match_rule_type: 'prefix'
+  # 所有以 admin.example.com 为域名，且路径前缀为 /sensitive 的请求需要验证
+  - match_rule_domain: 'admin.example.com'
+    match_rule_path: '/sensitive'
+    match_rule_type: 'prefix'
+  # 所有域名下，路径精确匹配 /user 的 DELETE 请求需要验证
+  - match_rule_method: ["DELETE"]
+    match_rule_path: '/user'
+    match_rule_type: 'exact'
+  # 所有以 legacy.example.com 为域名的 POST 请求需要验证
+  - match_rule_domain: 'legacy.example.com'
+    match_rule_method: ["POST"]
 ```
 
-只有泛域名 `*.bar.com` 下前缀匹配 `/header` 的请求需要验证
-
 ## 配置示例
 
 下面假设 `ext-auth` 服务在 Kubernetes 中 serviceName 为 `ext-auth`，端口 `8090`，路径为 `/auth`，命名空间为 `backend`
@@ -185,13 +200,13 @@ content-length: 0
 http_service:
   authorization_request:
     allowed_headers:
-    - exact: x-auth-version
+      - exact: x-auth-version
     headers_to_add:
       x-envoy-header: true
   authorization_response:
     allowed_upstream_headers:
-    - exact: x-user-id
-    - exact: x-auth-version
+      - exact: x-user-id
+      - exact: x-auth-version
   endpoint_mode: envoy
   endpoint:
     service_name: ext-auth.backend.svc.cluster.local
@@ -287,13 +302,13 @@ content-length: 0
 http_service:
   authorization_request:
     allowed_headers:
-    - exact: x-auth-version
+      - exact: x-auth-version
     headers_to_add:
       x-envoy-header: true
   authorization_response:
     allowed_upstream_headers:
-    - exact: x-user-id
-    - exact: x-auth-version
+      - exact: x-user-id
+      - exact: x-auth-version
   endpoint_mode: forward_auth
   endpoint:
     service_name: ext-auth.backend.svc.cluster.local
diff --git a/plugins/wasm-go/extensions/ext-auth/README_EN.md b/plugins/wasm-go/extensions/ext-auth/README_EN.md
index a095690cf6..8a01216009 100644
--- a/plugins/wasm-go/extensions/ext-auth/README_EN.md
+++ b/plugins/wasm-go/extensions/ext-auth/README_EN.md
@@ -77,6 +77,7 @@ Configuration fields for each item of `MatchRule` type. When using `array of Mat
 | Name | Data Type | Required | Default Value | Description |
 | --- | --- | --- | --- | --- |
 | `match_rule_domain` | string | No | - | The domain of the matching rule, supports wildcard patterns, e.g., `*.bar.com` |
+| `match_rule_method` | []string | No | - | Matching rule for the request method |
 | `match_rule_path` | string | No | - | The rule for matching the request path |
 | `match_rule_type` | string | No | - | The type of the rule for matching the request path, can be `exact`, `prefix`, `suffix`, `contains`, `regex` |
 
@@ -100,27 +101,41 @@ Supports blacklist and whitelist mode configuration. The default is the whitelis
 **Whitelist Mode**
 
 ```yaml
+# Configuration for the whitelist mode. Requests that match the whitelist rules do not need verification.
 match_type: 'whitelist'
 match_list:
-    - match_rule_domain: '*.bar.com'
-      match_rule_path: '/foo'
-      match_rule_type: 'prefix'
+  # Requests with the domain name api.example.com and a path prefixed with /public do not need verification.
+  - match_rule_domain: 'api.example.com'
+    match_rule_path: '/public'
+    match_rule_type: 'prefix'
+  # For the image resource server images.example.com, all GET requests do not need verification.
+  - match_rule_domain: 'images.example.com'
+    match_rule_method: ["GET"]
+  # For all domains, HEAD requests with an exact path match of /health-check do not need verification.
+  - match_rule_method: ["HEAD"]
+    match_rule_path: '/health-check'
+    match_rule_type: 'exact'
 ```
 
-Requests with a prefix match of `/foo` under the wildcard domain `*.bar.com` do not need to be verified.
-
 **Blacklist Mode**
 
 ```yaml
+# Configuration for the blacklist mode. Requests that match the blacklist rules need verification.
 match_type: 'blacklist'
 match_list:
-    - match_rule_domain: '*.bar.com'
-      match_rule_path: '/headers'
-      match_rule_type: 'prefix'
+  # Requests with the domain name admin.example.com and a path prefixed with /sensitive need verification.
+  - match_rule_domain: 'admin.example.com'
+    match_rule_path: '/sensitive'
+    match_rule_type: 'prefix'
+  # For all domains, DELETE requests with an exact path match of /user need verification.
+  - match_rule_method: ["DELETE"]
+    match_rule_path: '/user'
+    match_rule_type: 'exact'
+  # For the domain legacy.example.com, all POST requests need verification.
+  - match_rule_domain: 'legacy.example.com'
+    match_rule_method: ["POST"]
 ```
 
-Only requests with a prefix match of `/header` under the wildcard domain `*.bar.com` need to be verified.
-
 
 ## Configuration Examples
 
@@ -186,13 +201,13 @@ Configuration of the `ext-auth` plugin:
 http_service:
   authorization_request:
     allowed_headers:
-    - exact: x-auth-version
+      - exact: x-auth-version
     headers_to_add:
       x-envoy-header: true
   authorization_response:
     allowed_upstream_headers:
-    - exact: x-user-id
-    - exact: x-auth-version
+      - exact: x-user-id
+      - exact: x-auth-version
   endpoint_mode: envoy
   endpoint:
     service_name: ext-auth.backend.svc.cluster.local
@@ -286,13 +301,13 @@ Configuration of the `ext-auth` plugin:
 http_service:
   authorization_request:
     allowed_headers:
-    - exact: x-auth-version
+      - exact: x-auth-version
     headers_to_add:
       x-envoy-header: true
   authorization_response:
     allowed_upstream_headers:
-    - exact: x-user-id
-    - exact: x-auth-version
+      - exact: x-user-id
+      - exact: x-auth-version
   endpoint_mode: forward_auth
   endpoint:
     service_name: ext-auth.backend.svc.cluster.local
diff --git a/plugins/wasm-go/extensions/ext-auth/config/config.go b/plugins/wasm-go/extensions/ext-auth/config/config.go
index 5709bbf9b0..def0955cea 100644
--- a/plugins/wasm-go/extensions/ext-auth/config/config.go
+++ b/plugins/wasm-go/extensions/ext-auth/config/config.go
@@ -260,19 +260,28 @@ func parseMatchRules(json gjson.Result, config *ExtAuthConfig) error {
 	var err error
 
 	matchListConfig.ForEach(func(key, value gjson.Result) bool {
-		pathMatcher, buildErr := expr.BuildStringMatcher(
-			value.Get("match_rule_type").Str,
-			value.Get("match_rule_path").Str, false)
-		if buildErr != nil {
-			err = fmt.Errorf("failed to build string matcher for rule with domain %q, path %q, type %q: %w",
-				value.Get("match_rule_domain").Str,
-				value.Get("match_rule_path").Str,
-				value.Get("match_rule_type").Str,
-				buildErr)
-			return false // stop iterating
+		domain := value.Get("match_rule_domain").Str
+		methodArray := value.Get("match_rule_method").Array()
+		matchRuleType := value.Get("match_rule_type").Str
+		matchRulePath := value.Get("match_rule_path").Str
+
+		var pathMatcher expr.Matcher
+		var buildErr error
+
+		if matchRuleType == "" && matchRulePath == "" {
+			pathMatcher = nil
+		} else {
+			pathMatcher, buildErr = expr.BuildStringMatcher(matchRuleType, matchRulePath, false)
+			if buildErr != nil {
+				err = fmt.Errorf("failed to build string matcher for rule with domain %q, method %v, path %q, type %q: %w",
+					domain, methodArray, matchRulePath, matchRuleType, buildErr)
+				return false // stop iterating
+			}
 		}
+
 		ruleList = append(ruleList, expr.Rule{
-			Domain: value.Get("match_rule_domain").Str,
+			Domain: domain,
+			Method: convertToStringList(methodArray),
 			Path:   pathMatcher,
 		})
 		return true // keep iterating
@@ -297,3 +306,11 @@ func convertToStringMap(result gjson.Result) map[string]string {
 	})
 	return m
 }
+
+func convertToStringList(results []gjson.Result) []string {
+	interfaces := make([]string, len(results))
+	for i, result := range results {
+		interfaces[i] = result.String()
+	}
+	return interfaces
+}
diff --git a/plugins/wasm-go/extensions/ext-auth/config/config_test.go b/plugins/wasm-go/extensions/ext-auth/config/config_test.go
index 02750356e7..299035f458 100644
--- a/plugins/wasm-go/extensions/ext-auth/config/config_test.go
+++ b/plugins/wasm-go/extensions/ext-auth/config/config_test.go
@@ -218,6 +218,7 @@ func TestParseConfig(t *testing.T) {
 					RuleList: []expr.Rule{
 						{
 							Domain: "*.bar.com",
+							Method: []string{},
 							Path: func() expr.Matcher {
 								pathMatcher, err := expr.BuildStringMatcher(expr.MatchPatternPrefix, "/headers", false)
 								if err != nil {
@@ -248,6 +249,7 @@ func TestParseConfig(t *testing.T) {
 				"match_list": [
 					{
 						"match_rule_domain": "*.foo.com",
+						"match_rule_method": ["GET"],
 						"match_rule_path": "/api",
 						"match_rule_type": "exact"
 					}
@@ -269,6 +271,7 @@ func TestParseConfig(t *testing.T) {
 					RuleList: []expr.Rule{
 						{
 							Domain: "*.foo.com",
+							Method: []string{"GET"},
 							Path: func() expr.Matcher {
 								pathMatcher, err := expr.BuildStringMatcher(expr.MatchPatternExact, "/api", false)
 								if err != nil {
@@ -284,6 +287,50 @@ func TestParseConfig(t *testing.T) {
 				StatusOnError:             403,
 			},
 		},
+		{
+			name: "Valid Match Rules with Whitelist - Only Method",
+			json: `{
+				"http_service": {
+					"endpoint_mode": "envoy",
+					"endpoint": {
+						"service_name": "example.com",
+						"service_port": 80,
+						"path_prefix": "/auth"
+					}
+				},
+				"match_type": "whitelist",
+				"match_list": [
+					{
+						"match_rule_method": ["GET"]
+					}
+				]
+			}`,
+			expected: ExtAuthConfig{
+				HttpService: HttpService{
+					EndpointMode: "envoy",
+					Client: wrapper.NewClusterClient(wrapper.FQDNCluster{
+						FQDN: "example.com",
+						Port: 80,
+						Host: "",
+					}),
+					PathPrefix: "/auth",
+					Timeout:    1000,
+				},
+				MatchRules: expr.MatchRules{
+					Mode: "whitelist",
+					RuleList: []expr.Rule{
+						{
+							Domain: "",
+							Method: []string{"GET"},
+							Path:   nil,
+						},
+					},
+				},
+				FailureModeAllow:          false,
+				FailureModeAllowHeaderAdd: false,
+				StatusOnError:             403,
+			},
+		},
 		{
 			name: "Missing Match Type",
 			json: `{
@@ -342,12 +389,13 @@ func TestParseConfig(t *testing.T) {
 				"match_list": [
 					{
 						"match_rule_domain": "*.bar.com",
+						"match_rule_method": ["POST","PUT","DELETE"],
 						"match_rule_path": "/headers",
 						"match_rule_type": "invalid_type"
 					}
 				]
 			}`,
-			expectedErr: `failed to build string matcher for rule with domain "*.bar.com", path "/headers", type "invalid_type": unknown string matcher type`,
+			expectedErr: `failed to build string matcher for rule with domain "*.bar.com", method [POST PUT DELETE], path "/headers", type "invalid_type": unknown string matcher type`,
 		},
 	}
 
diff --git a/plugins/wasm-go/extensions/ext-auth/expr/match_rules.go b/plugins/wasm-go/extensions/ext-auth/expr/match_rules.go
index c4c89fe385..bc74cd9bff 100644
--- a/plugins/wasm-go/extensions/ext-auth/expr/match_rules.go
+++ b/plugins/wasm-go/extensions/ext-auth/expr/match_rules.go
@@ -3,6 +3,7 @@ package expr
 import (
 	"strings"
 
+	"ext-auth/util"
 	regexp "github.com/wasilibs/go-re2"
 )
 
@@ -18,6 +19,7 @@ type MatchRules struct {
 
 type Rule struct {
 	Domain string
+	Method []string
 	Path   Matcher
 }
 
@@ -28,19 +30,19 @@ func MatchRulesDefaults() MatchRules {
 	}
 }
 
-// IsAllowedByMode checks if the given domain and path are allowed based on the configuration mode.
-func (config *MatchRules) IsAllowedByMode(domain, path string) bool {
+// IsAllowedByMode checks if the given domain, method and path are allowed based on the configuration mode.
+func (config *MatchRules) IsAllowedByMode(domain, method, path string) bool {
 	switch config.Mode {
 	case ModeWhitelist:
 		for _, rule := range config.RuleList {
-			if rule.matchDomainAndPath(domain, path) {
+			if rule.matchesAllConditions(domain, method, path) {
 				return true
 			}
 		}
 		return false
 	case ModeBlacklist:
 		for _, rule := range config.RuleList {
-			if rule.matchDomainAndPath(domain, path) {
+			if rule.matchesAllConditions(domain, method, path) {
 				return false
 			}
 		}
@@ -50,17 +52,21 @@ func (config *MatchRules) IsAllowedByMode(domain, path string) bool {
 	}
 }
 
-// matchDomainAndPath checks if the given domain and path match the rule.
-// If rule.Domain is empty, it only checks rule.Path.
-// If rule.Path is empty, it only checks rule.Domain.
-// If both are empty, it returns false.
-func (rule *Rule) matchDomainAndPath(domain, path string) bool {
-	if rule.Domain == "" && rule.Path == nil {
+// matchesAllConditions checks if the given domain, method and path match all conditions of the rule.
+func (rule *Rule) matchesAllConditions(domain, method, path string) bool {
+	// If all conditions are empty, return false
+	if rule.Domain == "" && rule.Path == nil && len(rule.Method) == 0 {
 		return false
 	}
+
+	// Check domain and path matching
 	domainMatch := rule.Domain == "" || matchDomain(domain, rule.Domain)
 	pathMatch := rule.Path == nil || rule.Path.Match(path)
-	return domainMatch && pathMatch
+
+	// Check HTTP method matching: if no methods are specified, any method is allowed
+	methodMatch := len(rule.Method) == 0 || util.ContainsString(rule.Method, method)
+
+	return domainMatch && pathMatch && methodMatch
 }
 
 // matchDomain checks if the given domain matches the pattern.
diff --git a/plugins/wasm-go/extensions/ext-auth/expr/match_rules_test.go b/plugins/wasm-go/extensions/ext-auth/expr/match_rules_test.go
index 5d041262ac..f6ab9a542f 100644
--- a/plugins/wasm-go/extensions/ext-auth/expr/match_rules_test.go
+++ b/plugins/wasm-go/extensions/ext-auth/expr/match_rules_test.go
@@ -6,11 +6,20 @@ import (
 	"github.com/stretchr/testify/assert"
 )
 
+func createMatcher(pattern string, caseSensitive bool) Matcher {
+	pathMatcher, err := newStringExactMatcher(pattern, caseSensitive)
+	if err != nil {
+		panic(err)
+	}
+	return pathMatcher
+}
+
 func TestIsAllowedByMode(t *testing.T) {
 	tests := []struct {
 		name     string
 		config   MatchRules
 		domain   string
+		method   string
 		path     string
 		expected bool
 	}{
@@ -21,17 +30,13 @@ func TestIsAllowedByMode(t *testing.T) {
 				RuleList: []Rule{
 					{
 						Domain: "example.com",
-						Path: func() Matcher {
-							pathMatcher, err := newStringExactMatcher("/foo", true)
-							if err != nil {
-								t.Fatalf("Failed to create Matcher: %v", err)
-							}
-							return pathMatcher
-						}(),
+						Method: []string{"GET"},
+						Path:   createMatcher("/foo", true),
 					},
 				},
 			},
 			domain:   "example.com",
+			method:   "GET",
 			path:     "/foo",
 			expected: true,
 		},
@@ -42,18 +47,14 @@ func TestIsAllowedByMode(t *testing.T) {
 				RuleList: []Rule{
 					{
 						Domain: "example.com",
-						Path: func() Matcher {
-							pathMatcher, err := newStringExactMatcher("/foo", true)
-							if err != nil {
-								t.Fatalf("Failed to create Matcher: %v", err)
-							}
-							return pathMatcher
-						}(),
+						Method: []string{"GET"},
+						Path:   createMatcher("/foo", true),
 					},
 				},
 			},
 			domain:   "example.com",
-			path:     "/bar",
+			method:   "POST",
+			path:     "/foo",
 			expected: false,
 		},
 		{
@@ -63,17 +64,13 @@ func TestIsAllowedByMode(t *testing.T) {
 				RuleList: []Rule{
 					{
 						Domain: "example.com",
-						Path: func() Matcher {
-							pathMatcher, err := newStringExactMatcher("/foo", true)
-							if err != nil {
-								t.Fatalf("Failed to create Matcher: %v", err)
-							}
-							return pathMatcher
-						}(),
+						Method: []string{"GET"},
+						Path:   createMatcher("/foo", true),
 					},
 				},
 			},
 			domain:   "example.com",
+			method:   "GET",
 			path:     "/foo",
 			expected: false,
 		},
@@ -84,18 +81,14 @@ func TestIsAllowedByMode(t *testing.T) {
 				RuleList: []Rule{
 					{
 						Domain: "example.com",
-						Path: func() Matcher {
-							pathMatcher, err := newStringExactMatcher("/foo", true)
-							if err != nil {
-								t.Fatalf("Failed to create Matcher: %v", err)
-							}
-							return pathMatcher
-						}(),
+						Method: []string{"GET"},
+						Path:   createMatcher("/foo", true),
 					},
 				},
 			},
 			domain:   "example.com",
-			path:     "/bar",
+			method:   "POST",
+			path:     "/foo",
 			expected: true,
 		},
 		{
@@ -107,6 +100,7 @@ func TestIsAllowedByMode(t *testing.T) {
 				},
 			},
 			domain:   "example.com",
+			method:   "GET",
 			path:     "/foo",
 			expected: true,
 		},
@@ -117,29 +111,25 @@ func TestIsAllowedByMode(t *testing.T) {
 				RuleList: []Rule{
 					{
 						Domain: "",
-						Path: func() Matcher {
-							pathMatcher, err := newStringExactMatcher("/foo", true)
-							if err != nil {
-								t.Fatalf("Failed to create Matcher: %v", err)
-							}
-							return pathMatcher
-						}(),
+						Path:   createMatcher("/foo", true),
 					},
 				},
 			},
 			domain:   "example.com",
+			method:   "GET",
 			path:     "/foo",
 			expected: true,
 		},
 		{
-			name: "Both Domain and Path are empty",
+			name: "All fields (Domain, Method, Path) are empty",
 			config: MatchRules{
 				Mode: ModeWhitelist,
 				RuleList: []Rule{
-					{Domain: "", Path: nil},
+					{Domain: "", Method: []string{}, Path: nil},
 				},
 			},
 			domain:   "example.com",
+			method:   "GET",
 			path:     "/foo",
 			expected: false,
 		},
@@ -150,17 +140,13 @@ func TestIsAllowedByMode(t *testing.T) {
 				RuleList: []Rule{
 					{
 						Domain: "example.com",
-						Path: func() Matcher {
-							pathMatcher, err := newStringExactMatcher("/foo", true)
-							if err != nil {
-								t.Fatalf("Failed to create Matcher: %v", err)
-							}
-							return pathMatcher
-						}(),
+						Method: []string{"GET"},
+						Path:   createMatcher("/foo", true),
 					},
 				},
 			},
 			domain:   "example.com",
+			method:   "GET",
 			path:     "/foo",
 			expected: false,
 		},
@@ -171,17 +157,13 @@ func TestIsAllowedByMode(t *testing.T) {
 				RuleList: []Rule{
 					{
 						Domain: "*.example.com",
-						Path: func() Matcher {
-							pathMatcher, err := newStringExactMatcher("/foo", true)
-							if err != nil {
-								t.Fatalf("Failed to create Matcher: %v", err)
-							}
-							return pathMatcher
-						}(),
+						Method: []string{"GET"},
+						Path:   createMatcher("/foo", true),
 					},
 				},
 			},
 			domain:   "sub.example.com",
+			method:   "GET",
 			path:     "/foo",
 			expected: true,
 		},
@@ -192,20 +174,48 @@ func TestIsAllowedByMode(t *testing.T) {
 				RuleList: []Rule{
 					{
 						Domain: "*.example.com",
-						Path: func() Matcher {
-							pathMatcher, err := newStringExactMatcher("/foo", true)
-							if err != nil {
-								t.Fatalf("Failed to create Matcher: %v", err)
-							}
-							return pathMatcher
-						}(),
+						Method: []string{"GET"},
+						Path:   createMatcher("/foo", true),
 					},
 				},
 			},
 			domain:   "example.com",
+			method:   "GET",
 			path:     "/foo",
 			expected: false,
 		},
+		{
+			name: "Whitelist mode, only method matches",
+			config: MatchRules{
+				Mode: ModeWhitelist,
+				RuleList: []Rule{
+					{
+						Method: []string{"GET"},
+						Path:   nil,
+					},
+				},
+			},
+			domain:   "example.com",
+			method:   "GET",
+			path:     "/foo",
+			expected: true,
+		},
+		{
+			name: "Whitelist mode, only domain matches",
+			config: MatchRules{
+				Mode: ModeWhitelist,
+				RuleList: []Rule{
+					{
+						Domain: "example.com",
+						Path:   nil,
+					},
+				},
+			},
+			domain:   "example.com",
+			method:   "GET",
+			path:     "/foo",
+			expected: true,
+		},
 		{
 			name: "Blacklist mode, generic domain matches",
 			config: MatchRules{
@@ -213,17 +223,13 @@ func TestIsAllowedByMode(t *testing.T) {
 				RuleList: []Rule{
 					{
 						Domain: "*.example.com",
-						Path: func() Matcher {
-							pathMatcher, err := newStringExactMatcher("/foo", true)
-							if err != nil {
-								t.Fatalf("Failed to create Matcher: %v", err)
-							}
-							return pathMatcher
-						}(),
+						Method: []string{"GET"},
+						Path:   createMatcher("/foo", true),
 					},
 				},
 			},
 			domain:   "sub.example.com",
+			method:   "GET",
 			path:     "/foo",
 			expected: false,
 		},
@@ -234,25 +240,89 @@ func TestIsAllowedByMode(t *testing.T) {
 				RuleList: []Rule{
 					{
 						Domain: "*.example.com",
-						Path: func() Matcher {
-							pathMatcher, err := newStringExactMatcher("/foo", true)
-							if err != nil {
-								t.Fatalf("Failed to create Matcher: %v", err)
-							}
-							return pathMatcher
-						}(),
+						Method: []string{"GET"},
+						Path:   createMatcher("/foo", true),
 					},
 				},
 			},
 			domain:   "example.com",
+			method:   "GET",
 			path:     "/foo",
 			expected: true,
 		},
+		{
+			name: "Domain with special characters",
+			config: MatchRules{
+				Mode: ModeWhitelist,
+				RuleList: []Rule{
+					{
+						Domain: "example-*.com",
+						Method: []string{"GET"},
+						Path:   createMatcher("/foo", true),
+					},
+				},
+			},
+			domain:   "example-test.com",
+			method:   "GET",
+			path:     "/foo",
+			expected: true,
+		},
+		{
+			name: "Path with special characters",
+			config: MatchRules{
+				Mode: ModeWhitelist,
+				RuleList: []Rule{
+					{
+						Domain: "example.com",
+						Method: []string{"GET"},
+						Path:   createMatcher("/foo-bar", true),
+					},
+				},
+			},
+			domain:   "example.com",
+			method:   "GET",
+			path:     "/foo-bar",
+			expected: true,
+		},
+		{
+			name: "Multiple methods, one matches",
+			config: MatchRules{
+				Mode: ModeWhitelist,
+				RuleList: []Rule{
+					{
+						Domain: "example.com",
+						Method: []string{"GET", "POST"},
+						Path:   createMatcher("/foo", true),
+					},
+				},
+			},
+			domain:   "example.com",
+			method:   "POST",
+			path:     "/foo",
+			expected: true,
+		},
+		{
+			name: "Multiple methods, none match",
+			config: MatchRules{
+				Mode: ModeWhitelist,
+				RuleList: []Rule{
+					{
+						Domain: "example.com",
+						Method: []string{"GET", "POST"},
+						Path:   createMatcher("/foo", true),
+					},
+				},
+			},
+			domain:   "example.com",
+			method:   "PUT",
+			path:     "/foo",
+			expected: false,
+		},
 	}
 
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			result := tt.config.IsAllowedByMode(tt.domain, tt.path)
+			result := tt.config.IsAllowedByMode(tt.domain, tt.method, tt.path)
 			assert.Equal(t, tt.expected, result)
 		})
 	}
diff --git a/plugins/wasm-go/extensions/ext-auth/main.go b/plugins/wasm-go/extensions/ext-auth/main.go
index 8cc8c05952..7d3ce54b4a 100644
--- a/plugins/wasm-go/extensions/ext-auth/main.go
+++ b/plugins/wasm-go/extensions/ext-auth/main.go
@@ -51,9 +51,8 @@ const (
 )
 
 func onHttpRequestHeaders(ctx wrapper.HttpContext, config config.ExtAuthConfig, log wrapper.Log) types.Action {
-	path := wrapper.GetRequestPathWithoutQuery()
 	// If the request's domain and path match the MatchRules, skip authentication
-	if config.MatchRules.IsAllowedByMode(ctx.Host(), path) {
+	if config.MatchRules.IsAllowedByMode(ctx.Host(), ctx.Method(), wrapper.GetRequestPathWithoutQuery()) {
 		ctx.DontReadRequestBody()
 		return types.ActionContinue
 	}
diff --git a/plugins/wasm-go/extensions/ext-auth/util/utils.go b/plugins/wasm-go/extensions/ext-auth/util/utils.go
index eef1852878..2f6d8586a9 100644
--- a/plugins/wasm-go/extensions/ext-auth/util/utils.go
+++ b/plugins/wasm-go/extensions/ext-auth/util/utils.go
@@ -37,3 +37,12 @@ func ExtractFromHeader(headers [][2]string, headerKey string) string {
 	}
 	return ""
 }
+
+func ContainsString(slice []string, s string) bool {
+	for _, item := range slice {
+		if strings.EqualFold(item, s) {
+			return true
+		}
+	}
+	return false
+}
diff --git a/plugins/wasm-rust/Dockerfile b/plugins/wasm-rust/Dockerfile
index b2c6725b14..ffdea8c537 100644
--- a/plugins/wasm-rust/Dockerfile
+++ b/plugins/wasm-rust/Dockerfile
@@ -3,10 +3,10 @@ WORKDIR /workspace
 RUN rustup target add wasm32-wasip1
 ARG PLUGIN_NAME="say-hello"
 ARG BUILD_OPTS="--release"
-ARG BUILDRC=".buildrc"
+ARG PREBUILD=".prebuild"
 COPY . .
 WORKDIR /workspace/extensions/$PLUGIN_NAME
-RUN if [ -f $BUILDRC ]; then sh $BUILDRC; fi
+RUN if [ -f $PREBUILD ]; then sh $PREBUILD; fi
 RUN cargo build --target wasm32-wasip1 $BUILD_OPTS \
     && cp target/wasm32-wasip1/release/*.wasm /main.wasm
 
diff --git a/plugins/wasm-rust/extensions/ai-data-masking/.buildrc b/plugins/wasm-rust/extensions/ai-data-masking/.buildrc
deleted file mode 100644
index bd317b8605..0000000000
--- a/plugins/wasm-rust/extensions/ai-data-masking/.buildrc
+++ /dev/null
@@ -1 +0,0 @@
-apt update && apt-get install gcc gcc-multilib llvm clang -y && apt clean
\ No newline at end of file
diff --git a/plugins/wasm-rust/extensions/ai-data-masking/.prebuild b/plugins/wasm-rust/extensions/ai-data-masking/.prebuild
new file mode 100644
index 0000000000..ba1b9b5d6d
--- /dev/null
+++ b/plugins/wasm-rust/extensions/ai-data-masking/.prebuild
@@ -0,0 +1,3 @@
+apt-get update
+apt-get install gcc gcc-multilib llvm clang -y
+apt-get clean
diff --git a/plugins/wasm-rust/extensions/ai-data-masking/src/lib.rs b/plugins/wasm-rust/extensions/ai-data-masking/src/lib.rs
index ca2db3da42..dc10bc3715 100644
--- a/plugins/wasm-rust/extensions/ai-data-masking/src/lib.rs
+++ b/plugins/wasm-rust/extensions/ai-data-masking/src/lib.rs
@@ -13,8 +13,10 @@
 // limitations under the License.
 
 mod deny_word;
+mod msg_window;
 
 use crate::deny_word::DenyWord;
+use crate::msg_window::MsgWindow;
 use fancy_regex::Regex;
 use grok::patterns;
 use higress_wasm_rust::log::Log;
@@ -27,8 +29,8 @@ use proxy_wasm::traits::{Context, HttpContext, RootContext};
 use proxy_wasm::types::{Bytes, ContextType, DataAction, HeaderAction, LogLevel};
 use rust_embed::Embed;
 use serde::de::Error;
-use serde::Deserialize;
 use serde::Deserializer;
+use serde::{Deserialize, Serialize};
 use serde_json::{json, Value};
 use std::cell::RefCell;
 use std::collections::{BTreeMap, HashMap, VecDeque};
@@ -66,9 +68,12 @@ struct AiDataMasking {
     config: Option<Rc<AiDataMaskingConfig>>,
     mask_map: HashMap<String, Option<String>>,
     is_openai: bool,
+    is_openai_stream: Option<bool>,
     stream: bool,
-    res_body: Bytes,
     log: Log,
+    msg_window: MsgWindow,
+    char_window_size: usize,
+    byte_window_size: usize,
 }
 fn deserialize_regexp<'de, D>(deserializer: D) -> Result<Regex, D::Error>
 where
@@ -213,10 +218,33 @@ struct ResMessage {
     #[serde(default)]
     delta: Option<Message>,
 }
+
+#[derive(Default, Debug, Deserialize, Serialize, Clone)]
+struct Usage {
+    completion_tokens: i32,
+    prompt_tokens: i32,
+    total_tokens: i32,
+}
+
+impl Usage {
+    pub fn add(&mut self, usage: &Usage) {
+        self.completion_tokens += usage.completion_tokens;
+        self.prompt_tokens += usage.prompt_tokens;
+        self.total_tokens += usage.total_tokens;
+    }
+    pub fn reset(&mut self) {
+        self.completion_tokens = 0;
+        self.prompt_tokens = 0;
+        self.total_tokens = 0;
+    }
+}
+
 #[derive(Default, Debug, Deserialize)]
 struct Res {
     #[serde(default)]
     choices: Vec<ResMessage>,
+    #[serde(default)]
+    usage: Usage,
 }
 
 static SYSTEM_PATTERNS: &[(&str, &str)] = &[
@@ -334,9 +362,12 @@ impl RootContextWrapper<AiDataMaskingConfig> for AiDataMaskingRoot {
             mask_map: HashMap::new(),
             config: None,
             is_openai: false,
+            is_openai_stream: None,
             stream: false,
-            res_body: Bytes::new(),
+            msg_window: MsgWindow::new(),
             log: Log::new(PLUGIN_NAME.to_string()),
+            char_window_size: 0,
+            byte_window_size: 0,
         }))
     }
 }
@@ -416,32 +447,6 @@ impl AiDataMasking {
         DataAction::StopIterationAndBuffer
     }
 
-    fn process_sse_message(&mut self, sse_message: &str) -> Vec<String> {
-        let mut messages = Vec::new();
-        for msg in sse_message.split('\n') {
-            if !msg.starts_with("data:") {
-                continue;
-            }
-            let res: Res = if let Some(m) = msg.strip_prefix("data:") {
-                match serde_json::from_str(m) {
-                    Ok(r) => r,
-                    Err(_) => continue,
-                }
-            } else {
-                continue;
-            };
-
-            if res.choices.is_empty() {
-                continue;
-            }
-            for choice in &res.choices {
-                if let Some(delta) = &choice.delta {
-                    messages.push(delta.content.clone());
-                }
-            }
-        }
-        messages
-    }
     fn replace_request_msg(&mut self, message: &str) -> String {
         let config = self.config.as_ref().unwrap();
         let mut msg = message.to_string();
@@ -464,6 +469,13 @@ impl AiDataMasking {
                         }
                         Type::Replace => rule.regex.replace(from_word, &rule.value).to_string(),
                     };
+                    if to_word.len() > self.byte_window_size {
+                        self.byte_window_size = to_word.len();
+                    }
+                    if to_word.chars().count() > self.char_window_size {
+                        self.char_window_size = to_word.chars().count();
+                    }
+
                     replace_pair.push((from_word.to_string(), to_word.clone()));
 
                     if rule.restore && !to_word.is_empty() {
@@ -499,6 +511,7 @@ impl HttpContext for AiDataMasking {
         _end_of_stream: bool,
     ) -> HeaderAction {
         if has_request_body() {
+            self.set_http_request_header("Content-Length", None);
             HeaderAction::StopIteration
         } else {
             HeaderAction::Continue
@@ -512,58 +525,41 @@ impl HttpContext for AiDataMasking {
         self.set_http_response_header("Content-Length", None);
         HeaderAction::Continue
     }
-    fn on_http_response_body(&mut self, body_size: usize, _end_of_stream: bool) -> DataAction {
+    fn on_http_response_body(&mut self, body_size: usize, end_of_stream: bool) -> DataAction {
         if !self.stream {
             return DataAction::Continue;
         }
-        if let Some(body) = self.get_http_response_body(0, body_size) {
-            self.res_body.extend(&body);
-
-            if let Ok(body_str) = String::from_utf8(self.res_body.clone()) {
-                if self.is_openai {
-                    let messages = self.process_sse_message(&body_str);
-
-                    if self.check_message(&messages.join("")) {
+        if body_size > 0 {
+            if let Some(body) = self.get_http_response_body(0, body_size) {
+                if self.is_openai && self.is_openai_stream.is_none() {
+                    self.is_openai_stream = Some(body.starts_with(b"data:"));
+                }
+                self.msg_window.push(&body, self.is_openai_stream.unwrap());
+                if let Ok(mut msg) = String::from_utf8(self.msg_window.message.clone()) {
+                    if self.check_message(&msg) {
                         return self.deny(true);
                     }
-                } else if self.check_message(&body_str) {
-                    return self.deny(true);
-                }
-            }
-            if self.mask_map.is_empty() {
-                return DataAction::Continue;
-            }
-            if let Ok(body_str) = std::str::from_utf8(&body) {
-                let mut new_str = body_str.to_string();
-                if self.is_openai {
-                    let messages = self.process_sse_message(body_str);
-
-                    for message in messages {
-                        let mut new_message = message.clone();
+                    if !self.mask_map.is_empty() {
                         for (from_word, to_word) in self.mask_map.iter() {
                             if let Some(to) = to_word {
-                                new_message = new_message.replace(from_word, to);
+                                msg = msg.replace(from_word, to);
                             }
                         }
-                        if new_message != message {
-                            new_str = new_str.replace(
-                                &json!(message).to_string(),
-                                &json!(new_message).to_string(),
-                            );
-                        }
-                    }
-                } else {
-                    for (from_word, to_word) in self.mask_map.iter() {
-                        if let Some(to) = to_word {
-                            new_str = new_str.replace(from_word, to);
-                        }
                     }
-                }
-                if new_str != body_str {
-                    self.replace_http_response_body(new_str.as_bytes());
+                    self.msg_window.message = msg.as_bytes().to_vec();
                 }
             }
         }
+        let new_body = if end_of_stream {
+            self.msg_window.finish(self.is_openai_stream.unwrap())
+        } else {
+            self.msg_window.pop(
+                self.char_window_size * 2,
+                self.byte_window_size * 2,
+                self.is_openai_stream.unwrap(),
+            )
+        };
+        self.replace_http_response_body(&new_body);
         DataAction::Continue
     }
 }
@@ -586,7 +582,6 @@ impl HttpContextWrapper<AiDataMaskingConfig> for AiDataMasking {
             return DataAction::Continue;
         }
         let config = self.config.as_ref().unwrap();
-
         let mut req_body = match String::from_utf8(req_body.clone()) {
             Ok(r) => r,
             Err(_) => return DataAction::Continue,
diff --git a/plugins/wasm-rust/extensions/ai-data-masking/src/msg_window.rs b/plugins/wasm-rust/extensions/ai-data-masking/src/msg_window.rs
new file mode 100644
index 0000000000..b8b33aacb0
--- /dev/null
+++ b/plugins/wasm-rust/extensions/ai-data-masking/src/msg_window.rs
@@ -0,0 +1,338 @@
+use higress_wasm_rust::event_stream::EventStream;
+use serde_json::json;
+
+use crate::{Res, Usage};
+
+#[derive(Default)]
+pub(crate) struct MsgWindow {
+    stream_parser: EventStream,
+    pub(crate) message: Vec<u8>,
+    usage: Usage,
+}
+
+impl MsgWindow {
+    pub fn new() -> Self {
+        MsgWindow::default()
+    }
+
+    fn update_event(&mut self, event: Vec<u8>) -> Option<Vec<u8>> {
+        if event.is_empty() || !event.starts_with(b"data:") {
+            Some(event)
+        } else if let Ok(res) = serde_json::from_slice::<Res>(&event[b"data:".len()..]) {
+            for choice in &res.choices {
+                if let Some(delta) = &choice.delta {
+                    self.message.extend(delta.content.as_bytes());
+                }
+            }
+            self.usage.add(&res.usage);
+            None
+        } else if event.starts_with(b"data: [DONE]") {
+            None
+        } else {
+            Some(event)
+        }
+    }
+    pub fn push(&mut self, data: &[u8], is_openai: bool) {
+        if is_openai {
+            self.stream_parser.update(data.to_vec());
+            while let Some(event) = self.stream_parser.next() {
+                if let Some(msg) = self.update_event(event) {
+                    self.message.extend(msg);
+                }
+            }
+        } else {
+            self.message.extend(data);
+        }
+    }
+
+    pub fn pop(
+        &mut self,
+        char_window_size: usize,
+        byte_window_size: usize,
+        is_openai: bool,
+    ) -> Vec<u8> {
+        if let Ok(message) = String::from_utf8(self.message.clone()) {
+            let chars = message.chars().collect::<Vec<char>>();
+            if chars.len() <= char_window_size {
+                return Vec::new();
+            }
+            let ret = chars[..chars.len() - char_window_size]
+                .iter()
+                .collect::<String>();
+            self.message = chars[chars.len() - char_window_size..]
+                .iter()
+                .collect::<String>()
+                .as_bytes()
+                .to_vec();
+
+            if is_openai {
+                let usage = self.usage.clone();
+                self.usage.reset();
+                format!(
+                    "data: {}\n\n",
+                    json!({"choices": [{"index": 0, "delta": {"role": "assistant", "content": ret}}], "usage": usage})
+                ).as_bytes().to_vec()
+            } else {
+                ret.as_bytes().to_vec()
+            }
+        } else {
+            let ret = self.message[..self.message.len() - byte_window_size].to_vec();
+            self.message = self.message[self.message.len() - byte_window_size..].to_vec();
+            ret
+        }
+    }
+
+    pub fn finish(&mut self, is_openai: bool) -> Vec<u8> {
+        if let Some(event) = self.stream_parser.flush() {
+            self.update_event(event);
+        }
+        if self.message.is_empty() {
+            Vec::new()
+        } else if is_openai {
+            format!(
+                "data: {}\n\ndata: [DONE]\n\n",
+                json!({"choices": [{"index": 0, "delta": {"role": "assistant", "content": String::from_utf8_lossy(&self.message)}}], "usage": self.usage})
+            ).as_bytes().to_vec()
+        } else {
+            self.message.clone()
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+
+    use super::*;
+
+    #[test]
+    fn test_msg() {
+        let mut msg_win = MsgWindow::default();
+        let data = r#"data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"H"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"ig"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"ress"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":" 是"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"一个"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"基于"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":" Ist"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"io"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":" 的"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"高性能"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"服务"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"网格"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"数据"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"平面"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"项目"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"，"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"旨在"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"提供"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"高"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"吞"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"吐"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"量"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"、"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"低"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"延迟"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"和"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"可"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"扩展"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"的服务"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"通信"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"管理"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"。"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"它"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"为企业"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"级"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"应用"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"提供了"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"丰富的"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"流量"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"治理"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"功能"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"，"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"如"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"负载"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"均衡"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"、"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"熔"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"断"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"、"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"限"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"流"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"等"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"，并"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"支持"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"多"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"协议"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"代理"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"（"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"包括"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":" HTTP"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"/"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"1"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"."},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"1"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":","},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":" HTTP"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"/"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"2"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":","},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":" g"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"RPC"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"）。"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"H"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"ig"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"ress"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":" 的"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"设计"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"目标"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"是"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"优化"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":" Ist"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"io"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":" 在"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"大规模"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"集群"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"中的"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"性能"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"表现"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"，"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"满足"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"高"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"并发"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"场景"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"下的"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"需求"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"。"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":""},"finish_reason":"stop"}]}
+
+data: [DONE]
+
+"#;
+        let mut buffer = Vec::new();
+        for line in data.split("\n\n") {
+            msg_win.push(line.as_bytes(), true);
+            msg_win.push(b"\n\n", true);
+            if let Ok(mut msg) = String::from_utf8(msg_win.message.clone()) {
+                msg = msg.replace("Higress", "***higress***");
+                msg_win.message = msg.as_bytes().to_vec();
+            }
+            buffer.extend(msg_win.pop(7, 7, true));
+        }
+        buffer.extend(msg_win.finish(true));
+        let mut message = String::new();
+        for line in buffer.split(|&x| x == b'\n') {
+            if line.is_empty() {
+                continue;
+            }
+            assert!(line.starts_with(b"data:"));
+            if line.starts_with(b"data: [DONE]") {
+                continue;
+            }
+            let des = serde_json::from_slice(&line[b"data:".len()..]);
+            assert!(des.is_ok());
+            let res: Res = des.unwrap();
+            for choice in &res.choices {
+                if let Some(delta) = &choice.delta {
+                    message.push_str(&delta.content);
+                }
+            }
+        }
+        assert_eq!(message, "***higress*** 是一个基于 Istio 的高性能服务网格数据平面项目，旨在提供高吞吐量、低延迟和可扩展的服务通信管理。它为企业级应用提供了丰富的流量治理功能，如负载均衡、熔断、限流等，并支持多协议代理（包括 HTTP/1.1, HTTP/2, gRPC）。***higress*** 的设计目标是优化 Istio 在大规模集群中的性能表现，满足高并发场景下的需求。");
+    }
+}
diff --git a/plugins/wasm-rust/src/event_stream.rs b/plugins/wasm-rust/src/event_stream.rs
index 97715dcac1..cb12a35f00 100644
--- a/plugins/wasm-rust/src/event_stream.rs
+++ b/plugins/wasm-rust/src/event_stream.rs
@@ -108,10 +108,7 @@ impl EventStream {
     }
 
     fn is_2eol(&self, i: usize) -> Option<usize> {
-        let size1 = match self.is_eol(i) {
-            None => return None,
-            Some(size1) => size1,
-        };
+        let size1 = self.is_eol(i)?;
         if i + size1 < self.buffer.len() {
             match self.is_eol(i + size1) {
                 None => {
diff --git a/tools/hack/build-envoy.sh b/tools/hack/build-envoy.sh
index c07f24cb1f..931d0f5b1f 100755
--- a/tools/hack/build-envoy.sh
+++ b/tools/hack/build-envoy.sh
@@ -30,9 +30,11 @@ fi
 CONDITIONAL_HOST_MOUNTS+="--mount type=bind,source=${ROOT}/external/package,destination=/home/package "
 CONDITIONAL_HOST_MOUNTS+="--mount type=bind,source=${ROOT}/external/envoy,destination=/home/envoy "
 
+BUILD_TOOLS_IMG=${BUILD_TOOLS_IMG:-"higress-registry.cn-hangzhou.cr.aliyuncs.com/higress/build-tools:release-1.19-ef344298e65eeb2d9e2d07b87eb4e715c2def613"}
+
 BUILD_WITH_CONTAINER=1 \
     CONDITIONAL_HOST_MOUNTS=${CONDITIONAL_HOST_MOUNTS} \
     BUILD_ENVOY_BINARY_ONLY=1 \
     DOCKER_RUN_OPTIONS="--user root -e HTTP_PROXY -e HTTPS_PROXY" \
-    IMG=higress-registry.cn-hangzhou.cr.aliyuncs.com/higress/build-tools-proxy:release-1.19-ef344298e65eeb2d9e2d07b87eb4e715c2def613 \
+    IMG=${BUILD_TOOLS_IMG} \
     make test_release
diff --git a/tools/hack/build-istio-image.sh b/tools/hack/build-istio-image.sh
index 2cb46578be..5c46753827 100755
--- a/tools/hack/build-istio-image.sh
+++ b/tools/hack/build-istio-image.sh
@@ -25,14 +25,34 @@ CONDITIONAL_HOST_MOUNTS+="--mount type=bind,source=${ROOT}/external/package,dest
 
 DOCKER_RUN_OPTIONS+="-e HTTP_PROXY -e HTTPS_PROXY"
 
+BUILD_TOOLS_IMG=${BUILD_TOOLS_IMG:-"higress-registry.cn-hangzhou.cr.aliyuncs.com/higress/build-tools:release-1.19-ef344298e65eeb2d9e2d07b87eb4e715c2def613"}
+
+ORIGINAL_HUB=${HUB}
+
+echo "IMG_URL=$IMG_URL"
+
+if [ -n "$IMG_URL" ]; then
+  TAG=${IMG_URL#*:}
+  HUB=${IMG_URL%:*}
+  HUB=${HUB%/*}
+  if [ "$TAG" == "${IMG_URL}" ]; then
+    TAG=latest
+  fi
+fi
+
+echo "HUB=$HUB"
+echo "TAG=$TAG"
+
 GOOS_LOCAL=linux TARGET_OS=linux TARGET_ARCH=${TARGET_ARCH} \
     ISTIO_ENVOY_LINUX_RELEASE_URL=${ISTIO_ENVOY_LINUX_RELEASE_URL} \
     BUILD_WITH_CONTAINER=1 \
     USE_REAL_USER=${USE_REAL_USER:-0} \
     CONDITIONAL_HOST_MOUNTS=${CONDITIONAL_HOST_MOUNTS} \
     DOCKER_BUILD_VARIANTS=default DOCKER_TARGETS="${DOCKER_TARGETS}" \
-    ISTIO_BASE_REGISTRY="${HUB}" \
+    ISTIO_BASE_REGISTRY="${ORIGINAL_HUB}" \
     BASE_VERSION="${HIGRESS_BASE_VERSION}" \
     DOCKER_RUN_OPTIONS=${DOCKER_RUN_OPTIONS} \
-    IMG=higress-registry.cn-hangzhou.cr.aliyuncs.com/higress/build-tools:release-1.19-ef344298e65eeb2d9e2d07b87eb4e715c2def613 \
+    HUB="${HUB}" \
+    TAG="${TAG}" \
+    IMG=${BUILD_TOOLS_IMG} \
     make "$@"
diff --git a/tools/hack/build-istio-pilot.sh b/tools/hack/build-istio-pilot.sh
index 351ac8962a..7acf9d1231 100755
--- a/tools/hack/build-istio-pilot.sh
+++ b/tools/hack/build-istio-pilot.sh
@@ -19,7 +19,9 @@ set -euo pipefail
 source "$(dirname -- "$0")/setup-istio-env.sh"
 
 cd ${ROOT}/external/istio
-rm -rf out/linux_${TARGET_ARCH}; 
+rm -rf out/linux_${TARGET_ARCH};
+
+BUILD_TOOLS_IMG=${BUILD_TOOLS_IMG:-"higress-registry.cn-hangzhou.cr.aliyuncs.com/higress/build-tools:release-1.19-ef344298e65eeb2d9e2d07b87eb4e715c2def613"}
 
 GOOS_LOCAL=linux TARGET_OS=linux TARGET_ARCH=${TARGET_ARCH} \
     ISTIO_ENVOY_LINUX_RELEASE_URL=${ISTIO_ENVOY_LINUX_RELEASE_URL} \
@@ -28,5 +30,5 @@ GOOS_LOCAL=linux TARGET_OS=linux TARGET_ARCH=${TARGET_ARCH} \
     ISTIO_BASE_REGISTRY="${HUB}" \
     BASE_VERSION="${HIGRESS_BASE_VERSION}" \
     DOCKER_RUN_OPTIONS="--user root -e HTTP_PROXY -e HTTPS_PROXY" \
-    IMG=higress-registry.cn-hangzhou.cr.aliyuncs.com/higress/build-tools:release-1.19-ef344298e65eeb2d9e2d07b87eb4e715c2def613 \
+    IMG=${BUILD_TOOLS_IMG} \
     make build-linux