diff --git a/.github/workflows/build-and-push-wasm-plugin-image.yaml b/.github/workflows/build-and-push-wasm-plugin-image.yaml
index 41935de210..2406582296 100644
--- a/.github/workflows/build-and-push-wasm-plugin-image.yaml
+++ b/.github/workflows/build-and-push-wasm-plugin-image.yaml
@@ -133,6 +133,11 @@ jobs:
command="
set -e
cd /workspace/plugins/wasm-rust/extensions/${PLUGIN_NAME}
+ if [ -f ./.prebuild ]; then
+ echo 'Found .prebuild file, sourcing it...'
+ . ./.prebuild
+ fi
+ rustup target add wasm32-wasip1
cargo build --target wasm32-wasip1 --release
cp target/wasm32-wasip1/release/*.wasm plugin.wasm
tar czvf plugin.tar.gz plugin.wasm
diff --git a/.github/workflows/build-image-and-push.yaml b/.github/workflows/build-image-and-push.yaml
index 938b041f30..4d789ddef2 100644
--- a/.github/workflows/build-image-and-push.yaml
+++ b/.github/workflows/build-image-and-push.yaml
@@ -1,229 +1,258 @@
-name: Build Docker Images and Push to Image Registry
-
-on:
- push:
- tags:
- - "v*.*.*"
- workflow_dispatch: ~
-
-jobs:
- build-controller-image:
- runs-on: ubuntu-latest
- environment:
- name: image-registry-controller
- env:
- CONTROLLER_IMAGE_REGISTRY: ${{ vars.IMAGE_REGISTRY || 'higress-registry.cn-hangzhou.cr.aliyuncs.com' }}
- CONTROLLER_IMAGE_NAME: ${{ vars.CONTROLLER_IMAGE_NAME || 'higress/higress' }}
- steps:
- - name: "Checkout ${{ github.ref }}"
- uses: actions/checkout@v4
- with:
- fetch-depth: 1
-
- - name: Free Up GitHub Actions Ubuntu Runner Disk Space 🔧
- uses: jlumbroso/free-disk-space@main
- with:
- tool-cache: false
- android: true
- dotnet: true
- haskell: true
- large-packages: true
- swap-storage: true
-
- - name: "Setup Go"
- uses: actions/setup-go@v5
- with:
- go-version: 1.21.5
-
- - name: Setup Golang Caches
- uses: actions/cache@v4
- with:
- path: |-
- ~/.cache/go-build
- ~/go/pkg/mod
- key: ${{ runner.os }}-go-${{ github.run_id }}
- restore-keys: ${{ runner.os }}-go
-
- - name: Calculate Docker metadata
- id: docker-meta
- uses: docker/metadata-action@v5
- with:
- images: |
- ${{ env.CONTROLLER_IMAGE_REGISTRY }}/${{ env.CONTROLLER_IMAGE_NAME }}
- tags: |
- type=sha
- type=ref,event=tag
- type=semver,pattern={{version}}
- type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', 'main') }}
-
- - name: Login to Docker Registry
- uses: docker/login-action@v3
- with:
- registry: ${{ env.CONTROLLER_IMAGE_REGISTRY }}
- username: ${{ secrets.REGISTRY_USERNAME }}
- password: ${{ secrets.REGISTRY_PASSWORD }}
-
- - name: Build Docker Image and Push
- run: |
- GOPROXY="https://proxy.golang.org,direct" make docker-buildx-push
- BUILT_IMAGE="higress-registry.cn-hangzhou.cr.aliyuncs.com/higress/higress"
- readarray -t IMAGES <<< "${{ steps.docker-meta.outputs.tags }}"
- for image in ${IMAGES[@]}; do
- echo "Image: $image"
- docker buildx imagetools create $BUILT_IMAGE:$GITHUB_SHA --tag $image
- done
-
- build-pilot-image:
- runs-on: ubuntu-latest
- environment:
- name: image-registry-pilot
- env:
- PILOT_IMAGE_REGISTRY: ${{ vars.IMAGE_REGISTRY || 'higress-registry.cn-hangzhou.cr.aliyuncs.com' }}
- PILOT_IMAGE_NAME: ${{ vars.PILOT_IMAGE_NAME || 'higress/pilot' }}
- steps:
- - name: "Checkout ${{ github.ref }}"
- uses: actions/checkout@v4
- with:
- fetch-depth: 1
-
- - name: Free Up GitHub Actions Ubuntu Runner Disk Space 🔧
- uses: jlumbroso/free-disk-space@main
- with:
- tool-cache: false
- android: true
- dotnet: true
- haskell: true
- large-packages: true
- swap-storage: true
-
- - name: "Setup Go"
- uses: actions/setup-go@v5
- with:
- go-version: 1.21.5
-
- - name: Setup Golang Caches
- uses: actions/cache@v4
- with:
- path: |-
- ~/.cache/go-build
- ~/go/pkg/mod
- key: ${{ runner.os }}-go-${{ github.run_id }}
- restore-keys: ${{ runner.os }}-go
-
- - name: Set up Docker Buildx
- uses: docker/setup-buildx-action@v1
-
- - name: Cache Docker layers
- uses: actions/cache@v2
- with:
- path: /tmp/.buildx-cache
- key: ${{ runner.os }}-buildx-${{ github.sha }}
- restore-keys: |
- ${{ runner.os }}-buildx-
-
- - name: Calculate Docker metadata
- id: docker-meta
- uses: docker/metadata-action@v5
- with:
- images: |
- ${{ env.PILOT_IMAGE_REGISTRY }}/${{ env.PILOT_IMAGE_NAME }}
- tags: |
- type=sha
- type=ref,event=tag
- type=semver,pattern={{version}}
- type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', 'main') }}
-
- - name: Login to Docker Registry
- uses: docker/login-action@v3
- with:
- registry: ${{ env.PILOT_IMAGE_REGISTRY }}
- username: ${{ secrets.REGISTRY_USERNAME }}
- password: ${{ secrets.REGISTRY_PASSWORD }}
-
- - name: Build Pilot-Discovery Image and Push
- run: |
- GOPROXY="https://proxy.golang.org,direct" make build-istio
- BUILT_IMAGE="higress-registry.cn-hangzhou.cr.aliyuncs.com/higress/pilot"
- readarray -t IMAGES <<< "${{ steps.docker-meta.outputs.tags }}"
- for image in ${IMAGES[@]}; do
- echo "Image: $image"
- docker buildx imagetools create $BUILT_IMAGE:$GITHUB_SHA --tag $image
- done
-
-
- build-gateway-image:
- runs-on: ubuntu-latest
- environment:
- name: image-registry-pilot
- env:
- GATEWAY_IMAGE_REGISTRY: ${{ vars.IMAGE_REGISTRY || 'higress-registry.cn-hangzhou.cr.aliyuncs.com' }}
- GATEWAY_IMAGE_NAME: ${{ vars.GATEWAY_IMAGE_NAME || 'higress/gateway' }}
- steps:
- - name: "Checkout ${{ github.ref }}"
- uses: actions/checkout@v4
- with:
- fetch-depth: 1
-
- - name: Free Up GitHub Actions Ubuntu Runner Disk Space 🔧
- uses: jlumbroso/free-disk-space@main
- with:
- tool-cache: false
- android: true
- dotnet: true
- haskell: true
- large-packages: true
- swap-storage: true
-
- - name: "Setup Go"
- uses: actions/setup-go@v5
- with:
- go-version: 1.21.5
-
- - name: Setup Golang Caches
- uses: actions/cache@v4
- with:
- path: |-
- ~/.cache/go-build
- ~/go/pkg/mod
- key: ${{ runner.os }}-go-${{ github.run_id }}
- restore-keys: ${{ runner.os }}-go
-
- - name: Set up Docker Buildx
- uses: docker/setup-buildx-action@v1
-
- - name: Cache Docker layers
- uses: actions/cache@v2
- with:
- path: /tmp/.buildx-cache
- key: ${{ runner.os }}-buildx-${{ github.sha }}
- restore-keys: |
- ${{ runner.os }}-buildx-
-
- - name: Calculate Docker metadata
- id: docker-meta
- uses: docker/metadata-action@v5
- with:
- images: |
- ${{ env.GATEWAY_IMAGE_REGISTRY }}/${{ env.GATEWAY_IMAGE_NAME }}
- tags: |
- type=sha
- type=ref,event=tag
- type=semver,pattern={{version}}
- type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', 'main') }}
-
- - name: Login to Docker Registry
- uses: docker/login-action@v3
- with:
- registry: ${{ env.GATEWAY_IMAGE_REGISTRY }}
- username: ${{ secrets.REGISTRY_USERNAME }}
- password: ${{ secrets.REGISTRY_PASSWORD }}
-
- - name: Build Gateway Image and Push
- run: |
- GOPROXY="https://proxy.golang.org,direct" make build-gateway
- BUILT_IMAGE="higress-registry.cn-hangzhou.cr.aliyuncs.com/higress/proxyv2"
- readarray -t IMAGES <<< "${{ steps.docker-meta.outputs.tags }}"
- for image in ${IMAGES[@]}; do
- echo "Image: $image"
- docker buildx imagetools create $BUILT_IMAGE:$GITHUB_SHA --tag $image
- done
+name: Build Docker Images and Push to Image Registry
+
+on:
+ push:
+ tags:
+ - "v*.*.*"
+ workflow_dispatch: ~
+
+jobs:
+ build-controller-image:
+ runs-on: ubuntu-latest
+ environment:
+ name: image-registry-controller
+ env:
+ CONTROLLER_IMAGE_REGISTRY: ${{ vars.IMAGE_REGISTRY || 'higress-registry.cn-hangzhou.cr.aliyuncs.com' }}
+ CONTROLLER_IMAGE_NAME: ${{ vars.CONTROLLER_IMAGE_NAME || 'higress/higress' }}
+ steps:
+ - name: "Checkout ${{ github.ref }}"
+ uses: actions/checkout@v4
+ with:
+ fetch-depth: 1
+
+ - name: Free Up GitHub Actions Ubuntu Runner Disk Space 🔧
+ uses: jlumbroso/free-disk-space@main
+ with:
+ tool-cache: false
+ android: true
+ dotnet: true
+ haskell: true
+ large-packages: true
+ swap-storage: true
+
+ - name: "Setup Go"
+ uses: actions/setup-go@v5
+ with:
+ go-version: 1.21.5
+
+ - name: Setup Golang Caches
+ uses: actions/cache@v4
+ with:
+ path: |-
+ ~/.cache/go-build
+ ~/go/pkg/mod
+ key: ${{ runner.os }}-go-${{ github.run_id }}
+ restore-keys: ${{ runner.os }}-go
+
+ - name: Calculate Docker metadata
+ id: docker-meta
+ uses: docker/metadata-action@v5
+ with:
+ images: |
+ ${{ env.CONTROLLER_IMAGE_REGISTRY }}/${{ env.CONTROLLER_IMAGE_NAME }}
+ tags: |
+ type=sha
+ type=ref,event=tag
+ type=semver,pattern={{version}}
+ type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', 'main') }}
+
+ - name: Login to Docker Registry
+ uses: docker/login-action@v3
+ with:
+ registry: ${{ env.CONTROLLER_IMAGE_REGISTRY }}
+ username: ${{ secrets.REGISTRY_USERNAME }}
+ password: ${{ secrets.REGISTRY_PASSWORD }}
+
+ - name: Build Docker Image and Push
+ run: |
+ BUILT_IMAGE=""
+ readarray -t IMAGES <<< "${{ steps.docker-meta.outputs.tags }}"
+ for image in ${IMAGES[@]}; do
+ echo "Image: $image"
+ if [ "$BUILT_IMAGE" == "" ]; then
+ GOPROXY="https://proxy.golang.org,direct" IMG_URL="$image" make docker-buildx-push
+ BUILT_IMAGE="$image"
+ else
+ docker buildx imagetools create $BUILT_IMAGE --tag $image
+ fi
+ done
+
+ build-pilot-image:
+ runs-on: ubuntu-latest
+ environment:
+ name: image-registry-pilot
+ env:
+ PILOT_IMAGE_REGISTRY: ${{ vars.IMAGE_REGISTRY || 'higress-registry.cn-hangzhou.cr.aliyuncs.com' }}
+ PILOT_IMAGE_NAME: ${{ vars.PILOT_IMAGE_NAME || 'higress/pilot' }}
+ steps:
+ - name: "Checkout ${{ github.ref }}"
+ uses: actions/checkout@v4
+ with:
+ fetch-depth: 1
+
+ - name: Free Up GitHub Actions Ubuntu Runner Disk Space 🔧
+ uses: jlumbroso/free-disk-space@main
+ with:
+ tool-cache: false
+ android: true
+ dotnet: true
+ haskell: true
+ large-packages: true
+ swap-storage: true
+
+ - name: "Setup Go"
+ uses: actions/setup-go@v5
+ with:
+ go-version: 1.21.5
+
+ - name: Setup Golang Caches
+ uses: actions/cache@v4
+ with:
+ path: |-
+ ~/.cache/go-build
+ ~/go/pkg/mod
+ key: ${{ runner.os }}-go-${{ github.run_id }}
+ restore-keys: ${{ runner.os }}-go
+
+ - name: Set up QEMU
+ uses: docker/setup-qemu-action@v3
+ with:
+ image: tonistiigi/binfmt:qemu-v7.0.0
+
+ - name: Set up Docker Buildx
+ uses: docker/setup-buildx-action@v3
+
+ - name: Cache Docker layers
+ uses: actions/cache@v2
+ with:
+ path: /tmp/.buildx-cache
+ key: ${{ runner.os }}-buildx-${{ github.sha }}
+ restore-keys: |
+ ${{ runner.os }}-buildx-
+
+ - name: Calculate Docker metadata
+ id: docker-meta
+ uses: docker/metadata-action@v5
+ with:
+ images: |
+ ${{ env.PILOT_IMAGE_REGISTRY }}/${{ env.PILOT_IMAGE_NAME }}
+ tags: |
+ type=sha
+ type=ref,event=tag
+ type=semver,pattern={{version}}
+ type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', 'main') }}
+
+ - name: Login to Docker Registry
+ uses: docker/login-action@v3
+ with:
+ registry: ${{ env.PILOT_IMAGE_REGISTRY }}
+ username: ${{ secrets.REGISTRY_USERNAME }}
+ password: ${{ secrets.REGISTRY_PASSWORD }}
+
+ - name: Build Pilot-Discovery Image and Push
+ run: |
+ BUILT_IMAGE=""
+ readarray -t IMAGES <<< "${{ steps.docker-meta.outputs.tags }}"
+ for image in ${IMAGES[@]}; do
+ echo "Image: $image"
+ if [ "$BUILT_IMAGE" == "" ]; then
+ TAG=${image#*:}
+ HUB=${image%:*}
+ HUB=${HUB%/*}
+ BUILT_IMAGE="$HUB/pilot:$TAG"
+ GOPROXY="https://proxy.golang.org,direct" IMG_URL="$BUILT_IMAGE" make build-istio
+ fi
+ if [ "$BUILT_IMAGE" != "$image" ]; then
+ docker buildx imagetools create $BUILT_IMAGE --tag $image
+ fi
+ done
+
+ build-gateway-image:
+ runs-on: ubuntu-latest
+ environment:
+ name: image-registry-gateway
+ env:
+ GATEWAY_IMAGE_REGISTRY: ${{ vars.IMAGE_REGISTRY || 'higress-registry.cn-hangzhou.cr.aliyuncs.com' }}
+ GATEWAY_IMAGE_NAME: ${{ vars.GATEWAY_IMAGE_NAME || 'higress/gateway' }}
+ steps:
+ - name: "Checkout ${{ github.ref }}"
+ uses: actions/checkout@v4
+ with:
+ fetch-depth: 1
+
+ - name: Free Up GitHub Actions Ubuntu Runner Disk Space 🔧
+ uses: jlumbroso/free-disk-space@main
+ with:
+ tool-cache: false
+ android: true
+ dotnet: true
+ haskell: true
+ large-packages: true
+ swap-storage: true
+
+ - name: "Setup Go"
+ uses: actions/setup-go@v5
+ with:
+ go-version: 1.21.5
+
+ - name: Setup Golang Caches
+ uses: actions/cache@v4
+ with:
+ path: |-
+ ~/.cache/go-build
+ ~/go/pkg/mod
+ key: ${{ runner.os }}-go-${{ github.run_id }}
+ restore-keys: ${{ runner.os }}-go
+
+ - name: Set up QEMU
+ uses: docker/setup-qemu-action@v3
+ with:
+ image: tonistiigi/binfmt:qemu-v7.0.0
+
+ - name: Set up Docker Buildx
+ uses: docker/setup-buildx-action@v3
+
+ - name: Cache Docker layers
+ uses: actions/cache@v2
+ with:
+ path: /tmp/.buildx-cache
+ key: ${{ runner.os }}-buildx-${{ github.sha }}
+ restore-keys: |
+ ${{ runner.os }}-buildx-
+
+ - name: Calculate Docker metadata
+ id: docker-meta
+ uses: docker/metadata-action@v5
+ with:
+ images: |
+ ${{ env.GATEWAY_IMAGE_REGISTRY }}/${{ env.GATEWAY_IMAGE_NAME }}
+ tags: |
+ type=sha
+ type=ref,event=tag
+ type=semver,pattern={{version}}
+ type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', 'main') }}
+
+ - name: Login to Docker Registry
+ uses: docker/login-action@v3
+ with:
+ registry: ${{ env.GATEWAY_IMAGE_REGISTRY }}
+ username: ${{ secrets.REGISTRY_USERNAME }}
+ password: ${{ secrets.REGISTRY_PASSWORD }}
+
+ - name: Build Gateway Image and Push
+ run: |
+ BUILT_IMAGE=""
+ readarray -t IMAGES <<< "${{ steps.docker-meta.outputs.tags }}"
+ for image in ${IMAGES[@]}; do
+ echo "Image: $image"
+ if [ "$BUILT_IMAGE" == "" ]; then
+ TAG=${image#*:}
+ HUB=${image%:*}
+ HUB=${HUB%/*}
+ BUILT_IMAGE="$HUB/proxyv2:$TAG"
+ GOPROXY="https://proxy.golang.org,direct" IMG_URL="$BUILT_IMAGE" make build-gateway
+ fi
+ if [ "$BUILT_IMAGE" != "$image" ]; then
+ docker buildx imagetools create $BUILT_IMAGE --tag $image
+ fi
+ done
\ No newline at end of file
diff --git a/.github/workflows/helm-docs.yaml b/.github/workflows/helm-docs.yaml
index d4637dbe1b..6ed5937fe3 100644
--- a/.github/workflows/helm-docs.yaml
+++ b/.github/workflows/helm-docs.yaml
@@ -10,7 +10,7 @@ on:
push:
branches: [ main ]
paths:
- - 'helm/**'
+ - 'helm/**'
jobs:
helm:
@@ -39,6 +39,7 @@ jobs:
rm -f ./helm-docs
translate-readme:
+ if: ${{ ! always() }}
needs: helm
runs-on: ubuntu-latest
diff --git a/Makefile.core.mk b/Makefile.core.mk
index 93aff0df81..2d84c0b118 100644
--- a/Makefile.core.mk
+++ b/Makefile.core.mk
@@ -162,13 +162,13 @@ buildx-prepare:
build-gateway: prebuild buildx-prepare
USE_REAL_USER=1 TARGET_ARCH=amd64 DOCKER_TARGETS="docker.proxyv2" ./tools/hack/build-istio-image.sh init
USE_REAL_USER=1 TARGET_ARCH=arm64 DOCKER_TARGETS="docker.proxyv2" ./tools/hack/build-istio-image.sh init
- DOCKER_TARGETS="docker.proxyv2" ./tools/hack/build-istio-image.sh docker.buildx
+ DOCKER_TARGETS="docker.proxyv2" IMG_URL="${IMG_URL}" ./tools/hack/build-istio-image.sh docker.buildx
build-gateway-local: prebuild
TARGET_ARCH=${TARGET_ARCH} DOCKER_TARGETS="docker.proxyv2" ./tools/hack/build-istio-image.sh docker
build-istio: prebuild buildx-prepare
- DOCKER_TARGETS="docker.pilot" ./tools/hack/build-istio-image.sh docker.buildx
+ DOCKER_TARGETS="docker.pilot" IMG_URL="${IMG_URL}" ./tools/hack/build-istio-image.sh docker.buildx
build-istio-local: prebuild
TARGET_ARCH=${TARGET_ARCH} DOCKER_TARGETS="docker.pilot" ./tools/hack/build-istio-image.sh docker
diff --git a/docker/docker.mk b/docker/docker.mk
index b572176508..f9315a3271 100644
--- a/docker/docker.mk
+++ b/docker/docker.mk
@@ -35,6 +35,8 @@ DOCKER_ALL_VARIANTS ?= debug distroless
INCLUDE_UNTAGGED_DEFAULT ?= false
DEFAULT_DISTRIBUTION=debug
-HIGRESS_DOCKER_BUILDX_RULE ?= $(foreach VARIANT,$(DOCKER_BUILD_VARIANTS), time (mkdir -p $(HIGRESS_DOCKER_BUILD_TOP)/$@ && TARGET_ARCH=$(TARGET_ARCH) ./docker/docker-copy.sh $^ $(HIGRESS_DOCKER_BUILD_TOP)/$@ && cd $(HIGRESS_DOCKER_BUILD_TOP)/$@ $(BUILD_PRE) && docker buildx create --name higress --node higress0 --platform linux/amd64,linux/arm64 --use && docker buildx build --no-cache --platform linux/amd64,linux/arm64 $(BUILD_ARGS) --build-arg BASE_DISTRIBUTION=$(call normalize-tag,$(VARIANT)) -t $(HUB)/higress:$(TAG)$(call variant-tag,$(VARIANT)) -f Dockerfile.higress . --push ); )
-HIGRESS_DOCKER_RULE ?= $(foreach VARIANT,$(DOCKER_BUILD_VARIANTS), time (mkdir -p $(HIGRESS_DOCKER_BUILD_TOP)/$@ && TARGET_ARCH=$(TARGET_ARCH) ./docker/docker-copy.sh $^ $(HIGRESS_DOCKER_BUILD_TOP)/$@ && cd $(HIGRESS_DOCKER_BUILD_TOP)/$@ $(BUILD_PRE) && docker build $(BUILD_ARGS) --build-arg BASE_DISTRIBUTION=$(call normalize-tag,$(VARIANT)) -t $(HUB)/higress:$(TAG)$(call variant-tag,$(VARIANT)) -f Dockerfile.higress . ); )
+IMG ?= higress
+IMG_URL ?= $(HUB)/$(IMG):$(TAG)
+HIGRESS_DOCKER_BUILDX_RULE ?= $(foreach VARIANT,$(DOCKER_BUILD_VARIANTS), time (mkdir -p $(HIGRESS_DOCKER_BUILD_TOP)/$@ && TARGET_ARCH=$(TARGET_ARCH) ./docker/docker-copy.sh $^ $(HIGRESS_DOCKER_BUILD_TOP)/$@ && cd $(HIGRESS_DOCKER_BUILD_TOP)/$@ $(BUILD_PRE) && docker buildx create --name higress --node higress0 --platform linux/amd64,linux/arm64 --use && docker buildx build --no-cache --platform linux/amd64,linux/arm64 $(BUILD_ARGS) --build-arg BASE_DISTRIBUTION=$(call normalize-tag,$(VARIANT)) -t $(IMG_URL)$(call variant-tag,$(VARIANT)) -f Dockerfile.higress . --push ); )
+HIGRESS_DOCKER_RULE ?= $(foreach VARIANT,$(DOCKER_BUILD_VARIANTS), time (mkdir -p $(HIGRESS_DOCKER_BUILD_TOP)/$@ && TARGET_ARCH=$(TARGET_ARCH) ./docker/docker-copy.sh $^ $(HIGRESS_DOCKER_BUILD_TOP)/$@ && cd $(HIGRESS_DOCKER_BUILD_TOP)/$@ $(BUILD_PRE) && docker build $(BUILD_ARGS) --build-arg BASE_DISTRIBUTION=$(call normalize-tag,$(VARIANT)) -t $(IMG_URL)$(call variant-tag,$(VARIANT)) -f Dockerfile.higress . ); )
diff --git a/helm/core/README.md b/helm/core/README.md
index fdd3e61a79..0ccad6dfb4 100644
--- a/helm/core/README.md
+++ b/helm/core/README.md
@@ -2,4 +2,4 @@
Installs the core components of cloud-native gateway [Higress](http://higress.io/)
-**Note:** It is highly recommended to install the whole package of Higress. Please visit https://higress.io/docs/user/quickstart/ for details.
\ No newline at end of file
+**Note:** It is highly recommended to install the whole package of Higress. Please visit https://higress.io/docs/user/quickstart/ for details.
diff --git a/helm/core/templates/_pod.tpl b/helm/core/templates/_pod.tpl
index 3e883d248b..c87e4d3eff 100644
--- a/helm/core/templates/_pod.tpl
+++ b/helm/core/templates/_pod.tpl
@@ -45,9 +45,9 @@ template:
- router
- --domain
- $(POD_NAMESPACE).svc.cluster.local
- - --proxyLogLevel=warning
- - --proxyComponentLogLevel=misc:error
- - --log_output_level=all:info
+ - --proxyLogLevel={{- default "warning" .Values.global.proxy.logLevel }}
+ - --proxyComponentLogLevel={{- default "misc:error" .Values.global.proxy.componentLogLevel }}
+ - --log_output_level={{- default "default:info" .Values.global.logging.level }}
- --serviceCluster=higress-gateway
securityContext:
{{- if .Values.gateway.containerSecurityContext }}
diff --git a/helm/core/values.yaml b/helm/core/values.yaml
index 6186654a05..d4fdff6eb1 100644
--- a/helm/core/values.yaml
+++ b/helm/core/values.yaml
@@ -491,6 +491,7 @@ gateway:
externalTrafficPolicy: ""
rollingMaxSurge: 100%
+ # -- If global.local is true, the default value is 100%, otherwise it is 25%
rollingMaxUnavailable: 25%
resources:
diff --git a/helm/higress/README.md b/helm/higress/README.md
index 3c23f42532..2cecaa91f4 100644
--- a/helm/higress/README.md
+++ b/helm/higress/README.md
@@ -130,7 +130,7 @@ The command removes all the Kubernetes components associated with the chart and
| gateway.resources.requests.memory | string | `"2048Mi"` | |
| gateway.revision | string | `""` | revision declares which revision this gateway is a part of |
| gateway.rollingMaxSurge | string | `"100%"` | |
-| gateway.rollingMaxUnavailable | string | `"25%"` | |
+| gateway.rollingMaxUnavailable | string | `"25%"` | If global.local is true, the default value is 100%, otherwise it is 25% |
| gateway.securityContext | string | `nil` | Define the security context for the pod. If unset, this will be automatically set to the minimum privileges required to bind to port 80 and 443. On Kubernetes 1.22+, this only requires the `net.ipv4.ip_unprivileged_port_start` sysctl. |
| gateway.service.annotations | object | `{}` | |
| gateway.service.externalTrafficPolicy | string | `""` | |
diff --git a/plugins/wasm-go/extensions/ai-cache/README.md b/plugins/wasm-go/extensions/ai-cache/README.md
index 999f472270..70f3e1b9d4 100644
--- a/plugins/wasm-go/extensions/ai-cache/README.md
+++ b/plugins/wasm-go/extensions/ai-cache/README.md
@@ -86,7 +86,8 @@ LLM 结果缓存插件,默认配置方式可以直接用于 openai 协议的
| cache.password | string | optional | "" | 缓存服务密码 |
| cache.timeout | uint32 | optional | 10000 | 缓存服务的超时时间,单位为毫秒。默认值是10000,即10秒 |
| cache.cacheTTL | int | optional | 0 | 缓存过期时间,单位为秒。默认值是 0,即 永不过期|
-| cacheKeyPrefix | string | optional | "higress-ai-cache:" | 缓存 Key 的前缀,默认值为 "higress-ai-cache:" |
+| cache.cacheKeyPrefix | string | optional | "higress-ai-cache:" | 缓存 Key 的前缀,默认值为 "higress-ai-cache:" |
+| cache.database | int | optional | 0 | 使用的数据库id,仅限redis,例如配置为1,对应`SELECT 1` |
## 其他配置
@@ -168,6 +169,7 @@ redis:
serviceName: my_redis.dns
servicePort: 6379
timeout: 100
+ database: 1
```
## 进阶用法
diff --git a/plugins/wasm-go/extensions/ai-cache/README_EN.md b/plugins/wasm-go/extensions/ai-cache/README_EN.md
index 7544995999..d48f9f71b9 100644
--- a/plugins/wasm-go/extensions/ai-cache/README_EN.md
+++ b/plugins/wasm-go/extensions/ai-cache/README_EN.md
@@ -15,26 +15,29 @@ Plugin Execution Phase: `Authentication Phase`
Plugin Execution Priority: `10`
## Configuration Description
-| Name | Type | Requirement | Default | Description |
-| -------- | -------- | -------- | -------- | -------- |
-| cacheKeyFrom.requestBody | string | optional | "messages.@reverse.0.content" | Extracts a string from the request Body based on [GJSON PATH](https://github.com/tidwall/gjson/blob/master/SYNTAX.md) syntax |
-| cacheValueFrom.responseBody | string | optional | "choices.0.message.content" | Extracts a string from the response Body based on [GJSON PATH](https://github.com/tidwall/gjson/blob/master/SYNTAX.md) syntax |
+| Name | Type | Requirement | Default | Description |
+| -------- | -------- | -------- | -------- | -------- |
+| cacheKeyFrom.requestBody | string | optional | "messages.@reverse.0.content" | Extracts a string from the request Body based on [GJSON PATH](https://github.com/tidwall/gjson/blob/master/SYNTAX.md) syntax |
+| cacheValueFrom.responseBody | string | optional | "choices.0.message.content" | Extracts a string from the response Body based on [GJSON PATH](https://github.com/tidwall/gjson/blob/master/SYNTAX.md) syntax |
| cacheStreamValueFrom.responseBody | string | optional | "choices.0.delta.content" | Extracts a string from the streaming response Body based on [GJSON PATH](https://github.com/tidwall/gjson/blob/master/SYNTAX.md) syntax |
-| cacheKeyPrefix | string | optional | "higress-ai-cache:" | Prefix for the Redis cache key |
-| cacheTTL | integer | optional | 0 | Cache expiration time in seconds, default value is 0, which means never expire |
-| redis.serviceName | string | required | - | The complete FQDN name of the Redis service, including the service type, e.g., my-redis.dns, redis.my-ns.svc.cluster.local |
-| redis.servicePort | integer | optional | 6379 | Redis service port |
-| redis.timeout | integer | optional | 1000 | Timeout for requests to Redis, in milliseconds |
-| redis.username | string | optional | - | Username for logging into Redis |
-| redis.password | string | optional | - | Password for logging into Redis |
-| returnResponseTemplate | string | optional | `{"id":"from-cache","choices":[%s],"model":"gpt-4o","object":"chat.completion","usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}` | Template for returning HTTP response, with %s marking the part to be replaced by cache value |
-| returnStreamResponseTemplate | string | optional | `data:{"id":"from-cache","choices":[{"index":0,"delta":{"role":"assistant","content":"%s"},"finish_reason":"stop"}],"model":"gpt-4o","object":"chat.completion","usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}\n\ndata:[DONE]\n\n` | Template for returning streaming HTTP response, with %s marking the part to be replaced by cache value |
+| cacheKeyPrefix | string | optional | "higress-ai-cache:" | Prefix for the Redis cache key |
+| cacheTTL | integer | optional | 0 | Cache expiration time in seconds, default value is 0, which means never expire |
+| redis.serviceName | string | required | - | The complete FQDN name of the Redis service, including the service type, e.g., my-redis.dns, redis.my-ns.svc.cluster.local |
+| redis.servicePort | integer | optional | 6379 | Redis service port |
+| redis.timeout | integer | optional | 1000 | Timeout for requests to Redis, in milliseconds |
+| redis.username | string | optional | - | Username for logging into Redis |
+| redis.database | int | optional | 0 | The database ID used, limited to Redis, for example, configured as 1, corresponds to `SELECT 1`. |
+| redis.password | string | optional | - | Password for logging into Redis |
+| returnResponseTemplate | string | optional | `{"id":"from-cache","choices":[%s],"model":"gpt-4o","object":"chat.completion","usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}` | Template for returning HTTP response, with %s marking the part to be replaced by cache value |
+| returnStreamResponseTemplate | string | optional | `data:{"id":"from-cache","choices":[{"index":0,"delta":{"role":"assistant","content":"%s"},"finish_reason":"stop"}],"model":"gpt-4o","object":"chat.completion","usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}\n\ndata:[DONE]\n\n` | Template for returning streaming HTTP response, with %s marking the part to be replaced by cache value |
## Configuration Example
```yaml
redis:
serviceName: my-redis.dns
timeout: 2000
+ servicePort: 6379
+ database: 1
```
## Advanced Usage
diff --git a/plugins/wasm-go/extensions/ai-cache/cache/provider.go b/plugins/wasm-go/extensions/ai-cache/cache/provider.go
index d68acd5099..9afca2c12e 100644
--- a/plugins/wasm-go/extensions/ai-cache/cache/provider.go
+++ b/plugins/wasm-go/extensions/ai-cache/cache/provider.go
@@ -52,6 +52,9 @@ type ProviderConfig struct {
// @Title 缓存 Key 前缀
// @Description 缓存 Key 的前缀,默认值为 "higressAiCache:"
cacheKeyPrefix string
+ // @Title redis database
+ // @Description 指定 redis 的 database,默认使用0
+ database int
}
func (c *ProviderConfig) GetProviderType() string {
@@ -79,6 +82,7 @@ func (c *ProviderConfig) FromJson(json gjson.Result) {
if !json.Get("password").Exists() {
c.password = ""
}
+ c.database = int(json.Get("database").Int())
c.timeout = uint32(json.Get("timeout").Int())
if !json.Get("timeout").Exists() {
c.timeout = 10000
diff --git a/plugins/wasm-go/extensions/ai-cache/cache/redis.go b/plugins/wasm-go/extensions/ai-cache/cache/redis.go
index 4cb69744e1..b4a116ab89 100644
--- a/plugins/wasm-go/extensions/ai-cache/cache/redis.go
+++ b/plugins/wasm-go/extensions/ai-cache/cache/redis.go
@@ -38,7 +38,7 @@ func (rp *redisProvider) GetProviderType() string {
}
func (rp *redisProvider) Init(username string, password string, timeout uint32) error {
- return rp.client.Init(rp.config.username, rp.config.password, int64(rp.config.timeout))
+ return rp.client.Init(rp.config.username, rp.config.password, int64(rp.config.timeout), wrapper.WithDataBase(rp.config.database))
}
func (rp *redisProvider) Get(key string, cb wrapper.RedisResponseCallback) error {
diff --git a/plugins/wasm-go/extensions/ai-cache/config/config.go b/plugins/wasm-go/extensions/ai-cache/config/config.go
index 80c6147374..bc1093a567 100644
--- a/plugins/wasm-go/extensions/ai-cache/config/config.go
+++ b/plugins/wasm-go/extensions/ai-cache/config/config.go
@@ -28,9 +28,9 @@ type PluginConfig struct {
embeddingProvider embedding.Provider
vectorProvider vector.Provider
- embeddingProviderConfig embedding.ProviderConfig
- vectorProviderConfig vector.ProviderConfig
- cacheProviderConfig cache.ProviderConfig
+ embeddingProviderConfig *embedding.ProviderConfig
+ vectorProviderConfig *vector.ProviderConfig
+ cacheProviderConfig *cache.ProviderConfig
CacheKeyFrom string
CacheValueFrom string
@@ -47,7 +47,9 @@ type PluginConfig struct {
}
func (c *PluginConfig) FromJson(json gjson.Result, log wrapper.Log) {
-
+ c.embeddingProviderConfig = &embedding.ProviderConfig{}
+ c.vectorProviderConfig = &vector.ProviderConfig{}
+ c.cacheProviderConfig = &cache.ProviderConfig{}
c.vectorProviderConfig.FromJson(json.Get("vector"))
c.embeddingProviderConfig.FromJson(json.Get("embedding"))
c.cacheProviderConfig.FromJson(json.Get("cache"))
@@ -142,7 +144,7 @@ func (c *PluginConfig) Complete(log wrapper.Log) error {
var err error
if c.embeddingProviderConfig.GetProviderType() != "" {
log.Debugf("embedding provider is set to %s", c.embeddingProviderConfig.GetProviderType())
- c.embeddingProvider, err = embedding.CreateProvider(c.embeddingProviderConfig)
+ c.embeddingProvider, err = embedding.CreateProvider(*c.embeddingProviderConfig)
if err != nil {
return err
}
@@ -152,7 +154,7 @@ func (c *PluginConfig) Complete(log wrapper.Log) error {
}
if c.cacheProviderConfig.GetProviderType() != "" {
log.Debugf("cache provider is set to %s", c.cacheProviderConfig.GetProviderType())
- c.cacheProvider, err = cache.CreateProvider(c.cacheProviderConfig)
+ c.cacheProvider, err = cache.CreateProvider(*c.cacheProviderConfig)
if err != nil {
return err
}
@@ -162,7 +164,7 @@ func (c *PluginConfig) Complete(log wrapper.Log) error {
}
if c.vectorProviderConfig.GetProviderType() != "" {
log.Debugf("vector provider is set to %s", c.vectorProviderConfig.GetProviderType())
- c.vectorProvider, err = vector.CreateProvider(c.vectorProviderConfig)
+ c.vectorProvider, err = vector.CreateProvider(*c.vectorProviderConfig)
if err != nil {
return err
}
@@ -182,7 +184,7 @@ func (c *PluginConfig) GetVectorProvider() vector.Provider {
}
func (c *PluginConfig) GetVectorProviderConfig() vector.ProviderConfig {
- return c.vectorProviderConfig
+ return *c.vectorProviderConfig
}
func (c *PluginConfig) GetCacheProvider() cache.Provider {
diff --git a/plugins/wasm-go/extensions/ai-cache/embedding/ollama.go b/plugins/wasm-go/extensions/ai-cache/embedding/ollama.go
new file mode 100644
index 0000000000..a61bf77827
--- /dev/null
+++ b/plugins/wasm-go/extensions/ai-cache/embedding/ollama.go
@@ -0,0 +1,151 @@
+package embedding
+
+import (
+ "encoding/json"
+ "errors"
+ "fmt"
+ "github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
+ "github.com/tidwall/gjson"
+ "net/http"
+ "strconv"
+)
+
+const (
+ OLLAMA_DOMAIN = "localhost"
+ OLLAMA_PORT = 11434
+ OLLAMA_DEFAULT_MODEL_NAME = "llama3.2"
+ OLLAMA_ENDPOINT = "/api/embed"
+)
+
+type ollamaProviderInitializer struct {
+}
+
+func (c *ollamaProviderInitializer) InitConfig(json gjson.Result) {}
+
+func (c *ollamaProviderInitializer) ValidateConfig() error {
+ return nil
+}
+
+type ollamaProvider struct {
+ config ProviderConfig
+ client *wrapper.ClusterClient[wrapper.FQDNCluster]
+}
+
+func (t *ollamaProviderInitializer) CreateProvider(c ProviderConfig) (Provider, error) {
+ if c.servicePort == 0 {
+ c.servicePort = OLLAMA_PORT
+ }
+ if c.serviceHost == "" {
+ c.serviceHost = OLLAMA_DOMAIN
+ }
+ if c.model == "" {
+ c.model = OLLAMA_DEFAULT_MODEL_NAME
+ }
+
+ return &ollamaProvider{
+ config: c,
+ client: wrapper.NewClusterClient(wrapper.FQDNCluster{
+ FQDN: c.serviceName,
+ Host: c.serviceHost,
+ Port: c.servicePort,
+ }),
+ }, nil
+}
+
+func (t *ollamaProvider) GetProviderType() string {
+ return PROVIDER_TYPE_OLLAMA
+}
+
+type ollamaResponse struct {
+ Model string `json:"model"`
+ Embeddings [][]float64 `json:"embeddings"`
+ TotalDuration int64 `json:"total_duration"`
+ LoadDuration int64 `json:"load_duration"`
+ PromptEvalCount int64 `json:"prompt_eval_count"`
+}
+
+type ollamaEmbeddingRequest struct {
+ Input string `json:"input"`
+ Model string `json:"model"`
+}
+
+func (t *ollamaProvider) constructParameters(text string, log wrapper.Log) (string, [][2]string, []byte, error) {
+ if text == "" {
+ err := errors.New("queryString text cannot be empty")
+ return "", nil, nil, err
+ }
+
+ data := ollamaEmbeddingRequest{
+ Input: text,
+ Model: t.config.model,
+ }
+
+ requestBody, err := json.Marshal(data)
+ if err != nil {
+ log.Errorf("failed to marshal request data: %v", err)
+ return "", nil, nil, err
+ }
+
+ headers := [][2]string{
+ {"Content-Type", "application/json"},
+ }
+ log.Debugf("constructParameters: %s", string(requestBody))
+
+ return OLLAMA_ENDPOINT, headers, requestBody, err
+}
+
+func (t *ollamaProvider) parseTextEmbedding(responseBody []byte) (*ollamaResponse, error) {
+ var resp ollamaResponse
+ if err := json.Unmarshal(responseBody, &resp); err != nil {
+ return nil, fmt.Errorf("failed to parse response: %w", err)
+ }
+ return &resp, nil
+}
+
+func (t *ollamaProvider) GetEmbedding(
+ queryString string,
+ ctx wrapper.HttpContext,
+ log wrapper.Log,
+ callback func(emb []float64, err error)) error {
+ embUrl, embHeaders, embRequestBody, err := t.constructParameters(queryString, log)
+ if err != nil {
+ log.Errorf("failed to construct parameters: %v", err)
+ return err
+ }
+
+ var resp *ollamaResponse
+
+ defer func() {
+ if err != nil {
+ callback(nil, err)
+ }
+ }()
+ err = t.client.Post(embUrl, embHeaders, embRequestBody,
+ func(statusCode int, responseHeaders http.Header, responseBody []byte) {
+
+ if statusCode != http.StatusOK {
+ err = errors.New("failed to get embedding due to status code: " + strconv.Itoa(statusCode))
+ callback(nil, err)
+ return
+ }
+
+ resp, err = t.parseTextEmbedding(responseBody)
+ if err != nil {
+ err = fmt.Errorf("failed to parse response: %v", err)
+ callback(nil, err)
+ return
+ }
+
+ log.Debugf("get embedding response: %d, %s", statusCode, responseBody)
+
+ if len(resp.Embeddings) == 0 {
+ err = errors.New("no embedding found in response")
+ callback(nil, err)
+ return
+ }
+
+ callback(resp.Embeddings[0], nil)
+
+ }, t.config.timeout)
+ return err
+}
diff --git a/plugins/wasm-go/extensions/ai-cache/embedding/provider.go b/plugins/wasm-go/extensions/ai-cache/embedding/provider.go
index 608f50ad54..7f0e14b269 100644
--- a/plugins/wasm-go/extensions/ai-cache/embedding/provider.go
+++ b/plugins/wasm-go/extensions/ai-cache/embedding/provider.go
@@ -12,6 +12,7 @@ const (
PROVIDER_TYPE_TEXTIN = "textin"
PROVIDER_TYPE_COHERE = "cohere"
PROVIDER_TYPE_OPENAI = "openai"
+ PROVIDER_TYPE_OLLAMA = "ollama"
)
type providerInitializer interface {
@@ -26,6 +27,7 @@ var (
PROVIDER_TYPE_TEXTIN: &textInProviderInitializer{},
PROVIDER_TYPE_COHERE: &cohereProviderInitializer{},
PROVIDER_TYPE_OPENAI: &openAIProviderInitializer{},
+ PROVIDER_TYPE_OLLAMA: &ollamaProviderInitializer{},
}
)
diff --git a/plugins/wasm-go/extensions/ai-cache/main.go b/plugins/wasm-go/extensions/ai-cache/main.go
index 4bb3f2bad1..41014c5ebd 100644
--- a/plugins/wasm-go/extensions/ai-cache/main.go
+++ b/plugins/wasm-go/extensions/ai-cache/main.go
@@ -23,7 +23,7 @@ const (
SKIP_CACHE_HEADER = "x-higress-skip-ai-cache"
ERROR_PARTIAL_MESSAGE_KEY = "errorPartialMessage"
- DEFAULT_MAX_BODY_BYTES uint32 = 10 * 1024 * 1024
+ DEFAULT_MAX_BODY_BYTES uint32 = 100 * 1024 * 1024
)
func main() {
diff --git a/plugins/wasm-go/extensions/ai-history/README.md b/plugins/wasm-go/extensions/ai-history/README.md
index d4684d292d..b8462345c5 100644
--- a/plugins/wasm-go/extensions/ai-history/README.md
+++ b/plugins/wasm-go/extensions/ai-history/README.md
@@ -20,17 +20,18 @@ description: AI 历史对话插件配置参考
## 配置字段
-| 名称 | 数据类型 | 填写要求 | 默认值 | Description |
-|-------------------|---------|----------|-----------------------|---------------------------------------------------------------------------|
-| identityHeader | string | optional | "Authorization" | 身份解析对应的请求头,可用 Authorization,X-Mse-Consumer等 |
-| fillHistoryCnt | integer | optional | 3 | 默认填充历史对话轮次 |
-| cacheKeyPrefix | string | optional | "higress-ai-history:" | Redis缓存Key的前缀 |
-| cacheTTL | integer | optional | 0 | 缓存的过期时间,单位是秒,默认值为0,即永不过期 |
-| redis.serviceName | string | required | - | redis 服务名称,带服务类型的完整 FQDN 名称,例如 my-redis.dns、redis.my-ns.svc.cluster.local |
-| redis.servicePort | integer | optional | 6379 | redis 服务端口 |
-| redis.timeout | integer | optional | 1000 | 请求 redis 的超时时间,单位为毫秒 |
-| redis.username | string | optional | - | 登陆 redis 的用户名 |
-| redis.password | string | optional | - | 登陆 redis 的密码 |
+| 名称 | 数据类型 | 填写要求 | 默认值 | Description |
+|-------------------|----------|----------|-----------------------|----------------------------------------------------------------------------------------------|
+| identityHeader | string | optional | "Authorization" | 身份解析对应的请求头,可用 Authorization,X-Mse-Consumer等 |
+| fillHistoryCnt | integer | optional | 3 | 默认填充历史对话轮次 |
+| cacheKeyPrefix | string | optional | "higress-ai-history:" | Redis缓存Key的前缀 |
+| cacheTTL | integer | optional | 0 | 缓存的过期时间,单位是秒,默认值为0,即永不过期 |
+| redis.serviceName | string | required | - | redis 服务名称,带服务类型的完整 FQDN 名称,例如 my-redis.dns、redis.my-ns.svc.cluster.local |
+| redis.servicePort | integer | optional | 6379 | redis 服务端口 |
+| redis.timeout | integer | optional | 1000 | 请求 redis 的超时时间,单位为毫秒 |
+| redis.username | string | optional | - | 登陆 redis 的用户名 |
+| redis.password | string | optional | - | 登陆 redis 的密码 |
+| redis.database | int | optional | 0 | 使用的数据库id,例如配置为1,对应`SELECT 1` |
## 用法示例
diff --git a/plugins/wasm-go/extensions/ai-history/README_EN.md b/plugins/wasm-go/extensions/ai-history/README_EN.md
index 1fc6144d40..7d0149a019 100644
--- a/plugins/wasm-go/extensions/ai-history/README_EN.md
+++ b/plugins/wasm-go/extensions/ai-history/README_EN.md
@@ -15,17 +15,19 @@ Plugin Execution Phase: `Default Phase`
Plugin Execution Priority: `650`
## Configuration Fields
-| Name | Data Type | Required | Default Value | Description |
-|-------------------|---------|----------|-----------------------|---------------------------------------------------------------------------|
-| identityHeader | string | optional | "Authorization" | The request header for identity resolution, can be Authorization, X-Mse-Consumer, etc. |
-| fillHistoryCnt | integer | optional | 3 | Default number of historical dialogues to be filled. |
-| cacheKeyPrefix | string | optional | "higress-ai-history:" | Prefix for Redis cache key. |
-| cacheTTL | integer | optional | 0 | Cache expiration time in seconds, default value is 0, meaning it never expires. |
-| redis.serviceName | string | required | - | Redis service name, full FQDN name with service type, e.g., my-redis.dns, redis.my-ns.svc.cluster.local |
-| redis.servicePort | integer | optional | 6379 | Redis service port. |
-| redis.timeout | integer | optional | 1000 | Timeout for requests to Redis, in milliseconds. |
-| redis.username | string | optional | - | Username for logging into Redis. |
-| redis.password | string | optional | - | Password for logging into Redis. |
+| Name | Data Type | Required | Default Value | Description |
+|-------------------|-----------|----------|-----------------------|---------------------------------------------------------------------------------------------------------|
+| identityHeader | string | optional | "Authorization" | The request header for identity resolution, can be Authorization, X-Mse-Consumer, etc. |
+| fillHistoryCnt | integer | optional | 3 | Default number of historical dialogues to be filled. |
+| cacheKeyPrefix | string | optional | "higress-ai-history:" | Prefix for Redis cache key. |
+| cacheTTL | integer | optional | 0 | Cache expiration time in seconds, default value is 0, meaning it never expires. |
+| redis.serviceName | string | required | - | Redis service name, full FQDN name with service type, e.g., my-redis.dns, redis.my-ns.svc.cluster.local |
+| redis.servicePort | integer | optional | 6379 | Redis service port. |
+| redis.timeout | integer | optional | 1000 | Timeout for requests to Redis, in milliseconds. |
+| redis.username | string | optional | - | Username for logging into Redis. |
+| redis.password | string | optional | - | Password for logging into Redis. |
+| redis.database | int | optional | 0 | The database ID used, for example, configured as 1, corresponds to `SELECT 1`. |
+
## Usage Example
### Configuration Information
diff --git a/plugins/wasm-go/extensions/ai-history/main.go b/plugins/wasm-go/extensions/ai-history/main.go
index 3f728dd96d..f0fabaaa4c 100644
--- a/plugins/wasm-go/extensions/ai-history/main.go
+++ b/plugins/wasm-go/extensions/ai-history/main.go
@@ -76,6 +76,9 @@ type RedisInfo struct {
// @Title zh-CN 请求超时
// @Description zh-CN 请求 redis 的超时时间,单位为毫秒。默认值是1000,即1秒
Timeout int `required:"false" yaml:"timeout" json:"timeout"`
+ // @Title zh-CN Database
+ // @Description zh-CN redis database
+ Database int `required:"false" yaml:"database" json:"database"`
}
type KVExtractor struct {
@@ -138,6 +141,7 @@ func parseConfig(json gjson.Result, c *PluginConfig, log wrapper.Log) error {
if c.RedisInfo.Timeout == 0 {
c.RedisInfo.Timeout = 1000
}
+ c.RedisInfo.Database = int(json.Get("redis.database").Int())
c.QuestionFrom.RequestBody = "messages.@reverse.0.content"
c.AnswerValueFrom.ResponseBody = "choices.0.message.content"
c.AnswerStreamValueFrom.ResponseBody = "choices.0.delta.content"
@@ -159,7 +163,7 @@ func parseConfig(json gjson.Result, c *PluginConfig, log wrapper.Log) error {
FQDN: c.RedisInfo.ServiceName,
Port: int64(c.RedisInfo.ServicePort),
})
- return c.redisClient.Init(c.RedisInfo.Username, c.RedisInfo.Password, int64(c.RedisInfo.Timeout))
+ return c.redisClient.Init(c.RedisInfo.Username, c.RedisInfo.Password, int64(c.RedisInfo.Timeout), wrapper.WithDataBase(c.RedisInfo.Database))
}
func onHttpRequestHeaders(ctx wrapper.HttpContext, config PluginConfig, log wrapper.Log) types.Action {
diff --git a/plugins/wasm-go/extensions/ai-proxy/README.md b/plugins/wasm-go/extensions/ai-proxy/README.md
index 8f281ffd2b..cb685e6e03 100644
--- a/plugins/wasm-go/extensions/ai-proxy/README.md
+++ b/plugins/wasm-go/extensions/ai-proxy/README.md
@@ -31,18 +31,19 @@ description: AI 代理插件配置参考
`provider`的配置字段说明如下:
-| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 |
-|------------------| --------------- | -------- | ------ |-----------------------------------------------------------------------------------------------------------------------------------------------------------|
-| `type` | string | 必填 | - | AI 服务提供商名称 |
-| `apiTokens` | array of string | 非必填 | - | 用于在访问 AI 服务时进行认证的令牌。如果配置了多个 token,插件会在请求时随机进行选择。部分服务提供商只支持配置一个 token。 |
-| `timeout` | number | 非必填 | - | 访问 AI 服务的超时时间。单位为毫秒。默认值为 120000,即 2 分钟 |
-| `modelMapping` | map of string | 非必填 | - | AI 模型映射表,用于将请求中的模型名称映射为服务提供商支持模型名称。
1. 支持前缀匹配。例如用 "gpt-3-*" 匹配所有名称以“gpt-3-”开头的模型;
2. 支持使用 "*" 为键来配置通用兜底映射关系;
3. 如果映射的目标名称为空字符串 "",则表示保留原模型名称。 |
-| `protocol` | string | 非必填 | - | 插件对外提供的 API 接口契约。目前支持以下取值:openai(默认值,使用 OpenAI 的接口契约)、original(使用目标服务提供商的原始接口契约) |
-| `context` | object | 非必填 | - | 配置 AI 对话上下文信息 |
-| `customSettings` | array of customSetting | 非必填 | - | 为AI请求指定覆盖或者填充参数 |
-| `failover` | object | 非必填 | - | 配置 apiToken 的 failover 策略,当 apiToken 不可用时,将其移出 apiToken 列表,待健康检测通过后重新添加回 apiToken 列表 |
-| `retryOnFailure` | object | 非必填 | - | 当请求失败时立即进行重试 |
-| `capabilities` | map of string | 非必填 | - | 部分provider的部分ai能力原生兼容openai/v1格式,不需要重写,可以直接转发,通过此配置项指定来开启转发, key表示的是采用的厂商协议能力,values表示的真实的厂商该能力的api path, 厂商协议能力当前支持: openai/v1/chatcompletions, openai/v1/embeddings, openai/v1/imagegeneration, openai/v1/audiospeech, cohere/v1/rerank |
+| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 |
+|------------------| --------------- | -------- | ------ |-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| `type` | string | 必填 | - | AI 服务提供商名称 |
+| `apiTokens` | array of string | 非必填 | - | 用于在访问 AI 服务时进行认证的令牌。如果配置了多个 token,插件会在请求时随机进行选择。部分服务提供商只支持配置一个 token。 |
+| `timeout` | number | 非必填 | - | 访问 AI 服务的超时时间。单位为毫秒。默认值为 120000,即 2 分钟 |
+| `modelMapping` | map of string | 非必填 | - | AI 模型映射表,用于将请求中的模型名称映射为服务提供商支持模型名称。
1. 支持前缀匹配。例如用 "gpt-3-*" 匹配所有名称以“gpt-3-”开头的模型;
2. 支持使用 "*" 为键来配置通用兜底映射关系;
3. 如果映射的目标名称为空字符串 "",则表示保留原模型名称。 |
+| `protocol` | string | 非必填 | - | 插件对外提供的 API 接口契约。目前支持以下取值:openai(默认值,使用 OpenAI 的接口契约)、original(使用目标服务提供商的原始接口契约) |
+| `context` | object | 非必填 | - | 配置 AI 对话上下文信息 |
+| `customSettings` | array of customSetting | 非必填 | - | 为AI请求指定覆盖或者填充参数 |
+| `failover` | object | 非必填 | - | 配置 apiToken 的 failover 策略,当 apiToken 不可用时,将其移出 apiToken 列表,待健康检测通过后重新添加回 apiToken 列表 |
+| `retryOnFailure` | object | 非必填 | - | 当请求失败时立即进行重试 |
+| `reasoningContentMode` | string | 非必填 | - | 如何处理大模型服务返回的推理内容。目前支持以下取值:passthrough(正常输出推理内容)、ignore(不输出推理内容)、concat(将推理内容拼接在常规输出内容之前)。默认为 passthrough。仅支持通义千问服务。 |
+| `capabilities` | map of string | 非必填 | - | 部分provider的部分ai能力原生兼容openai/v1格式,不需要重写,可以直接转发,通过此配置项指定来开启转发, key表示的是采用的厂商协议能力,values表示的真实的厂商该能力的api path, 厂商协议能力当前支持: openai/v1/chatcompletions, openai/v1/embeddings, openai/v1/imagegeneration, openai/v1/audiospeech, cohere/v1/rerank |
`context`的配置字段说明如下:
diff --git a/plugins/wasm-go/extensions/ai-proxy/config/config.go b/plugins/wasm-go/extensions/ai-proxy/config/config.go
index 48f08dd9e4..f0b820345a 100644
--- a/plugins/wasm-go/extensions/ai-proxy/config/config.go
+++ b/plugins/wasm-go/extensions/ai-proxy/config/config.go
@@ -80,13 +80,16 @@ func (c *PluginConfig) Complete(log wrapper.Log) error {
c.activeProvider = nil
return nil
}
+
var err error
+
c.activeProvider, err = provider.CreateProvider(*c.activeProviderConfig)
+ if err != nil {
+ return err
+ }
providerConfig := c.GetProviderConfig()
- err = providerConfig.SetApiTokensFailover(log, c.activeProvider)
-
- return err
+ return providerConfig.SetApiTokensFailover(log, c.activeProvider)
}
func (c *PluginConfig) GetProvider() provider.Provider {
diff --git a/plugins/wasm-go/extensions/ai-proxy/main.go b/plugins/wasm-go/extensions/ai-proxy/main.go
index dc6bc123ce..35d06b9502 100644
--- a/plugins/wasm-go/extensions/ai-proxy/main.go
+++ b/plugins/wasm-go/extensions/ai-proxy/main.go
@@ -15,12 +15,13 @@ import (
"github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
"github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
"github.com/tidwall/gjson"
+ "github.com/tidwall/sjson"
)
const (
pluginName = "ai-proxy"
- defaultMaxBodyBytes uint32 = 10 * 1024 * 1024
+ defaultMaxBodyBytes uint32 = 100 * 1024 * 1024
)
func main() {
@@ -40,9 +41,11 @@ func parseGlobalConfig(json gjson.Result, pluginConfig *config.PluginConfig, log
pluginConfig.FromJson(json)
if err := pluginConfig.Validate(); err != nil {
+ log.Errorf("global rule config is invalid: %v", err)
return err
}
if err := pluginConfig.Complete(log); err != nil {
+ log.Errorf("failed to apply global rule config: %v", err)
return err
}
@@ -56,9 +59,11 @@ func parseOverrideRuleConfig(json gjson.Result, global config.PluginConfig, plug
pluginConfig.FromJson(json)
if err := pluginConfig.Validate(); err != nil {
+ log.Errorf("overriden rule config is invalid: %v", err)
return err
}
if err := pluginConfig.Complete(log); err != nil {
+ log.Errorf("failed to apply overriden rule config: %v", err)
return err
}
@@ -98,21 +103,23 @@ func onHttpRequestHeader(ctx wrapper.HttpContext, pluginConfig config.PluginConf
// Always remove the Accept-Encoding header to prevent the LLM from sending compressed responses,
// allowing plugins to inspect or modify the response correctly
- proxywasm.RemoveHttpRequestHeader("Accept-Encoding")
+ _ = proxywasm.RemoveHttpRequestHeader("Accept-Encoding")
if handler, ok := activeProvider.(provider.RequestHeadersHandler); ok {
// Set the apiToken for the current request.
providerConfig.SetApiTokenInUse(ctx, log)
+ // Set available apiTokens of current request in the context, will be used in the retryOnFailure
+ providerConfig.SetAvailableApiTokens(ctx, log)
err := handler.OnRequestHeaders(ctx, apiName, log)
if err != nil {
- util.ErrorHandler("ai-proxy.proc_req_headers_failed", fmt.Errorf("failed to process request headers: %v", err))
+ _ = util.ErrorHandler("ai-proxy.proc_req_headers_failed", fmt.Errorf("failed to process request headers: %v", err))
return types.ActionContinue
}
hasRequestBody := wrapper.HasRequestBody()
if hasRequestBody {
- proxywasm.RemoveHttpRequestHeader("Content-Length")
+ _ = proxywasm.RemoveHttpRequestHeader("Content-Length")
ctx.SetRequestBodyBufferLimit(defaultMaxBodyBytes)
// Delay the header processing to allow changing in OnRequestBody
return types.HeaderStopIteration
@@ -136,23 +143,21 @@ func onHttpRequestBody(ctx wrapper.HttpContext, pluginConfig config.PluginConfig
if handler, ok := activeProvider.(provider.RequestBodyHandler); ok {
apiName, _ := ctx.GetContext(provider.CtxKeyApiName).(provider.ApiName)
-
- newBody, settingErr := pluginConfig.GetProviderConfig().ReplaceByCustomSettings(body)
+ providerConfig := pluginConfig.GetProviderConfig()
+ newBody, settingErr := providerConfig.ReplaceByCustomSettings(body)
if settingErr != nil {
- util.ErrorHandler(
- "ai-proxy.proc_req_body_failed",
- fmt.Errorf("failed to replace request body by custom settings: %v", settingErr),
- )
- return types.ActionContinue
+ log.Errorf("failed to replace request body by custom settings: %v", settingErr)
+ }
+ if providerConfig.IsOpenAIProtocol() {
+ newBody = normalizeOpenAiRequestBody(newBody, log)
}
-
log.Debugf("[onHttpRequestBody] newBody=%s", newBody)
body = newBody
action, err := handler.OnRequestBody(ctx, apiName, body, log)
if err == nil {
return action
}
- util.ErrorHandler("ai-proxy.proc_req_body_failed", fmt.Errorf("failed to process request body: %v", err))
+ _ = util.ErrorHandler("ai-proxy.proc_req_body_failed", fmt.Errorf("failed to process request body: %v", err))
}
return types.ActionContinue
}
@@ -176,6 +181,7 @@ func onHttpResponseHeaders(ctx wrapper.HttpContext, pluginConfig config.PluginCo
providerConfig := pluginConfig.GetProviderConfig()
apiTokenInUse := providerConfig.GetApiTokenInUse(ctx)
+ apiTokens := providerConfig.GetAvailableApiToken(ctx)
status, err := proxywasm.GetHttpResponseHeader(":status")
if err != nil || status != "200" {
@@ -183,7 +189,7 @@ func onHttpResponseHeaders(ctx wrapper.HttpContext, pluginConfig config.PluginCo
log.Errorf("unable to load :status header from response: %v", err)
}
ctx.DontReadResponseBody()
- return providerConfig.OnRequestFailed(activeProvider, ctx, apiTokenInUse, log)
+ return providerConfig.OnRequestFailed(activeProvider, ctx, apiTokenInUse, apiTokens, log)
}
// Reset ctxApiTokenRequestFailureCount if the request is successful,
@@ -201,7 +207,11 @@ func onHttpResponseHeaders(ctx wrapper.HttpContext, pluginConfig config.PluginCo
checkStream(ctx, log)
_, needHandleBody := activeProvider.(provider.TransformResponseBodyHandler)
- _, needHandleStreamingBody := activeProvider.(provider.StreamingResponseBodyHandler)
+ var needHandleStreamingBody bool
+ _, needHandleStreamingBody = activeProvider.(provider.StreamingResponseBodyHandler)
+ if !needHandleStreamingBody {
+ _, needHandleStreamingBody = activeProvider.(provider.StreamingEventHandler)
+ }
if !needHandleBody && !needHandleStreamingBody {
ctx.DontReadResponseBody()
} else if !needHandleStreamingBody {
@@ -220,7 +230,7 @@ func onStreamingResponseBody(ctx wrapper.HttpContext, pluginConfig config.Plugin
}
log.Debugf("[onStreamingResponseBody] provider=%s", activeProvider.GetProviderType())
- log.Debugf("isLastChunk=%v chunk: %s", isLastChunk, string(chunk))
+ log.Debugf("[onStreamingResponseBody] isLastChunk=%v chunk: %s", isLastChunk, string(chunk))
if handler, ok := activeProvider.(provider.StreamingResponseBodyHandler); ok {
apiName, _ := ctx.GetContext(provider.CtxKeyApiName).(provider.ApiName)
@@ -230,6 +240,38 @@ func onStreamingResponseBody(ctx wrapper.HttpContext, pluginConfig config.Plugin
}
return chunk
}
+ if handler, ok := activeProvider.(provider.StreamingEventHandler); ok {
+ apiName, _ := ctx.GetContext(provider.CtxKeyApiName).(provider.ApiName)
+ events := provider.ExtractStreamingEvents(ctx, chunk, log)
+ log.Debugf("[onStreamingResponseBody] %d events received", len(events))
+ if len(events) == 0 {
+ // No events are extracted, return the original chunk
+ return chunk
+ }
+ var responseBuilder strings.Builder
+ for _, event := range events {
+ log.Debugf("processing event: %v", event)
+
+ if event.IsEndData() {
+ responseBuilder.WriteString(event.ToHttpString())
+ continue
+ }
+
+ outputEvents, err := handler.OnStreamingEvent(ctx, apiName, event, log)
+ if err != nil {
+ log.Errorf("[onStreamingResponseBody] failed to process streaming event: %v\n%s", err, chunk)
+ return chunk
+ }
+ if outputEvents == nil || len(outputEvents) == 0 {
+ responseBuilder.WriteString(event.ToHttpString())
+ } else {
+ for _, outputEvent := range outputEvents {
+ responseBuilder.WriteString(outputEvent.ToHttpString())
+ }
+ }
+ }
+ return []byte(responseBuilder.String())
+ }
return chunk
}
@@ -247,16 +289,28 @@ func onHttpResponseBody(ctx wrapper.HttpContext, pluginConfig config.PluginConfi
apiName, _ := ctx.GetContext(provider.CtxKeyApiName).(provider.ApiName)
body, err := handler.TransformResponseBody(ctx, apiName, body, log)
if err != nil {
- util.ErrorHandler("ai-proxy.proc_resp_body_failed", fmt.Errorf("failed to process response body: %v", err))
+ _ = util.ErrorHandler("ai-proxy.proc_resp_body_failed", fmt.Errorf("failed to process response body: %v", err))
return types.ActionContinue
}
if err = provider.ReplaceResponseBody(body, log); err != nil {
- util.ErrorHandler("ai-proxy.replace_resp_body_failed", fmt.Errorf("failed to replace response body: %v", err))
+ _ = util.ErrorHandler("ai-proxy.replace_resp_body_failed", fmt.Errorf("failed to replace response body: %v", err))
}
}
return types.ActionContinue
}
+func normalizeOpenAiRequestBody(body []byte, log wrapper.Log) []byte {
+ var err error
+ // Default setting include_usage.
+ if gjson.GetBytes(body, "stream").Bool() {
+ body, err = sjson.SetBytes(body, "stream_options.include_usage", true)
+ if err != nil {
+ log.Errorf("set include_usage failed, err:%s", err)
+ }
+ }
+ return body
+}
+
func checkStream(ctx wrapper.HttpContext, log wrapper.Log) {
contentType, err := proxywasm.GetHttpResponseHeader("Content-Type")
if err != nil || !strings.HasPrefix(contentType, "text/event-stream") {
diff --git a/plugins/wasm-go/extensions/ai-proxy/provider/failover.go b/plugins/wasm-go/extensions/ai-proxy/provider/failover.go
index 6c8259949b..9644693f5e 100644
--- a/plugins/wasm-go/extensions/ai-proxy/provider/failover.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/failover.go
@@ -32,6 +32,8 @@ type failover struct {
healthCheckModel string `required:"false" yaml:"healthCheckModel" json:"healthCheckModel"`
// @Title zh-CN 本次请求使用的 apiToken
ctxApiTokenInUse string
+ // @Title zh-CN 记录本次请求时所有可用的 apiToken
+ ctxAvailableApiTokensInRequest string
// @Title zh-CN 记录 apiToken 请求失败的次数,key 为 apiToken,value 为失败次数
ctxApiTokenRequestFailureCount string
// @Title zh-CN 记录 apiToken 健康检测成功的次数,key 为 apiToken,value 为成功次数
@@ -527,6 +529,22 @@ func (c *ProviderConfig) GetGlobalRandomToken(log wrapper.Log) string {
}
}
+func (c *ProviderConfig) GetAvailableApiToken(ctx wrapper.HttpContext) []string {
+ apiTokens, _ := ctx.GetContext(c.failover.ctxAvailableApiTokensInRequest).([]string)
+ return apiTokens
+}
+
+// SetAvailableApiTokens set available apiTokens of current request in the context, will be used in the retryOnFailure
+func (c *ProviderConfig) SetAvailableApiTokens(ctx wrapper.HttpContext, log wrapper.Log) {
+ var apiTokens []string
+ if c.isFailoverEnabled() {
+ apiTokens, _, _ = getApiTokens(c.failover.ctxApiTokens)
+ } else {
+ apiTokens = c.apiTokens
+ }
+ ctx.SetContext(c.failover.ctxAvailableApiTokensInRequest, apiTokens)
+}
+
func (c *ProviderConfig) isFailoverEnabled() bool {
return c.failover.enabled
}
@@ -539,12 +557,12 @@ func (c *ProviderConfig) resetSharedData() {
_ = proxywasm.SetSharedData(c.failover.ctxApiTokenRequestFailureCount, nil, 0)
}
-func (c *ProviderConfig) OnRequestFailed(activeProvider Provider, ctx wrapper.HttpContext, apiTokenInUse string, log wrapper.Log) types.Action {
+func (c *ProviderConfig) OnRequestFailed(activeProvider Provider, ctx wrapper.HttpContext, apiTokenInUse string, apiTokens []string, log wrapper.Log) types.Action {
if c.isFailoverEnabled() {
c.handleUnavailableApiToken(ctx, apiTokenInUse, log)
}
if c.isRetryOnFailureEnabled() && ctx.GetContext(ctxKeyIsStreaming) != nil && !ctx.GetContext(ctxKeyIsStreaming).(bool) {
- c.retryFailedRequest(activeProvider, ctx, log)
+ c.retryFailedRequest(activeProvider, ctx, apiTokenInUse, apiTokens, log)
return types.HeaderStopAllIterationAndWatermark
}
return types.ActionContinue
diff --git a/plugins/wasm-go/extensions/ai-proxy/provider/model.go b/plugins/wasm-go/extensions/ai-proxy/provider/model.go
index 726a18fca6..7de9cfe2fa 100644
--- a/plugins/wasm-go/extensions/ai-proxy/provider/model.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/model.go
@@ -1,6 +1,9 @@
package provider
-import "strings"
+import (
+ "fmt"
+ "strings"
+)
const (
streamEventIdItemKey = "id:"
@@ -110,9 +113,16 @@ type chatCompletionChoice struct {
}
type usage struct {
- PromptTokens int `json:"prompt_tokens,omitempty"`
- CompletionTokens int `json:"completion_tokens,omitempty"`
- TotalTokens int `json:"total_tokens,omitempty"`
+ PromptTokens int `json:"prompt_tokens,omitempty"`
+ CompletionTokens int `json:"completion_tokens,omitempty"`
+ TotalTokens int `json:"total_tokens,omitempty"`
+ CompletionTokensDetails *completionTokensDetails `json:"completion_tokens_details,omitempty"`
+}
+
+type completionTokensDetails struct {
+ ReasoningTokens int `json:"reasoning_tokens,omitempty"`
+ AcceptedPredictionTokens int `json:"accepted_prediction_tokens,omitempty"`
+ RejectedPredictionTokens int `json:"rejected_prediction_tokens,omitempty"`
}
type chatMessage struct {
@@ -126,6 +136,24 @@ type chatMessage struct {
Refusal string `json:"refusal,omitempty"`
}
+func (m *chatMessage) handleReasoningContent(reasoningContentMode string) {
+ if m.ReasoningContent == "" {
+ return
+ }
+ switch reasoningContentMode {
+ case reasoningBehaviorIgnore:
+ m.ReasoningContent = ""
+ break
+ case reasoningBehaviorConcat:
+ m.Content = fmt.Sprintf("%v\n%v", m.ReasoningContent, m.Content)
+ m.ReasoningContent = ""
+ break
+ case reasoningBehaviorPassThrough:
+ default:
+ break
+ }
+}
+
type messageContent struct {
Type string `json:"type,omitempty"`
Text string `json:"text"`
@@ -138,6 +166,9 @@ type imageUrl struct {
}
func (m *chatMessage) IsEmpty() bool {
+ if m.ReasoningContent != "" {
+ return false
+ }
if m.IsStringContent() && m.Content != "" {
return false
}
@@ -247,14 +278,18 @@ func (m *functionCall) IsEmpty() bool {
return m.Name == "" && m.Arguments == ""
}
-type streamEvent struct {
+type StreamEvent struct {
Id string `json:"id"`
Event string `json:"event"`
Data string `json:"data"`
HttpStatus string `json:"http_status"`
}
-func (e *streamEvent) setValue(key, value string) {
+func (e *StreamEvent) IsEndData() bool {
+ return e.Data == streamEndDataValue
+}
+
+func (e *StreamEvent) SetValue(key, value string) {
switch key {
case streamEventIdItemKey:
e.Id = value
@@ -269,6 +304,10 @@ func (e *streamEvent) setValue(key, value string) {
}
}
+func (e *StreamEvent) ToHttpString() string {
+ return fmt.Sprintf("%s %s\n\n", streamDataItemKey, e.Data)
+}
+
// https://platform.openai.com/docs/guides/images
type imageGenerationRequest struct {
Model string `json:"model"`
diff --git a/plugins/wasm-go/extensions/ai-proxy/provider/moonshot.go b/plugins/wasm-go/extensions/ai-proxy/provider/moonshot.go
index f0f63cf792..46fa68c734 100644
--- a/plugins/wasm-go/extensions/ai-proxy/provider/moonshot.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/moonshot.go
@@ -102,12 +102,12 @@ func (m *moonshotProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiNam
}()
if err != nil {
log.Errorf("failed to load context file: %v", err)
- util.ErrorHandler("ai-proxy.moonshot.load_ctx_failed", fmt.Errorf("failed to load context file: %v", err))
+ _ = util.ErrorHandler("ai-proxy.moonshot.load_ctx_failed", fmt.Errorf("failed to load context file: %v", err))
return
}
err = m.performChatCompletion(ctx, content, request, log)
if err != nil {
- util.ErrorHandler("ai-proxy.moonshot.insert_ctx_failed", fmt.Errorf("failed to perform chat completion: %v", err))
+ _ = util.ErrorHandler("ai-proxy.moonshot.insert_ctx_failed", fmt.Errorf("failed to perform chat completion: %v", err))
}
}, log)
if err == nil {
@@ -161,79 +161,9 @@ func (m *moonshotProvider) sendRequest(method, path, body, apiKey string, callba
}
}
-func (m *moonshotProvider) OnStreamingResponseBody(ctx wrapper.HttpContext, name ApiName, chunk []byte, isLastChunk bool, log wrapper.Log) ([]byte, error) {
+func (m *moonshotProvider) OnStreamingEvent(ctx wrapper.HttpContext, name ApiName, event StreamEvent, log wrapper.Log) ([]StreamEvent, error) {
if name != ApiNameChatCompletion {
- return chunk, nil
- }
- receivedBody := chunk
- if bufferedStreamingBody, has := ctx.GetContext(ctxKeyStreamingBody).([]byte); has {
- receivedBody = append(bufferedStreamingBody, chunk...)
- }
-
- eventStartIndex, lineStartIndex, valueStartIndex := -1, -1, -1
-
- defer func() {
- if eventStartIndex >= 0 && eventStartIndex < len(receivedBody) {
- // Just in case the received chunk is not a complete event.
- ctx.SetContext(ctxKeyStreamingBody, receivedBody[eventStartIndex:])
- } else {
- ctx.SetContext(ctxKeyStreamingBody, nil)
- }
- }()
-
- var responseBuilder strings.Builder
- currentKey := ""
- currentEvent := &streamEvent{}
- i, length := 0, len(receivedBody)
- for i = 0; i < length; i++ {
- ch := receivedBody[i]
- if ch != '\n' {
- if lineStartIndex == -1 {
- if eventStartIndex == -1 {
- eventStartIndex = i
- }
- lineStartIndex = i
- valueStartIndex = -1
- }
- if valueStartIndex == -1 {
- if ch == ':' {
- valueStartIndex = i + 1
- currentKey = string(receivedBody[lineStartIndex:valueStartIndex])
- }
- } else if valueStartIndex == i && ch == ' ' {
- // Skip leading spaces in data.
- valueStartIndex = i + 1
- }
- continue
- }
-
- if lineStartIndex != -1 {
- value := string(receivedBody[valueStartIndex:i])
- currentEvent.setValue(currentKey, value)
- } else {
- // Extra new line. The current event is complete.
- log.Debugf("processing event: %v", currentEvent)
- m.convertStreamEvent(&responseBuilder, currentEvent, log)
- // Reset event parsing state.
- eventStartIndex = -1
- currentEvent = &streamEvent{}
- }
-
- // Reset line parsing state.
- lineStartIndex = -1
- valueStartIndex = -1
- currentKey = ""
- }
-
- modifiedResponseChunk := responseBuilder.String()
- log.Debugf("=== modified response chunk: %s", modifiedResponseChunk)
- return []byte(modifiedResponseChunk), nil
-}
-
-func (m *moonshotProvider) convertStreamEvent(responseBuilder *strings.Builder, event *streamEvent, log wrapper.Log) error {
- if event.Data == streamEndDataValue {
- m.appendStreamEvent(responseBuilder, event)
- return nil
+ return nil, nil
}
if gjson.Get(event.Data, "choices.0.usage").Exists() {
@@ -241,20 +171,19 @@ func (m *moonshotProvider) convertStreamEvent(responseBuilder *strings.Builder,
newData, err := sjson.Delete(event.Data, "choices.0.usage")
if err != nil {
log.Errorf("convert usage event error: %v", err)
- return err
+ return nil, err
}
newData, err = sjson.SetRaw(newData, "usage", usageStr)
if err != nil {
log.Errorf("convert usage event error: %v", err)
- return err
+ return nil, err
}
event.Data = newData
}
- m.appendStreamEvent(responseBuilder, event)
- return nil
+ return []StreamEvent{event}, nil
}
-func (m *moonshotProvider) appendStreamEvent(responseBuilder *strings.Builder, event *streamEvent) {
+func (m *moonshotProvider) appendStreamEvent(responseBuilder *strings.Builder, event *StreamEvent) {
responseBuilder.WriteString(streamDataItemKey)
responseBuilder.WriteString(event.Data)
responseBuilder.WriteString("\n\n")
diff --git a/plugins/wasm-go/extensions/ai-proxy/provider/openai.go b/plugins/wasm-go/extensions/ai-proxy/provider/openai.go
index 0a170347f5..f875dbaa40 100644
--- a/plugins/wasm-go/extensions/ai-proxy/provider/openai.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/openai.go
@@ -2,7 +2,6 @@ package provider
import (
"encoding/json"
- "fmt"
"net/http"
"path"
"strings"
@@ -58,10 +57,10 @@ func (m *openaiProviderInitializer) CreateProvider(config ProviderConfig) (Provi
}
customUrl := strings.TrimPrefix(strings.TrimPrefix(config.openaiCustomUrl, "http://"), "https://")
pairs := strings.SplitN(customUrl, "/", 2)
- if len(pairs) != 2 {
- return nil, fmt.Errorf("invalid openaiCustomUrl:%s", config.openaiCustomUrl)
+ customPath := "/"
+ if len(pairs) == 2 {
+ customPath += pairs[1]
}
- customPath := "/" + pairs[1]
isDirectCustomPath := isDirectPath(customPath)
capabilities := m.DefaultCapabilities()
if !isDirectCustomPath {
@@ -128,21 +127,14 @@ func (m *openaiProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName,
}
func (m *openaiProvider) TransformRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) ([]byte, error) {
- request := &chatCompletionRequest{}
- if err := decodeChatCompletionRequest(body, request); err != nil {
- return nil, err
- }
if m.config.responseJsonSchema != nil {
+ request := &chatCompletionRequest{}
+ if err := decodeChatCompletionRequest(body, request); err != nil {
+ return nil, err
+ }
log.Debugf("[ai-proxy] set response format to %s", m.config.responseJsonSchema)
request.ResponseFormat = m.config.responseJsonSchema
+ body, _ = json.Marshal(request)
}
- if request.Stream {
- // For stream requests, we need to include usage in the response.
- if request.StreamOptions == nil {
- request.StreamOptions = &streamOptions{IncludeUsage: true}
- } else if !request.StreamOptions.IncludeUsage {
- request.StreamOptions.IncludeUsage = true
- }
- }
- return json.Marshal(request)
+ return m.config.defaultTransformRequestBody(ctx, apiName, body, log)
}
diff --git a/plugins/wasm-go/extensions/ai-proxy/provider/provider.go b/plugins/wasm-go/extensions/ai-proxy/provider/provider.go
index 67cce2888b..c5ec8ce2d4 100644
--- a/plugins/wasm-go/extensions/ai-proxy/provider/provider.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/provider.go
@@ -85,6 +85,10 @@ const (
objectChatCompletion = "chat.completion"
objectChatCompletionChunk = "chat.completion.chunk"
+ reasoningBehaviorPassThrough = "passthrough"
+ reasoningBehaviorIgnore = "ignore"
+ reasoningBehaviorConcat = "concat"
+
wildcard = "*"
defaultTimeout = 2 * 60 * 1000 // ms
@@ -145,6 +149,10 @@ type StreamingResponseBodyHandler interface {
OnStreamingResponseBody(ctx wrapper.HttpContext, name ApiName, chunk []byte, isLastChunk bool, log wrapper.Log) ([]byte, error)
}
+type StreamingEventHandler interface {
+ OnStreamingEvent(ctx wrapper.HttpContext, name ApiName, event StreamEvent, log wrapper.Log) ([]StreamEvent, error)
+}
+
type ApiNameHandler interface {
GetApiName(path string) ApiName
}
@@ -190,6 +198,9 @@ type ProviderConfig struct {
// @Title zh-CN 失败请求重试
// @Description zh-CN 对失败的请求立即进行重试
retryOnFailure *retryOnFailure `required:"false" yaml:"retryOnFailure" json:"retryOnFailure"`
+ // @Title zh-CN 推理内容处理方式
+ // @Description zh-CN 如何处理大模型服务返回的推理内容。目前支持以下取值:passthrough(正常输出推理内容)、ignore(不输出推理内容)、concat(将推理内容拼接在常规输出内容之前)。默认为 normal。仅支持通义千问服务。
+ reasoningContentMode string `required:"false" yaml:"reasoningContentMode" json:"reasoningContentMode"`
// @Title zh-CN 基于OpenAI协议的自定义后端URL
// @Description zh-CN 仅适用于支持 openai 协议的服务。
openaiCustomUrl string `required:"false" yaml:"openaiCustomUrl" json:"openaiCustomUrl"`
@@ -281,6 +292,10 @@ func (c *ProviderConfig) GetProtocol() string {
return c.protocol
}
+func (c *ProviderConfig) IsOpenAIProtocol() bool {
+ return c.protocol == protocolOpenAI
+}
+
func (c *ProviderConfig) FromJson(json gjson.Result) {
c.id = json.Get("id").String()
c.typ = json.Get("type").String()
@@ -359,6 +374,20 @@ func (c *ProviderConfig) FromJson(json gjson.Result) {
}
}
+ c.reasoningContentMode = json.Get("reasoningContentMode").String()
+ if c.reasoningContentMode == "" {
+ c.reasoningContentMode = reasoningBehaviorPassThrough
+ } else {
+ c.reasoningContentMode = strings.ToLower(c.reasoningContentMode)
+ switch c.reasoningContentMode {
+ case reasoningBehaviorPassThrough, reasoningBehaviorIgnore, reasoningBehaviorConcat:
+ break
+ default:
+ c.reasoningContentMode = reasoningBehaviorPassThrough
+ break
+ }
+ }
+
failoverJson := json.Get("failover")
c.failover = &failover{
enabled: false,
@@ -554,6 +583,81 @@ func doGetMappedModel(model string, modelMapping map[string]string, log wrapper.
return ""
}
+func ExtractStreamingEvents(ctx wrapper.HttpContext, chunk []byte, log wrapper.Log) []StreamEvent {
+ body := chunk
+ if bufferedStreamingBody, has := ctx.GetContext(ctxKeyStreamingBody).([]byte); has {
+ body = append(bufferedStreamingBody, chunk...)
+ }
+
+ eventStartIndex, lineStartIndex, valueStartIndex := -1, -1, -1
+
+ defer func() {
+ if eventStartIndex >= 0 && eventStartIndex < len(body) {
+ // Just in case the received chunk is not a complete event.
+ ctx.SetContext(ctxKeyStreamingBody, body[eventStartIndex:])
+ } else {
+ ctx.SetContext(ctxKeyStreamingBody, nil)
+ }
+ }()
+
+ // Sample Qwen event response:
+ //
+ // event:result
+ // :HTTP_STATUS/200
+ // data:{"output":{"choices":[{"message":{"content":"你好!","role":"assistant"},"finish_reason":"null"}]},"usage":{"total_tokens":116,"input_tokens":114,"output_tokens":2},"request_id":"71689cfc-1f42-9949-86e8-9563b7f832b1"}
+ //
+ // event:error
+ // :HTTP_STATUS/400
+ // data:{"code":"InvalidParameter","message":"Preprocessor error","request_id":"0cbe6006-faec-9854-bf8b-c906d75c3bd8"}
+ //
+
+ var events []StreamEvent
+
+ currentKey := ""
+ currentEvent := &StreamEvent{}
+ i, length := 0, len(body)
+ for i = 0; i < length; i++ {
+ ch := body[i]
+ if ch != '\n' {
+ if lineStartIndex == -1 {
+ if eventStartIndex == -1 {
+ eventStartIndex = i
+ }
+ lineStartIndex = i
+ valueStartIndex = -1
+ }
+ if valueStartIndex == -1 {
+ if ch == ':' {
+ valueStartIndex = i + 1
+ currentKey = string(body[lineStartIndex:valueStartIndex])
+ }
+ } else if valueStartIndex == i && ch == ' ' {
+ // Skip leading spaces in data.
+ valueStartIndex = i + 1
+ }
+ continue
+ }
+
+ if lineStartIndex != -1 {
+ value := string(body[valueStartIndex:i])
+ currentEvent.SetValue(currentKey, value)
+ } else {
+ // Extra new line. The current event is complete.
+ events = append(events, *currentEvent)
+ // Reset event parsing state.
+ eventStartIndex = -1
+ currentEvent = &StreamEvent{}
+ }
+
+ // Reset line parsing state.
+ lineStartIndex = -1
+ valueStartIndex = -1
+ currentKey = ""
+ }
+
+ return events
+}
+
func (c *ProviderConfig) isSupportedAPI(apiName ApiName) bool {
_, exist := c.capabilities[string(apiName)]
return exist
diff --git a/plugins/wasm-go/extensions/ai-proxy/provider/qwen.go b/plugins/wasm-go/extensions/ai-proxy/provider/qwen.go
index 2f757c683a..4bb39c1210 100644
--- a/plugins/wasm-go/extensions/ai-proxy/provider/qwen.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/qwen.go
@@ -188,89 +188,32 @@ func (m *qwenProvider) onEmbeddingsRequestBody(ctx wrapper.HttpContext, body []b
return json.Marshal(qwenRequest)
}
-func (m *qwenProvider) OnStreamingResponseBody(ctx wrapper.HttpContext, name ApiName, chunk []byte, isLastChunk bool, log wrapper.Log) ([]byte, error) {
+func (m *qwenProvider) OnStreamingEvent(ctx wrapper.HttpContext, name ApiName, event StreamEvent, log wrapper.Log) ([]StreamEvent, error) {
if m.config.qwenEnableCompatible || name != ApiNameChatCompletion {
- return chunk, nil
- }
-
- receivedBody := chunk
- if bufferedStreamingBody, has := ctx.GetContext(ctxKeyStreamingBody).([]byte); has {
- receivedBody = append(bufferedStreamingBody, chunk...)
+ return nil, nil
}
incrementalStreaming := ctx.GetBoolContext(ctxKeyIncrementalStreaming, false)
- eventStartIndex, lineStartIndex, valueStartIndex := -1, -1, -1
-
- defer func() {
- if eventStartIndex >= 0 && eventStartIndex < len(receivedBody) {
- // Just in case the received chunk is not a complete event.
- ctx.SetContext(ctxKeyStreamingBody, receivedBody[eventStartIndex:])
- } else {
- ctx.SetContext(ctxKeyStreamingBody, nil)
- }
- }()
-
- // Sample Qwen event response:
- //
- // event:result
- // :HTTP_STATUS/200
- // data:{"output":{"choices":[{"message":{"content":"你好!","role":"assistant"},"finish_reason":"null"}]},"usage":{"total_tokens":116,"input_tokens":114,"output_tokens":2},"request_id":"71689cfc-1f42-9949-86e8-9563b7f832b1"}
- //
- // event:error
- // :HTTP_STATUS/400
- // data:{"code":"InvalidParameter","message":"Preprocessor error","request_id":"0cbe6006-faec-9854-bf8b-c906d75c3bd8"}
- //
-
- var responseBuilder strings.Builder
- currentKey := ""
- currentEvent := &streamEvent{}
- i, length := 0, len(receivedBody)
- for i = 0; i < length; i++ {
- ch := receivedBody[i]
- if ch != '\n' {
- if lineStartIndex == -1 {
- if eventStartIndex == -1 {
- eventStartIndex = i
- }
- lineStartIndex = i
- valueStartIndex = -1
- }
- if valueStartIndex == -1 {
- if ch == ':' {
- valueStartIndex = i + 1
- currentKey = string(receivedBody[lineStartIndex:valueStartIndex])
- }
- } else if valueStartIndex == i && ch == ' ' {
- // Skip leading spaces in data.
- valueStartIndex = i + 1
- }
- continue
- }
+ qwenResponse := &qwenTextGenResponse{}
+ if err := json.Unmarshal([]byte(event.Data), qwenResponse); err != nil {
+ log.Errorf("unable to unmarshal Qwen response: %v", err)
+ return nil, fmt.Errorf("unable to unmarshal Qwen response: %v", err)
+ }
- if lineStartIndex != -1 {
- value := string(receivedBody[valueStartIndex:i])
- currentEvent.setValue(currentKey, value)
- } else {
- // Extra new line. The current event is complete.
- log.Debugf("processing event: %v", currentEvent)
- if err := m.convertStreamEvent(ctx, &responseBuilder, currentEvent, incrementalStreaming, log); err != nil {
- return nil, err
- }
- // Reset event parsing state.
- eventStartIndex = -1
- currentEvent = &streamEvent{}
+ var outputEvents []StreamEvent
+ responses := m.buildChatCompletionStreamingResponse(ctx, qwenResponse, incrementalStreaming, log)
+ for _, response := range responses {
+ responseBody, err := json.Marshal(response)
+ if err != nil {
+ log.Errorf("unable to marshal response: %v", err)
+ return nil, fmt.Errorf("unable to marshal response: %v", err)
}
-
- // Reset line parsing state.
- lineStartIndex = -1
- valueStartIndex = -1
- currentKey = ""
+ modifiedEvent := event
+ modifiedEvent.Data = string(responseBody)
+ outputEvents = append(outputEvents, modifiedEvent)
}
-
- modifiedResponseChunk := responseBuilder.String()
- log.Debugf("=== modified response chunk: %s", modifiedResponseChunk)
- return []byte(modifiedResponseChunk), nil
+ return outputEvents, nil
}
func (m *qwenProvider) TransformResponseBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) ([]byte, error) {
@@ -357,7 +300,7 @@ func (m *qwenProvider) buildQwenTextGenerationRequest(ctx wrapper.HttpContext, o
func (m *qwenProvider) buildChatCompletionResponse(ctx wrapper.HttpContext, qwenResponse *qwenTextGenResponse) *chatCompletionResponse {
choices := make([]chatCompletionChoice, 0, len(qwenResponse.Output.Choices))
for _, qwenChoice := range qwenResponse.Output.Choices {
- message := qwenMessageToChatMessage(qwenChoice.Message)
+ message := qwenMessageToChatMessage(qwenChoice.Message, m.config.reasoningContentMode)
choices = append(choices, chatCompletionChoice{
Message: &message,
FinishReason: qwenChoice.FinishReason,
@@ -395,7 +338,8 @@ func (m *qwenProvider) buildChatCompletionStreamingResponse(ctx wrapper.HttpCont
finished := qwenChoice.FinishReason != "" && qwenChoice.FinishReason != "null"
message := qwenChoice.Message
- deltaContentMessage := &chatMessage{Role: message.Role, Content: message.Content}
+ deltaContentMessage := &chatMessage{Role: message.Role, Content: message.Content, ReasoningContent: message.ReasoningContent}
+ deltaContentMessage.handleReasoningContent(m.config.reasoningContentMode)
deltaToolCallsMessage := &chatMessage{Role: message.Role, ToolCalls: append([]toolCall{}, message.ToolCalls...)}
if !incrementalStreaming {
for _, tc := range message.ToolCalls {
@@ -430,6 +374,11 @@ func (m *qwenProvider) buildChatCompletionStreamingResponse(ctx wrapper.HttpCont
}
}
}
+ if message.ReasoningContent == "" {
+ message.ReasoningContent = pushedMessage.ReasoningContent
+ } else {
+ deltaContentMessage.ReasoningContent = util.StripPrefix(deltaContentMessage.ReasoningContent, pushedMessage.ReasoningContent)
+ }
if len(deltaToolCallsMessage.ToolCalls) > 0 && pushedMessage.ToolCalls != nil {
for i, tc := range deltaToolCallsMessage.ToolCalls {
if i >= len(pushedMessage.ToolCalls) {
@@ -475,39 +424,6 @@ func (m *qwenProvider) buildChatCompletionStreamingResponse(ctx wrapper.HttpCont
return responses
}
-func (m *qwenProvider) convertStreamEvent(ctx wrapper.HttpContext, responseBuilder *strings.Builder, event *streamEvent, incrementalStreaming bool, log wrapper.Log) error {
- if event.Data == streamEndDataValue {
- m.appendStreamEvent(responseBuilder, event)
- return nil
- }
-
- if event.Event != eventResult || event.HttpStatus != httpStatus200 {
- // Something goes wrong. Just pass through the event.
- m.appendStreamEvent(responseBuilder, event)
- return nil
- }
-
- qwenResponse := &qwenTextGenResponse{}
- if err := json.Unmarshal([]byte(event.Data), qwenResponse); err != nil {
- log.Errorf("unable to unmarshal Qwen response: %v", err)
- return fmt.Errorf("unable to unmarshal Qwen response: %v", err)
- }
-
- responses := m.buildChatCompletionStreamingResponse(ctx, qwenResponse, incrementalStreaming, log)
- for _, response := range responses {
- responseBody, err := json.Marshal(response)
- if err != nil {
- log.Errorf("unable to marshal response: %v", err)
- return fmt.Errorf("unable to marshal response: %v", err)
- }
- modifiedEvent := &*event
- modifiedEvent.Data = string(responseBody)
- m.appendStreamEvent(responseBuilder, modifiedEvent)
- }
-
- return nil
-}
-
func (m *qwenProvider) insertHttpContextMessage(body []byte, content string, onlyOneSystemBeforeFile bool) ([]byte, error) {
request := &qwenTextGenRequest{}
if err := json.Unmarshal(body, request); err != nil {
@@ -552,7 +468,7 @@ func (m *qwenProvider) insertHttpContextMessage(body []byte, content string, onl
return json.Marshal(request)
}
-func (m *qwenProvider) appendStreamEvent(responseBuilder *strings.Builder, event *streamEvent) {
+func (m *qwenProvider) appendStreamEvent(responseBuilder *strings.Builder, event *StreamEvent) {
responseBuilder.WriteString(streamDataItemKey)
responseBuilder.WriteString(event.Data)
responseBuilder.WriteString("\n\n")
@@ -690,13 +606,16 @@ type qwenTextEmbeddings struct {
Embedding []float64 `json:"embedding"`
}
-func qwenMessageToChatMessage(qwenMessage qwenMessage) chatMessage {
- return chatMessage{
- Name: qwenMessage.Name,
- Role: qwenMessage.Role,
- Content: qwenMessage.Content,
- ToolCalls: qwenMessage.ToolCalls,
+func qwenMessageToChatMessage(qwenMessage qwenMessage, reasoningContentMode string) chatMessage {
+ msg := chatMessage{
+ Name: qwenMessage.Name,
+ Role: qwenMessage.Role,
+ Content: qwenMessage.Content,
+ ReasoningContent: qwenMessage.ReasoningContent,
+ ToolCalls: qwenMessage.ToolCalls,
}
+ msg.handleReasoningContent(reasoningContentMode)
+ return msg
}
func (m *qwenMessage) IsStringContent() bool {
diff --git a/plugins/wasm-go/extensions/ai-proxy/provider/retry.go b/plugins/wasm-go/extensions/ai-proxy/provider/retry.go
index 033a8cd8c5..59691d855f 100644
--- a/plugins/wasm-go/extensions/ai-proxy/provider/retry.go
+++ b/plugins/wasm-go/extensions/ai-proxy/provider/retry.go
@@ -1,11 +1,13 @@
package provider
import (
+ "math/rand"
+ "net/http"
+
"github.com/alibaba/higress/plugins/wasm-go/extensions/ai-proxy/util"
"github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
"github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
"github.com/tidwall/gjson"
- "net/http"
)
const (
@@ -38,12 +40,12 @@ func (c *ProviderConfig) isRetryOnFailureEnabled() bool {
return c.retryOnFailure.enabled
}
-func (c *ProviderConfig) retryFailedRequest(activeProvider Provider, ctx wrapper.HttpContext, log wrapper.Log) {
+func (c *ProviderConfig) retryFailedRequest(activeProvider Provider, ctx wrapper.HttpContext, apiTokenInUse string, apiTokens []string, log wrapper.Log) {
log.Debugf("Retry failed request: provider=%s", activeProvider.GetProviderType())
retryClient := createRetryClient(ctx)
apiName, _ := ctx.GetContext(CtxKeyApiName).(ApiName)
ctx.SetContext(ctxRetryCount, 1)
- c.sendRetryRequest(ctx, apiName, activeProvider, retryClient, log)
+ c.sendRetryRequest(ctx, apiName, activeProvider, retryClient, apiTokenInUse, apiTokens, log)
}
func (c *ProviderConfig) transformResponseHeadersAndBody(ctx wrapper.HttpContext, activeProvider Provider, apiName ApiName, headers http.Header, body []byte, log wrapper.Log) ([][2]string, []byte) {
@@ -67,7 +69,8 @@ func (c *ProviderConfig) transformResponseHeadersAndBody(ctx wrapper.HttpContext
func (c *ProviderConfig) retryCall(
ctx wrapper.HttpContext, log wrapper.Log, activeProvider Provider,
apiName ApiName, statusCode int, responseHeaders http.Header, responseBody []byte,
- retryClient *wrapper.ClusterClient[wrapper.RouteCluster]) {
+ retryClient *wrapper.ClusterClient[wrapper.RouteCluster],
+ apiTokenInUse string, apiTokens []string) {
retryCount := ctx.GetContext(ctxRetryCount).(int)
log.Debugf("Sent retry request: %d/%d", retryCount, c.retryOnFailure.maxRetries)
@@ -76,6 +79,7 @@ func (c *ProviderConfig) retryCall(
log.Debugf("Retry request succeeded")
headers, body := c.transformResponseHeadersAndBody(ctx, activeProvider, apiName, responseHeaders, responseBody, log)
proxywasm.SendHttpResponse(200, headers, body, -1)
+ return
} else {
log.Debugf("The retry request still failed, status: %d, responseHeaders: %v, responseBody: %s", statusCode, responseHeaders, string(responseBody))
}
@@ -83,26 +87,41 @@ func (c *ProviderConfig) retryCall(
retryCount++
if retryCount <= int(c.retryOnFailure.maxRetries) {
ctx.SetContext(ctxRetryCount, retryCount)
- c.sendRetryRequest(ctx, apiName, activeProvider, retryClient, log)
+ c.sendRetryRequest(ctx, apiName, activeProvider, retryClient, apiTokenInUse, apiTokens, log)
} else {
log.Debugf("Reached the maximum retry count: %d", c.retryOnFailure.maxRetries)
proxywasm.ResumeHttpResponse()
+ return
}
}
func (c *ProviderConfig) sendRetryRequest(
ctx wrapper.HttpContext, apiName ApiName, activeProvider Provider,
- retryClient *wrapper.ClusterClient[wrapper.RouteCluster], log wrapper.Log) {
+ retryClient *wrapper.ClusterClient[wrapper.RouteCluster],
+ apiTokenInUse string, apiTokens []string, log wrapper.Log) {
+
+ // Remove last failed token from retry apiTokens list
+ apiTokens = removeApiTokenFromRetryList(apiTokens, apiTokenInUse, log)
+ if len(apiTokens) == 0 {
+ log.Debugf("No more apiTokens to retry")
+ proxywasm.ResumeHttpResponse()
+ return
+ }
+ // Set apiTokenInUse for the retry request
+ apiTokenInUse = GetRandomToken(apiTokens)
+ log.Debugf("Retry request with apiToken: %s", apiTokenInUse)
+ ctx.SetContext(c.failover.ctxApiTokenInUse, apiTokenInUse)
requestHeaders, requestBody := c.getRetryRequestHeadersAndBody(ctx, activeProvider, apiName, log)
path := getRetryPath(ctx)
err := retryClient.Post(path, util.HeaderToSlice(requestHeaders), requestBody, func(statusCode int, responseHeaders http.Header, responseBody []byte) {
- c.retryCall(ctx, log, activeProvider, apiName, statusCode, responseHeaders, responseBody, retryClient)
+ c.retryCall(ctx, log, activeProvider, apiName, statusCode, responseHeaders, responseBody, retryClient, apiTokenInUse, apiTokens)
}, uint32(c.retryOnFailure.retryTimeout))
if err != nil {
log.Errorf("Failed to send retry request: %v", err)
proxywasm.ResumeHttpResponse()
+ return
}
}
@@ -126,9 +145,7 @@ func getRetryPath(ctx wrapper.HttpContext) string {
}
func (c *ProviderConfig) getRetryRequestHeadersAndBody(ctx wrapper.HttpContext, activeProvider Provider, apiName ApiName, log wrapper.Log) (http.Header, []byte) {
- // The retry request may be sent with different apiToken, so the header needs to be regenerated
- c.SetApiTokenInUse(ctx, log)
-
+ // The retry request is sent with different apiToken, so the header needs to be regenerated
requestHeaders := http.Header{
"Content-Type": []string{"application/json"},
}
@@ -139,3 +156,27 @@ func (c *ProviderConfig) getRetryRequestHeadersAndBody(ctx wrapper.HttpContext,
return requestHeaders, requestBody
}
+
+func removeApiTokenFromRetryList(apiTokens []string, removedApiToken string, log wrapper.Log) []string {
+ var availableApiTokens []string
+ for _, s := range apiTokens {
+ if s != removedApiToken {
+ availableApiTokens = append(availableApiTokens, s)
+ }
+ }
+ log.Debugf("Remove apiToken %s from retry apiTokens list", removedApiToken)
+ log.Debugf("Available retry apiTokens: %v", availableApiTokens)
+ return availableApiTokens
+}
+
+func GetRandomToken(apiTokens []string) string {
+ count := len(apiTokens)
+ switch count {
+ case 0:
+ return ""
+ case 1:
+ return apiTokens[0]
+ default:
+ return apiTokens[rand.Intn(count)]
+ }
+}
diff --git a/plugins/wasm-go/extensions/ai-quota/README.md b/plugins/wasm-go/extensions/ai-quota/README.md
index 4305272902..4b0d362fed 100644
--- a/plugins/wasm-go/extensions/ai-quota/README.md
+++ b/plugins/wasm-go/extensions/ai-quota/README.md
@@ -26,14 +26,14 @@ description: AI 配额管理插件配置参考
`redis`中每一项的配置字段说明
-| 配置项 | 类型 | 必填 | 默认值 | 说明 |
-| ------------ | ------ | ---- | ---------------------------------------------------------- | --------------------------- |
-| service_name | string | 必填 | - | redis 服务名称,带服务类型的完整 FQDN 名称,例如 my-redis.dns、redis.my-ns.svc.cluster.local |
-| service_port | int | 否 | 服务类型为固定地址(static service)默认值为80,其他为6379 | 输入redis服务的服务端口 |
-| username | string | 否 | - | redis用户名 |
-| password | string | 否 | - | redis密码 |
-| timeout | int | 否 | 1000 | redis连接超时时间,单位毫秒 |
-
+| 配置项 | 类型 | 必填 | 默认值 | 说明 |
+| ------------ | ------ | ---- | ---------------------------------------------------------- | --------------------------- |
+| service_name | string | 必填 | - | redis 服务名称,带服务类型的完整 FQDN 名称,例如 my-redis.dns、redis.my-ns.svc.cluster.local |
+| service_port | int | 否 | 服务类型为固定地址(static service)默认值为80,其他为6379 | 输入redis服务的服务端口 |
+| username | string | 否 | - | redis用户名 |
+| password | string | 否 | - | redis密码 |
+| timeout | int | 否 | 1000 | redis连接超时时间,单位毫秒 |
+| database | int | 否 | 0 | 使用的数据库id,例如配置为1,对应`SELECT 1` |
## 配置示例
diff --git a/plugins/wasm-go/extensions/ai-quota/README_EN.md b/plugins/wasm-go/extensions/ai-quota/README_EN.md
index e136a75969..0eff19aeed 100644
--- a/plugins/wasm-go/extensions/ai-quota/README_EN.md
+++ b/plugins/wasm-go/extensions/ai-quota/README_EN.md
@@ -18,13 +18,14 @@ Plugin execution priority: `750`
| `admin_path` | string | Optional | /quota | Prefix for the path to manage quota requests |
| `redis` | object | Yes | | Redis related configuration |
Explanation of each configuration field in `redis`
-| Configuration Item | Type | Required | Default Value | Explanation |
-|---------------------|------------------|----------|---------------------------------------------------------|-----------------------------------------------|
-| service_name | string | Required | - | Redis service name, full FQDN name with service type, e.g., my-redis.dns, redis.my-ns.svc.cluster.local |
-| service_port | int | No | Default value for static service is 80; others are 6379 | Service port for the redis service |
-| username | string | No | - | Redis username |
-| password | string | No | - | Redis password |
-| timeout | int | No | 1000 | Redis connection timeout in milliseconds |
+| Configuration Item | Type | Required | Default Value | Explanation |
+|--------------------|--------|----------|---------------------------------------------------------|---------------------------------------------------------------------------------------------------------|
+| service_name | string | Required | - | Redis service name, full FQDN name with service type, e.g., my-redis.dns, redis.my-ns.svc.cluster.local |
+| service_port | int | No | Default value for static service is 80; others are 6379 | Service port for the redis service |
+| username | string | No | - | Redis username |
+| password | string | No | - | Redis password |
+| timeout | int | No | 1000 | Redis connection timeout in milliseconds |
+| database | int | No | 0 | The database ID used, for example, configured as 1, corresponds to `SELECT 1`. |
## Configuration Example
### Identify request parameter apikey and apply rate limiting accordingly
diff --git a/plugins/wasm-go/extensions/ai-quota/go.mod b/plugins/wasm-go/extensions/ai-quota/go.mod
index ec77e402e4..8b9e11fd10 100644
--- a/plugins/wasm-go/extensions/ai-quota/go.mod
+++ b/plugins/wasm-go/extensions/ai-quota/go.mod
@@ -2,11 +2,11 @@ module github.com/alibaba/higress/plugins/wasm-go/extensions/ai-quota
go 1.19
-//replace github.com/alibaba/higress/plugins/wasm-go => ../..
+replace github.com/alibaba/higress/plugins/wasm-go => ../..
require (
github.com/alibaba/higress/plugins/wasm-go v1.4.3-0.20240808022948-34f5722d93de
- github.com/higress-group/proxy-wasm-go-sdk v0.0.0-20240711023527-ba358c48772f
+ github.com/higress-group/proxy-wasm-go-sdk v1.0.0
github.com/tidwall/gjson v1.17.3
github.com/tidwall/resp v0.1.1
)
diff --git a/plugins/wasm-go/extensions/ai-quota/go.sum b/plugins/wasm-go/extensions/ai-quota/go.sum
index 996d474d43..b4ab172fe2 100644
--- a/plugins/wasm-go/extensions/ai-quota/go.sum
+++ b/plugins/wasm-go/extensions/ai-quota/go.sum
@@ -1,12 +1,10 @@
-github.com/alibaba/higress/plugins/wasm-go v1.4.3-0.20240808022948-34f5722d93de h1:lDLqj7Hw41ox8VdsP7oCTPhjPa3+QJUCKApcLh2a45Y=
-github.com/alibaba/higress/plugins/wasm-go v1.4.3-0.20240808022948-34f5722d93de/go.mod h1:359don/ahMxpfeLMzr29Cjwcu8IywTTDUzWlBPRNLHw=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/higress-group/nottinygc v0.0.0-20231101025119-e93c4c2f8520 h1:IHDghbGQ2DTIXHBHxWfqCYQW1fKjyJ/I7W1pMyUDeEA=
github.com/higress-group/nottinygc v0.0.0-20231101025119-e93c4c2f8520/go.mod h1:Nz8ORLaFiLWotg6GeKlJMhv8cci8mM43uEnLA5t8iew=
-github.com/higress-group/proxy-wasm-go-sdk v0.0.0-20240711023527-ba358c48772f h1:ZIiIBRvIw62gA5MJhuwp1+2wWbqL9IGElQ499rUsYYg=
-github.com/higress-group/proxy-wasm-go-sdk v0.0.0-20240711023527-ba358c48772f/go.mod h1:hNFjhrLUIq+kJ9bOcs8QtiplSQ61GZXtd2xHKx4BYRo=
+github.com/higress-group/proxy-wasm-go-sdk v1.0.0 h1:BZRNf4R7jr9hwRivg/E29nkVaKEak5MWjBDhWjuHijU=
+github.com/higress-group/proxy-wasm-go-sdk v1.0.0/go.mod h1:iiSyFbo+rAtbtGt/bsefv8GU57h9CCLYGJA74/tF5/0=
github.com/magefile/mage v1.14.0 h1:6QDX3g6z1YvJ4olPhT1wksUcSa/V0a1B+pJb73fBjyo=
github.com/magefile/mage v1.14.0/go.mod h1:z5UZb/iS3GoOSn0JgWuiw7dxlurVYTu+/jHXqQg881A=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
diff --git a/plugins/wasm-go/extensions/ai-quota/main.go b/plugins/wasm-go/extensions/ai-quota/main.go
index 2facd912bc..2c6d75e8f4 100644
--- a/plugins/wasm-go/extensions/ai-quota/main.go
+++ b/plugins/wasm-go/extensions/ai-quota/main.go
@@ -69,6 +69,7 @@ type RedisInfo struct {
Username string `required:"false" yaml:"username" json:"username"`
Password string `required:"false" yaml:"password" json:"password"`
Timeout int `required:"false" yaml:"timeout" json:"timeout"`
+ Database int `required:"false" yaml:"database" json:"database"`
}
func parseConfig(json gjson.Result, config *QuotaConfig, log wrapper.Log) error {
@@ -110,17 +111,19 @@ func parseConfig(json gjson.Result, config *QuotaConfig, log wrapper.Log) error
if timeout == 0 {
timeout = 1000
}
+ database := int(redisConfig.Get("database").Int())
config.redisInfo.ServiceName = serviceName
config.redisInfo.ServicePort = servicePort
config.redisInfo.Username = username
config.redisInfo.Password = password
config.redisInfo.Timeout = timeout
+ config.redisInfo.Database = database
config.redisClient = wrapper.NewRedisClusterClient(wrapper.FQDNCluster{
FQDN: serviceName,
Port: int64(servicePort),
})
- return config.redisClient.Init(username, password, int64(timeout))
+ return config.redisClient.Init(username, password, int64(timeout), wrapper.WithDataBase(database))
}
func onHttpRequestHeaders(context wrapper.HttpContext, config QuotaConfig, log wrapper.Log) types.Action {
diff --git a/plugins/wasm-go/extensions/ai-search/README.md b/plugins/wasm-go/extensions/ai-search/README.md
new file mode 100644
index 0000000000..5ae133148a
--- /dev/null
+++ b/plugins/wasm-go/extensions/ai-search/README.md
@@ -0,0 +1,244 @@
+---
+title: AI 搜索增强
+keywords: [higress,ai search]
+description: higress 支持通过集成搜索引擎(Google/Bing/Arxiv/Elasticsearch等)的实时结果,增强DeepSeek-R1等模型等回答准确性和时效性
+---
+
+## 功能说明
+
+`ai-search`插件通过集成搜索引擎(Google/Bing/Arxiv/Elasticsearch等)的实时结果,增强AI模型的回答准确性和时效性。插件会自动将搜索结果注入到提示模板中,并根据配置决定是否在最终回答中添加引用来源。
+
+## 运行属性
+
+插件执行阶段:`默认阶段`
+插件执行优先级:`440`
+
+## 配置字段
+
+| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 |
+|------|----------|----------|--------|------|
+| needReference | bool | 选填 | false | 是否在回答中添加引用来源 |
+| referenceFormat | string | 选填 | `"**References:**\n%s"` | 引用内容格式,必须包含%s占位符 |
+| defaultLang | string | 选填 | - | 默认搜索语言代码(如zh-CN/en-US) |
+| promptTemplate | string | 选填 | 内置模板 | 提示模板,必须包含`{search_results}`和`{question}`占位符 |
+| searchFrom | array of object | 必填 | - | 参考下面搜索引擎配置,至少配置一个引擎 |
+| searchRewrite | object | 选填 | - | 搜索重写配置,用于使用LLM服务优化搜索查询 |
+
+## 搜索重写说明
+
+搜索重写功能使用LLM服务对用户的原始查询进行分析和优化,可以:
+1. 将用户的自然语言查询转换为更适合搜索引擎的关键词组合
+2. 对于Arxiv论文搜索,自动识别相关的论文类别并添加类别限定
+3. 对于私有知识库搜索,将长查询拆分成多个精准的关键词组合
+
+强烈建议在使用Arxiv或Elasticsearch引擎时启用此功能。对于Arxiv搜索,它能准确识别论文所属领域并优化英文关键词;对于私有知识库搜索,它能提供更精准的关键词匹配,显著提升搜索效果。
+
+## 搜索重写配置
+
+| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 |
+|------|----------|----------|--------|------|
+| llmServiceName | string | 必填 | - | LLM服务名称 |
+| llmServicePort | number | 必填 | - | LLM服务端口 |
+| llmApiKey | string | 必填 | - | LLM服务API密钥 |
+| llmUrl | string | 必填 | - | LLM服务API地址 |
+| llmModelName | string | 必填 | - | LLM模型名称 |
+| timeoutMillisecond | number | 选填 | 30000 | API调用超时时间(毫秒) |
+
+## 搜索引擎通用配置
+
+| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 |
+|------|----------|----------|--------|------|
+| type | string | 必填 | - | 引擎类型(google/bing/arxiv/elasticsearch/quark) |
+| serviceName | string | 必填 | - | 后端服务名称 |
+| servicePort | number | 必填 | - | 后端服务端口 |
+| apiKey | string | 必填 | - | 搜索引擎API密钥/Aliyun AccessKey |
+| count | number | 选填 | 10 | 单次搜索返回结果数量 |
+| start | number | 选填 | 0 | 搜索结果偏移量(从第start+1条结果开始返回) |
+| timeoutMillisecond | number | 选填 | 5000 | API调用超时时间(毫秒) |
+| optionArgs | map | 选填 | - | 搜索引擎特定参数(key-value格式) |
+
+## Google 特定配置
+
+| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 |
+|------|----------|----------|--------|------|
+| cx | string | 必填 | - | Google自定义搜索引擎ID,用于指定搜索范围 |
+
+## Arxiv 特定配置
+
+| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 |
+|------|----------|----------|--------|------|
+| arxivCategory | string | 选填 | - | 搜索的论文[类别](https://arxiv.org/category_taxonomy)(如cs.AI, cs.CL等) |
+
+## Elasticsearch 特定配置
+
+| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 |
+|------|----------|----------|--------|------|
+| index | string | 必填 | - | 要搜索的Elasticsearch索引名称 |
+| contentField | string | 必填 | - | 要查询的内容字段名称 |
+| linkField | string | 必填 | - | 结果链接字段名称 |
+| titleField | string | 必填 | - | 结果标题字段名称 |
+
+## Quark 特定配置
+
+| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 |
+|------|----------|----------|--------|------|
+| secretKey | string | 必填 | - | Aliyun SecretKey |
+| endpoint | string | 选填 | iqs.cn-zhangjiakou.aliyuncs.com | 请求搜索引擎服务时的接入点 |
+
+## 配置示例
+
+### 基础配置(单搜索引擎)
+
+```yaml
+needReference: true
+searchFrom:
+- type: google
+ apiKey: "your-google-api-key"
+ cx: "search-engine-id"
+ serviceName: "google-svc.dns"
+ servicePort: 443
+ count: 5
+ optionArgs:
+ fileType: "pdf"
+```
+
+### Arxiv搜索配置
+
+```yaml
+searchFrom:
+- type: arxiv
+ serviceName: "arxiv-svc.dns"
+ servicePort: 443
+ arxivCategory: "cs.AI"
+ count: 10
+```
+
+
+### 夸克搜索配置
+
+```yaml
+searchFrom:
+- type: quark
+ serviceName: "quark-svc.dns"
+ servicePort: 443
+ apiKey: "aliyun accessKey"
+ count: 10 # 搜索网页数,最多10条
+ secretKey: "aliyun secretKey"
+ endpoint: "iqs.cn-zhangjiakou.aliyuncs.com"
+```
+
+### 多搜索引擎配置
+
+```yaml
+defaultLang: "en-US"
+promptTemplate: |
+ # Search Results:
+ {search_results}
+
+ # Please answer this question:
+ {question}
+searchFrom:
+- type: google
+ apiKey: "google-key"
+ cx: "github-search-id" # 专门搜索GitHub内容的搜索引擎ID
+ serviceName: "google-svc.dns"
+ servicePort: 443
+- type: google
+ apiKey: "google-key"
+ cx: "news-search-id" # 专门搜索Google News内容的搜索引擎ID
+ serviceName: "google-svc.dns"
+ servicePort: 443
+- type: bing
+ apiKey: "bing-key"
+ serviceName: "bing-svc.dns"
+ servicePort: 443
+ optionArgs:
+ answerCount: "5"
+```
+
+### 并发查询配置
+
+由于搜索引擎对单次查询返回结果数量有限制(如Google限制单次最多返回100条结果),可以通过以下方式获取更多结果:
+1. 设置较小的count值(如10)
+2. 通过start参数指定结果偏移量
+3. 并发发起多个查询请求,每个请求的start值按count递增
+
+例如,要获取30条结果,可以配置count=10并并发发起20个查询,每个查询的start值分别为0,10,20:
+
+```yaml
+searchFrom:
+- type: google
+ apiKey: "your-google-api-key"
+ cx: "search-engine-id"
+ serviceName: "google-svc.dns"
+ servicePort: 443
+ start: 0
+ count: 10
+- type: google
+ apiKey: "your-google-api-key"
+ cx: "search-engine-id"
+ serviceName: "google-svc.dns"
+ servicePort: 443
+ start: 10
+ count: 10
+- type: google
+ apiKey: "your-google-api-key"
+ cx: "search-engine-id"
+ serviceName: "google-svc.dns"
+ servicePort: 443
+ start: 20
+ count: 10
+```
+
+注意,过高的并发可能会导致限流,需要根据实际情况调整。
+
+### Elasticsearch 配置(用于对接私有知识库)
+
+```yaml
+searchFrom:
+- type: elasticsearch
+ serviceName: "es-svc.static"
+ # 固定地址服务的端口默认是80
+ servicePort: 80
+ index: "knowledge_base"
+ contentField: "content"
+ linkField: "url"
+ titleField: "title"
+```
+
+### 自定义引用格式
+
+```yaml
+needReference: true
+referenceFormat: "### 数据来源\n%s"
+searchFrom:
+- type: bing
+ apiKey: "your-bing-key"
+ serviceName: "search-service.dns"
+ servicePort: 8080
+```
+
+### 搜索重写配置
+
+```yaml
+searchFrom:
+- type: google
+ apiKey: "your-google-api-key"
+ cx: "search-engine-id"
+ serviceName: "google-svc.dns"
+ servicePort: 443
+searchRewrite:
+ llmServiceName: "llm-svc.dns"
+ llmServicePort: 443
+ llmApiKey: "your-llm-api-key"
+ llmUrl: "https://api.example.com/v1/chat/completions"
+ llmModelName: "gpt-3.5-turbo"
+ timeoutMillisecond: 15000
+```
+
+## 注意事项
+
+1. 提示词模版必须包含`{search_results}`和`{question}`占位符,可选使用`{cur_date}`插入当前日期(格式:2006年1月2日)
+2. 默认模板包含搜索结果处理指引和回答规范,如无特殊需要可以直接用默认模板,否则请根据实际情况修改
+3. 多个搜索引擎是并行查询,总超时时间 = 所有搜索引擎配置中最大timeoutMillisecond值 + 处理时间
+4. Arxiv搜索不需要API密钥,但可以指定论文类别(arxivCategory)来缩小搜索范围
diff --git a/plugins/wasm-go/extensions/ai-search/README_EN.md b/plugins/wasm-go/extensions/ai-search/README_EN.md
new file mode 100644
index 0000000000..1afd955bd9
--- /dev/null
+++ b/plugins/wasm-go/extensions/ai-search/README_EN.md
@@ -0,0 +1,243 @@
+---
+title: AI Search Enhancement
+keywords: [higress, ai search]
+description: Higress supports enhancing the accuracy and timeliness of responses from models like DeepSeek-R1 by integrating real-time results from search engines (Google/Bing/Arxiv/Elasticsearch etc.)
+---
+
+## Feature Description
+
+The `ai-search` plugin enhances the accuracy and timeliness of AI model responses by integrating real-time results from search engines (Google/Bing/Arxiv/Elasticsearch etc.). The plugin automatically injects search results into the prompt template and determines whether to add reference sources in the final response based on configuration.
+
+## Runtime Properties
+
+Plugin execution stage: `Default stage`
+Plugin execution priority: `440`
+
+## Configuration Fields
+
+| Name | Data Type | Requirement | Default Value | Description |
+|------|-----------|-------------|---------------|-------------|
+| needReference | bool | Optional | false | Whether to add reference sources in the response |
+| referenceFormat | string | Optional | `"**References:**\n%s"` | Reference content format, must include %s placeholder |
+| defaultLang | string | Optional | - | Default search language code (e.g. zh-CN/en-US) |
+| promptTemplate | string | Optional | Built-in template | Prompt template, must include `{search_results}` and `{question}` placeholders |
+| searchFrom | array of object | Required | - | Refer to search engine configuration below, at least one engine must be configured |
+| searchRewrite | object | Optional | - | Search rewrite configuration, used to optimize search queries using an LLM service |
+
+## Search Rewrite Description
+
+The search rewrite feature uses an LLM service to analyze and optimize the user's original query, which can:
+1. Convert natural language queries into keyword combinations better suited for search engines
+2. For Arxiv paper searches, automatically identify relevant paper categories and add category constraints
+3. For private knowledge base searches, break down long queries into multiple precise keyword combinations
+
+It is strongly recommended to enable this feature when using Arxiv or Elasticsearch engines. For Arxiv searches, it can accurately identify paper domains and optimize English keywords; for private knowledge base searches, it can provide more precise keyword matching, significantly improving search effectiveness.
+
+## Search Rewrite Configuration
+
+| Name | Data Type | Requirement | Default Value | Description |
+|------|-----------|-------------|---------------|-------------|
+| llmServiceName | string | Required | - | LLM service name |
+| llmServicePort | number | Required | - | LLM service port |
+| llmApiKey | string | Required | - | LLM service API key |
+| llmUrl | string | Required | - | LLM service API URL |
+| llmModelName | string | Required | - | LLM model name |
+| timeoutMillisecond | number | Optional | 30000 | API call timeout (milliseconds) |
+
+## Search Engine Common Configuration
+
+| Name | Data Type | Requirement | Default Value | Description |
+|------|-----------|-------------|---------------|-------------|
+| type | string | Required | - | Engine type (google/bing/arxiv/elasticsearch/quark) |
+| apiKey | string | Required | - | Search engine API key/Aliyun AccessKey |
+| serviceName | string | Required | - | Backend service name |
+| servicePort | number | Required | - | Backend service port |
+| count | number | Optional | 10 | Number of results returned per search |
+| start | number | Optional | 0 | Search result offset (start returning from the start+1 result) |
+| timeoutMillisecond | number | Optional | 5000 | API call timeout (milliseconds) |
+| optionArgs | map | Optional | - | Search engine specific parameters (key-value format) |
+
+## Google Specific Configuration
+
+| Name | Data Type | Requirement | Default Value | Description |
+|------|-----------|-------------|---------------|-------------|
+| cx | string | Required | - | Google Custom Search Engine ID, used to specify search scope |
+
+## Arxiv Specific Configuration
+
+| Name | Data Type | Requirement | Default Value | Description |
+|------|-----------|-------------|---------------|-------------|
+| arxivCategory | string | Optional | - | Search paper [category](https://arxiv.org/category_taxonomy) (e.g. cs.AI, cs.CL etc.) |
+
+## Elasticsearch Specific Configuration
+
+| Name | Data Type | Requirement | Default Value | Description |
+|------|-----------|-------------|---------------|-------------|
+| index | string | Required | - | Elasticsearch index name to search |
+| contentField | string | Required | - | Content field name to query |
+| linkField | string | Required | - | Result link field name |
+| titleField | string | Required | - | Result title field name |
+
+## Quark Specific Configuration
+
+| Name | Data Type | Requirement | Default Value | Description |
+|------|----------|----------|--------|------|
+| secretKey | string | Required | - | Aliyun SecretKey |
+| endpoint | string | Optional | iqs.cn-zhangjiakou.aliyuncs.com | Endpoint for accessing quark |
+
+## Configuration Examples
+
+### Basic Configuration (Single Search Engine)
+
+```yaml
+needReference: true
+searchFrom:
+- type: google
+ apiKey: "your-google-api-key"
+ cx: "search-engine-id"
+ serviceName: "google-svc.dns"
+ servicePort: 443
+ count: 5
+ optionArgs:
+ fileType: "pdf"
+```
+
+### Arxiv Search Configuration
+
+```yaml
+searchFrom:
+- type: arxiv
+ serviceName: "arxiv-svc.dns"
+ servicePort: 443
+ arxivCategory: "cs.AI"
+ count: 10
+```
+
+### 夸克搜索配置
+
+```yaml
+searchFrom:
+- type: quark
+ serviceName: "quark-svc.dns"
+ servicePort: 443
+ apiKey: "aliyun accessKey"
+ count: 10
+ secretKey: "aliyun secretKey"
+ endpoint: "iqs.cn-zhangjiakou.aliyuncs.com"
+```
+
+### Multiple Search Engines Configuration
+
+```yaml
+defaultLang: "en-US"
+promptTemplate: |
+ # Search Results:
+ {search_results}
+
+ # Please answer this question:
+ {question}
+searchFrom:
+- type: google
+ apiKey: "google-key"
+ cx: "github-search-id" # Search engine ID specifically for GitHub content
+ serviceName: "google-svc.dns"
+ servicePort: 443
+- type: google
+ apiKey: "google-key"
+ cx: "news-search-id" # Search engine ID specifically for Google News content
+ serviceName: "google-svc.dns"
+ servicePort: 443
+- type: bing
+ apiKey: "bing-key"
+ serviceName: "bing-svc.dns"
+ servicePort: 443
+ optionArgs:
+ answerCount: "5"
+```
+
+### Concurrent Query Configuration
+
+Since search engines limit the number of results per query (e.g. Google limits to 100 results per query), you can get more results by:
+1. Setting a smaller count value (e.g. 10)
+2. Specifying result offset with start parameter
+3. Concurrently initiating multiple query requests, with each request's start value incrementing by count
+
+For example, to get 30 results, configure count=10 and concurrently initiate 3 queries with start values 0,10,20 respectively:
+
+```yaml
+searchFrom:
+- type: google
+ apiKey: "your-google-api-key"
+ cx: "search-engine-id"
+ serviceName: "google-svc.dns"
+ servicePort: 443
+ start: 0
+ count: 10
+- type: google
+ apiKey: "your-google-api-key"
+ cx: "search-engine-id"
+ serviceName: "google-svc.dns"
+ servicePort: 443
+ start: 10
+ count: 10
+- type: google
+ apiKey: "your-google-api-key"
+ cx: "search-engine-id"
+ serviceName: "google-svc.dns"
+ servicePort: 443
+ start: 20
+ count: 10
+```
+
+Note that excessive concurrency may lead to rate limiting, adjust according to actual situation.
+
+### Elasticsearch Configuration (For Private Knowledge Base Integration)
+
+```yaml
+searchFrom:
+- type: elasticsearch
+ serviceName: "es-svc.static"
+ # static ip service use 80 as default port
+ servicePort: 80
+ index: "knowledge_base"
+ contentField: "content"
+ linkField: "url"
+ titleField: "title"
+```
+
+### Custom Reference Format
+
+```yaml
+needReference: true
+referenceFormat: "### Data Sources\n%s"
+searchFrom:
+- type: bing
+ apiKey: "your-bing-key"
+ serviceName: "search-service.dns"
+ servicePort: 8080
+```
+
+### Search Rewrite Configuration
+
+```yaml
+searchFrom:
+- type: google
+ apiKey: "your-google-api-key"
+ cx: "search-engine-id"
+ serviceName: "google-svc.dns"
+ servicePort: 443
+searchRewrite:
+ llmServiceName: "llm-svc.dns"
+ llmServicePort: 443
+ llmApiKey: "your-llm-api-key"
+ llmUrl: "https://api.example.com/v1/chat/completions"
+ llmModelName: "gpt-3.5-turbo"
+ timeoutMillisecond: 15000
+```
+
+## Notes
+
+1. The prompt template must include `{search_results}` and `{question}` placeholders, optionally use `{cur_date}` to insert current date (format: January 2, 2006)
+2. The default template includes search results processing instructions and response specifications, you can use the default template unless there are special needs
+3. Multiple search engines query in parallel, total timeout = maximum timeoutMillisecond value among all search engine configurations + processing time
+4. Arxiv search doesn't require API key, but you can specify paper category (arxivCategory) to narrow search scope
diff --git a/plugins/wasm-go/extensions/ai-search/engine/arxiv/arxiv.go b/plugins/wasm-go/extensions/ai-search/engine/arxiv/arxiv.go
new file mode 100644
index 0000000000..56a998ca33
--- /dev/null
+++ b/plugins/wasm-go/extensions/ai-search/engine/arxiv/arxiv.go
@@ -0,0 +1,134 @@
+package arxiv
+
+import (
+ "bytes"
+ "errors"
+ "fmt"
+ "net/http"
+ "net/url"
+ "strings"
+
+ "github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
+ "github.com/antchfx/xmlquery"
+ "github.com/tidwall/gjson"
+
+ "github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search/engine"
+)
+
+type ArxivSearch struct {
+ optionArgs map[string]string
+ start int
+ count int
+ timeoutMillisecond uint32
+ client wrapper.HttpClient
+ arxivCategory string
+}
+
+func NewArxivSearch(config *gjson.Result) (*ArxivSearch, error) {
+ engine := &ArxivSearch{}
+ serviceName := config.Get("serviceName").String()
+ if serviceName == "" {
+ return nil, errors.New("serviceName not found")
+ }
+ servicePort := config.Get("servicePort").Int()
+ if servicePort == 0 {
+ return nil, errors.New("servicePort not found")
+ }
+ engine.client = wrapper.NewClusterClient(wrapper.FQDNCluster{
+ FQDN: serviceName,
+ Port: servicePort,
+ })
+ engine.start = int(config.Get("start").Uint())
+ engine.count = int(config.Get("count").Uint())
+ if engine.count == 0 {
+ engine.count = 10
+ }
+ engine.timeoutMillisecond = uint32(config.Get("timeoutMillisecond").Uint())
+ if engine.timeoutMillisecond == 0 {
+ engine.timeoutMillisecond = 5000
+ }
+ engine.optionArgs = map[string]string{}
+ for key, value := range config.Get("optionArgs").Map() {
+ valStr := value.String()
+ if valStr != "" {
+ engine.optionArgs[key] = value.String()
+ }
+ }
+ engine.arxivCategory = config.Get("arxivCategory").String()
+ return engine, nil
+}
+
+func (a ArxivSearch) NeedExectue(ctx engine.SearchContext) bool {
+ return ctx.EngineType == "arxiv"
+}
+
+func (a ArxivSearch) Client() wrapper.HttpClient {
+ return a.client
+}
+
+func (a ArxivSearch) CallArgs(ctx engine.SearchContext) engine.CallArgs {
+ var searchQueryItems []string
+ for _, q := range ctx.Querys {
+ searchQueryItems = append(searchQueryItems, fmt.Sprintf("all:%s", url.QueryEscape(q)))
+ }
+ searchQuery := strings.Join(searchQueryItems, "+AND+")
+ category := ctx.ArxivCategory
+ if category == "" {
+ category = a.arxivCategory
+ }
+ if category != "" {
+ searchQuery = fmt.Sprintf("%s+AND+cat:%s", searchQuery, category)
+ }
+ queryUrl := fmt.Sprintf("https://export.arxiv.org/api/query?search_query=%s&max_results=%d&start=%d",
+ searchQuery, a.count, a.start)
+ var extraArgs []string
+ for key, value := range a.optionArgs {
+ extraArgs = append(extraArgs, fmt.Sprintf("%s=%s", key, url.QueryEscape(value)))
+ }
+ if len(extraArgs) > 0 {
+ queryUrl = fmt.Sprintf("%s&%s", queryUrl, strings.Join(extraArgs, "&"))
+ }
+ return engine.CallArgs{
+ Method: http.MethodGet,
+ Url: queryUrl,
+ Headers: [][2]string{{"Accept", "application/atom+xml"}},
+ TimeoutMillisecond: a.timeoutMillisecond,
+ }
+}
+
+func (a ArxivSearch) ParseResult(ctx engine.SearchContext, response []byte) []engine.SearchResult {
+ var results []engine.SearchResult
+ doc, err := xmlquery.Parse(bytes.NewReader(response))
+ if err != nil {
+ return results
+ }
+
+ entries := xmlquery.Find(doc, "//entry")
+ for _, entry := range entries {
+ title := entry.SelectElement("title").InnerText()
+ link := ""
+ for _, l := range entry.SelectElements("link") {
+ if l.SelectAttr("rel") == "alternate" && l.SelectAttr("type") == "text/html" {
+ link = l.SelectAttr("href")
+ break
+ }
+ }
+ summary := entry.SelectElement("summary").InnerText()
+ publishTime := entry.SelectElement("published").InnerText()
+ authors := entry.SelectElements("author")
+ var authorNames []string
+ for _, author := range authors {
+ authorNames = append(authorNames, author.SelectElement("name").InnerText())
+ }
+ content := fmt.Sprintf("%s\nAuthors: %s\nPublication time: %s", summary, strings.Join(authorNames, ", "), publishTime)
+ result := engine.SearchResult{
+ Title: title,
+ Link: link,
+ Content: content,
+ }
+ if result.Valid() {
+ results = append(results, result)
+ }
+ }
+ return results
+}
diff --git a/plugins/wasm-go/extensions/ai-search/engine/bing/bing.go b/plugins/wasm-go/extensions/ai-search/engine/bing/bing.go
new file mode 100644
index 0000000000..b24fe33464
--- /dev/null
+++ b/plugins/wasm-go/extensions/ai-search/engine/bing/bing.go
@@ -0,0 +1,128 @@
+package bing
+
+import (
+ "errors"
+ "fmt"
+ "net/http"
+ "net/url"
+ "strings"
+
+ "github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
+ "github.com/tidwall/gjson"
+
+ "github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search/engine"
+)
+
+type BingSearch struct {
+ optionArgs map[string]string
+ apiKey string
+ start int
+ count int
+ timeoutMillisecond uint32
+ client wrapper.HttpClient
+}
+
+func NewBingSearch(config *gjson.Result) (*BingSearch, error) {
+ engine := &BingSearch{}
+ engine.apiKey = config.Get("apiKey").String()
+ if engine.apiKey == "" {
+ return nil, errors.New("apiKey not found")
+ }
+ serviceName := config.Get("serviceName").String()
+ if serviceName == "" {
+ return nil, errors.New("serviceName not found")
+ }
+ servicePort := config.Get("servicePort").Int()
+ if servicePort == 0 {
+ return nil, errors.New("servicePort not found")
+ }
+ engine.client = wrapper.NewClusterClient(wrapper.FQDNCluster{
+ FQDN: serviceName,
+ Port: servicePort,
+ })
+ engine.start = int(config.Get("start").Uint())
+ engine.count = int(config.Get("count").Uint())
+ if engine.count == 0 {
+ engine.count = 10
+ }
+ engine.timeoutMillisecond = uint32(config.Get("timeoutMillisecond").Uint())
+ if engine.timeoutMillisecond == 0 {
+ engine.timeoutMillisecond = 5000
+ }
+ engine.optionArgs = map[string]string{}
+ for key, value := range config.Get("optionArgs").Map() {
+ valStr := value.String()
+ if valStr != "" {
+ engine.optionArgs[key] = value.String()
+ }
+ }
+ return engine, nil
+}
+
+func (b BingSearch) NeedExectue(ctx engine.SearchContext) bool {
+ return ctx.EngineType == "" || ctx.EngineType == "internet"
+}
+
+func (b BingSearch) Client() wrapper.HttpClient {
+ return b.client
+}
+
+func (b BingSearch) CallArgs(ctx engine.SearchContext) engine.CallArgs {
+ queryUrl := fmt.Sprintf("https://api.bing.microsoft.com/v7.0/search?q=%s&count=%d&offset=%d",
+ url.QueryEscape(strings.Join(ctx.Querys, " ")), b.count, b.start)
+ var extraArgs []string
+ for key, value := range b.optionArgs {
+ extraArgs = append(extraArgs, fmt.Sprintf("%s=%s", key, url.QueryEscape(value)))
+ }
+ if ctx.Language != "" {
+ extraArgs = append(extraArgs, fmt.Sprintf("mkt=%s", ctx.Language))
+ }
+ if len(extraArgs) > 0 {
+ queryUrl = fmt.Sprintf("%s&%s", queryUrl, strings.Join(extraArgs, "&"))
+ }
+ return engine.CallArgs{
+ Method: http.MethodGet,
+ Url: queryUrl,
+ Headers: [][2]string{{"Ocp-Apim-Subscription-Key", b.apiKey}},
+ TimeoutMillisecond: b.timeoutMillisecond,
+ }
+}
+
+func (b BingSearch) ParseResult(ctx engine.SearchContext, response []byte) []engine.SearchResult {
+ jsonObj := gjson.ParseBytes(response)
+ var results []engine.SearchResult
+ webPages := jsonObj.Get("webPages.value")
+ for _, page := range webPages.Array() {
+ result := engine.SearchResult{
+ Title: page.Get("name").String(),
+ Link: page.Get("url").String(),
+ Content: page.Get("snippet").String(),
+ }
+ if result.Valid() {
+ results = append(results, result)
+ }
+ deepLinks := page.Get("deepLinks")
+ for _, inner := range deepLinks.Array() {
+ innerResult := engine.SearchResult{
+ Title: inner.Get("name").String(),
+ Link: inner.Get("url").String(),
+ Content: inner.Get("snippet").String(),
+ }
+ if innerResult.Valid() {
+ results = append(results, innerResult)
+ }
+ }
+ }
+ news := jsonObj.Get("news.value")
+ for _, article := range news.Array() {
+ result := engine.SearchResult{
+ Title: article.Get("name").String(),
+ Link: article.Get("url").String(),
+ Content: article.Get("description").String(),
+ }
+ if result.Valid() {
+ results = append(results, result)
+ }
+ }
+ return results
+}
diff --git a/plugins/wasm-go/extensions/ai-search/engine/elasticsearch/elasticsearch.go b/plugins/wasm-go/extensions/ai-search/engine/elasticsearch/elasticsearch.go
new file mode 100644
index 0000000000..4290558c38
--- /dev/null
+++ b/plugins/wasm-go/extensions/ai-search/engine/elasticsearch/elasticsearch.go
@@ -0,0 +1,114 @@
+package elasticsearch
+
+import (
+ "errors"
+ "fmt"
+ "net/http"
+ "strings"
+
+ "github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
+ "github.com/tidwall/gjson"
+
+ "github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search/engine"
+)
+
+type ElasticsearchSearch struct {
+ client wrapper.HttpClient
+ index string
+ contentField string
+ linkField string
+ titleField string
+ start int
+ count int
+ timeoutMillisecond uint32
+}
+
+func NewElasticsearchSearch(config *gjson.Result) (*ElasticsearchSearch, error) {
+ engine := &ElasticsearchSearch{}
+ serviceName := config.Get("serviceName").String()
+ if serviceName == "" {
+ return nil, errors.New("serviceName not found")
+ }
+ servicePort := config.Get("servicePort").Int()
+ if servicePort == 0 {
+ return nil, errors.New("servicePort not found")
+ }
+ engine.client = wrapper.NewClusterClient(wrapper.FQDNCluster{
+ FQDN: serviceName,
+ Port: servicePort,
+ })
+ engine.index = config.Get("index").String()
+ if engine.index == "" {
+ return nil, errors.New("index not found")
+ }
+ engine.contentField = config.Get("contentField").String()
+ if engine.contentField == "" {
+ return nil, errors.New("contentField not found")
+ }
+ engine.linkField = config.Get("linkField").String()
+ if engine.linkField == "" {
+ return nil, errors.New("linkField not found")
+ }
+ engine.titleField = config.Get("titleField").String()
+ if engine.titleField == "" {
+ return nil, errors.New("titleField not found")
+ }
+ engine.timeoutMillisecond = uint32(config.Get("timeoutMillisecond").Uint())
+ if engine.timeoutMillisecond == 0 {
+ engine.timeoutMillisecond = 5000
+ }
+ engine.start = int(config.Get("start").Uint())
+ engine.count = int(config.Get("count").Uint())
+ if engine.count == 0 {
+ engine.count = 10
+ }
+ return engine, nil
+}
+
+func (e ElasticsearchSearch) NeedExectue(ctx engine.SearchContext) bool {
+ return ctx.EngineType == "private"
+}
+
+func (e ElasticsearchSearch) Client() wrapper.HttpClient {
+ return e.client
+}
+
+func (e ElasticsearchSearch) CallArgs(ctx engine.SearchContext) engine.CallArgs {
+ searchBody := fmt.Sprintf(`{
+ "query": {
+ "match": {
+ "%s": {
+ "query": "%s",
+ "operator": "AND"
+ }
+ }
+ }
+ }`, e.contentField, strings.Join(ctx.Querys, " "))
+
+ return engine.CallArgs{
+ Method: http.MethodPost,
+ Url: fmt.Sprintf("/%s/_search?from=%d&size=%d", e.index, e.start, e.count),
+ Headers: [][2]string{
+ {"Content-Type", "application/json"},
+ },
+ Body: []byte(searchBody),
+ TimeoutMillisecond: e.timeoutMillisecond,
+ }
+}
+
+func (e ElasticsearchSearch) ParseResult(ctx engine.SearchContext, response []byte) []engine.SearchResult {
+ jsonObj := gjson.ParseBytes(response)
+ var results []engine.SearchResult
+ for _, hit := range jsonObj.Get("hits.hits").Array() {
+ source := hit.Get("_source")
+ result := engine.SearchResult{
+ Title: source.Get(e.titleField).String(),
+ Link: source.Get(e.linkField).String(),
+ Content: source.Get(e.contentField).String(),
+ }
+ if result.Valid() {
+ results = append(results, result)
+ }
+ }
+ return results
+}
diff --git a/plugins/wasm-go/extensions/ai-search/engine/google/google.go b/plugins/wasm-go/extensions/ai-search/engine/google/google.go
new file mode 100644
index 0000000000..e189646b99
--- /dev/null
+++ b/plugins/wasm-go/extensions/ai-search/engine/google/google.go
@@ -0,0 +1,120 @@
+package google
+
+import (
+ "errors"
+ "fmt"
+ "net/http"
+ "net/url"
+ "strings"
+
+ "github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
+ "github.com/tidwall/gjson"
+
+ "github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search/engine"
+)
+
+type GoogleSearch struct {
+ optionArgs map[string]string
+ apiKey string
+ cx string
+ start int
+ count int
+ timeoutMillisecond uint32
+ client wrapper.HttpClient
+}
+
+func NewGoogleSearch(config *gjson.Result) (*GoogleSearch, error) {
+ engine := &GoogleSearch{}
+ engine.apiKey = config.Get("apiKey").String()
+ if engine.apiKey == "" {
+ return nil, errors.New("apiKey not found")
+ }
+ engine.cx = config.Get("cx").String()
+ if engine.cx == "" {
+ return nil, errors.New("cx not found")
+ }
+ serviceName := config.Get("serviceName").String()
+ if serviceName == "" {
+ return nil, errors.New("serviceName not found")
+ }
+ servicePort := config.Get("servicePort").Int()
+ if servicePort == 0 {
+ return nil, errors.New("servicePort not found")
+ }
+ engine.client = wrapper.NewClusterClient(wrapper.FQDNCluster{
+ FQDN: serviceName,
+ Port: servicePort,
+ })
+ engine.start = int(config.Get("start").Uint())
+ engine.count = int(config.Get("count").Uint())
+ if engine.count == 0 {
+ engine.count = 10
+ }
+ if engine.count > 10 || engine.start+engine.count > 100 {
+ return nil, errors.New("count must be less than 10, and start + count must be less than or equal to 100.")
+ }
+ engine.timeoutMillisecond = uint32(config.Get("timeoutMillisecond").Uint())
+ if engine.timeoutMillisecond == 0 {
+ engine.timeoutMillisecond = 5000
+ }
+ engine.optionArgs = map[string]string{}
+ for key, value := range config.Get("optionArgs").Map() {
+ valStr := value.String()
+ if valStr != "" {
+ engine.optionArgs[key] = value.String()
+ }
+ }
+ return engine, nil
+}
+
+func (g GoogleSearch) NeedExectue(ctx engine.SearchContext) bool {
+ return ctx.EngineType == "" || ctx.EngineType == "internet"
+}
+
+func (g GoogleSearch) Client() wrapper.HttpClient {
+ return g.client
+}
+
+func (g GoogleSearch) CallArgs(ctx engine.SearchContext) engine.CallArgs {
+ queryUrl := fmt.Sprintf("https://customsearch.googleapis.com/customsearch/v1?cx=%s&q=%s&num=%d&key=%s&start=%d",
+ g.cx, url.QueryEscape(strings.Join(ctx.Querys, " ")), g.count, g.apiKey, g.start+1)
+ var extraArgs []string
+ for key, value := range g.optionArgs {
+ extraArgs = append(extraArgs, fmt.Sprintf("%s=%s", key, url.QueryEscape(value)))
+ }
+ if ctx.Language != "" {
+ extraArgs = append(extraArgs, fmt.Sprintf("lr=lang_%s", ctx.Language))
+ }
+ if len(extraArgs) > 0 {
+ queryUrl = fmt.Sprintf("%s&%s", queryUrl, strings.Join(extraArgs, "&"))
+ }
+ return engine.CallArgs{
+ Method: http.MethodGet,
+ Url: queryUrl,
+ Headers: [][2]string{
+ {"Accept", "application/json"},
+ },
+ TimeoutMillisecond: g.timeoutMillisecond,
+ }
+}
+
+func (g GoogleSearch) ParseResult(ctx engine.SearchContext, response []byte) []engine.SearchResult {
+ jsonObj := gjson.ParseBytes(response)
+ var results []engine.SearchResult
+ for _, item := range jsonObj.Get("items").Array() {
+ content := item.Get("snippet").String()
+ metaDescription := item.Get("pagemap.metatags.0.og:description").String()
+ if metaDescription != "" {
+ content = fmt.Sprintf("%s\n...\n%s", content, metaDescription)
+ }
+ result := engine.SearchResult{
+ Title: item.Get("title").String(),
+ Link: item.Get("link").String(),
+ Content: content,
+ }
+ if result.Valid() {
+ results = append(results, result)
+ }
+ }
+ return results
+}
diff --git a/plugins/wasm-go/extensions/ai-search/engine/quark/quark.go b/plugins/wasm-go/extensions/ai-search/engine/quark/quark.go
new file mode 100644
index 0000000000..84273bb776
--- /dev/null
+++ b/plugins/wasm-go/extensions/ai-search/engine/quark/quark.go
@@ -0,0 +1,194 @@
+package quark
+
+import (
+ "crypto/hmac"
+ "crypto/rand"
+ "crypto/sha256"
+ "encoding/hex"
+ "errors"
+ "fmt"
+ "net/http"
+ "net/url"
+ "sort"
+ "strings"
+ "time"
+
+ "github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
+ "github.com/tidwall/gjson"
+
+ "github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search/engine"
+)
+
+type QuarkSearch struct {
+ apiKey string
+ secretKey string
+ timeoutMillisecond uint32
+ client wrapper.HttpClient
+ count uint32
+ endpoint string
+}
+
+const (
+ Path = "/linked-retrieval/linked-retrieval-entry/v2/linkedRetrieval/commands/genericSearch"
+ ContentSha256 = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" // for empty body
+ Action = "GenericSearch"
+ Version = "2024-11-11"
+ SignatureAlgorithm = "ACS3-HMAC-SHA256"
+ SignedHeaders = "host;x-acs-action;x-acs-content-sha256;x-acs-date;x-acs-signature-nonce;x-acs-version"
+)
+
+func urlEncoding(rawStr string) string {
+ encodedStr := url.PathEscape(rawStr)
+ encodedStr = strings.ReplaceAll(encodedStr, "+", "%2B")
+ encodedStr = strings.ReplaceAll(encodedStr, ":", "%3A")
+ encodedStr = strings.ReplaceAll(encodedStr, "=", "%3D")
+ encodedStr = strings.ReplaceAll(encodedStr, "&", "%26")
+ encodedStr = strings.ReplaceAll(encodedStr, "$", "%24")
+ encodedStr = strings.ReplaceAll(encodedStr, "@", "%40")
+ // encodedStr := url.QueryEscape(rawStr)
+ return encodedStr
+}
+
+func getSignature(stringToSign, secret string) string {
+ h := hmac.New(sha256.New, []byte(secret))
+ h.Write([]byte(stringToSign))
+ hash := h.Sum(nil)
+ return hex.EncodeToString(hash)
+}
+
+func getCanonicalHeaders(params map[string]string) string {
+ paramArray := []string{}
+ for k, v := range params {
+ paramArray = append(paramArray, k+":"+v)
+ }
+ sort.Slice(paramArray, func(i, j int) bool {
+ return paramArray[i] <= paramArray[j]
+ })
+ return strings.Join(paramArray, "\n") + "\n"
+}
+
+func getHasedString(input string) string {
+ hash := sha256.Sum256([]byte(input))
+ hashHex := hex.EncodeToString(hash[:])
+ return hashHex
+}
+
+func generateHexID(length int) (string, error) {
+ bytes := make([]byte, length/2)
+ if _, err := rand.Read(bytes); err != nil {
+ return "", err
+ }
+ return hex.EncodeToString(bytes), nil
+}
+
+func NewQuarkSearch(config *gjson.Result) (*QuarkSearch, error) {
+ engine := &QuarkSearch{}
+ engine.apiKey = config.Get("apiKey").String()
+ if engine.apiKey == "" {
+ return nil, errors.New("apiKey not found")
+ }
+ engine.secretKey = config.Get("secretKey").String()
+ if engine.secretKey == "" {
+ return nil, errors.New("secretKey not found")
+ }
+ serviceName := config.Get("serviceName").String()
+ if serviceName == "" {
+ return nil, errors.New("serviceName not found")
+ }
+ servicePort := config.Get("servicePort").Int()
+ if servicePort == 0 {
+ return nil, errors.New("servicePort not found")
+ }
+ engine.endpoint = config.Get("endpoint").String()
+ if engine.endpoint == "" {
+ engine.endpoint = "iqs.cn-zhangjiakou.aliyuncs.com"
+ }
+ engine.count = uint32(config.Get("count").Int())
+ if engine.count == 0 {
+ engine.count = 10
+ }
+ engine.client = wrapper.NewClusterClient(wrapper.FQDNCluster{
+ FQDN: serviceName,
+ Port: servicePort,
+ })
+ engine.timeoutMillisecond = uint32(config.Get("timeoutMillisecond").Uint())
+ if engine.timeoutMillisecond == 0 {
+ engine.timeoutMillisecond = 5000
+ }
+ return engine, nil
+}
+
+func (g QuarkSearch) NeedExectue(ctx engine.SearchContext) bool {
+ return ctx.EngineType == "" || ctx.EngineType == "internet"
+}
+
+func (g QuarkSearch) Client() wrapper.HttpClient {
+ return g.client
+}
+
+func (g QuarkSearch) CallArgs(ctx engine.SearchContext) engine.CallArgs {
+ query := strings.Join(ctx.Querys, " ")
+ canonicalURI := Path
+ queryParams := map[string]string{
+ "query": query,
+ "timeRange": "NoLimit",
+ }
+ queryParamsStr := []string{}
+ for k, v := range queryParams {
+ queryParamsStr = append(queryParamsStr, k+"="+urlEncoding(v))
+ }
+ canonicalQueryString := strings.Join(queryParamsStr, "&")
+ timeStamp := time.Now().UTC().Format("2006-01-02T15:04:05Z")
+ randomID, _ := generateHexID(32)
+ params := map[string]string{
+ "host": g.endpoint,
+ "x-acs-action": Action,
+ "x-acs-content-sha256": ContentSha256,
+ "x-acs-date": timeStamp,
+ "x-acs-signature-nonce": randomID,
+ "x-acs-version": Version,
+ }
+ canonicalHeaders := getCanonicalHeaders(params)
+ canonicalRequest := http.MethodGet + "\n" + canonicalURI + "\n" + canonicalQueryString + "\n" + canonicalHeaders + "\n" + SignedHeaders + "\n" + ContentSha256
+ stringToSign := SignatureAlgorithm + "\n" + getHasedString(canonicalRequest)
+
+ authHeaderFmt := "%s Credential=%s,SignedHeaders=%s,Signature=%s"
+ authHeader := fmt.Sprintf(authHeaderFmt, SignatureAlgorithm, g.apiKey, SignedHeaders, getSignature(stringToSign, g.secretKey))
+
+ reqParams := url.Values{}
+ for k, v := range queryParams {
+ reqParams.Add(k, v)
+ }
+ requestURL := fmt.Sprintf("https://%s%s?%s", g.endpoint, Path, reqParams.Encode())
+
+ return engine.CallArgs{
+ Method: http.MethodGet,
+ Url: requestURL,
+ Headers: [][2]string{
+ {"x-acs-date", timeStamp},
+ {"x-acs-signature-nonce", randomID},
+ {"x-acs-content-sha256", ContentSha256},
+ {"x-acs-version", Version},
+ {"x-acs-action", Action},
+ {"Authorization", authHeader},
+ },
+ Body: nil,
+ TimeoutMillisecond: g.timeoutMillisecond,
+ }
+}
+
+func (g QuarkSearch) ParseResult(ctx engine.SearchContext, response []byte) []engine.SearchResult {
+ jsonObj := gjson.ParseBytes(response)
+ var results []engine.SearchResult
+ for index, item := range jsonObj.Get("pageItems").Array() {
+ result := engine.SearchResult{
+ Title: item.Get("title").String(),
+ Link: item.Get("link").String(),
+ Content: item.Get("mainText").String(),
+ }
+ if result.Valid() && index < int(g.count) {
+ results = append(results, result)
+ }
+ }
+ return results
+}
diff --git a/plugins/wasm-go/extensions/ai-search/engine/types.go b/plugins/wasm-go/extensions/ai-search/engine/types.go
new file mode 100644
index 0000000000..a0d6780bae
--- /dev/null
+++ b/plugins/wasm-go/extensions/ai-search/engine/types.go
@@ -0,0 +1,37 @@
+package engine
+
+import (
+ "github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
+)
+
+type SearchResult struct {
+ Title string
+ Link string
+ Content string
+}
+
+func (result SearchResult) Valid() bool {
+ return result.Title != "" && result.Link != "" && result.Content != ""
+}
+
+type SearchContext struct {
+ EngineType string
+ Querys []string
+ Language string
+ ArxivCategory string
+}
+
+type CallArgs struct {
+ Method string
+ Url string
+ Headers [][2]string
+ Body []byte
+ TimeoutMillisecond uint32
+}
+
+type SearchEngine interface {
+ NeedExectue(ctx SearchContext) bool
+ Client() wrapper.HttpClient
+ CallArgs(ctx SearchContext) CallArgs
+ ParseResult(ctx SearchContext, response []byte) []SearchResult
+}
diff --git a/plugins/wasm-go/extensions/ai-search/go.mod b/plugins/wasm-go/extensions/ai-search/go.mod
new file mode 100644
index 0000000000..17bd972c49
--- /dev/null
+++ b/plugins/wasm-go/extensions/ai-search/go.mod
@@ -0,0 +1,26 @@
+module github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search
+
+go 1.18
+
+replace github.com/alibaba/higress/plugins/wasm-go => ../..
+
+require (
+ github.com/alibaba/higress/plugins/wasm-go v0.0.0
+ github.com/antchfx/xmlquery v1.4.4
+ github.com/higress-group/proxy-wasm-go-sdk v1.0.0
+ github.com/tidwall/gjson v1.18.0
+ github.com/tidwall/sjson v1.2.5
+)
+
+require (
+ github.com/antchfx/xpath v1.3.3 // indirect
+ github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
+ github.com/google/uuid v1.3.0 // indirect
+ github.com/higress-group/nottinygc v0.0.0-20231101025119-e93c4c2f8520 // indirect
+ github.com/magefile/mage v1.14.0 // indirect
+ github.com/tidwall/match v1.1.1 // indirect
+ github.com/tidwall/pretty v1.2.0 // indirect
+ github.com/tidwall/resp v0.1.1 // indirect
+ golang.org/x/net v0.33.0 // indirect
+ golang.org/x/text v0.21.0 // indirect
+)
diff --git a/plugins/wasm-go/extensions/ai-search/go.sum b/plugins/wasm-go/extensions/ai-search/go.sum
new file mode 100644
index 0000000000..81d555f4bd
--- /dev/null
+++ b/plugins/wasm-go/extensions/ai-search/go.sum
@@ -0,0 +1,96 @@
+github.com/antchfx/xmlquery v1.4.4 h1:mxMEkdYP3pjKSftxss4nUHfjBhnMk4imGoR96FRY2dg=
+github.com/antchfx/xmlquery v1.4.4/go.mod h1:AEPEEPYE9GnA2mj5Ur2L5Q5/2PycJ0N9Fusrx9b12fc=
+github.com/antchfx/xpath v1.3.3 h1:tmuPQa1Uye0Ym1Zn65vxPgfltWb/Lxu2jeqIGteJSRs=
+github.com/antchfx/xpath v1.3.3/go.mod h1:i54GszH55fYfBmoZXapTHN8T8tkcHfRgLyVwwqzXNcs=
+github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
+github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE=
+github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
+github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
+github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
+github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
+github.com/higress-group/nottinygc v0.0.0-20231101025119-e93c4c2f8520 h1:IHDghbGQ2DTIXHBHxWfqCYQW1fKjyJ/I7W1pMyUDeEA=
+github.com/higress-group/nottinygc v0.0.0-20231101025119-e93c4c2f8520/go.mod h1:Nz8ORLaFiLWotg6GeKlJMhv8cci8mM43uEnLA5t8iew=
+github.com/higress-group/proxy-wasm-go-sdk v1.0.0 h1:BZRNf4R7jr9hwRivg/E29nkVaKEak5MWjBDhWjuHijU=
+github.com/higress-group/proxy-wasm-go-sdk v1.0.0/go.mod h1:iiSyFbo+rAtbtGt/bsefv8GU57h9CCLYGJA74/tF5/0=
+github.com/magefile/mage v1.14.0 h1:6QDX3g6z1YvJ4olPhT1wksUcSa/V0a1B+pJb73fBjyo=
+github.com/magefile/mage v1.14.0/go.mod h1:z5UZb/iS3GoOSn0JgWuiw7dxlurVYTu+/jHXqQg881A=
+github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
+github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
+github.com/tidwall/gjson v1.14.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
+github.com/tidwall/gjson v1.18.0 h1:FIDeeyB800efLX89e5a8Y0BNH+LOngJyGrIWxG2FKQY=
+github.com/tidwall/gjson v1.18.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
+github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA=
+github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM=
+github.com/tidwall/pretty v1.2.0 h1:RWIZEg2iJ8/g6fDDYzMpobmaoGh5OLl4AXtGUGPcqCs=
+github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
+github.com/tidwall/resp v0.1.1 h1:Ly20wkhqKTmDUPlyM1S7pWo5kk0tDu8OoC/vFArXmwE=
+github.com/tidwall/resp v0.1.1/go.mod h1:3/FrruOBAxPTPtundW0VXgmsQ4ZBA0Aw714lVYgwFa0=
+github.com/tidwall/sjson v1.2.5 h1:kLy8mja+1c9jlljvWTlSazM7cKDRfJuR/bOJhcY5NcY=
+github.com/tidwall/sjson v1.2.5/go.mod h1:Fvgq9kS/6ociJEDnK0Fk1cpYF4FIW6ZF7LAe+6jwd28=
+github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
+golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
+golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
+golang.org/x/crypto v0.13.0/go.mod h1:y6Z2r+Rw4iayiXXAIxJIDAJ1zMW4yaTpebo8fPOliYc=
+golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU=
+golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8=
+golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk=
+golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
+golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
+golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
+golang.org/x/mod v0.15.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
+golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
+golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
+golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
+golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
+golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=
+golang.org/x/net v0.15.0/go.mod h1:idbUs1IY1+zTqbi8yxTbhexhEEk5ur9LInksu6HrEpk=
+golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44=
+golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM=
+golang.org/x/net v0.33.0 h1:74SYHlV8BIgHIFC/LrYkOGIwL19eTYXQ5wc6TBuO36I=
+golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4=
+golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y=
+golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
+golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
+golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
+golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/telemetry v0.0.0-20240228155512-f48c80bd79b2/go.mod h1:TeRTkGYfJXctD9OcfyVLyj2J3IxLnKwHJR8f4D8a3YE=
+golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
+golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
+golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
+golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo=
+golang.org/x/term v0.12.0/go.mod h1:owVbMEjm3cBLCHdkQu9b1opXd4ETQWc3BhuQGKgXgvU=
+golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk=
+golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY=
+golang.org/x/term v0.27.0/go.mod h1:iMsnZpn0cago0GOrHO2+Y7u7JPn5AylBrcoWkElMTSM=
+golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
+golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
+golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
+golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
+golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
+golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
+golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
+golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
+golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo=
+golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ=
+golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
+golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
+golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
+golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
+golang.org/x/tools v0.13.0/go.mod h1:HvlwmtVNQAhOuCjW7xxvovg8wbNq7LwfXh/k7wXUl58=
+golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk=
+golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
diff --git a/plugins/wasm-go/extensions/ai-search/main.go b/plugins/wasm-go/extensions/ai-search/main.go
new file mode 100644
index 0000000000..720e688ccc
--- /dev/null
+++ b/plugins/wasm-go/extensions/ai-search/main.go
@@ -0,0 +1,568 @@
+// Copyright (c) 2022 Alibaba Group Holding Ltd.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package main
+
+import (
+ _ "embed"
+ "errors"
+ "fmt"
+ "net/http"
+ "strings"
+ "time"
+
+ "github.com/higress-group/proxy-wasm-go-sdk/proxywasm"
+ "github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types"
+ "github.com/tidwall/gjson"
+ "github.com/tidwall/sjson"
+
+ "github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper"
+
+ "github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search/engine"
+ "github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search/engine/arxiv"
+ "github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search/engine/bing"
+ "github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search/engine/elasticsearch"
+ "github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search/engine/google"
+ "github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search/engine/quark"
+)
+
+type SearchRewrite struct {
+ client wrapper.HttpClient
+ url string
+ apiKey string
+ modelName string
+ timeoutMillisecond uint32
+ prompt string
+}
+
+type Config struct {
+ engine []engine.SearchEngine
+ promptTemplate string
+ referenceFormat string
+ defaultLanguage string
+ needReference bool
+ searchRewrite *SearchRewrite
+}
+
+const (
+ DEFAULT_MAX_BODY_BYTES uint32 = 100 * 1024 * 1024
+)
+
+//go:embed prompts/full.md
+var fullSearchPrompts string
+
+//go:embed prompts/arxiv.md
+var arxivSearchPrompts string
+
+//go:embed prompts/internet.md
+var internetSearchPrompts string
+
+//go:embed prompts/private.md
+var privateSearchPrompts string
+
+func main() {
+ wrapper.SetCtx(
+ "ai-search",
+ wrapper.ParseConfigBy(parseConfig),
+ wrapper.ProcessRequestHeadersBy(onHttpRequestHeaders),
+ wrapper.ProcessRequestBodyBy(onHttpRequestBody),
+ wrapper.ProcessResponseHeadersBy(onHttpResponseHeaders),
+ wrapper.ProcessStreamingResponseBodyBy(onStreamingResponseBody),
+ wrapper.ProcessResponseBodyBy(onHttpResponseBody),
+ )
+}
+
+func parseConfig(json gjson.Result, config *Config, log wrapper.Log) error {
+ config.needReference = json.Get("needReference").Bool()
+ if config.needReference {
+ config.referenceFormat = json.Get("referenceFormat").String()
+ if config.referenceFormat == "" {
+ config.referenceFormat = "**References:**\n%s"
+ } else if !strings.Contains(config.referenceFormat, "%s") {
+ return fmt.Errorf("invalid referenceFormat:%s", config.referenceFormat)
+ }
+ }
+ config.defaultLanguage = json.Get("defaultLang").String()
+ config.promptTemplate = json.Get("promptTemplate").String()
+ if config.promptTemplate == "" {
+ if config.needReference {
+ config.promptTemplate = `# 以下内容是基于用户发送的消息的搜索结果:
+{search_results}
+在我给你的搜索结果中,每个结果都是[webpage X begin]...[webpage X end]格式的,X代表每篇文章的数字索引。请在适当的情况下在句子末尾引用上下文。请按照引用编号[X]的格式在答案中对应部分引用上下文。如果一句话源自多个上下文,请列出所有相关的引用编号,例如[3][5],切记不要将引用集中在最后返回引用编号,而是在答案对应部分列出。
+在回答时,请注意以下几点:
+- 今天是北京时间:{cur_date}。
+- 并非搜索结果的所有内容都与用户的问题密切相关,你需要结合问题,对搜索结果进行甄别、筛选。
+- 对于列举类的问题(如列举所有航班信息),尽量将答案控制在10个要点以内,并告诉用户可以查看搜索来源、获得完整信息。优先提供信息完整、最相关的列举项;如非必要,不要主动告诉用户搜索结果未提供的内容。
+- 对于创作类的问题(如写论文),请务必在正文的段落中引用对应的参考编号,例如[3][5],不能只在文章末尾引用。你需要解读并概括用户的题目要求,选择合适的格式,充分利用搜索结果并抽取重要信息,生成符合用户要求、极具思想深度、富有创造力与专业性的答案。你的创作篇幅需要尽可能延长,对于每一个要点的论述要推测用户的意图,给出尽可能多角度的回答要点,且务必信息量大、论述详尽。
+- 如果回答很长,请尽量结构化、分段落总结。如果需要分点作答,尽量控制在5个点以内,并合并相关的内容。
+- 对于客观类的问答,如果问题的答案非常简短,可以适当补充一到两句相关信息,以丰富内容。
+- 你需要根据用户要求和回答内容选择合适、美观的回答格式,确保可读性强。
+- 你的回答应该综合多个相关网页来回答,不能重复引用一个网页。
+- 除非用户要求,否则你回答的语言需要和用户提问的语言保持一致。
+
+# 用户消息为:
+{question}`
+ } else {
+ config.promptTemplate = `# 以下内容是基于用户发送的消息的搜索结果:
+{search_results}
+在我给你的搜索结果中,每个结果都是[webpage begin]...[webpage end]格式的。
+在回答时,请注意以下几点:
+- 今天是北京时间:{cur_date}。
+- 并非搜索结果的所有内容都与用户的问题密切相关,你需要结合问题,对搜索结果进行甄别、筛选。
+- 对于列举类的问题(如列举所有航班信息),尽量将答案控制在10个要点以内。如非必要,不要主动告诉用户搜索结果未提供的内容。
+- 对于创作类的问题(如写论文),你需要解读并概括用户的题目要求,选择合适的格式,充分利用搜索结果并抽取重要信息,生成符合用户要求、极具思想深度、富有创造力与专业性的答案。你的创作篇幅需要尽可能延长,对于每一个要点的论述要推测用户的意图,给出尽可能多角度的回答要点,且务必信息量大、论述详尽。
+- 如果回答很长,请尽量结构化、分段落总结。如果需要分点作答,尽量控制在5个点以内,并合并相关的内容。
+- 对于客观类的问答,如果问题的答案非常简短,可以适当补充一到两句相关信息,以丰富内容。
+- 你需要根据用户要求和回答内容选择合适、美观的回答格式,确保可读性强。
+- 你的回答应该综合多个相关网页来回答,但回答中不要给出网页的引用来源。
+- 除非用户要求,否则你回答的语言需要和用户提问的语言保持一致。
+
+# 用户消息为:
+{question}`
+ }
+ }
+ if !strings.Contains(config.promptTemplate, "{search_results}") ||
+ !strings.Contains(config.promptTemplate, "{question}") {
+ return fmt.Errorf("invalid promptTemplate, must contains {search_results} and {question}:%s", config.promptTemplate)
+ }
+ var internetExists, privateExists, arxivExists bool
+ for _, e := range json.Get("searchFrom").Array() {
+ switch e.Get("type").String() {
+ case "bing":
+ searchEngine, err := bing.NewBingSearch(&e)
+ if err != nil {
+ return fmt.Errorf("bing search engine init failed:%s", err)
+ }
+ config.engine = append(config.engine, searchEngine)
+ internetExists = true
+ case "google":
+ searchEngine, err := google.NewGoogleSearch(&e)
+ if err != nil {
+ return fmt.Errorf("google search engine init failed:%s", err)
+ }
+ config.engine = append(config.engine, searchEngine)
+ internetExists = true
+ case "arxiv":
+ searchEngine, err := arxiv.NewArxivSearch(&e)
+ if err != nil {
+ return fmt.Errorf("arxiv search engine init failed:%s", err)
+ }
+ config.engine = append(config.engine, searchEngine)
+ arxivExists = true
+ case "elasticsearch":
+ searchEngine, err := elasticsearch.NewElasticsearchSearch(&e)
+ if err != nil {
+ return fmt.Errorf("elasticsearch search engine init failed:%s", err)
+ }
+ config.engine = append(config.engine, searchEngine)
+ privateExists = true
+ case "quark":
+ searchEngine, err := quark.NewQuarkSearch(&e)
+ if err != nil {
+ return fmt.Errorf("elasticsearch search engine init failed:%s", err)
+ }
+ config.engine = append(config.engine, searchEngine)
+ internetExists = true
+ default:
+ return fmt.Errorf("unkown search engine:%s", e.Get("type").String())
+ }
+ }
+ searchRewriteJson := json.Get("searchRewrite")
+ if searchRewriteJson.Exists() {
+ searchRewrite := &SearchRewrite{}
+ llmServiceName := searchRewriteJson.Get("llmServiceName").String()
+ if llmServiceName == "" {
+ return errors.New("llm_service_name not found")
+ }
+ llmServicePort := searchRewriteJson.Get("llmServicePort").Int()
+ if llmServicePort == 0 {
+ return errors.New("llmServicePort not found")
+ }
+ searchRewrite.client = wrapper.NewClusterClient(wrapper.FQDNCluster{
+ FQDN: llmServiceName,
+ Port: llmServicePort,
+ })
+ llmApiKey := searchRewriteJson.Get("llmApiKey").String()
+ if llmApiKey == "" {
+ return errors.New("llmApiKey not found")
+ }
+ searchRewrite.apiKey = llmApiKey
+ llmUrl := searchRewriteJson.Get("llmUrl").String()
+ if llmUrl == "" {
+ return errors.New("llmUrl not found")
+ }
+ searchRewrite.url = llmUrl
+ llmModelName := searchRewriteJson.Get("llmModelName").String()
+ if llmModelName == "" {
+ return errors.New("llmModelName not found")
+ }
+ searchRewrite.modelName = llmModelName
+ llmTimeout := searchRewriteJson.Get("timeoutMillisecond").Uint()
+ if llmTimeout == 0 {
+ llmTimeout = 30000
+ }
+ searchRewrite.timeoutMillisecond = uint32(llmTimeout)
+ // The consideration here is that internet searches are generally available, but arxiv and private sources may not be.
+ if arxivExists {
+ if privateExists {
+ // private + internet + arxiv
+ searchRewrite.prompt = fullSearchPrompts
+ } else {
+ // internet + arxiv
+ searchRewrite.prompt = arxivSearchPrompts
+ }
+ } else if privateExists {
+ // private + internet
+ searchRewrite.prompt = privateSearchPrompts
+ } else if internetExists {
+ // only internet
+ searchRewrite.prompt = internetSearchPrompts
+ }
+ config.searchRewrite = searchRewrite
+ }
+ if len(config.engine) == 0 {
+ return fmt.Errorf("no avaliable search engine found")
+ }
+ log.Debugf("ai search enabled, config: %#v", config)
+ return nil
+}
+
+func onHttpRequestHeaders(ctx wrapper.HttpContext, config Config, log wrapper.Log) types.Action {
+ contentType, _ := proxywasm.GetHttpRequestHeader("content-type")
+ // The request does not have a body.
+ if contentType == "" {
+ return types.ActionContinue
+ }
+ if !strings.Contains(contentType, "application/json") {
+ log.Warnf("content is not json, can't process: %s", contentType)
+ ctx.DontReadRequestBody()
+ return types.ActionContinue
+ }
+ ctx.SetRequestBodyBufferLimit(DEFAULT_MAX_BODY_BYTES)
+ _ = proxywasm.RemoveHttpRequestHeader("Accept-Encoding")
+ return types.ActionContinue
+}
+
+func onHttpRequestBody(ctx wrapper.HttpContext, config Config, body []byte, log wrapper.Log) types.Action {
+ var queryIndex int
+ var query string
+ messages := gjson.GetBytes(body, "messages").Array()
+ for i := len(messages) - 1; i >= 0; i-- {
+ if messages[i].Get("role").String() == "user" {
+ queryIndex = i
+ query = messages[i].Get("content").String()
+ break
+ }
+ }
+ if query == "" {
+ log.Errorf("not found user query in body:%s", body)
+ return types.ActionContinue
+ }
+ searchRewrite := config.searchRewrite
+ if searchRewrite != nil {
+ startTime := time.Now()
+ rewritePrompt := strings.Replace(searchRewrite.prompt, "{question}", query, 1)
+ rewriteBody, _ := sjson.SetBytes([]byte(fmt.Sprintf(
+ `{"stream":false,"max_tokens":100,"model":"%s","messages":[{"role":"user","content":""}]}`,
+ searchRewrite.modelName)), "messages.0.content", rewritePrompt)
+ err := searchRewrite.client.Post(searchRewrite.url,
+ [][2]string{
+ {"Content-Type", "application/json"},
+ {"Authorization", fmt.Sprintf("Bearer %s", searchRewrite.apiKey)},
+ }, rewriteBody,
+ func(statusCode int, responseHeaders http.Header, responseBody []byte) {
+ if statusCode != http.StatusOK {
+ log.Errorf("search rewrite failed, status: %d", statusCode)
+ // After a rewrite failure, no further search is performed, thus quickly identifying the failure.
+ proxywasm.ResumeHttpRequest()
+ return
+ }
+
+ content := gjson.GetBytes(responseBody, "choices.0.message.content").String()
+ log.Infof("LLM rewritten query response: %s (took %v), original search query:%s",
+ strings.ReplaceAll(content, "\n", `\n`), time.Since(startTime), query)
+ if strings.Contains(content, "none") {
+ log.Debugf("no search required")
+ proxywasm.ResumeHttpRequest()
+ return
+ }
+
+ // Parse search queries from LLM response
+ var searchContexts []engine.SearchContext
+ for _, line := range strings.Split(content, "\n") {
+ line = strings.TrimSpace(line)
+ if line == "" {
+ continue
+ }
+
+ parts := strings.SplitN(line, ":", 2)
+ if len(parts) != 2 {
+ continue
+ }
+
+ engineType := strings.TrimSpace(parts[0])
+ queryStr := strings.TrimSpace(parts[1])
+
+ var ctx engine.SearchContext
+ ctx.Language = config.defaultLanguage
+
+ switch {
+ case engineType == "internet":
+ ctx.EngineType = engineType
+ ctx.Querys = []string{queryStr}
+ case engineType == "private":
+ ctx.EngineType = engineType
+ ctx.Querys = strings.Split(queryStr, ",")
+ for i := range ctx.Querys {
+ ctx.Querys[i] = strings.TrimSpace(ctx.Querys[i])
+ }
+ default:
+ // Arxiv category
+ ctx.EngineType = "arxiv"
+ ctx.ArxivCategory = engineType
+ ctx.Querys = strings.Split(queryStr, ",")
+ for i := range ctx.Querys {
+ ctx.Querys[i] = strings.TrimSpace(ctx.Querys[i])
+ }
+ }
+
+ if len(ctx.Querys) > 0 {
+ searchContexts = append(searchContexts, ctx)
+ if ctx.ArxivCategory != "" {
+ // Conduct i/nquiries in all areas to increase recall.
+ backupCtx := ctx
+ backupCtx.ArxivCategory = ""
+ searchContexts = append(searchContexts, backupCtx)
+ }
+ }
+ }
+
+ if len(searchContexts) == 0 {
+ log.Errorf("no valid search contexts found")
+ proxywasm.ResumeHttpRequest()
+ return
+ }
+ if types.ActionContinue == executeSearch(ctx, config, queryIndex, body, searchContexts, log) {
+ proxywasm.ResumeHttpRequest()
+ }
+ }, searchRewrite.timeoutMillisecond)
+ if err != nil {
+ log.Errorf("search rewrite call llm service failed:%s", err)
+ // After a rewrite failure, no further search is performed, thus quickly identifying the failure.
+ return types.ActionContinue
+ }
+ return types.ActionPause
+ }
+
+ // Execute search without rewrite
+ return executeSearch(ctx, config, queryIndex, body, []engine.SearchContext{{
+ Querys: []string{query},
+ Language: config.defaultLanguage,
+ }}, log)
+}
+
+func executeSearch(ctx wrapper.HttpContext, config Config, queryIndex int, body []byte, searchContexts []engine.SearchContext, log wrapper.Log) types.Action {
+ searchResultGroups := make([][]engine.SearchResult, len(config.engine))
+ var finished int
+ var searching int
+ for i := 0; i < len(config.engine); i++ {
+ configEngine := config.engine[i]
+
+ // Check if engine needs to execute for any of the search contexts
+ var needsExecute bool
+ for _, searchCtx := range searchContexts {
+ if configEngine.NeedExectue(searchCtx) {
+ needsExecute = true
+ break
+ }
+ }
+ if !needsExecute {
+ continue
+ }
+
+ // Process all search contexts for this engine
+ for _, searchCtx := range searchContexts {
+ if !configEngine.NeedExectue(searchCtx) {
+ continue
+ }
+ args := configEngine.CallArgs(searchCtx)
+ index := i
+ err := configEngine.Client().Call(args.Method, args.Url, args.Headers, args.Body,
+ func(statusCode int, responseHeaders http.Header, responseBody []byte) {
+ defer func() {
+ finished++
+ if finished == searching {
+ // Merge search results from all engines with deduplication
+ var mergedResults []engine.SearchResult
+ seenLinks := make(map[string]bool)
+ for _, results := range searchResultGroups {
+ for _, result := range results {
+ if !seenLinks[result.Link] {
+ seenLinks[result.Link] = true
+ mergedResults = append(mergedResults, result)
+ }
+ }
+ }
+ // Format search results for prompt template
+ var formattedResults []string
+ var formattedReferences []string
+ for j, result := range mergedResults {
+ if config.needReference {
+ formattedResults = append(formattedResults,
+ fmt.Sprintf("[webpage %d begin]\n%s\n[webpage %d end]", j+1, result.Content, j+1))
+ formattedReferences = append(formattedReferences,
+ fmt.Sprintf("[%d] [%s](%s)", j+1, result.Title, result.Link))
+ } else {
+ formattedResults = append(formattedResults,
+ fmt.Sprintf("[webpage begin]\n%s\n[webpage end]", result.Content))
+ }
+ }
+ // Prepare template variables
+ curDate := time.Now().In(time.FixedZone("CST", 8*3600)).Format("2006年1月2日")
+ searchResults := strings.Join(formattedResults, "\n")
+ log.Debugf("searchResults: %s", searchResults)
+ // Fill prompt template
+ prompt := strings.Replace(config.promptTemplate, "{search_results}", searchResults, 1)
+ prompt = strings.Replace(prompt, "{question}", searchContexts[0].Querys[0], 1)
+ prompt = strings.Replace(prompt, "{cur_date}", curDate, 1)
+ // Update request body with processed prompt
+ modifiedBody, err := sjson.SetBytes(body, fmt.Sprintf("messages.%d.content", queryIndex), prompt)
+ if err != nil {
+ log.Errorf("modify request message content failed, err:%v, body:%s", err, body)
+ } else {
+ log.Debugf("modifeid body:%s", modifiedBody)
+ proxywasm.ReplaceHttpRequestBody(modifiedBody)
+ if config.needReference {
+ ctx.SetContext("References", strings.Join(formattedReferences, "\n"))
+ }
+ }
+ proxywasm.ResumeHttpRequest()
+ }
+ }()
+ if statusCode != http.StatusOK {
+ log.Errorf("search call failed, status: %d, engine: %#v", statusCode, configEngine)
+ return
+ }
+ // Append results to existing slice for this engine
+ searchResultGroups[index] = append(searchResultGroups[index], configEngine.ParseResult(searchCtx, responseBody)...)
+ }, args.TimeoutMillisecond)
+ if err != nil {
+ log.Errorf("search call failed, engine: %#v", configEngine)
+ continue
+ }
+ searching++
+ }
+ }
+ if searching > 0 {
+ return types.ActionPause
+ }
+ return types.ActionContinue
+}
+
+func onHttpResponseHeaders(ctx wrapper.HttpContext, config Config, log wrapper.Log) types.Action {
+ if !config.needReference {
+ ctx.DontReadResponseBody()
+ return types.ActionContinue
+ }
+ proxywasm.RemoveHttpResponseHeader("content-length")
+ contentType, err := proxywasm.GetHttpResponseHeader("Content-Type")
+ if err != nil || !strings.HasPrefix(contentType, "text/event-stream") {
+ if err != nil {
+ log.Errorf("unable to load content-type header from response: %v", err)
+ }
+ ctx.BufferResponseBody()
+ ctx.SetResponseBodyBufferLimit(DEFAULT_MAX_BODY_BYTES)
+ }
+ return types.ActionContinue
+}
+
+func onHttpResponseBody(ctx wrapper.HttpContext, config Config, body []byte, log wrapper.Log) types.Action {
+ references := ctx.GetStringContext("References", "")
+ if references == "" {
+ return types.ActionContinue
+ }
+ content := gjson.GetBytes(body, "choices.0.message.content")
+ modifiedContent := fmt.Sprintf("%s\n\n%s", fmt.Sprintf(config.referenceFormat, references), content)
+ body, err := sjson.SetBytes(body, "choices.0.message.content", modifiedContent)
+ if err != nil {
+ log.Errorf("modify response message content failed, err:%v, body:%s", err, body)
+ return types.ActionContinue
+ }
+ proxywasm.ReplaceHttpResponseBody(body)
+ return types.ActionContinue
+}
+
+func onStreamingResponseBody(ctx wrapper.HttpContext, config Config, chunk []byte, isLastChunk bool, log wrapper.Log) []byte {
+ if ctx.GetBoolContext("ReferenceAppended", false) {
+ return chunk
+ }
+ references := ctx.GetStringContext("References", "")
+ if references == "" {
+ return chunk
+ }
+ modifiedChunk, responseReady := setReferencesToFirstMessage(ctx, chunk, fmt.Sprintf(config.referenceFormat, references), log)
+ if responseReady {
+ ctx.SetContext("ReferenceAppended", true)
+ return modifiedChunk
+ } else {
+ return []byte("")
+ }
+}
+
+const PARTIAL_MESSAGE_CONTEXT_KEY = "partialMessage"
+
+func setReferencesToFirstMessage(ctx wrapper.HttpContext, chunk []byte, references string, log wrapper.Log) ([]byte, bool) {
+ if len(chunk) == 0 {
+ log.Debugf("chunk is empty")
+ return nil, false
+ }
+
+ var partialMessage []byte
+ partialMessageI := ctx.GetContext(PARTIAL_MESSAGE_CONTEXT_KEY)
+ if partialMessageI != nil {
+ if pMsg, ok := partialMessageI.([]byte); ok {
+ partialMessage = append(pMsg, chunk...)
+ } else {
+ log.Warnf("invalid partial message type: %T", partialMessageI)
+ partialMessage = chunk
+ }
+ } else {
+ partialMessage = chunk
+ }
+
+ if len(partialMessage) == 0 {
+ log.Debugf("partial message is empty")
+ return nil, false
+ }
+ messages := strings.Split(string(partialMessage), "\n\n")
+ if len(messages) > 1 {
+ firstMessage := messages[0]
+ log.Debugf("first message: %s", firstMessage)
+ firstMessage = strings.TrimPrefix(firstMessage, "data:")
+ firstMessage = strings.TrimPrefix(firstMessage, " ")
+ firstMessage = strings.TrimSuffix(firstMessage, "\n")
+ deltaContent := gjson.Get(firstMessage, "choices.0.delta.content")
+ modifiedMessage, err := sjson.Set(firstMessage, "choices.0.delta.content", fmt.Sprintf("%s\n\n%s", references, deltaContent))
+ if err != nil {
+ log.Errorf("modify response delta content failed, err:%v", err)
+ return partialMessage, true
+ }
+ modifiedMessage = fmt.Sprintf("data: %s", modifiedMessage)
+ log.Debugf("modified message: %s", firstMessage)
+ messages[0] = string(modifiedMessage)
+ return []byte(strings.Join(messages, "\n\n")), true
+ }
+ ctx.SetContext(PARTIAL_MESSAGE_CONTEXT_KEY, partialMessage)
+ return nil, false
+}
diff --git a/plugins/wasm-go/extensions/ai-search/prompts/arxiv.md b/plugins/wasm-go/extensions/ai-search/prompts/arxiv.md
new file mode 100644
index 0000000000..34aeefa413
--- /dev/null
+++ b/plugins/wasm-go/extensions/ai-search/prompts/arxiv.md
@@ -0,0 +1,214 @@
+# 目标
+你需要分析**用户发送的消息**,是否需要查询搜索引擎(Google/Bing)/论文资料库(Arxiv),并按照如下情况回复相应内容:
+
+## 情况一:不需要查询搜索引擎/论文资料/私有知识库
+### 情况举例:
+1. **用户发送的消息**不是在提问或寻求帮助
+2. **用户发送的消息**是要求翻译文字
+
+### 思考过程
+根据上面的**情况举例**,如果符合,则按照下面**回复内容示例**进行回复,注意不要输出思考过程
+
+### 回复内容示例:
+none
+
+## 情况二:需要查询搜索引擎/论文资料
+### 情况举例:
+1. 答复**用户发送的消息**,需依赖互联网上最新的资料
+2. 答复**用户发送的消息**,需依赖论文等专业资料
+3. 通过查询资料,可以更好地答复**用户发送的消息**
+
+### 思考过程
+根据上面的**情况举例**,以及其他需要查询资料的情况,如果符合,按照以下步骤思考,并按照下面**回复内容示例**进行回复,注意不要输出思考过程:
+1. What: 分析要答复**用户发送的消息**,需要了解什么知识和资料
+2. Where: 判断了解这个知识和资料要向Google等搜索引擎提问,还是向Arxiv论文资料库进行查询,或者需要同时查询多个地方
+3. How: 分析对于要查询的知识和资料,应该提出什么样的问题
+4. Adjust: 明确要向什么地方查询什么问题后,按下面方式对问题进行调整
+ 4.1. 向搜索引擎提问:用一句话概括问题,并且针对搜索引擎做问题优化
+ 4.2. 向Arxiv论文资料库提问:
+ 4.2.1. 明确问题所属领域,然后确定Arxiv的Category值,Category可选的枚举如下:
+ - cs.AI: Artificial Intelligence
+ - cs.AR: Hardware Architecture
+ - cs.CC: Computational Complexity
+ - cs.CE: Computational Engineering, Finance, and Science
+ - cs.CG: Computational Geometry
+ - cs.CL: Computation and Language
+ - cs.CR: Cryptography and Security
+ - cs.CV: Computer Vision and Pattern Recognition
+ - cs.CY: Computers and Society
+ - cs.DB: Databases
+ - cs.DC: Distributed, Parallel, and Cluster Computing
+ - cs.DL: Digital Libraries
+ - cs.DM: Discrete Mathematics
+ - cs.DS: Data Structures and Algorithms
+ - cs.ET: Emerging Technologies
+ - cs.FL: Formal Languages and Automata Theory
+ - cs.GL: General Literature
+ - cs.GR: Graphics
+ - cs.GT: Computer Science and Game Theory
+ - cs.HC: Human-Computer Interaction
+ - cs.IR: Information Retrieval
+ - cs.IT: Information Theory
+ - cs.LG: Machine Learning
+ - cs.LO: Logic in Computer Science
+ - cs.MA: Multiagent Systems
+ - cs.MM: Multimedia
+ - cs.MS: Mathematical Software
+ - cs.NA: Numerical Analysis
+ - cs.NE: Neural and Evolutionary Computing
+ - cs.NI: Networking and Internet Architecture
+ - cs.OH: Other Computer Science
+ - cs.OS: Operating Systems
+ - cs.PF: Performance
+ - cs.PL: Programming Languages
+ - cs.RO: Robotics
+ - cs.SC: Symbolic Computation
+ - cs.SD: Sound
+ - cs.SE: Software Engineering
+ - cs.SI: Social and Information Networks
+ - cs.SY: Systems and Control
+ - econ.EM: Econometrics
+ - econ.GN: General Economics
+ - econ.TH: Theoretical Economics
+ - eess.AS: Audio and Speech Processing
+ - eess.IV: Image and Video Processing
+ - eess.SP: Signal Processing
+ - eess.SY: Systems and Control
+ - math.AC: Commutative Algebra
+ - math.AG: Algebraic Geometry
+ - math.AP: Analysis of PDEs
+ - math.AT: Algebraic Topology
+ - math.CA: Classical Analysis and ODEs
+ - math.CO: Combinatorics
+ - math.CT: Category Theory
+ - math.CV: Complex Variables
+ - math.DG: Differential Geometry
+ - math.DS: Dynamical Systems
+ - math.FA: Functional Analysis
+ - math.GM: General Mathematics
+ - math.GN: General Topology
+ - math.GR: Group Theory
+ - math.GT: Geometric Topology
+ - math.HO: History and Overview
+ - math.IT: Information Theory
+ - math.KT: K-Theory and Homology
+ - math.LO: Logic
+ - math.MG: Metric Geometry
+ - math.MP: Mathematical Physics
+ - math.NA: Numerical Analysis
+ - math.NT: Number Theory
+ - math.OA: Operator Algebras
+ - math.OC: Optimization and Control
+ - math.PR: Probability
+ - math.QA: Quantum Algebra
+ - math.RA: Rings and Algebras
+ - math.RT: Representation Theory
+ - math.SG: Symplectic Geometry
+ - math.SP: Spectral Theory
+ - math.ST: Statistics Theory
+ - astro-ph.CO: Cosmology and Nongalactic Astrophysics
+ - astro-ph.EP: Earth and Planetary Astrophysics
+ - astro-ph.GA: Astrophysics of Galaxies
+ - astro-ph.HE: High Energy Astrophysical Phenomena
+ - astro-ph.IM: Instrumentation and Methods for Astrophysics
+ - astro-ph.SR: Solar and Stellar Astrophysics
+ - cond-mat.dis-nn: Disordered Systems and Neural Networks
+ - cond-mat.mes-hall: Mesoscale and Nanoscale Physics
+ - cond-mat.mtrl-sci: Materials Science
+ - cond-mat.other: Other Condensed Matter
+ - cond-mat.quant-gas: Quantum Gases
+ - cond-mat.soft: Soft Condensed Matter
+ - cond-mat.stat-mech: Statistical Mechanics
+ - cond-mat.str-el: Strongly Correlated Electrons
+ - cond-mat.supr-con: Superconductivity
+ - gr-qc: General Relativity and Quantum Cosmology
+ - hep-ex: High Energy Physics - Experiment
+ - hep-lat: High Energy Physics - Lattice
+ - hep-ph: High Energy Physics - Phenomenology
+ - hep-th: High Energy Physics - Theory
+ - math-ph: Mathematical Physics
+ - nlin.AO: Adaptation and Self-Organizing Systems
+ - nlin.CD: Chaotic Dynamics
+ - nlin.CG: Cellular Automata and Lattice Gases
+ - nlin.PS: Pattern Formation and Solitons
+ - nlin.SI: Exactly Solvable and Integrable Systems
+ - nucl-ex: Nuclear Experiment
+ - nucl-th: Nuclear Theory
+ - physics.acc-ph: Accelerator Physics
+ - physics.ao-ph: Atmospheric and Oceanic Physics
+ - physics.app-ph: Applied Physics
+ - physics.atm-clus: Atomic and Molecular Clusters
+ - physics.atom-ph: Atomic Physics
+ - physics.bio-ph: Biological Physics
+ - physics.chem-ph: Chemical Physics
+ - physics.class-ph: Classical Physics
+ - physics.comp-ph: Computational Physics
+ - physics.data-an: Data Analysis, Statistics and Probability
+ - physics.ed-ph: Physics Education
+ - physics.flu-dyn: Fluid Dynamics
+ - physics.gen-ph: General Physics
+ - physics.geo-ph: Geophysics
+ - physics.hist-ph: History and Philosophy of Physics
+ - physics.ins-det: Instrumentation and Detectors
+ - physics.med-ph: Medical Physics
+ - physics.optics: Optics
+ - physics.plasm-ph: Plasma Physics
+ - physics.pop-ph: Popular Physics
+ - physics.soc-ph: Physics and Society
+ - physics.space-ph: Space Physics
+ - quant-ph: Quantum Physics
+ - q-bio.BM: Biomolecules
+ - q-bio.CB: Cell Behavior
+ - q-bio.GN: Genomics
+ - q-bio.MN: Molecular Networks
+ - q-bio.NC: Neurons and Cognition
+ - q-bio.OT: Other Quantitative Biology
+ - q-bio.PE: Populations and Evolution
+ - q-bio.QM: Quantitative Methods
+ - q-bio.SC: Subcellular Processes
+ - q-bio.TO: Tissues and Organs
+ - q-fin.CP: Computational Finance
+ - q-fin.EC: Economics
+ - q-fin.GN: General Finance
+ - q-fin.MF: Mathematical Finance
+ - q-fin.PM: Portfolio Management
+ - q-fin.PR: Pricing of Securities
+ - q-fin.RM: Risk Management
+ - q-fin.ST: Statistical Finance
+ - q-fin.TR: Trading and Market Microstructure
+ - stat.AP: Applications
+ - stat.CO: Computation
+ - stat.ME: Methodology
+ - stat.ML: Machine Learning
+ - stat.OT: Other Statistics
+ - stat.TH: Statistics Theory
+ 4.2.2. 根据问题所属领域,将问题拆分成多组关键词的组合,同时组合中的关键词个数尽量不要超过3个
+5. Final: 按照下面**回复内容示例**进行回复,注意:
+ - 不要输出思考过程
+ - 可以向多个查询目标分别查询多次,多个查询用换行分隔,总查询次数控制在5次以内
+ - 查询搜索引擎时,需要以"internet:"开头
+ - 查询Arxiv论文时,需要以Arxiv的Category值开头,例如"cs.AI:"
+ - 查询Arxiv论文时,优先用英文表述关键词进行搜索
+ - 当用多个关键词查询时,关键词之间用","分隔
+ - 尽量满足**用户发送的消息**中的搜索要求,例如用户要求用英文搜索,则需用英文表述问题和关键词
+ - 用户如果没有要求搜索语言,则用和**用户发送的消息**一致的语言表述问题和关键词
+ - 如果**用户发送的消息**使用中文,至少要有一条向搜索引擎查询的中文问题
+
+### 回复内容示例:
+
+#### 用不同语言查询多次搜索引擎
+internet: 黄金价格走势
+internet: The trend of gold prices
+
+#### 向Arxiv的多个类目查询多次
+cs.AI: attention mechanism
+cs.AI: neuron
+q-bio.NC: brain,attention mechanism
+
+#### 向多个查询目标查询多次
+internet: 中国未来房价趋势
+internet: 最新中国经济政策
+econ.TH: policy, real estate
+
+# 用户发送的消息为:
+{question}
diff --git a/plugins/wasm-go/extensions/ai-search/prompts/full.md b/plugins/wasm-go/extensions/ai-search/prompts/full.md
new file mode 100644
index 0000000000..aec605d1b8
--- /dev/null
+++ b/plugins/wasm-go/extensions/ai-search/prompts/full.md
@@ -0,0 +1,221 @@
+# 目标
+你需要分析**用户发送的消息**,是否需要查询搜索引擎(Google/Bing)/论文资料库(Arxiv)/私有知识库,并按照如下情况回复相应内容:
+
+## 情况一:不需要查询搜索引擎/论文资料/私有知识库
+### 情况举例:
+1. **用户发送的消息**不是在提问或寻求帮助
+2. **用户发送的消息**是要求翻译文字
+
+### 思考过程
+根据上面的**情况举例**,如果符合,则按照下面**回复内容示例**进行回复,注意不要输出思考过程
+
+### 回复内容示例:
+none
+
+## 情况二:需要查询搜索引擎/论文资料/私有知识库
+### 情况举例:
+1. 答复**用户发送的消息**,需依赖互联网上最新的资料
+2. 答复**用户发送的消息**,需依赖论文等专业资料
+3. 通过查询资料,可以更好地答复**用户发送的消息**
+
+### 思考过程
+根据上面的**情况举例**,以及其他需要查询资料的情况,如果符合,按照以下步骤思考,并按照下面**回复内容示例**进行回复,注意不要输出思考过程:
+1. What: 分析要答复**用户发送的消息**,需要了解什么知识和资料
+2. Where: 判断了解这个知识和资料要向Google等搜索引擎提问,还是向Arxiv论文资料库进行查询,还是向私有知识库进行查询,或者需要同时查询多个地方
+3. How: 分析对于要查询的知识和资料,应该提出什么样的问题
+4. Adjust: 明确要向什么地方查询什么问题后,按下面方式对问题进行调整
+ 4.1. 向搜索引擎提问:用一句话概括问题,并且针对搜索引擎做问题优化
+ 4.2. 向私有知识库提问:将问题拆分成多组关键词的组合,同时组合中的关键词个数尽量不要超过3个
+ 4.3. 向Arxiv论文资料库提问:
+ 4.3.1. 明确问题所属领域,然后确定Arxiv的Category值,Category可选的枚举如下:
+ - cs.AI: Artificial Intelligence
+ - cs.AR: Hardware Architecture
+ - cs.CC: Computational Complexity
+ - cs.CE: Computational Engineering, Finance, and Science
+ - cs.CG: Computational Geometry
+ - cs.CL: Computation and Language
+ - cs.CR: Cryptography and Security
+ - cs.CV: Computer Vision and Pattern Recognition
+ - cs.CY: Computers and Society
+ - cs.DB: Databases
+ - cs.DC: Distributed, Parallel, and Cluster Computing
+ - cs.DL: Digital Libraries
+ - cs.DM: Discrete Mathematics
+ - cs.DS: Data Structures and Algorithms
+ - cs.ET: Emerging Technologies
+ - cs.FL: Formal Languages and Automata Theory
+ - cs.GL: General Literature
+ - cs.GR: Graphics
+ - cs.GT: Computer Science and Game Theory
+ - cs.HC: Human-Computer Interaction
+ - cs.IR: Information Retrieval
+ - cs.IT: Information Theory
+ - cs.LG: Machine Learning
+ - cs.LO: Logic in Computer Science
+ - cs.MA: Multiagent Systems
+ - cs.MM: Multimedia
+ - cs.MS: Mathematical Software
+ - cs.NA: Numerical Analysis
+ - cs.NE: Neural and Evolutionary Computing
+ - cs.NI: Networking and Internet Architecture
+ - cs.OH: Other Computer Science
+ - cs.OS: Operating Systems
+ - cs.PF: Performance
+ - cs.PL: Programming Languages
+ - cs.RO: Robotics
+ - cs.SC: Symbolic Computation
+ - cs.SD: Sound
+ - cs.SE: Software Engineering
+ - cs.SI: Social and Information Networks
+ - cs.SY: Systems and Control
+ - econ.EM: Econometrics
+ - econ.GN: General Economics
+ - econ.TH: Theoretical Economics
+ - eess.AS: Audio and Speech Processing
+ - eess.IV: Image and Video Processing
+ - eess.SP: Signal Processing
+ - eess.SY: Systems and Control
+ - math.AC: Commutative Algebra
+ - math.AG: Algebraic Geometry
+ - math.AP: Analysis of PDEs
+ - math.AT: Algebraic Topology
+ - math.CA: Classical Analysis and ODEs
+ - math.CO: Combinatorics
+ - math.CT: Category Theory
+ - math.CV: Complex Variables
+ - math.DG: Differential Geometry
+ - math.DS: Dynamical Systems
+ - math.FA: Functional Analysis
+ - math.GM: General Mathematics
+ - math.GN: General Topology
+ - math.GR: Group Theory
+ - math.GT: Geometric Topology
+ - math.HO: History and Overview
+ - math.IT: Information Theory
+ - math.KT: K-Theory and Homology
+ - math.LO: Logic
+ - math.MG: Metric Geometry
+ - math.MP: Mathematical Physics
+ - math.NA: Numerical Analysis
+ - math.NT: Number Theory
+ - math.OA: Operator Algebras
+ - math.OC: Optimization and Control
+ - math.PR: Probability
+ - math.QA: Quantum Algebra
+ - math.RA: Rings and Algebras
+ - math.RT: Representation Theory
+ - math.SG: Symplectic Geometry
+ - math.SP: Spectral Theory
+ - math.ST: Statistics Theory
+ - astro-ph.CO: Cosmology and Nongalactic Astrophysics
+ - astro-ph.EP: Earth and Planetary Astrophysics
+ - astro-ph.GA: Astrophysics of Galaxies
+ - astro-ph.HE: High Energy Astrophysical Phenomena
+ - astro-ph.IM: Instrumentation and Methods for Astrophysics
+ - astro-ph.SR: Solar and Stellar Astrophysics
+ - cond-mat.dis-nn: Disordered Systems and Neural Networks
+ - cond-mat.mes-hall: Mesoscale and Nanoscale Physics
+ - cond-mat.mtrl-sci: Materials Science
+ - cond-mat.other: Other Condensed Matter
+ - cond-mat.quant-gas: Quantum Gases
+ - cond-mat.soft: Soft Condensed Matter
+ - cond-mat.stat-mech: Statistical Mechanics
+ - cond-mat.str-el: Strongly Correlated Electrons
+ - cond-mat.supr-con: Superconductivity
+ - gr-qc: General Relativity and Quantum Cosmology
+ - hep-ex: High Energy Physics - Experiment
+ - hep-lat: High Energy Physics - Lattice
+ - hep-ph: High Energy Physics - Phenomenology
+ - hep-th: High Energy Physics - Theory
+ - math-ph: Mathematical Physics
+ - nlin.AO: Adaptation and Self-Organizing Systems
+ - nlin.CD: Chaotic Dynamics
+ - nlin.CG: Cellular Automata and Lattice Gases
+ - nlin.PS: Pattern Formation and Solitons
+ - nlin.SI: Exactly Solvable and Integrable Systems
+ - nucl-ex: Nuclear Experiment
+ - nucl-th: Nuclear Theory
+ - physics.acc-ph: Accelerator Physics
+ - physics.ao-ph: Atmospheric and Oceanic Physics
+ - physics.app-ph: Applied Physics
+ - physics.atm-clus: Atomic and Molecular Clusters
+ - physics.atom-ph: Atomic Physics
+ - physics.bio-ph: Biological Physics
+ - physics.chem-ph: Chemical Physics
+ - physics.class-ph: Classical Physics
+ - physics.comp-ph: Computational Physics
+ - physics.data-an: Data Analysis, Statistics and Probability
+ - physics.ed-ph: Physics Education
+ - physics.flu-dyn: Fluid Dynamics
+ - physics.gen-ph: General Physics
+ - physics.geo-ph: Geophysics
+ - physics.hist-ph: History and Philosophy of Physics
+ - physics.ins-det: Instrumentation and Detectors
+ - physics.med-ph: Medical Physics
+ - physics.optics: Optics
+ - physics.plasm-ph: Plasma Physics
+ - physics.pop-ph: Popular Physics
+ - physics.soc-ph: Physics and Society
+ - physics.space-ph: Space Physics
+ - quant-ph: Quantum Physics
+ - q-bio.BM: Biomolecules
+ - q-bio.CB: Cell Behavior
+ - q-bio.GN: Genomics
+ - q-bio.MN: Molecular Networks
+ - q-bio.NC: Neurons and Cognition
+ - q-bio.OT: Other Quantitative Biology
+ - q-bio.PE: Populations and Evolution
+ - q-bio.QM: Quantitative Methods
+ - q-bio.SC: Subcellular Processes
+ - q-bio.TO: Tissues and Organs
+ - q-fin.CP: Computational Finance
+ - q-fin.EC: Economics
+ - q-fin.GN: General Finance
+ - q-fin.MF: Mathematical Finance
+ - q-fin.PM: Portfolio Management
+ - q-fin.PR: Pricing of Securities
+ - q-fin.RM: Risk Management
+ - q-fin.ST: Statistical Finance
+ - q-fin.TR: Trading and Market Microstructure
+ - stat.AP: Applications
+ - stat.CO: Computation
+ - stat.ME: Methodology
+ - stat.ML: Machine Learning
+ - stat.OT: Other Statistics
+ - stat.TH: Statistics Theory
+ 4.3.2. 根据问题所属领域,将问题拆分成多组关键词的组合,同时组合中的关键词个数尽量不要超过3个
+5. Final: 按照下面**回复内容示例**进行回复,注意:
+ - 不要输出思考过程
+ - 可以向多个查询目标分别查询多次,多个查询用换行分隔,总查询次数控制在5次以内
+ - 查询搜索引擎时,需要以"internet:"开头
+ - 查询私有知识库时,需要以"private:"开头
+ - 查询Arxiv论文时,需要以Arxiv的Category值开头,例如"cs.AI:"
+ - 查询Arxiv论文时,优先用英文表述关键词进行搜索
+ - 当用多个关键词查询时,关键词之间用","分隔
+ - 尽量满足**用户发送的消息**中的搜索要求,例如用户要求用英文搜索,则需用英文表述问题和关键词
+ - 用户如果没有要求搜索语言,则用和**用户发送的消息**一致的语言表述问题和关键词
+ - 如果**用户发送的消息**使用中文,至少要有一条向搜索引擎查询的中文问题
+
+### 回复内容示例:
+
+#### 用不同语言查询多次搜索引擎
+internet: 黄金价格走势
+internet: The trend of gold prices
+
+#### 向Arxiv的多个类目查询多次
+cs.AI: attention mechanism
+cs.AI: neuron
+q-bio.NC: brain,attention mechanism
+
+#### 向私有知识库查询多次
+private: 电子钱包,密码
+private: 张三,身份证号
+
+#### 向多个查询目标查询多次
+internet: 中国未来房价趋势
+internet: 最新中国经济政策
+econ.TH: policy, real estate
+private: 财务状况
+
+# 用户发送的消息为:
+{question}
diff --git a/plugins/wasm-go/extensions/ai-search/prompts/internet.md b/plugins/wasm-go/extensions/ai-search/prompts/internet.md
new file mode 100644
index 0000000000..f12836fc62
--- /dev/null
+++ b/plugins/wasm-go/extensions/ai-search/prompts/internet.md
@@ -0,0 +1,41 @@
+# 目标
+你需要分析**用户发送的消息**,是否需要查询搜索引擎(Google/Bing),并按照如下情况回复相应内容:
+
+## 情况一:不需要查询搜索引擎
+### 情况举例:
+1. **用户发送的消息**不是在提问或寻求帮助
+2. **用户发送的消息**是要求翻译文字
+
+### 思考过程
+根据上面的**情况举例**,如果符合,则按照下面**回复内容示例**进行回复,注意不要输出思考过程
+
+### 回复内容示例:
+none
+
+## 情况二:需要查询搜索引擎
+### 情况举例:
+1. 答复**用户发送的消息**,需依赖互联网上最新的资料
+2. 答复**用户发送的消息**,需依赖论文等专业资料
+3. 通过查询资料,可以更好地答复**用户发送的消息**
+
+### 思考过程
+根据上面的**情况举例**,以及其他需要查询资料的情况,如果符合,按照以下步骤思考,并按照下面**回复内容示例**进行回复,注意不要输出思考过程:
+1. What: 分析要答复**用户发送的消息**,需要了解什么知识和资料
+2. How: 分析对于要查询的知识和资料,应该提出什么样的问题
+3. Adjust: 明确查询什么问题后,用一句话概括问题,并且针对搜索引擎做问题优化
+4. Final: 按照下面**回复内容示例**进行回复,注意:
+ - 不要输出思考过程
+ - 可以查询多次,多个查询用换行分隔,总查询次数控制在5次以内
+ - 需要以"internet:"开头
+ - 尽量满足**用户发送的消息**中的搜索要求,例如用户要求用英文搜索,则需用英文表述问题和关键词
+ - 用户如果没有要求搜索语言,则用和**用户发送的消息**一致的语言表述问题和关键词
+ - 如果**用户发送的消息**使用中文,至少要有一条向搜索引擎查询的中文问题
+
+### 回复内容示例:
+
+#### 用不同语言查询多次搜索引擎
+internet: 黄金价格走势
+internet: The trend of gold prices
+
+# 用户发送的消息为:
+{question}
diff --git a/plugins/wasm-go/extensions/ai-search/prompts/private.md b/plugins/wasm-go/extensions/ai-search/prompts/private.md
new file mode 100644
index 0000000000..4ba0fc62ce
--- /dev/null
+++ b/plugins/wasm-go/extensions/ai-search/prompts/private.md
@@ -0,0 +1,55 @@
+# 目标
+你需要分析**用户发送的消息**,是否需要查询搜索引擎(Google/Bing)/私有知识库,并按照如下情况回复相应内容:
+
+## 情况一:不需要查询搜索引擎/私有知识库
+### 情况举例:
+1. **用户发送的消息**不是在提问或寻求帮助
+2. **用户发送的消息**是要求翻译文字
+
+### 思考过程
+根据上面的**情况举例**,如果符合,则按照下面**回复内容示例**进行回复,注意不要输出思考过程
+
+### 回复内容示例:
+none
+
+## 情况二:需要查询搜索引擎/私有知识库
+### 情况举例:
+1. 答复**用户发送的消息**,需依赖互联网上最新的资料
+2. 答复**用户发送的消息**,需依赖论文等专业资料
+3. 通过查询资料,可以更好地答复**用户发送的消息**
+
+### 思考过程
+根据上面的**情况举例**,以及其他需要查询资料的情况,如果符合,按照以下步骤思考,并按照下面**回复内容示例**进行回复,注意不要输出思考过程:
+1. What: 分析要答复**用户发送的消息**,需要了解什么知识和资料
+2. Where: 判断了解这个知识和资料要向Google等搜索引擎提问,还是向私有知识库进行查询,或者需要同时查询多个地方
+3. How: 分析对于要查询的知识和资料,应该提出什么样的问题
+4. Adjust: 明确要向什么地方查询什么问题后,按下面方式对问题进行调整
+ 4.1. 向搜索引擎提问:用一句话概括问题,并且针对搜索引擎做问题优化
+ 4.2. 向私有知识库提问:将问题拆分成多组关键词的组合,同时组合中的关键词个数尽量不要超过3个
+5. Final: 按照下面**回复内容示例**进行回复,注意:
+ - 不要输出思考过程
+ - 可以向多个查询目标分别查询多次,多个查询用换行分隔,总查询次数控制在5次以内
+ - 查询搜索引擎时,需要以"internet:"开头
+ - 查询私有知识库时,需要以"private:"开头
+ - 当用多个关键词查询时,关键词之间用","分隔
+ - 尽量满足**用户发送的消息**中的搜索要求,例如用户要求用英文搜索,则需用英文表述问题和关键词
+ - 用户如果没有要求搜索语言,则用和**用户发送的消息**一致的语言表述问题和关键词
+ - 如果**用户发送的消息**使用中文,至少要有一条向搜索引擎查询的中文问题
+
+### 回复内容示例:
+
+#### 用不同语言查询多次搜索引擎
+internet: 黄金价格走势
+internet: The trend of gold prices
+
+#### 向私有知识库查询多次
+private: 电子钱包,密码
+private: 张三,身份证号
+
+#### 向多个查询目标查询多次
+internet: 中国未来房价趋势
+internet: 最新中国经济政策
+private: 财务状况
+
+# 用户发送的消息为:
+{question}
diff --git a/plugins/wasm-go/extensions/ai-search/prompts/test_ai_search.py b/plugins/wasm-go/extensions/ai-search/prompts/test_ai_search.py
new file mode 100644
index 0000000000..64fbce9545
--- /dev/null
+++ b/plugins/wasm-go/extensions/ai-search/prompts/test_ai_search.py
@@ -0,0 +1,56 @@
+import argparse
+import requests
+import time
+import json
+
+def main():
+ # 解析命令行参数
+ parser = argparse.ArgumentParser(description='AI Search Test Script')
+ parser.add_argument('--question', required=True, help='The question to analyze')
+ parser.add_argument('--prompt', required=True, help='The prompt file to analyze')
+ args = parser.parse_args()
+
+ # 读取并解析prompts.md模板
+ # 这里假设prompts.md已经复制到当前目录
+ with open(args.prompt, 'r', encoding='utf-8') as f:
+ prompt_template = f.read()
+
+ # 替换模板中的{question}变量
+ prompt = prompt_template.replace('{question}', args.question)
+
+ # 准备请求数据
+ headers = {
+ 'Content-Type': 'application/json',
+ }
+ data = {
+ "model": "deepseek-v3",
+ "max_tokens": 100,
+ "messages": [
+ {
+ "role": "user",
+ "content": prompt
+ }
+ ]
+ }
+
+ # 发送请求并计时
+ start_time = time.time()
+ try:
+ response = requests.post(
+ 'http://localhost:8080/v1/chat/completions',
+ headers=headers,
+ data=json.dumps(data)
+ )
+ response.raise_for_status()
+ end_time = time.time()
+
+ # 处理响应
+ result = response.json()
+ print("Response:")
+ print(result['choices'][0]['message']['content'])
+ print(f"\nRequest took {end_time - start_time:.2f} seconds")
+ except requests.exceptions.RequestException as e:
+ print(f"Request failed: {e}")
+
+if __name__ == '__main__':
+ main()
diff --git a/plugins/wasm-go/extensions/ai-token-ratelimit/README.md b/plugins/wasm-go/extensions/ai-token-ratelimit/README.md
index a1d6a2fe36..c3c0c23340 100644
--- a/plugins/wasm-go/extensions/ai-token-ratelimit/README.md
+++ b/plugins/wasm-go/extensions/ai-token-ratelimit/README.md
@@ -51,14 +51,14 @@ description: AI Token限流插件配置参考
`redis`中每一项的配置字段说明
-| 配置项 | 类型 | 必填 | 默认值 | 说明 |
-| ------------ | ------ | ---- | ---------------------------------------------------------- | --------------------------- |
-| service_name | string | 必填 | - | redis 服务名称,带服务类型的完整 FQDN 名称,例如 my-redis.dns、redis.my-ns.svc.cluster.local |
-| service_port | int | 否 | 服务类型为固定地址(static service)默认值为80,其他为6379 | 输入redis服务的服务端口 |
-| username | string | 否 | - | redis用户名 |
-| password | string | 否 | - | redis密码 |
-| timeout | int | 否 | 1000 | redis连接超时时间,单位毫秒 |
-
+| 配置项 | 类型 | 必填 | 默认值 | 说明 |
+| ------------ | ------ | ---- | ---------------------------------------------------------- | --------------------------- |
+| service_name | string | 必填 | - | redis 服务名称,带服务类型的完整 FQDN 名称,例如 my-redis.dns、redis.my-ns.svc.cluster.local |
+| service_port | int | 否 | 服务类型为固定地址(static service)默认值为80,其他为6379 | 输入redis服务的服务端口 |
+| username | string | 否 | - | redis用户名 |
+| password | string | 否 | - | redis密码 |
+| timeout | int | 否 | 1000 | redis连接超时时间,单位毫秒 |
+| database | int | 否 | 0 | 使用的数据库id,例如配置为1,对应`SELECT 1` |
## 配置示例
@@ -258,24 +258,12 @@ spec:
'*': "qwen-turbo"
ingress:
- qwen
- url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-proxy:v1.0.0
+ url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-proxy:1.0.0
phase: UNSPECIFIED_PHASE
priority: 100
---
apiVersion: extensions.higress.io/v1alpha1
kind: WasmPlugin
-metadata:
- name: ai-statistics
- namespace: higress-system
-spec:
- defaultConfig:
- enable: true
- url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-token-statistics:v1.0.0
- phase: UNSPECIFIED_PHASE
- priority: 200
----
-apiVersion: extensions.higress.io/v1alpha1
-kind: WasmPlugin
metadata:
name: ai-token-ratelimit
namespace: higress-system
@@ -294,7 +282,7 @@ spec:
# service_name: redis.default.svc.cluster.local
service_name: redis.dns
service_port: 6379
- url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-token-ratelimit:v1.0.0
+ url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-token-ratelimit:1.0.0
phase: UNSPECIFIED_PHASE
priority: 600
```
@@ -370,10 +358,19 @@ spec:
pathType: Prefix
```
+转发 higress-gateway 的流量到本地,方便进行测试。
+
+```bash
+kubectl port-forward svc/higress-gateway -n higress-system 18000:80
+```
+
触发限流效果如下:
```bash
-curl "http://qwen-test.com:18000/v1/chat/completions?apikey=123456" -H "Content-Type: application/json" -d '{
+curl "http://localhost:18000/v1/chat/completions?apikey=123456" \
+-H "Host: qwen-test.com" \
+-H "Content-Type: application/json" \
+-d '{
"model": "gpt-3",
"messages": [
{
diff --git a/plugins/wasm-go/extensions/ai-token-ratelimit/README_EN.md b/plugins/wasm-go/extensions/ai-token-ratelimit/README_EN.md
index c07e7aa2f6..cf502198e2 100644
--- a/plugins/wasm-go/extensions/ai-token-ratelimit/README_EN.md
+++ b/plugins/wasm-go/extensions/ai-token-ratelimit/README_EN.md
@@ -43,13 +43,14 @@ Field descriptions for each item in `limit_keys`
| token_per_day | int | No, optionally select one in `token_per_second`, `token_per_minute`, `token_per_hour`, `token_per_day` | - | Allowed number of token requests per day |
Field descriptions for each item in `redis`
-| Configuration Item | Type | Required | Default Value | Description |
-| ----------------------- | ----------------- | -------- | --------------------------------------------------------------- | ----------------------------------------------- |
-| service_name | string | Required | - | Full FQDN name of the redis service, including service type, e.g., my-redis.dns, redis.my-ns.svc.cluster.local |
-| service_port | int | No | Default value for static addresses (static service) is 80; otherwise, it is 6379 | Input the service port of the redis service |
-| username | string | No | - | Redis username |
-| password | string | No | - | Redis password |
-| timeout | int | No | 1000 | Redis connection timeout in milliseconds |
+| Configuration Item | Type | Required | Default Value | Description |
+| ----------------------- | ----------------- | -------- | --------------------------------------------------------------- | ----------------------------------------------- |
+| service_name | string | Required | - | Full FQDN name of the redis service, including service type, e.g., my-redis.dns, redis.my-ns.svc.cluster.local |
+| service_port | int | No | Default value for static addresses (static service) is 80; otherwise, it is 6379 | Input the service port of the redis service |
+| username | string | No | - | Redis username |
+| password | string | No | - | Redis password |
+| timeout | int | No | 1000 | Redis connection timeout in milliseconds |
+| database | int | No | 0 | The database ID used, for example, configured as 1, corresponds to `SELECT 1`. |
## Configuration Examples
### Identify request parameter apikey for differentiated rate limiting
@@ -233,24 +234,12 @@ spec:
'*': "qwen-turbo"
ingress:
- qwen
- url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-proxy:v1.0.0
+ url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-proxy:1.0.0
phase: UNSPECIFIED_PHASE
priority: 100
---
apiVersion: extensions.higress.io/v1alpha1
kind: WasmPlugin
-metadata:
- name: ai-statistics
- namespace: higress-system
-spec:
- defaultConfig:
- enable: true
- url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-token-statistics:v1.0.0
- phase: UNSPECIFIED_PHASE
- priority: 200
----
-apiVersion: extensions.higress.io/v1alpha1
-kind: WasmPlugin
metadata:
name: ai-token-ratelimit
namespace: higress-system
@@ -269,7 +258,7 @@ spec:
# service_name: redis.default.svc.cluster.local
service_name: redis.dns
service_port: 6379
- url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-token-ratelimit:v1.0.0
+ url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-token-ratelimit:1.0.0
phase: UNSPECIFIED_PHASE
priority: 600
```
@@ -346,10 +335,19 @@ spec:
pathType: Prefix
```
+Forward the traffic of higress-gateway to the local, making it convenient for testing.
+
+```bash
+kubectl port-forward svc/higress-gateway -n higress-system 18000:80
+```
+
The rate limiting effect is triggered as follows:
```bash
-curl "http://qwen-test.com:18000/v1/chat/completions?apikey=123456" -H "Content-Type: application/json" -d '{
+curl "http://localhost:18000/v1/chat/completions?apikey=123456" \
+-H "Host: qwen-test.com" \
+-H "Content-Type: application/json" \
+-d '{
"model": "gpt-3",
"messages": [
{
diff --git a/plugins/wasm-go/extensions/ai-token-ratelimit/config.go b/plugins/wasm-go/extensions/ai-token-ratelimit/config.go
index 9668f18617..743f2925f5 100644
--- a/plugins/wasm-go/extensions/ai-token-ratelimit/config.go
+++ b/plugins/wasm-go/extensions/ai-token-ratelimit/config.go
@@ -110,7 +110,8 @@ func initRedisClusterClient(json gjson.Result, config *ClusterKeyRateLimitConfig
FQDN: serviceName,
Port: int64(servicePort),
})
- return config.redisClient.Init(username, password, int64(timeout))
+ database := int(redisConfig.Get("database").Int())
+ return config.redisClient.Init(username, password, int64(timeout), wrapper.WithDataBase(database))
}
func parseClusterKeyRateLimitConfig(json gjson.Result, config *ClusterKeyRateLimitConfig) error {
diff --git a/plugins/wasm-go/extensions/cluster-key-rate-limit/README.md b/plugins/wasm-go/extensions/cluster-key-rate-limit/README.md
index 883e2535c2..f75ea01bcb 100644
--- a/plugins/wasm-go/extensions/cluster-key-rate-limit/README.md
+++ b/plugins/wasm-go/extensions/cluster-key-rate-limit/README.md
@@ -52,13 +52,14 @@ description: 基于 Key 集群限流插件配置参考
`redis` 中每一项的配置字段说明。
-| 配置项 | 类型 | 必填 | 默认值 | 说明 |
-| ------------ | ------ | ---- | ---------------------------------------------------------- |---------------------------------------------------------------------------|
+| 配置项 | 类型 | 必填 | 默认值 | 说明 |
+| ------------ | ------ | ---- | ---------------------------------------------------------- | --------------------------------------------------------------------------- |
| service_name | string | 必填 | - | redis 服务名称,带服务类型的完整 FQDN 名称,例如 my-redis.dns、redis.my-ns.svc.cluster.local |
-| service_port | int | 否 | 服务类型为固定地址(static service)默认值为80,其他为6379 | 输入redis服务的服务端口 |
-| username | string | 否 | - | redis 用户名 |
-| password | string | 否 | - | redis 密码 |
-| timeout | int | 否 | 1000 | redis 连接超时时间,单位毫秒 |
+| service_port | int | 否 | 服务类型为固定地址(static service)默认值为80,其他为6379 | 输入redis服务的服务端口 |
+| username | string | 否 | - | redis 用户名 |
+| password | string | 否 | - | redis 密码 |
+| timeout | int | 否 | 1000 | redis 连接超时时间,单位毫秒 |
+| database | int | 否 | 0 | 使用的数据库id,例如配置为1,对应`SELECT 1` |
## 配置示例
diff --git a/plugins/wasm-go/extensions/cluster-key-rate-limit/README_EN.md b/plugins/wasm-go/extensions/cluster-key-rate-limit/README_EN.md
index 4a4dcf8633..83e0935d91 100644
--- a/plugins/wasm-go/extensions/cluster-key-rate-limit/README_EN.md
+++ b/plugins/wasm-go/extensions/cluster-key-rate-limit/README_EN.md
@@ -46,13 +46,15 @@ Description of configuration fields for each item in `limit_keys`.
| query_per_day | int | No, one of `query_per_second`, `query_per_minute`, `query_per_hour`, `query_per_day` is optional. | - | Allowed number of requests per day. |
Description of configuration fields for each item in `redis`.
-| Configuration Item | Type | Required | Default Value | Description |
-|---------------------------|---------------|----------|------------------------------------------------------------|---------------------------------------------------------------------------|
-| service_name | string | Required | - | Full FQDN name of the Redis service, including service type, e.g., my-redis.dns, redis.my-ns.svc.cluster.local. |
-| service_port | int | No | 80 for static services; otherwise 6379 | Service port for the Redis service. |
-| username | string | No | - | Redis username. |
-| password | string | No | - | Redis password. |
-| timeout | int | No | 1000 | Redis connection timeout in milliseconds. |
+| Configuration Item | Type | Required | Default Value | Description |
+|--------------------|--------|----------|----------------------------------------|-----------------------------------------------------------------------------------------------------------------|
+| service_name | string | Required | - | Full FQDN name of the Redis service, including service type, e.g., my-redis.dns, redis.my-ns.svc.cluster.local. |
+| service_port | int | No | 80 for static services; otherwise 6379 | Service port for the Redis service. |
+| username | string | No | - | Redis username. |
+| password | string | No | - | Redis password. |
+| timeout | int | No | 1000 | Redis connection timeout in milliseconds. |
+| database | int | No | 0 | The database ID used, for example, configured as 1, corresponds to `SELECT 1`. |
+
## Configuration Examples
diff --git a/plugins/wasm-go/extensions/cluster-key-rate-limit/config.go b/plugins/wasm-go/extensions/cluster-key-rate-limit/config.go
index 3689c36561..00d84b21fc 100644
--- a/plugins/wasm-go/extensions/cluster-key-rate-limit/config.go
+++ b/plugins/wasm-go/extensions/cluster-key-rate-limit/config.go
@@ -110,7 +110,8 @@ func initRedisClusterClient(json gjson.Result, config *ClusterKeyRateLimitConfig
FQDN: serviceName,
Port: int64(servicePort),
})
- return config.redisClient.Init(username, password, int64(timeout))
+ database := int(redisConfig.Get("database").Int())
+ return config.redisClient.Init(username, password, int64(timeout), wrapper.WithDataBase(database))
}
func parseClusterKeyRateLimitConfig(json gjson.Result, config *ClusterKeyRateLimitConfig) error {
diff --git a/plugins/wasm-go/extensions/ext-auth/README.md b/plugins/wasm-go/extensions/ext-auth/README.md
index cca0f655c8..de7e2feb83 100644
--- a/plugins/wasm-go/extensions/ext-auth/README.md
+++ b/plugins/wasm-go/extensions/ext-auth/README.md
@@ -77,6 +77,7 @@ MatchRule 类型每一项的配置字段说明,在使用 `array of MatchRule`
| 名称 | 数据类型 | 必填 | 默认值 | 描述 |
| ------------------- | -------- | ---- | ------ | ------------------------------------------------------------ |
| `match_rule_domain` | string | 否 | - | 匹配规则域名,支持通配符模式,例如 `*.bar.com` |
+| `match_rule_method` | []string | 否 | - | 匹配请求方法 |
| `match_rule_path` | string | 否 | - | 匹配请求路径的规则 |
| `match_rule_type` | string | 否 | - | 匹配请求路径的规则类型,可选 `exact` , `prefix` , `suffix`, `contains`, `regex` |
@@ -100,27 +101,41 @@ MatchRule 类型每一项的配置字段说明,在使用 `array of MatchRule`
**白名单模式**
```yaml
+# 白名单模式配置,符合白名单规则的请求无需验证
match_type: 'whitelist'
match_list:
- - match_rule_domain: '*.bar.com'
- match_rule_path: '/foo'
- match_rule_type: 'prefix'
+ # 所有以 api.example.com 为域名,且路径前缀为 /public 的请求无需验证
+ - match_rule_domain: 'api.example.com'
+ match_rule_path: '/public'
+ match_rule_type: 'prefix'
+ # 针对图片资源服务器 images.example.com,所有 GET 请求无需验证
+ - match_rule_domain: 'images.example.com'
+ match_rule_method: ["GET"]
+ # 所有域名下,路径精确匹配 /health-check 的 HEAD 请求无需验证
+ - match_rule_method: ["HEAD"]
+ match_rule_path: '/health-check'
+ match_rule_type: 'exact'
```
-泛域名 `*.bar.com` 下前缀匹配 `/foo` 的请求无需验证
-
**黑名单模式**
```yaml
+# 黑名单模式配置,符合黑名单规则的请求需要验证
match_type: 'blacklist'
match_list:
- - match_rule_domain: '*.bar.com'
- match_rule_path: '/headers'
- match_rule_type: 'prefix'
+ # 所有以 admin.example.com 为域名,且路径前缀为 /sensitive 的请求需要验证
+ - match_rule_domain: 'admin.example.com'
+ match_rule_path: '/sensitive'
+ match_rule_type: 'prefix'
+ # 所有域名下,路径精确匹配 /user 的 DELETE 请求需要验证
+ - match_rule_method: ["DELETE"]
+ match_rule_path: '/user'
+ match_rule_type: 'exact'
+ # 所有以 legacy.example.com 为域名的 POST 请求需要验证
+ - match_rule_domain: 'legacy.example.com'
+ match_rule_method: ["POST"]
```
-只有泛域名 `*.bar.com` 下前缀匹配 `/header` 的请求需要验证
-
## 配置示例
下面假设 `ext-auth` 服务在 Kubernetes 中 serviceName 为 `ext-auth`,端口 `8090`,路径为 `/auth`,命名空间为 `backend`
@@ -185,13 +200,13 @@ content-length: 0
http_service:
authorization_request:
allowed_headers:
- - exact: x-auth-version
+ - exact: x-auth-version
headers_to_add:
x-envoy-header: true
authorization_response:
allowed_upstream_headers:
- - exact: x-user-id
- - exact: x-auth-version
+ - exact: x-user-id
+ - exact: x-auth-version
endpoint_mode: envoy
endpoint:
service_name: ext-auth.backend.svc.cluster.local
@@ -287,13 +302,13 @@ content-length: 0
http_service:
authorization_request:
allowed_headers:
- - exact: x-auth-version
+ - exact: x-auth-version
headers_to_add:
x-envoy-header: true
authorization_response:
allowed_upstream_headers:
- - exact: x-user-id
- - exact: x-auth-version
+ - exact: x-user-id
+ - exact: x-auth-version
endpoint_mode: forward_auth
endpoint:
service_name: ext-auth.backend.svc.cluster.local
diff --git a/plugins/wasm-go/extensions/ext-auth/README_EN.md b/plugins/wasm-go/extensions/ext-auth/README_EN.md
index a095690cf6..8a01216009 100644
--- a/plugins/wasm-go/extensions/ext-auth/README_EN.md
+++ b/plugins/wasm-go/extensions/ext-auth/README_EN.md
@@ -77,6 +77,7 @@ Configuration fields for each item of `MatchRule` type. When using `array of Mat
| Name | Data Type | Required | Default Value | Description |
| --- | --- | --- | --- | --- |
| `match_rule_domain` | string | No | - | The domain of the matching rule, supports wildcard patterns, e.g., `*.bar.com` |
+| `match_rule_method` | []string | No | - | Matching rule for the request method |
| `match_rule_path` | string | No | - | The rule for matching the request path |
| `match_rule_type` | string | No | - | The type of the rule for matching the request path, can be `exact`, `prefix`, `suffix`, `contains`, `regex` |
@@ -100,27 +101,41 @@ Supports blacklist and whitelist mode configuration. The default is the whitelis
**Whitelist Mode**
```yaml
+# Configuration for the whitelist mode. Requests that match the whitelist rules do not need verification.
match_type: 'whitelist'
match_list:
- - match_rule_domain: '*.bar.com'
- match_rule_path: '/foo'
- match_rule_type: 'prefix'
+ # Requests with the domain name api.example.com and a path prefixed with /public do not need verification.
+ - match_rule_domain: 'api.example.com'
+ match_rule_path: '/public'
+ match_rule_type: 'prefix'
+ # For the image resource server images.example.com, all GET requests do not need verification.
+ - match_rule_domain: 'images.example.com'
+ match_rule_method: ["GET"]
+ # For all domains, HEAD requests with an exact path match of /health-check do not need verification.
+ - match_rule_method: ["HEAD"]
+ match_rule_path: '/health-check'
+ match_rule_type: 'exact'
```
-Requests with a prefix match of `/foo` under the wildcard domain `*.bar.com` do not need to be verified.
-
**Blacklist Mode**
```yaml
+# Configuration for the blacklist mode. Requests that match the blacklist rules need verification.
match_type: 'blacklist'
match_list:
- - match_rule_domain: '*.bar.com'
- match_rule_path: '/headers'
- match_rule_type: 'prefix'
+ # Requests with the domain name admin.example.com and a path prefixed with /sensitive need verification.
+ - match_rule_domain: 'admin.example.com'
+ match_rule_path: '/sensitive'
+ match_rule_type: 'prefix'
+ # For all domains, DELETE requests with an exact path match of /user need verification.
+ - match_rule_method: ["DELETE"]
+ match_rule_path: '/user'
+ match_rule_type: 'exact'
+ # For the domain legacy.example.com, all POST requests need verification.
+ - match_rule_domain: 'legacy.example.com'
+ match_rule_method: ["POST"]
```
-Only requests with a prefix match of `/header` under the wildcard domain `*.bar.com` need to be verified.
-
## Configuration Examples
@@ -186,13 +201,13 @@ Configuration of the `ext-auth` plugin:
http_service:
authorization_request:
allowed_headers:
- - exact: x-auth-version
+ - exact: x-auth-version
headers_to_add:
x-envoy-header: true
authorization_response:
allowed_upstream_headers:
- - exact: x-user-id
- - exact: x-auth-version
+ - exact: x-user-id
+ - exact: x-auth-version
endpoint_mode: envoy
endpoint:
service_name: ext-auth.backend.svc.cluster.local
@@ -286,13 +301,13 @@ Configuration of the `ext-auth` plugin:
http_service:
authorization_request:
allowed_headers:
- - exact: x-auth-version
+ - exact: x-auth-version
headers_to_add:
x-envoy-header: true
authorization_response:
allowed_upstream_headers:
- - exact: x-user-id
- - exact: x-auth-version
+ - exact: x-user-id
+ - exact: x-auth-version
endpoint_mode: forward_auth
endpoint:
service_name: ext-auth.backend.svc.cluster.local
diff --git a/plugins/wasm-go/extensions/ext-auth/config/config.go b/plugins/wasm-go/extensions/ext-auth/config/config.go
index 5709bbf9b0..def0955cea 100644
--- a/plugins/wasm-go/extensions/ext-auth/config/config.go
+++ b/plugins/wasm-go/extensions/ext-auth/config/config.go
@@ -260,19 +260,28 @@ func parseMatchRules(json gjson.Result, config *ExtAuthConfig) error {
var err error
matchListConfig.ForEach(func(key, value gjson.Result) bool {
- pathMatcher, buildErr := expr.BuildStringMatcher(
- value.Get("match_rule_type").Str,
- value.Get("match_rule_path").Str, false)
- if buildErr != nil {
- err = fmt.Errorf("failed to build string matcher for rule with domain %q, path %q, type %q: %w",
- value.Get("match_rule_domain").Str,
- value.Get("match_rule_path").Str,
- value.Get("match_rule_type").Str,
- buildErr)
- return false // stop iterating
+ domain := value.Get("match_rule_domain").Str
+ methodArray := value.Get("match_rule_method").Array()
+ matchRuleType := value.Get("match_rule_type").Str
+ matchRulePath := value.Get("match_rule_path").Str
+
+ var pathMatcher expr.Matcher
+ var buildErr error
+
+ if matchRuleType == "" && matchRulePath == "" {
+ pathMatcher = nil
+ } else {
+ pathMatcher, buildErr = expr.BuildStringMatcher(matchRuleType, matchRulePath, false)
+ if buildErr != nil {
+ err = fmt.Errorf("failed to build string matcher for rule with domain %q, method %v, path %q, type %q: %w",
+ domain, methodArray, matchRulePath, matchRuleType, buildErr)
+ return false // stop iterating
+ }
}
+
ruleList = append(ruleList, expr.Rule{
- Domain: value.Get("match_rule_domain").Str,
+ Domain: domain,
+ Method: convertToStringList(methodArray),
Path: pathMatcher,
})
return true // keep iterating
@@ -297,3 +306,11 @@ func convertToStringMap(result gjson.Result) map[string]string {
})
return m
}
+
+func convertToStringList(results []gjson.Result) []string {
+ interfaces := make([]string, len(results))
+ for i, result := range results {
+ interfaces[i] = result.String()
+ }
+ return interfaces
+}
diff --git a/plugins/wasm-go/extensions/ext-auth/config/config_test.go b/plugins/wasm-go/extensions/ext-auth/config/config_test.go
index 02750356e7..299035f458 100644
--- a/plugins/wasm-go/extensions/ext-auth/config/config_test.go
+++ b/plugins/wasm-go/extensions/ext-auth/config/config_test.go
@@ -218,6 +218,7 @@ func TestParseConfig(t *testing.T) {
RuleList: []expr.Rule{
{
Domain: "*.bar.com",
+ Method: []string{},
Path: func() expr.Matcher {
pathMatcher, err := expr.BuildStringMatcher(expr.MatchPatternPrefix, "/headers", false)
if err != nil {
@@ -248,6 +249,7 @@ func TestParseConfig(t *testing.T) {
"match_list": [
{
"match_rule_domain": "*.foo.com",
+ "match_rule_method": ["GET"],
"match_rule_path": "/api",
"match_rule_type": "exact"
}
@@ -269,6 +271,7 @@ func TestParseConfig(t *testing.T) {
RuleList: []expr.Rule{
{
Domain: "*.foo.com",
+ Method: []string{"GET"},
Path: func() expr.Matcher {
pathMatcher, err := expr.BuildStringMatcher(expr.MatchPatternExact, "/api", false)
if err != nil {
@@ -284,6 +287,50 @@ func TestParseConfig(t *testing.T) {
StatusOnError: 403,
},
},
+ {
+ name: "Valid Match Rules with Whitelist - Only Method",
+ json: `{
+ "http_service": {
+ "endpoint_mode": "envoy",
+ "endpoint": {
+ "service_name": "example.com",
+ "service_port": 80,
+ "path_prefix": "/auth"
+ }
+ },
+ "match_type": "whitelist",
+ "match_list": [
+ {
+ "match_rule_method": ["GET"]
+ }
+ ]
+ }`,
+ expected: ExtAuthConfig{
+ HttpService: HttpService{
+ EndpointMode: "envoy",
+ Client: wrapper.NewClusterClient(wrapper.FQDNCluster{
+ FQDN: "example.com",
+ Port: 80,
+ Host: "",
+ }),
+ PathPrefix: "/auth",
+ Timeout: 1000,
+ },
+ MatchRules: expr.MatchRules{
+ Mode: "whitelist",
+ RuleList: []expr.Rule{
+ {
+ Domain: "",
+ Method: []string{"GET"},
+ Path: nil,
+ },
+ },
+ },
+ FailureModeAllow: false,
+ FailureModeAllowHeaderAdd: false,
+ StatusOnError: 403,
+ },
+ },
{
name: "Missing Match Type",
json: `{
@@ -342,12 +389,13 @@ func TestParseConfig(t *testing.T) {
"match_list": [
{
"match_rule_domain": "*.bar.com",
+ "match_rule_method": ["POST","PUT","DELETE"],
"match_rule_path": "/headers",
"match_rule_type": "invalid_type"
}
]
}`,
- expectedErr: `failed to build string matcher for rule with domain "*.bar.com", path "/headers", type "invalid_type": unknown string matcher type`,
+ expectedErr: `failed to build string matcher for rule with domain "*.bar.com", method [POST PUT DELETE], path "/headers", type "invalid_type": unknown string matcher type`,
},
}
diff --git a/plugins/wasm-go/extensions/ext-auth/expr/match_rules.go b/plugins/wasm-go/extensions/ext-auth/expr/match_rules.go
index c4c89fe385..bc74cd9bff 100644
--- a/plugins/wasm-go/extensions/ext-auth/expr/match_rules.go
+++ b/plugins/wasm-go/extensions/ext-auth/expr/match_rules.go
@@ -3,6 +3,7 @@ package expr
import (
"strings"
+ "ext-auth/util"
regexp "github.com/wasilibs/go-re2"
)
@@ -18,6 +19,7 @@ type MatchRules struct {
type Rule struct {
Domain string
+ Method []string
Path Matcher
}
@@ -28,19 +30,19 @@ func MatchRulesDefaults() MatchRules {
}
}
-// IsAllowedByMode checks if the given domain and path are allowed based on the configuration mode.
-func (config *MatchRules) IsAllowedByMode(domain, path string) bool {
+// IsAllowedByMode checks if the given domain, method and path are allowed based on the configuration mode.
+func (config *MatchRules) IsAllowedByMode(domain, method, path string) bool {
switch config.Mode {
case ModeWhitelist:
for _, rule := range config.RuleList {
- if rule.matchDomainAndPath(domain, path) {
+ if rule.matchesAllConditions(domain, method, path) {
return true
}
}
return false
case ModeBlacklist:
for _, rule := range config.RuleList {
- if rule.matchDomainAndPath(domain, path) {
+ if rule.matchesAllConditions(domain, method, path) {
return false
}
}
@@ -50,17 +52,21 @@ func (config *MatchRules) IsAllowedByMode(domain, path string) bool {
}
}
-// matchDomainAndPath checks if the given domain and path match the rule.
-// If rule.Domain is empty, it only checks rule.Path.
-// If rule.Path is empty, it only checks rule.Domain.
-// If both are empty, it returns false.
-func (rule *Rule) matchDomainAndPath(domain, path string) bool {
- if rule.Domain == "" && rule.Path == nil {
+// matchesAllConditions checks if the given domain, method and path match all conditions of the rule.
+func (rule *Rule) matchesAllConditions(domain, method, path string) bool {
+ // If all conditions are empty, return false
+ if rule.Domain == "" && rule.Path == nil && len(rule.Method) == 0 {
return false
}
+
+ // Check domain and path matching
domainMatch := rule.Domain == "" || matchDomain(domain, rule.Domain)
pathMatch := rule.Path == nil || rule.Path.Match(path)
- return domainMatch && pathMatch
+
+ // Check HTTP method matching: if no methods are specified, any method is allowed
+ methodMatch := len(rule.Method) == 0 || util.ContainsString(rule.Method, method)
+
+ return domainMatch && pathMatch && methodMatch
}
// matchDomain checks if the given domain matches the pattern.
diff --git a/plugins/wasm-go/extensions/ext-auth/expr/match_rules_test.go b/plugins/wasm-go/extensions/ext-auth/expr/match_rules_test.go
index 5d041262ac..f6ab9a542f 100644
--- a/plugins/wasm-go/extensions/ext-auth/expr/match_rules_test.go
+++ b/plugins/wasm-go/extensions/ext-auth/expr/match_rules_test.go
@@ -6,11 +6,20 @@ import (
"github.com/stretchr/testify/assert"
)
+func createMatcher(pattern string, caseSensitive bool) Matcher {
+ pathMatcher, err := newStringExactMatcher(pattern, caseSensitive)
+ if err != nil {
+ panic(err)
+ }
+ return pathMatcher
+}
+
func TestIsAllowedByMode(t *testing.T) {
tests := []struct {
name string
config MatchRules
domain string
+ method string
path string
expected bool
}{
@@ -21,17 +30,13 @@ func TestIsAllowedByMode(t *testing.T) {
RuleList: []Rule{
{
Domain: "example.com",
- Path: func() Matcher {
- pathMatcher, err := newStringExactMatcher("/foo", true)
- if err != nil {
- t.Fatalf("Failed to create Matcher: %v", err)
- }
- return pathMatcher
- }(),
+ Method: []string{"GET"},
+ Path: createMatcher("/foo", true),
},
},
},
domain: "example.com",
+ method: "GET",
path: "/foo",
expected: true,
},
@@ -42,18 +47,14 @@ func TestIsAllowedByMode(t *testing.T) {
RuleList: []Rule{
{
Domain: "example.com",
- Path: func() Matcher {
- pathMatcher, err := newStringExactMatcher("/foo", true)
- if err != nil {
- t.Fatalf("Failed to create Matcher: %v", err)
- }
- return pathMatcher
- }(),
+ Method: []string{"GET"},
+ Path: createMatcher("/foo", true),
},
},
},
domain: "example.com",
- path: "/bar",
+ method: "POST",
+ path: "/foo",
expected: false,
},
{
@@ -63,17 +64,13 @@ func TestIsAllowedByMode(t *testing.T) {
RuleList: []Rule{
{
Domain: "example.com",
- Path: func() Matcher {
- pathMatcher, err := newStringExactMatcher("/foo", true)
- if err != nil {
- t.Fatalf("Failed to create Matcher: %v", err)
- }
- return pathMatcher
- }(),
+ Method: []string{"GET"},
+ Path: createMatcher("/foo", true),
},
},
},
domain: "example.com",
+ method: "GET",
path: "/foo",
expected: false,
},
@@ -84,18 +81,14 @@ func TestIsAllowedByMode(t *testing.T) {
RuleList: []Rule{
{
Domain: "example.com",
- Path: func() Matcher {
- pathMatcher, err := newStringExactMatcher("/foo", true)
- if err != nil {
- t.Fatalf("Failed to create Matcher: %v", err)
- }
- return pathMatcher
- }(),
+ Method: []string{"GET"},
+ Path: createMatcher("/foo", true),
},
},
},
domain: "example.com",
- path: "/bar",
+ method: "POST",
+ path: "/foo",
expected: true,
},
{
@@ -107,6 +100,7 @@ func TestIsAllowedByMode(t *testing.T) {
},
},
domain: "example.com",
+ method: "GET",
path: "/foo",
expected: true,
},
@@ -117,29 +111,25 @@ func TestIsAllowedByMode(t *testing.T) {
RuleList: []Rule{
{
Domain: "",
- Path: func() Matcher {
- pathMatcher, err := newStringExactMatcher("/foo", true)
- if err != nil {
- t.Fatalf("Failed to create Matcher: %v", err)
- }
- return pathMatcher
- }(),
+ Path: createMatcher("/foo", true),
},
},
},
domain: "example.com",
+ method: "GET",
path: "/foo",
expected: true,
},
{
- name: "Both Domain and Path are empty",
+ name: "All fields (Domain, Method, Path) are empty",
config: MatchRules{
Mode: ModeWhitelist,
RuleList: []Rule{
- {Domain: "", Path: nil},
+ {Domain: "", Method: []string{}, Path: nil},
},
},
domain: "example.com",
+ method: "GET",
path: "/foo",
expected: false,
},
@@ -150,17 +140,13 @@ func TestIsAllowedByMode(t *testing.T) {
RuleList: []Rule{
{
Domain: "example.com",
- Path: func() Matcher {
- pathMatcher, err := newStringExactMatcher("/foo", true)
- if err != nil {
- t.Fatalf("Failed to create Matcher: %v", err)
- }
- return pathMatcher
- }(),
+ Method: []string{"GET"},
+ Path: createMatcher("/foo", true),
},
},
},
domain: "example.com",
+ method: "GET",
path: "/foo",
expected: false,
},
@@ -171,17 +157,13 @@ func TestIsAllowedByMode(t *testing.T) {
RuleList: []Rule{
{
Domain: "*.example.com",
- Path: func() Matcher {
- pathMatcher, err := newStringExactMatcher("/foo", true)
- if err != nil {
- t.Fatalf("Failed to create Matcher: %v", err)
- }
- return pathMatcher
- }(),
+ Method: []string{"GET"},
+ Path: createMatcher("/foo", true),
},
},
},
domain: "sub.example.com",
+ method: "GET",
path: "/foo",
expected: true,
},
@@ -192,20 +174,48 @@ func TestIsAllowedByMode(t *testing.T) {
RuleList: []Rule{
{
Domain: "*.example.com",
- Path: func() Matcher {
- pathMatcher, err := newStringExactMatcher("/foo", true)
- if err != nil {
- t.Fatalf("Failed to create Matcher: %v", err)
- }
- return pathMatcher
- }(),
+ Method: []string{"GET"},
+ Path: createMatcher("/foo", true),
},
},
},
domain: "example.com",
+ method: "GET",
path: "/foo",
expected: false,
},
+ {
+ name: "Whitelist mode, only method matches",
+ config: MatchRules{
+ Mode: ModeWhitelist,
+ RuleList: []Rule{
+ {
+ Method: []string{"GET"},
+ Path: nil,
+ },
+ },
+ },
+ domain: "example.com",
+ method: "GET",
+ path: "/foo",
+ expected: true,
+ },
+ {
+ name: "Whitelist mode, only domain matches",
+ config: MatchRules{
+ Mode: ModeWhitelist,
+ RuleList: []Rule{
+ {
+ Domain: "example.com",
+ Path: nil,
+ },
+ },
+ },
+ domain: "example.com",
+ method: "GET",
+ path: "/foo",
+ expected: true,
+ },
{
name: "Blacklist mode, generic domain matches",
config: MatchRules{
@@ -213,17 +223,13 @@ func TestIsAllowedByMode(t *testing.T) {
RuleList: []Rule{
{
Domain: "*.example.com",
- Path: func() Matcher {
- pathMatcher, err := newStringExactMatcher("/foo", true)
- if err != nil {
- t.Fatalf("Failed to create Matcher: %v", err)
- }
- return pathMatcher
- }(),
+ Method: []string{"GET"},
+ Path: createMatcher("/foo", true),
},
},
},
domain: "sub.example.com",
+ method: "GET",
path: "/foo",
expected: false,
},
@@ -234,25 +240,89 @@ func TestIsAllowedByMode(t *testing.T) {
RuleList: []Rule{
{
Domain: "*.example.com",
- Path: func() Matcher {
- pathMatcher, err := newStringExactMatcher("/foo", true)
- if err != nil {
- t.Fatalf("Failed to create Matcher: %v", err)
- }
- return pathMatcher
- }(),
+ Method: []string{"GET"},
+ Path: createMatcher("/foo", true),
},
},
},
domain: "example.com",
+ method: "GET",
path: "/foo",
expected: true,
},
+ {
+ name: "Domain with special characters",
+ config: MatchRules{
+ Mode: ModeWhitelist,
+ RuleList: []Rule{
+ {
+ Domain: "example-*.com",
+ Method: []string{"GET"},
+ Path: createMatcher("/foo", true),
+ },
+ },
+ },
+ domain: "example-test.com",
+ method: "GET",
+ path: "/foo",
+ expected: true,
+ },
+ {
+ name: "Path with special characters",
+ config: MatchRules{
+ Mode: ModeWhitelist,
+ RuleList: []Rule{
+ {
+ Domain: "example.com",
+ Method: []string{"GET"},
+ Path: createMatcher("/foo-bar", true),
+ },
+ },
+ },
+ domain: "example.com",
+ method: "GET",
+ path: "/foo-bar",
+ expected: true,
+ },
+ {
+ name: "Multiple methods, one matches",
+ config: MatchRules{
+ Mode: ModeWhitelist,
+ RuleList: []Rule{
+ {
+ Domain: "example.com",
+ Method: []string{"GET", "POST"},
+ Path: createMatcher("/foo", true),
+ },
+ },
+ },
+ domain: "example.com",
+ method: "POST",
+ path: "/foo",
+ expected: true,
+ },
+ {
+ name: "Multiple methods, none match",
+ config: MatchRules{
+ Mode: ModeWhitelist,
+ RuleList: []Rule{
+ {
+ Domain: "example.com",
+ Method: []string{"GET", "POST"},
+ Path: createMatcher("/foo", true),
+ },
+ },
+ },
+ domain: "example.com",
+ method: "PUT",
+ path: "/foo",
+ expected: false,
+ },
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
- result := tt.config.IsAllowedByMode(tt.domain, tt.path)
+ result := tt.config.IsAllowedByMode(tt.domain, tt.method, tt.path)
assert.Equal(t, tt.expected, result)
})
}
diff --git a/plugins/wasm-go/extensions/ext-auth/main.go b/plugins/wasm-go/extensions/ext-auth/main.go
index 8cc8c05952..7d3ce54b4a 100644
--- a/plugins/wasm-go/extensions/ext-auth/main.go
+++ b/plugins/wasm-go/extensions/ext-auth/main.go
@@ -51,9 +51,8 @@ const (
)
func onHttpRequestHeaders(ctx wrapper.HttpContext, config config.ExtAuthConfig, log wrapper.Log) types.Action {
- path := wrapper.GetRequestPathWithoutQuery()
// If the request's domain and path match the MatchRules, skip authentication
- if config.MatchRules.IsAllowedByMode(ctx.Host(), path) {
+ if config.MatchRules.IsAllowedByMode(ctx.Host(), ctx.Method(), wrapper.GetRequestPathWithoutQuery()) {
ctx.DontReadRequestBody()
return types.ActionContinue
}
diff --git a/plugins/wasm-go/extensions/ext-auth/util/utils.go b/plugins/wasm-go/extensions/ext-auth/util/utils.go
index eef1852878..2f6d8586a9 100644
--- a/plugins/wasm-go/extensions/ext-auth/util/utils.go
+++ b/plugins/wasm-go/extensions/ext-auth/util/utils.go
@@ -37,3 +37,12 @@ func ExtractFromHeader(headers [][2]string, headerKey string) string {
}
return ""
}
+
+func ContainsString(slice []string, s string) bool {
+ for _, item := range slice {
+ if strings.EqualFold(item, s) {
+ return true
+ }
+ }
+ return false
+}
diff --git a/plugins/wasm-rust/Dockerfile b/plugins/wasm-rust/Dockerfile
index b2c6725b14..ffdea8c537 100644
--- a/plugins/wasm-rust/Dockerfile
+++ b/plugins/wasm-rust/Dockerfile
@@ -3,10 +3,10 @@ WORKDIR /workspace
RUN rustup target add wasm32-wasip1
ARG PLUGIN_NAME="say-hello"
ARG BUILD_OPTS="--release"
-ARG BUILDRC=".buildrc"
+ARG PREBUILD=".prebuild"
COPY . .
WORKDIR /workspace/extensions/$PLUGIN_NAME
-RUN if [ -f $BUILDRC ]; then sh $BUILDRC; fi
+RUN if [ -f $PREBUILD ]; then sh $PREBUILD; fi
RUN cargo build --target wasm32-wasip1 $BUILD_OPTS \
&& cp target/wasm32-wasip1/release/*.wasm /main.wasm
diff --git a/plugins/wasm-rust/extensions/ai-data-masking/.buildrc b/plugins/wasm-rust/extensions/ai-data-masking/.buildrc
deleted file mode 100644
index bd317b8605..0000000000
--- a/plugins/wasm-rust/extensions/ai-data-masking/.buildrc
+++ /dev/null
@@ -1 +0,0 @@
-apt update && apt-get install gcc gcc-multilib llvm clang -y && apt clean
\ No newline at end of file
diff --git a/plugins/wasm-rust/extensions/ai-data-masking/.prebuild b/plugins/wasm-rust/extensions/ai-data-masking/.prebuild
new file mode 100644
index 0000000000..ba1b9b5d6d
--- /dev/null
+++ b/plugins/wasm-rust/extensions/ai-data-masking/.prebuild
@@ -0,0 +1,3 @@
+apt-get update
+apt-get install gcc gcc-multilib llvm clang -y
+apt-get clean
diff --git a/plugins/wasm-rust/extensions/ai-data-masking/src/lib.rs b/plugins/wasm-rust/extensions/ai-data-masking/src/lib.rs
index ca2db3da42..dc10bc3715 100644
--- a/plugins/wasm-rust/extensions/ai-data-masking/src/lib.rs
+++ b/plugins/wasm-rust/extensions/ai-data-masking/src/lib.rs
@@ -13,8 +13,10 @@
// limitations under the License.
mod deny_word;
+mod msg_window;
use crate::deny_word::DenyWord;
+use crate::msg_window::MsgWindow;
use fancy_regex::Regex;
use grok::patterns;
use higress_wasm_rust::log::Log;
@@ -27,8 +29,8 @@ use proxy_wasm::traits::{Context, HttpContext, RootContext};
use proxy_wasm::types::{Bytes, ContextType, DataAction, HeaderAction, LogLevel};
use rust_embed::Embed;
use serde::de::Error;
-use serde::Deserialize;
use serde::Deserializer;
+use serde::{Deserialize, Serialize};
use serde_json::{json, Value};
use std::cell::RefCell;
use std::collections::{BTreeMap, HashMap, VecDeque};
@@ -66,9 +68,12 @@ struct AiDataMasking {
config: Option>,
mask_map: HashMap>,
is_openai: bool,
+ is_openai_stream: Option,
stream: bool,
- res_body: Bytes,
log: Log,
+ msg_window: MsgWindow,
+ char_window_size: usize,
+ byte_window_size: usize,
}
fn deserialize_regexp<'de, D>(deserializer: D) -> Result
where
@@ -213,10 +218,33 @@ struct ResMessage {
#[serde(default)]
delta: Option,
}
+
+#[derive(Default, Debug, Deserialize, Serialize, Clone)]
+struct Usage {
+ completion_tokens: i32,
+ prompt_tokens: i32,
+ total_tokens: i32,
+}
+
+impl Usage {
+ pub fn add(&mut self, usage: &Usage) {
+ self.completion_tokens += usage.completion_tokens;
+ self.prompt_tokens += usage.prompt_tokens;
+ self.total_tokens += usage.total_tokens;
+ }
+ pub fn reset(&mut self) {
+ self.completion_tokens = 0;
+ self.prompt_tokens = 0;
+ self.total_tokens = 0;
+ }
+}
+
#[derive(Default, Debug, Deserialize)]
struct Res {
#[serde(default)]
choices: Vec,
+ #[serde(default)]
+ usage: Usage,
}
static SYSTEM_PATTERNS: &[(&str, &str)] = &[
@@ -334,9 +362,12 @@ impl RootContextWrapper for AiDataMaskingRoot {
mask_map: HashMap::new(),
config: None,
is_openai: false,
+ is_openai_stream: None,
stream: false,
- res_body: Bytes::new(),
+ msg_window: MsgWindow::new(),
log: Log::new(PLUGIN_NAME.to_string()),
+ char_window_size: 0,
+ byte_window_size: 0,
}))
}
}
@@ -416,32 +447,6 @@ impl AiDataMasking {
DataAction::StopIterationAndBuffer
}
- fn process_sse_message(&mut self, sse_message: &str) -> Vec {
- let mut messages = Vec::new();
- for msg in sse_message.split('\n') {
- if !msg.starts_with("data:") {
- continue;
- }
- let res: Res = if let Some(m) = msg.strip_prefix("data:") {
- match serde_json::from_str(m) {
- Ok(r) => r,
- Err(_) => continue,
- }
- } else {
- continue;
- };
-
- if res.choices.is_empty() {
- continue;
- }
- for choice in &res.choices {
- if let Some(delta) = &choice.delta {
- messages.push(delta.content.clone());
- }
- }
- }
- messages
- }
fn replace_request_msg(&mut self, message: &str) -> String {
let config = self.config.as_ref().unwrap();
let mut msg = message.to_string();
@@ -464,6 +469,13 @@ impl AiDataMasking {
}
Type::Replace => rule.regex.replace(from_word, &rule.value).to_string(),
};
+ if to_word.len() > self.byte_window_size {
+ self.byte_window_size = to_word.len();
+ }
+ if to_word.chars().count() > self.char_window_size {
+ self.char_window_size = to_word.chars().count();
+ }
+
replace_pair.push((from_word.to_string(), to_word.clone()));
if rule.restore && !to_word.is_empty() {
@@ -499,6 +511,7 @@ impl HttpContext for AiDataMasking {
_end_of_stream: bool,
) -> HeaderAction {
if has_request_body() {
+ self.set_http_request_header("Content-Length", None);
HeaderAction::StopIteration
} else {
HeaderAction::Continue
@@ -512,58 +525,41 @@ impl HttpContext for AiDataMasking {
self.set_http_response_header("Content-Length", None);
HeaderAction::Continue
}
- fn on_http_response_body(&mut self, body_size: usize, _end_of_stream: bool) -> DataAction {
+ fn on_http_response_body(&mut self, body_size: usize, end_of_stream: bool) -> DataAction {
if !self.stream {
return DataAction::Continue;
}
- if let Some(body) = self.get_http_response_body(0, body_size) {
- self.res_body.extend(&body);
-
- if let Ok(body_str) = String::from_utf8(self.res_body.clone()) {
- if self.is_openai {
- let messages = self.process_sse_message(&body_str);
-
- if self.check_message(&messages.join("")) {
+ if body_size > 0 {
+ if let Some(body) = self.get_http_response_body(0, body_size) {
+ if self.is_openai && self.is_openai_stream.is_none() {
+ self.is_openai_stream = Some(body.starts_with(b"data:"));
+ }
+ self.msg_window.push(&body, self.is_openai_stream.unwrap());
+ if let Ok(mut msg) = String::from_utf8(self.msg_window.message.clone()) {
+ if self.check_message(&msg) {
return self.deny(true);
}
- } else if self.check_message(&body_str) {
- return self.deny(true);
- }
- }
- if self.mask_map.is_empty() {
- return DataAction::Continue;
- }
- if let Ok(body_str) = std::str::from_utf8(&body) {
- let mut new_str = body_str.to_string();
- if self.is_openai {
- let messages = self.process_sse_message(body_str);
-
- for message in messages {
- let mut new_message = message.clone();
+ if !self.mask_map.is_empty() {
for (from_word, to_word) in self.mask_map.iter() {
if let Some(to) = to_word {
- new_message = new_message.replace(from_word, to);
+ msg = msg.replace(from_word, to);
}
}
- if new_message != message {
- new_str = new_str.replace(
- &json!(message).to_string(),
- &json!(new_message).to_string(),
- );
- }
- }
- } else {
- for (from_word, to_word) in self.mask_map.iter() {
- if let Some(to) = to_word {
- new_str = new_str.replace(from_word, to);
- }
}
- }
- if new_str != body_str {
- self.replace_http_response_body(new_str.as_bytes());
+ self.msg_window.message = msg.as_bytes().to_vec();
}
}
}
+ let new_body = if end_of_stream {
+ self.msg_window.finish(self.is_openai_stream.unwrap())
+ } else {
+ self.msg_window.pop(
+ self.char_window_size * 2,
+ self.byte_window_size * 2,
+ self.is_openai_stream.unwrap(),
+ )
+ };
+ self.replace_http_response_body(&new_body);
DataAction::Continue
}
}
@@ -586,7 +582,6 @@ impl HttpContextWrapper for AiDataMasking {
return DataAction::Continue;
}
let config = self.config.as_ref().unwrap();
-
let mut req_body = match String::from_utf8(req_body.clone()) {
Ok(r) => r,
Err(_) => return DataAction::Continue,
diff --git a/plugins/wasm-rust/extensions/ai-data-masking/src/msg_window.rs b/plugins/wasm-rust/extensions/ai-data-masking/src/msg_window.rs
new file mode 100644
index 0000000000..b8b33aacb0
--- /dev/null
+++ b/plugins/wasm-rust/extensions/ai-data-masking/src/msg_window.rs
@@ -0,0 +1,338 @@
+use higress_wasm_rust::event_stream::EventStream;
+use serde_json::json;
+
+use crate::{Res, Usage};
+
+#[derive(Default)]
+pub(crate) struct MsgWindow {
+ stream_parser: EventStream,
+ pub(crate) message: Vec,
+ usage: Usage,
+}
+
+impl MsgWindow {
+ pub fn new() -> Self {
+ MsgWindow::default()
+ }
+
+ fn update_event(&mut self, event: Vec) -> Option> {
+ if event.is_empty() || !event.starts_with(b"data:") {
+ Some(event)
+ } else if let Ok(res) = serde_json::from_slice::(&event[b"data:".len()..]) {
+ for choice in &res.choices {
+ if let Some(delta) = &choice.delta {
+ self.message.extend(delta.content.as_bytes());
+ }
+ }
+ self.usage.add(&res.usage);
+ None
+ } else if event.starts_with(b"data: [DONE]") {
+ None
+ } else {
+ Some(event)
+ }
+ }
+ pub fn push(&mut self, data: &[u8], is_openai: bool) {
+ if is_openai {
+ self.stream_parser.update(data.to_vec());
+ while let Some(event) = self.stream_parser.next() {
+ if let Some(msg) = self.update_event(event) {
+ self.message.extend(msg);
+ }
+ }
+ } else {
+ self.message.extend(data);
+ }
+ }
+
+ pub fn pop(
+ &mut self,
+ char_window_size: usize,
+ byte_window_size: usize,
+ is_openai: bool,
+ ) -> Vec {
+ if let Ok(message) = String::from_utf8(self.message.clone()) {
+ let chars = message.chars().collect::>();
+ if chars.len() <= char_window_size {
+ return Vec::new();
+ }
+ let ret = chars[..chars.len() - char_window_size]
+ .iter()
+ .collect::();
+ self.message = chars[chars.len() - char_window_size..]
+ .iter()
+ .collect::()
+ .as_bytes()
+ .to_vec();
+
+ if is_openai {
+ let usage = self.usage.clone();
+ self.usage.reset();
+ format!(
+ "data: {}\n\n",
+ json!({"choices": [{"index": 0, "delta": {"role": "assistant", "content": ret}}], "usage": usage})
+ ).as_bytes().to_vec()
+ } else {
+ ret.as_bytes().to_vec()
+ }
+ } else {
+ let ret = self.message[..self.message.len() - byte_window_size].to_vec();
+ self.message = self.message[self.message.len() - byte_window_size..].to_vec();
+ ret
+ }
+ }
+
+ pub fn finish(&mut self, is_openai: bool) -> Vec {
+ if let Some(event) = self.stream_parser.flush() {
+ self.update_event(event);
+ }
+ if self.message.is_empty() {
+ Vec::new()
+ } else if is_openai {
+ format!(
+ "data: {}\n\ndata: [DONE]\n\n",
+ json!({"choices": [{"index": 0, "delta": {"role": "assistant", "content": String::from_utf8_lossy(&self.message)}}], "usage": self.usage})
+ ).as_bytes().to_vec()
+ } else {
+ self.message.clone()
+ }
+ }
+}
+
+#[cfg(test)]
+mod tests {
+
+ use super::*;
+
+ #[test]
+ fn test_msg() {
+ let mut msg_win = MsgWindow::default();
+ let data = r#"data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"H"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"ig"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"ress"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":" 是"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"一个"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"基于"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":" Ist"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"io"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":" 的"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"高性能"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"服务"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"网格"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"数据"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"平面"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"项目"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":","},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"旨在"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"提供"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"高"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"吞"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"吐"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"量"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"、"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"低"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"延迟"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"和"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"可"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"扩展"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"的服务"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"通信"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"管理"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"。"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"它"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"为企业"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"级"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"应用"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"提供了"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"丰富的"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"流量"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"治理"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"功能"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":","},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"如"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"负载"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"均衡"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"、"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"熔"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"断"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"、"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"限"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"流"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"等"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":",并"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"支持"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"多"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"协议"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"代理"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"("},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"包括"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":" HTTP"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"/"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"1"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"."},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"1"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":","},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":" HTTP"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"/"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"2"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":","},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":" g"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"RPC"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":")。"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"H"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"ig"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"ress"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":" 的"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"设计"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"目标"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"是"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"优化"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":" Ist"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"io"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":" 在"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"大规模"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"集群"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"中的"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"性能"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"表现"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":","},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"满足"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"高"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"并发"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"场景"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"下的"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"需求"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"。"},"finish_reason":null}]}
+
+data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":""},"finish_reason":"stop"}]}
+
+data: [DONE]
+
+"#;
+ let mut buffer = Vec::new();
+ for line in data.split("\n\n") {
+ msg_win.push(line.as_bytes(), true);
+ msg_win.push(b"\n\n", true);
+ if let Ok(mut msg) = String::from_utf8(msg_win.message.clone()) {
+ msg = msg.replace("Higress", "***higress***");
+ msg_win.message = msg.as_bytes().to_vec();
+ }
+ buffer.extend(msg_win.pop(7, 7, true));
+ }
+ buffer.extend(msg_win.finish(true));
+ let mut message = String::new();
+ for line in buffer.split(|&x| x == b'\n') {
+ if line.is_empty() {
+ continue;
+ }
+ assert!(line.starts_with(b"data:"));
+ if line.starts_with(b"data: [DONE]") {
+ continue;
+ }
+ let des = serde_json::from_slice(&line[b"data:".len()..]);
+ assert!(des.is_ok());
+ let res: Res = des.unwrap();
+ for choice in &res.choices {
+ if let Some(delta) = &choice.delta {
+ message.push_str(&delta.content);
+ }
+ }
+ }
+ assert_eq!(message, "***higress*** 是一个基于 Istio 的高性能服务网格数据平面项目,旨在提供高吞吐量、低延迟和可扩展的服务通信管理。它为企业级应用提供了丰富的流量治理功能,如负载均衡、熔断、限流等,并支持多协议代理(包括 HTTP/1.1, HTTP/2, gRPC)。***higress*** 的设计目标是优化 Istio 在大规模集群中的性能表现,满足高并发场景下的需求。");
+ }
+}
diff --git a/plugins/wasm-rust/src/event_stream.rs b/plugins/wasm-rust/src/event_stream.rs
index 97715dcac1..cb12a35f00 100644
--- a/plugins/wasm-rust/src/event_stream.rs
+++ b/plugins/wasm-rust/src/event_stream.rs
@@ -108,10 +108,7 @@ impl EventStream {
}
fn is_2eol(&self, i: usize) -> Option {
- let size1 = match self.is_eol(i) {
- None => return None,
- Some(size1) => size1,
- };
+ let size1 = self.is_eol(i)?;
if i + size1 < self.buffer.len() {
match self.is_eol(i + size1) {
None => {
diff --git a/tools/hack/build-envoy.sh b/tools/hack/build-envoy.sh
index c07f24cb1f..931d0f5b1f 100755
--- a/tools/hack/build-envoy.sh
+++ b/tools/hack/build-envoy.sh
@@ -30,9 +30,11 @@ fi
CONDITIONAL_HOST_MOUNTS+="--mount type=bind,source=${ROOT}/external/package,destination=/home/package "
CONDITIONAL_HOST_MOUNTS+="--mount type=bind,source=${ROOT}/external/envoy,destination=/home/envoy "
+BUILD_TOOLS_IMG=${BUILD_TOOLS_IMG:-"higress-registry.cn-hangzhou.cr.aliyuncs.com/higress/build-tools:release-1.19-ef344298e65eeb2d9e2d07b87eb4e715c2def613"}
+
BUILD_WITH_CONTAINER=1 \
CONDITIONAL_HOST_MOUNTS=${CONDITIONAL_HOST_MOUNTS} \
BUILD_ENVOY_BINARY_ONLY=1 \
DOCKER_RUN_OPTIONS="--user root -e HTTP_PROXY -e HTTPS_PROXY" \
- IMG=higress-registry.cn-hangzhou.cr.aliyuncs.com/higress/build-tools-proxy:release-1.19-ef344298e65eeb2d9e2d07b87eb4e715c2def613 \
+ IMG=${BUILD_TOOLS_IMG} \
make test_release
diff --git a/tools/hack/build-istio-image.sh b/tools/hack/build-istio-image.sh
index 2cb46578be..5c46753827 100755
--- a/tools/hack/build-istio-image.sh
+++ b/tools/hack/build-istio-image.sh
@@ -25,14 +25,34 @@ CONDITIONAL_HOST_MOUNTS+="--mount type=bind,source=${ROOT}/external/package,dest
DOCKER_RUN_OPTIONS+="-e HTTP_PROXY -e HTTPS_PROXY"
+BUILD_TOOLS_IMG=${BUILD_TOOLS_IMG:-"higress-registry.cn-hangzhou.cr.aliyuncs.com/higress/build-tools:release-1.19-ef344298e65eeb2d9e2d07b87eb4e715c2def613"}
+
+ORIGINAL_HUB=${HUB}
+
+echo "IMG_URL=$IMG_URL"
+
+if [ -n "$IMG_URL" ]; then
+ TAG=${IMG_URL#*:}
+ HUB=${IMG_URL%:*}
+ HUB=${HUB%/*}
+ if [ "$TAG" == "${IMG_URL}" ]; then
+ TAG=latest
+ fi
+fi
+
+echo "HUB=$HUB"
+echo "TAG=$TAG"
+
GOOS_LOCAL=linux TARGET_OS=linux TARGET_ARCH=${TARGET_ARCH} \
ISTIO_ENVOY_LINUX_RELEASE_URL=${ISTIO_ENVOY_LINUX_RELEASE_URL} \
BUILD_WITH_CONTAINER=1 \
USE_REAL_USER=${USE_REAL_USER:-0} \
CONDITIONAL_HOST_MOUNTS=${CONDITIONAL_HOST_MOUNTS} \
DOCKER_BUILD_VARIANTS=default DOCKER_TARGETS="${DOCKER_TARGETS}" \
- ISTIO_BASE_REGISTRY="${HUB}" \
+ ISTIO_BASE_REGISTRY="${ORIGINAL_HUB}" \
BASE_VERSION="${HIGRESS_BASE_VERSION}" \
DOCKER_RUN_OPTIONS=${DOCKER_RUN_OPTIONS} \
- IMG=higress-registry.cn-hangzhou.cr.aliyuncs.com/higress/build-tools:release-1.19-ef344298e65eeb2d9e2d07b87eb4e715c2def613 \
+ HUB="${HUB}" \
+ TAG="${TAG}" \
+ IMG=${BUILD_TOOLS_IMG} \
make "$@"
diff --git a/tools/hack/build-istio-pilot.sh b/tools/hack/build-istio-pilot.sh
index 351ac8962a..7acf9d1231 100755
--- a/tools/hack/build-istio-pilot.sh
+++ b/tools/hack/build-istio-pilot.sh
@@ -19,7 +19,9 @@ set -euo pipefail
source "$(dirname -- "$0")/setup-istio-env.sh"
cd ${ROOT}/external/istio
-rm -rf out/linux_${TARGET_ARCH};
+rm -rf out/linux_${TARGET_ARCH};
+
+BUILD_TOOLS_IMG=${BUILD_TOOLS_IMG:-"higress-registry.cn-hangzhou.cr.aliyuncs.com/higress/build-tools:release-1.19-ef344298e65eeb2d9e2d07b87eb4e715c2def613"}
GOOS_LOCAL=linux TARGET_OS=linux TARGET_ARCH=${TARGET_ARCH} \
ISTIO_ENVOY_LINUX_RELEASE_URL=${ISTIO_ENVOY_LINUX_RELEASE_URL} \
@@ -28,5 +30,5 @@ GOOS_LOCAL=linux TARGET_OS=linux TARGET_ARCH=${TARGET_ARCH} \
ISTIO_BASE_REGISTRY="${HUB}" \
BASE_VERSION="${HIGRESS_BASE_VERSION}" \
DOCKER_RUN_OPTIONS="--user root -e HTTP_PROXY -e HTTPS_PROXY" \
- IMG=higress-registry.cn-hangzhou.cr.aliyuncs.com/higress/build-tools:release-1.19-ef344298e65eeb2d9e2d07b87eb4e715c2def613 \
+ IMG=${BUILD_TOOLS_IMG} \
make build-linux