diff --git a/.github/workflows/build-and-push-wasm-plugin-image.yaml b/.github/workflows/build-and-push-wasm-plugin-image.yaml index 41935de210..2406582296 100644 --- a/.github/workflows/build-and-push-wasm-plugin-image.yaml +++ b/.github/workflows/build-and-push-wasm-plugin-image.yaml @@ -133,6 +133,11 @@ jobs: command=" set -e cd /workspace/plugins/wasm-rust/extensions/${PLUGIN_NAME} + if [ -f ./.prebuild ]; then + echo 'Found .prebuild file, sourcing it...' + . ./.prebuild + fi + rustup target add wasm32-wasip1 cargo build --target wasm32-wasip1 --release cp target/wasm32-wasip1/release/*.wasm plugin.wasm tar czvf plugin.tar.gz plugin.wasm diff --git a/.github/workflows/build-image-and-push.yaml b/.github/workflows/build-image-and-push.yaml index 938b041f30..4d789ddef2 100644 --- a/.github/workflows/build-image-and-push.yaml +++ b/.github/workflows/build-image-and-push.yaml @@ -1,229 +1,258 @@ -name: Build Docker Images and Push to Image Registry - -on: - push: - tags: - - "v*.*.*" - workflow_dispatch: ~ - -jobs: - build-controller-image: - runs-on: ubuntu-latest - environment: - name: image-registry-controller - env: - CONTROLLER_IMAGE_REGISTRY: ${{ vars.IMAGE_REGISTRY || 'higress-registry.cn-hangzhou.cr.aliyuncs.com' }} - CONTROLLER_IMAGE_NAME: ${{ vars.CONTROLLER_IMAGE_NAME || 'higress/higress' }} - steps: - - name: "Checkout ${{ github.ref }}" - uses: actions/checkout@v4 - with: - fetch-depth: 1 - - - name: Free Up GitHub Actions Ubuntu Runner Disk Space 🔧 - uses: jlumbroso/free-disk-space@main - with: - tool-cache: false - android: true - dotnet: true - haskell: true - large-packages: true - swap-storage: true - - - name: "Setup Go" - uses: actions/setup-go@v5 - with: - go-version: 1.21.5 - - - name: Setup Golang Caches - uses: actions/cache@v4 - with: - path: |- - ~/.cache/go-build - ~/go/pkg/mod - key: ${{ runner.os }}-go-${{ github.run_id }} - restore-keys: ${{ runner.os }}-go - - - name: Calculate Docker metadata - id: docker-meta - uses: docker/metadata-action@v5 - with: - images: | - ${{ env.CONTROLLER_IMAGE_REGISTRY }}/${{ env.CONTROLLER_IMAGE_NAME }} - tags: | - type=sha - type=ref,event=tag - type=semver,pattern={{version}} - type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', 'main') }} - - - name: Login to Docker Registry - uses: docker/login-action@v3 - with: - registry: ${{ env.CONTROLLER_IMAGE_REGISTRY }} - username: ${{ secrets.REGISTRY_USERNAME }} - password: ${{ secrets.REGISTRY_PASSWORD }} - - - name: Build Docker Image and Push - run: | - GOPROXY="https://proxy.golang.org,direct" make docker-buildx-push - BUILT_IMAGE="higress-registry.cn-hangzhou.cr.aliyuncs.com/higress/higress" - readarray -t IMAGES <<< "${{ steps.docker-meta.outputs.tags }}" - for image in ${IMAGES[@]}; do - echo "Image: $image" - docker buildx imagetools create $BUILT_IMAGE:$GITHUB_SHA --tag $image - done - - build-pilot-image: - runs-on: ubuntu-latest - environment: - name: image-registry-pilot - env: - PILOT_IMAGE_REGISTRY: ${{ vars.IMAGE_REGISTRY || 'higress-registry.cn-hangzhou.cr.aliyuncs.com' }} - PILOT_IMAGE_NAME: ${{ vars.PILOT_IMAGE_NAME || 'higress/pilot' }} - steps: - - name: "Checkout ${{ github.ref }}" - uses: actions/checkout@v4 - with: - fetch-depth: 1 - - - name: Free Up GitHub Actions Ubuntu Runner Disk Space 🔧 - uses: jlumbroso/free-disk-space@main - with: - tool-cache: false - android: true - dotnet: true - haskell: true - large-packages: true - swap-storage: true - - - name: "Setup Go" - uses: actions/setup-go@v5 - with: - go-version: 1.21.5 - - - name: Setup Golang Caches - uses: actions/cache@v4 - with: - path: |- - ~/.cache/go-build - ~/go/pkg/mod - key: ${{ runner.os }}-go-${{ github.run_id }} - restore-keys: ${{ runner.os }}-go - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v1 - - - name: Cache Docker layers - uses: actions/cache@v2 - with: - path: /tmp/.buildx-cache - key: ${{ runner.os }}-buildx-${{ github.sha }} - restore-keys: | - ${{ runner.os }}-buildx- - - - name: Calculate Docker metadata - id: docker-meta - uses: docker/metadata-action@v5 - with: - images: | - ${{ env.PILOT_IMAGE_REGISTRY }}/${{ env.PILOT_IMAGE_NAME }} - tags: | - type=sha - type=ref,event=tag - type=semver,pattern={{version}} - type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', 'main') }} - - - name: Login to Docker Registry - uses: docker/login-action@v3 - with: - registry: ${{ env.PILOT_IMAGE_REGISTRY }} - username: ${{ secrets.REGISTRY_USERNAME }} - password: ${{ secrets.REGISTRY_PASSWORD }} - - - name: Build Pilot-Discovery Image and Push - run: | - GOPROXY="https://proxy.golang.org,direct" make build-istio - BUILT_IMAGE="higress-registry.cn-hangzhou.cr.aliyuncs.com/higress/pilot" - readarray -t IMAGES <<< "${{ steps.docker-meta.outputs.tags }}" - for image in ${IMAGES[@]}; do - echo "Image: $image" - docker buildx imagetools create $BUILT_IMAGE:$GITHUB_SHA --tag $image - done - - - build-gateway-image: - runs-on: ubuntu-latest - environment: - name: image-registry-pilot - env: - GATEWAY_IMAGE_REGISTRY: ${{ vars.IMAGE_REGISTRY || 'higress-registry.cn-hangzhou.cr.aliyuncs.com' }} - GATEWAY_IMAGE_NAME: ${{ vars.GATEWAY_IMAGE_NAME || 'higress/gateway' }} - steps: - - name: "Checkout ${{ github.ref }}" - uses: actions/checkout@v4 - with: - fetch-depth: 1 - - - name: Free Up GitHub Actions Ubuntu Runner Disk Space 🔧 - uses: jlumbroso/free-disk-space@main - with: - tool-cache: false - android: true - dotnet: true - haskell: true - large-packages: true - swap-storage: true - - - name: "Setup Go" - uses: actions/setup-go@v5 - with: - go-version: 1.21.5 - - - name: Setup Golang Caches - uses: actions/cache@v4 - with: - path: |- - ~/.cache/go-build - ~/go/pkg/mod - key: ${{ runner.os }}-go-${{ github.run_id }} - restore-keys: ${{ runner.os }}-go - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v1 - - - name: Cache Docker layers - uses: actions/cache@v2 - with: - path: /tmp/.buildx-cache - key: ${{ runner.os }}-buildx-${{ github.sha }} - restore-keys: | - ${{ runner.os }}-buildx- - - - name: Calculate Docker metadata - id: docker-meta - uses: docker/metadata-action@v5 - with: - images: | - ${{ env.GATEWAY_IMAGE_REGISTRY }}/${{ env.GATEWAY_IMAGE_NAME }} - tags: | - type=sha - type=ref,event=tag - type=semver,pattern={{version}} - type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', 'main') }} - - - name: Login to Docker Registry - uses: docker/login-action@v3 - with: - registry: ${{ env.GATEWAY_IMAGE_REGISTRY }} - username: ${{ secrets.REGISTRY_USERNAME }} - password: ${{ secrets.REGISTRY_PASSWORD }} - - - name: Build Gateway Image and Push - run: | - GOPROXY="https://proxy.golang.org,direct" make build-gateway - BUILT_IMAGE="higress-registry.cn-hangzhou.cr.aliyuncs.com/higress/proxyv2" - readarray -t IMAGES <<< "${{ steps.docker-meta.outputs.tags }}" - for image in ${IMAGES[@]}; do - echo "Image: $image" - docker buildx imagetools create $BUILT_IMAGE:$GITHUB_SHA --tag $image - done +name: Build Docker Images and Push to Image Registry + +on: + push: + tags: + - "v*.*.*" + workflow_dispatch: ~ + +jobs: + build-controller-image: + runs-on: ubuntu-latest + environment: + name: image-registry-controller + env: + CONTROLLER_IMAGE_REGISTRY: ${{ vars.IMAGE_REGISTRY || 'higress-registry.cn-hangzhou.cr.aliyuncs.com' }} + CONTROLLER_IMAGE_NAME: ${{ vars.CONTROLLER_IMAGE_NAME || 'higress/higress' }} + steps: + - name: "Checkout ${{ github.ref }}" + uses: actions/checkout@v4 + with: + fetch-depth: 1 + + - name: Free Up GitHub Actions Ubuntu Runner Disk Space 🔧 + uses: jlumbroso/free-disk-space@main + with: + tool-cache: false + android: true + dotnet: true + haskell: true + large-packages: true + swap-storage: true + + - name: "Setup Go" + uses: actions/setup-go@v5 + with: + go-version: 1.21.5 + + - name: Setup Golang Caches + uses: actions/cache@v4 + with: + path: |- + ~/.cache/go-build + ~/go/pkg/mod + key: ${{ runner.os }}-go-${{ github.run_id }} + restore-keys: ${{ runner.os }}-go + + - name: Calculate Docker metadata + id: docker-meta + uses: docker/metadata-action@v5 + with: + images: | + ${{ env.CONTROLLER_IMAGE_REGISTRY }}/${{ env.CONTROLLER_IMAGE_NAME }} + tags: | + type=sha + type=ref,event=tag + type=semver,pattern={{version}} + type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', 'main') }} + + - name: Login to Docker Registry + uses: docker/login-action@v3 + with: + registry: ${{ env.CONTROLLER_IMAGE_REGISTRY }} + username: ${{ secrets.REGISTRY_USERNAME }} + password: ${{ secrets.REGISTRY_PASSWORD }} + + - name: Build Docker Image and Push + run: | + BUILT_IMAGE="" + readarray -t IMAGES <<< "${{ steps.docker-meta.outputs.tags }}" + for image in ${IMAGES[@]}; do + echo "Image: $image" + if [ "$BUILT_IMAGE" == "" ]; then + GOPROXY="https://proxy.golang.org,direct" IMG_URL="$image" make docker-buildx-push + BUILT_IMAGE="$image" + else + docker buildx imagetools create $BUILT_IMAGE --tag $image + fi + done + + build-pilot-image: + runs-on: ubuntu-latest + environment: + name: image-registry-pilot + env: + PILOT_IMAGE_REGISTRY: ${{ vars.IMAGE_REGISTRY || 'higress-registry.cn-hangzhou.cr.aliyuncs.com' }} + PILOT_IMAGE_NAME: ${{ vars.PILOT_IMAGE_NAME || 'higress/pilot' }} + steps: + - name: "Checkout ${{ github.ref }}" + uses: actions/checkout@v4 + with: + fetch-depth: 1 + + - name: Free Up GitHub Actions Ubuntu Runner Disk Space 🔧 + uses: jlumbroso/free-disk-space@main + with: + tool-cache: false + android: true + dotnet: true + haskell: true + large-packages: true + swap-storage: true + + - name: "Setup Go" + uses: actions/setup-go@v5 + with: + go-version: 1.21.5 + + - name: Setup Golang Caches + uses: actions/cache@v4 + with: + path: |- + ~/.cache/go-build + ~/go/pkg/mod + key: ${{ runner.os }}-go-${{ github.run_id }} + restore-keys: ${{ runner.os }}-go + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + with: + image: tonistiigi/binfmt:qemu-v7.0.0 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Cache Docker layers + uses: actions/cache@v2 + with: + path: /tmp/.buildx-cache + key: ${{ runner.os }}-buildx-${{ github.sha }} + restore-keys: | + ${{ runner.os }}-buildx- + + - name: Calculate Docker metadata + id: docker-meta + uses: docker/metadata-action@v5 + with: + images: | + ${{ env.PILOT_IMAGE_REGISTRY }}/${{ env.PILOT_IMAGE_NAME }} + tags: | + type=sha + type=ref,event=tag + type=semver,pattern={{version}} + type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', 'main') }} + + - name: Login to Docker Registry + uses: docker/login-action@v3 + with: + registry: ${{ env.PILOT_IMAGE_REGISTRY }} + username: ${{ secrets.REGISTRY_USERNAME }} + password: ${{ secrets.REGISTRY_PASSWORD }} + + - name: Build Pilot-Discovery Image and Push + run: | + BUILT_IMAGE="" + readarray -t IMAGES <<< "${{ steps.docker-meta.outputs.tags }}" + for image in ${IMAGES[@]}; do + echo "Image: $image" + if [ "$BUILT_IMAGE" == "" ]; then + TAG=${image#*:} + HUB=${image%:*} + HUB=${HUB%/*} + BUILT_IMAGE="$HUB/pilot:$TAG" + GOPROXY="https://proxy.golang.org,direct" IMG_URL="$BUILT_IMAGE" make build-istio + fi + if [ "$BUILT_IMAGE" != "$image" ]; then + docker buildx imagetools create $BUILT_IMAGE --tag $image + fi + done + + build-gateway-image: + runs-on: ubuntu-latest + environment: + name: image-registry-gateway + env: + GATEWAY_IMAGE_REGISTRY: ${{ vars.IMAGE_REGISTRY || 'higress-registry.cn-hangzhou.cr.aliyuncs.com' }} + GATEWAY_IMAGE_NAME: ${{ vars.GATEWAY_IMAGE_NAME || 'higress/gateway' }} + steps: + - name: "Checkout ${{ github.ref }}" + uses: actions/checkout@v4 + with: + fetch-depth: 1 + + - name: Free Up GitHub Actions Ubuntu Runner Disk Space 🔧 + uses: jlumbroso/free-disk-space@main + with: + tool-cache: false + android: true + dotnet: true + haskell: true + large-packages: true + swap-storage: true + + - name: "Setup Go" + uses: actions/setup-go@v5 + with: + go-version: 1.21.5 + + - name: Setup Golang Caches + uses: actions/cache@v4 + with: + path: |- + ~/.cache/go-build + ~/go/pkg/mod + key: ${{ runner.os }}-go-${{ github.run_id }} + restore-keys: ${{ runner.os }}-go + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + with: + image: tonistiigi/binfmt:qemu-v7.0.0 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Cache Docker layers + uses: actions/cache@v2 + with: + path: /tmp/.buildx-cache + key: ${{ runner.os }}-buildx-${{ github.sha }} + restore-keys: | + ${{ runner.os }}-buildx- + + - name: Calculate Docker metadata + id: docker-meta + uses: docker/metadata-action@v5 + with: + images: | + ${{ env.GATEWAY_IMAGE_REGISTRY }}/${{ env.GATEWAY_IMAGE_NAME }} + tags: | + type=sha + type=ref,event=tag + type=semver,pattern={{version}} + type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', 'main') }} + + - name: Login to Docker Registry + uses: docker/login-action@v3 + with: + registry: ${{ env.GATEWAY_IMAGE_REGISTRY }} + username: ${{ secrets.REGISTRY_USERNAME }} + password: ${{ secrets.REGISTRY_PASSWORD }} + + - name: Build Gateway Image and Push + run: | + BUILT_IMAGE="" + readarray -t IMAGES <<< "${{ steps.docker-meta.outputs.tags }}" + for image in ${IMAGES[@]}; do + echo "Image: $image" + if [ "$BUILT_IMAGE" == "" ]; then + TAG=${image#*:} + HUB=${image%:*} + HUB=${HUB%/*} + BUILT_IMAGE="$HUB/proxyv2:$TAG" + GOPROXY="https://proxy.golang.org,direct" IMG_URL="$BUILT_IMAGE" make build-gateway + fi + if [ "$BUILT_IMAGE" != "$image" ]; then + docker buildx imagetools create $BUILT_IMAGE --tag $image + fi + done \ No newline at end of file diff --git a/.github/workflows/helm-docs.yaml b/.github/workflows/helm-docs.yaml index d4637dbe1b..6ed5937fe3 100644 --- a/.github/workflows/helm-docs.yaml +++ b/.github/workflows/helm-docs.yaml @@ -10,7 +10,7 @@ on: push: branches: [ main ] paths: - - 'helm/**' + - 'helm/**' jobs: helm: @@ -39,6 +39,7 @@ jobs: rm -f ./helm-docs translate-readme: + if: ${{ ! always() }} needs: helm runs-on: ubuntu-latest diff --git a/Makefile.core.mk b/Makefile.core.mk index 93aff0df81..2d84c0b118 100644 --- a/Makefile.core.mk +++ b/Makefile.core.mk @@ -162,13 +162,13 @@ buildx-prepare: build-gateway: prebuild buildx-prepare USE_REAL_USER=1 TARGET_ARCH=amd64 DOCKER_TARGETS="docker.proxyv2" ./tools/hack/build-istio-image.sh init USE_REAL_USER=1 TARGET_ARCH=arm64 DOCKER_TARGETS="docker.proxyv2" ./tools/hack/build-istio-image.sh init - DOCKER_TARGETS="docker.proxyv2" ./tools/hack/build-istio-image.sh docker.buildx + DOCKER_TARGETS="docker.proxyv2" IMG_URL="${IMG_URL}" ./tools/hack/build-istio-image.sh docker.buildx build-gateway-local: prebuild TARGET_ARCH=${TARGET_ARCH} DOCKER_TARGETS="docker.proxyv2" ./tools/hack/build-istio-image.sh docker build-istio: prebuild buildx-prepare - DOCKER_TARGETS="docker.pilot" ./tools/hack/build-istio-image.sh docker.buildx + DOCKER_TARGETS="docker.pilot" IMG_URL="${IMG_URL}" ./tools/hack/build-istio-image.sh docker.buildx build-istio-local: prebuild TARGET_ARCH=${TARGET_ARCH} DOCKER_TARGETS="docker.pilot" ./tools/hack/build-istio-image.sh docker diff --git a/docker/docker.mk b/docker/docker.mk index b572176508..f9315a3271 100644 --- a/docker/docker.mk +++ b/docker/docker.mk @@ -35,6 +35,8 @@ DOCKER_ALL_VARIANTS ?= debug distroless INCLUDE_UNTAGGED_DEFAULT ?= false DEFAULT_DISTRIBUTION=debug -HIGRESS_DOCKER_BUILDX_RULE ?= $(foreach VARIANT,$(DOCKER_BUILD_VARIANTS), time (mkdir -p $(HIGRESS_DOCKER_BUILD_TOP)/$@ && TARGET_ARCH=$(TARGET_ARCH) ./docker/docker-copy.sh $^ $(HIGRESS_DOCKER_BUILD_TOP)/$@ && cd $(HIGRESS_DOCKER_BUILD_TOP)/$@ $(BUILD_PRE) && docker buildx create --name higress --node higress0 --platform linux/amd64,linux/arm64 --use && docker buildx build --no-cache --platform linux/amd64,linux/arm64 $(BUILD_ARGS) --build-arg BASE_DISTRIBUTION=$(call normalize-tag,$(VARIANT)) -t $(HUB)/higress:$(TAG)$(call variant-tag,$(VARIANT)) -f Dockerfile.higress . --push ); ) -HIGRESS_DOCKER_RULE ?= $(foreach VARIANT,$(DOCKER_BUILD_VARIANTS), time (mkdir -p $(HIGRESS_DOCKER_BUILD_TOP)/$@ && TARGET_ARCH=$(TARGET_ARCH) ./docker/docker-copy.sh $^ $(HIGRESS_DOCKER_BUILD_TOP)/$@ && cd $(HIGRESS_DOCKER_BUILD_TOP)/$@ $(BUILD_PRE) && docker build $(BUILD_ARGS) --build-arg BASE_DISTRIBUTION=$(call normalize-tag,$(VARIANT)) -t $(HUB)/higress:$(TAG)$(call variant-tag,$(VARIANT)) -f Dockerfile.higress . ); ) +IMG ?= higress +IMG_URL ?= $(HUB)/$(IMG):$(TAG) +HIGRESS_DOCKER_BUILDX_RULE ?= $(foreach VARIANT,$(DOCKER_BUILD_VARIANTS), time (mkdir -p $(HIGRESS_DOCKER_BUILD_TOP)/$@ && TARGET_ARCH=$(TARGET_ARCH) ./docker/docker-copy.sh $^ $(HIGRESS_DOCKER_BUILD_TOP)/$@ && cd $(HIGRESS_DOCKER_BUILD_TOP)/$@ $(BUILD_PRE) && docker buildx create --name higress --node higress0 --platform linux/amd64,linux/arm64 --use && docker buildx build --no-cache --platform linux/amd64,linux/arm64 $(BUILD_ARGS) --build-arg BASE_DISTRIBUTION=$(call normalize-tag,$(VARIANT)) -t $(IMG_URL)$(call variant-tag,$(VARIANT)) -f Dockerfile.higress . --push ); ) +HIGRESS_DOCKER_RULE ?= $(foreach VARIANT,$(DOCKER_BUILD_VARIANTS), time (mkdir -p $(HIGRESS_DOCKER_BUILD_TOP)/$@ && TARGET_ARCH=$(TARGET_ARCH) ./docker/docker-copy.sh $^ $(HIGRESS_DOCKER_BUILD_TOP)/$@ && cd $(HIGRESS_DOCKER_BUILD_TOP)/$@ $(BUILD_PRE) && docker build $(BUILD_ARGS) --build-arg BASE_DISTRIBUTION=$(call normalize-tag,$(VARIANT)) -t $(IMG_URL)$(call variant-tag,$(VARIANT)) -f Dockerfile.higress . ); ) diff --git a/helm/core/README.md b/helm/core/README.md index fdd3e61a79..0ccad6dfb4 100644 --- a/helm/core/README.md +++ b/helm/core/README.md @@ -2,4 +2,4 @@ Installs the core components of cloud-native gateway [Higress](http://higress.io/) -**Note:** It is highly recommended to install the whole package of Higress. Please visit https://higress.io/docs/user/quickstart/ for details. \ No newline at end of file +**Note:** It is highly recommended to install the whole package of Higress. Please visit https://higress.io/docs/user/quickstart/ for details. diff --git a/helm/core/templates/_pod.tpl b/helm/core/templates/_pod.tpl index 3e883d248b..c87e4d3eff 100644 --- a/helm/core/templates/_pod.tpl +++ b/helm/core/templates/_pod.tpl @@ -45,9 +45,9 @@ template: - router - --domain - $(POD_NAMESPACE).svc.cluster.local - - --proxyLogLevel=warning - - --proxyComponentLogLevel=misc:error - - --log_output_level=all:info + - --proxyLogLevel={{- default "warning" .Values.global.proxy.logLevel }} + - --proxyComponentLogLevel={{- default "misc:error" .Values.global.proxy.componentLogLevel }} + - --log_output_level={{- default "default:info" .Values.global.logging.level }} - --serviceCluster=higress-gateway securityContext: {{- if .Values.gateway.containerSecurityContext }} diff --git a/helm/core/values.yaml b/helm/core/values.yaml index 6186654a05..d4fdff6eb1 100644 --- a/helm/core/values.yaml +++ b/helm/core/values.yaml @@ -491,6 +491,7 @@ gateway: externalTrafficPolicy: "" rollingMaxSurge: 100% + # -- If global.local is true, the default value is 100%, otherwise it is 25% rollingMaxUnavailable: 25% resources: diff --git a/helm/higress/README.md b/helm/higress/README.md index 3c23f42532..2cecaa91f4 100644 --- a/helm/higress/README.md +++ b/helm/higress/README.md @@ -130,7 +130,7 @@ The command removes all the Kubernetes components associated with the chart and | gateway.resources.requests.memory | string | `"2048Mi"` | | | gateway.revision | string | `""` | revision declares which revision this gateway is a part of | | gateway.rollingMaxSurge | string | `"100%"` | | -| gateway.rollingMaxUnavailable | string | `"25%"` | | +| gateway.rollingMaxUnavailable | string | `"25%"` | If global.local is true, the default value is 100%, otherwise it is 25% | | gateway.securityContext | string | `nil` | Define the security context for the pod. If unset, this will be automatically set to the minimum privileges required to bind to port 80 and 443. On Kubernetes 1.22+, this only requires the `net.ipv4.ip_unprivileged_port_start` sysctl. | | gateway.service.annotations | object | `{}` | | | gateway.service.externalTrafficPolicy | string | `""` | | diff --git a/plugins/wasm-go/extensions/ai-cache/README.md b/plugins/wasm-go/extensions/ai-cache/README.md index 999f472270..70f3e1b9d4 100644 --- a/plugins/wasm-go/extensions/ai-cache/README.md +++ b/plugins/wasm-go/extensions/ai-cache/README.md @@ -86,7 +86,8 @@ LLM 结果缓存插件,默认配置方式可以直接用于 openai 协议的 | cache.password | string | optional | "" | 缓存服务密码 | | cache.timeout | uint32 | optional | 10000 | 缓存服务的超时时间,单位为毫秒。默认值是10000,即10秒 | | cache.cacheTTL | int | optional | 0 | 缓存过期时间,单位为秒。默认值是 0,即 永不过期| -| cacheKeyPrefix | string | optional | "higress-ai-cache:" | 缓存 Key 的前缀,默认值为 "higress-ai-cache:" | +| cache.cacheKeyPrefix | string | optional | "higress-ai-cache:" | 缓存 Key 的前缀,默认值为 "higress-ai-cache:" | +| cache.database | int | optional | 0 | 使用的数据库id,仅限redis,例如配置为1,对应`SELECT 1` | ## 其他配置 @@ -168,6 +169,7 @@ redis: serviceName: my_redis.dns servicePort: 6379 timeout: 100 + database: 1 ``` ## 进阶用法 diff --git a/plugins/wasm-go/extensions/ai-cache/README_EN.md b/plugins/wasm-go/extensions/ai-cache/README_EN.md index 7544995999..d48f9f71b9 100644 --- a/plugins/wasm-go/extensions/ai-cache/README_EN.md +++ b/plugins/wasm-go/extensions/ai-cache/README_EN.md @@ -15,26 +15,29 @@ Plugin Execution Phase: `Authentication Phase` Plugin Execution Priority: `10` ## Configuration Description -| Name | Type | Requirement | Default | Description | -| -------- | -------- | -------- | -------- | -------- | -| cacheKeyFrom.requestBody | string | optional | "messages.@reverse.0.content" | Extracts a string from the request Body based on [GJSON PATH](https://github.com/tidwall/gjson/blob/master/SYNTAX.md) syntax | -| cacheValueFrom.responseBody | string | optional | "choices.0.message.content" | Extracts a string from the response Body based on [GJSON PATH](https://github.com/tidwall/gjson/blob/master/SYNTAX.md) syntax | +| Name | Type | Requirement | Default | Description | +| -------- | -------- | -------- | -------- | -------- | +| cacheKeyFrom.requestBody | string | optional | "messages.@reverse.0.content" | Extracts a string from the request Body based on [GJSON PATH](https://github.com/tidwall/gjson/blob/master/SYNTAX.md) syntax | +| cacheValueFrom.responseBody | string | optional | "choices.0.message.content" | Extracts a string from the response Body based on [GJSON PATH](https://github.com/tidwall/gjson/blob/master/SYNTAX.md) syntax | | cacheStreamValueFrom.responseBody | string | optional | "choices.0.delta.content" | Extracts a string from the streaming response Body based on [GJSON PATH](https://github.com/tidwall/gjson/blob/master/SYNTAX.md) syntax | -| cacheKeyPrefix | string | optional | "higress-ai-cache:" | Prefix for the Redis cache key | -| cacheTTL | integer | optional | 0 | Cache expiration time in seconds, default value is 0, which means never expire | -| redis.serviceName | string | required | - | The complete FQDN name of the Redis service, including the service type, e.g., my-redis.dns, redis.my-ns.svc.cluster.local | -| redis.servicePort | integer | optional | 6379 | Redis service port | -| redis.timeout | integer | optional | 1000 | Timeout for requests to Redis, in milliseconds | -| redis.username | string | optional | - | Username for logging into Redis | -| redis.password | string | optional | - | Password for logging into Redis | -| returnResponseTemplate | string | optional | `{"id":"from-cache","choices":[%s],"model":"gpt-4o","object":"chat.completion","usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}` | Template for returning HTTP response, with %s marking the part to be replaced by cache value | -| returnStreamResponseTemplate | string | optional | `data:{"id":"from-cache","choices":[{"index":0,"delta":{"role":"assistant","content":"%s"},"finish_reason":"stop"}],"model":"gpt-4o","object":"chat.completion","usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}\n\ndata:[DONE]\n\n` | Template for returning streaming HTTP response, with %s marking the part to be replaced by cache value | +| cacheKeyPrefix | string | optional | "higress-ai-cache:" | Prefix for the Redis cache key | +| cacheTTL | integer | optional | 0 | Cache expiration time in seconds, default value is 0, which means never expire | +| redis.serviceName | string | required | - | The complete FQDN name of the Redis service, including the service type, e.g., my-redis.dns, redis.my-ns.svc.cluster.local | +| redis.servicePort | integer | optional | 6379 | Redis service port | +| redis.timeout | integer | optional | 1000 | Timeout for requests to Redis, in milliseconds | +| redis.username | string | optional | - | Username for logging into Redis | +| redis.database | int | optional | 0 | The database ID used, limited to Redis, for example, configured as 1, corresponds to `SELECT 1`. | +| redis.password | string | optional | - | Password for logging into Redis | +| returnResponseTemplate | string | optional | `{"id":"from-cache","choices":[%s],"model":"gpt-4o","object":"chat.completion","usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}` | Template for returning HTTP response, with %s marking the part to be replaced by cache value | +| returnStreamResponseTemplate | string | optional | `data:{"id":"from-cache","choices":[{"index":0,"delta":{"role":"assistant","content":"%s"},"finish_reason":"stop"}],"model":"gpt-4o","object":"chat.completion","usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}\n\ndata:[DONE]\n\n` | Template for returning streaming HTTP response, with %s marking the part to be replaced by cache value | ## Configuration Example ```yaml redis: serviceName: my-redis.dns timeout: 2000 + servicePort: 6379 + database: 1 ``` ## Advanced Usage diff --git a/plugins/wasm-go/extensions/ai-cache/cache/provider.go b/plugins/wasm-go/extensions/ai-cache/cache/provider.go index d68acd5099..9afca2c12e 100644 --- a/plugins/wasm-go/extensions/ai-cache/cache/provider.go +++ b/plugins/wasm-go/extensions/ai-cache/cache/provider.go @@ -52,6 +52,9 @@ type ProviderConfig struct { // @Title 缓存 Key 前缀 // @Description 缓存 Key 的前缀,默认值为 "higressAiCache:" cacheKeyPrefix string + // @Title redis database + // @Description 指定 redis 的 database,默认使用0 + database int } func (c *ProviderConfig) GetProviderType() string { @@ -79,6 +82,7 @@ func (c *ProviderConfig) FromJson(json gjson.Result) { if !json.Get("password").Exists() { c.password = "" } + c.database = int(json.Get("database").Int()) c.timeout = uint32(json.Get("timeout").Int()) if !json.Get("timeout").Exists() { c.timeout = 10000 diff --git a/plugins/wasm-go/extensions/ai-cache/cache/redis.go b/plugins/wasm-go/extensions/ai-cache/cache/redis.go index 4cb69744e1..b4a116ab89 100644 --- a/plugins/wasm-go/extensions/ai-cache/cache/redis.go +++ b/plugins/wasm-go/extensions/ai-cache/cache/redis.go @@ -38,7 +38,7 @@ func (rp *redisProvider) GetProviderType() string { } func (rp *redisProvider) Init(username string, password string, timeout uint32) error { - return rp.client.Init(rp.config.username, rp.config.password, int64(rp.config.timeout)) + return rp.client.Init(rp.config.username, rp.config.password, int64(rp.config.timeout), wrapper.WithDataBase(rp.config.database)) } func (rp *redisProvider) Get(key string, cb wrapper.RedisResponseCallback) error { diff --git a/plugins/wasm-go/extensions/ai-cache/config/config.go b/plugins/wasm-go/extensions/ai-cache/config/config.go index 80c6147374..bc1093a567 100644 --- a/plugins/wasm-go/extensions/ai-cache/config/config.go +++ b/plugins/wasm-go/extensions/ai-cache/config/config.go @@ -28,9 +28,9 @@ type PluginConfig struct { embeddingProvider embedding.Provider vectorProvider vector.Provider - embeddingProviderConfig embedding.ProviderConfig - vectorProviderConfig vector.ProviderConfig - cacheProviderConfig cache.ProviderConfig + embeddingProviderConfig *embedding.ProviderConfig + vectorProviderConfig *vector.ProviderConfig + cacheProviderConfig *cache.ProviderConfig CacheKeyFrom string CacheValueFrom string @@ -47,7 +47,9 @@ type PluginConfig struct { } func (c *PluginConfig) FromJson(json gjson.Result, log wrapper.Log) { - + c.embeddingProviderConfig = &embedding.ProviderConfig{} + c.vectorProviderConfig = &vector.ProviderConfig{} + c.cacheProviderConfig = &cache.ProviderConfig{} c.vectorProviderConfig.FromJson(json.Get("vector")) c.embeddingProviderConfig.FromJson(json.Get("embedding")) c.cacheProviderConfig.FromJson(json.Get("cache")) @@ -142,7 +144,7 @@ func (c *PluginConfig) Complete(log wrapper.Log) error { var err error if c.embeddingProviderConfig.GetProviderType() != "" { log.Debugf("embedding provider is set to %s", c.embeddingProviderConfig.GetProviderType()) - c.embeddingProvider, err = embedding.CreateProvider(c.embeddingProviderConfig) + c.embeddingProvider, err = embedding.CreateProvider(*c.embeddingProviderConfig) if err != nil { return err } @@ -152,7 +154,7 @@ func (c *PluginConfig) Complete(log wrapper.Log) error { } if c.cacheProviderConfig.GetProviderType() != "" { log.Debugf("cache provider is set to %s", c.cacheProviderConfig.GetProviderType()) - c.cacheProvider, err = cache.CreateProvider(c.cacheProviderConfig) + c.cacheProvider, err = cache.CreateProvider(*c.cacheProviderConfig) if err != nil { return err } @@ -162,7 +164,7 @@ func (c *PluginConfig) Complete(log wrapper.Log) error { } if c.vectorProviderConfig.GetProviderType() != "" { log.Debugf("vector provider is set to %s", c.vectorProviderConfig.GetProviderType()) - c.vectorProvider, err = vector.CreateProvider(c.vectorProviderConfig) + c.vectorProvider, err = vector.CreateProvider(*c.vectorProviderConfig) if err != nil { return err } @@ -182,7 +184,7 @@ func (c *PluginConfig) GetVectorProvider() vector.Provider { } func (c *PluginConfig) GetVectorProviderConfig() vector.ProviderConfig { - return c.vectorProviderConfig + return *c.vectorProviderConfig } func (c *PluginConfig) GetCacheProvider() cache.Provider { diff --git a/plugins/wasm-go/extensions/ai-cache/embedding/ollama.go b/plugins/wasm-go/extensions/ai-cache/embedding/ollama.go new file mode 100644 index 0000000000..a61bf77827 --- /dev/null +++ b/plugins/wasm-go/extensions/ai-cache/embedding/ollama.go @@ -0,0 +1,151 @@ +package embedding + +import ( + "encoding/json" + "errors" + "fmt" + "github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper" + "github.com/tidwall/gjson" + "net/http" + "strconv" +) + +const ( + OLLAMA_DOMAIN = "localhost" + OLLAMA_PORT = 11434 + OLLAMA_DEFAULT_MODEL_NAME = "llama3.2" + OLLAMA_ENDPOINT = "/api/embed" +) + +type ollamaProviderInitializer struct { +} + +func (c *ollamaProviderInitializer) InitConfig(json gjson.Result) {} + +func (c *ollamaProviderInitializer) ValidateConfig() error { + return nil +} + +type ollamaProvider struct { + config ProviderConfig + client *wrapper.ClusterClient[wrapper.FQDNCluster] +} + +func (t *ollamaProviderInitializer) CreateProvider(c ProviderConfig) (Provider, error) { + if c.servicePort == 0 { + c.servicePort = OLLAMA_PORT + } + if c.serviceHost == "" { + c.serviceHost = OLLAMA_DOMAIN + } + if c.model == "" { + c.model = OLLAMA_DEFAULT_MODEL_NAME + } + + return &ollamaProvider{ + config: c, + client: wrapper.NewClusterClient(wrapper.FQDNCluster{ + FQDN: c.serviceName, + Host: c.serviceHost, + Port: c.servicePort, + }), + }, nil +} + +func (t *ollamaProvider) GetProviderType() string { + return PROVIDER_TYPE_OLLAMA +} + +type ollamaResponse struct { + Model string `json:"model"` + Embeddings [][]float64 `json:"embeddings"` + TotalDuration int64 `json:"total_duration"` + LoadDuration int64 `json:"load_duration"` + PromptEvalCount int64 `json:"prompt_eval_count"` +} + +type ollamaEmbeddingRequest struct { + Input string `json:"input"` + Model string `json:"model"` +} + +func (t *ollamaProvider) constructParameters(text string, log wrapper.Log) (string, [][2]string, []byte, error) { + if text == "" { + err := errors.New("queryString text cannot be empty") + return "", nil, nil, err + } + + data := ollamaEmbeddingRequest{ + Input: text, + Model: t.config.model, + } + + requestBody, err := json.Marshal(data) + if err != nil { + log.Errorf("failed to marshal request data: %v", err) + return "", nil, nil, err + } + + headers := [][2]string{ + {"Content-Type", "application/json"}, + } + log.Debugf("constructParameters: %s", string(requestBody)) + + return OLLAMA_ENDPOINT, headers, requestBody, err +} + +func (t *ollamaProvider) parseTextEmbedding(responseBody []byte) (*ollamaResponse, error) { + var resp ollamaResponse + if err := json.Unmarshal(responseBody, &resp); err != nil { + return nil, fmt.Errorf("failed to parse response: %w", err) + } + return &resp, nil +} + +func (t *ollamaProvider) GetEmbedding( + queryString string, + ctx wrapper.HttpContext, + log wrapper.Log, + callback func(emb []float64, err error)) error { + embUrl, embHeaders, embRequestBody, err := t.constructParameters(queryString, log) + if err != nil { + log.Errorf("failed to construct parameters: %v", err) + return err + } + + var resp *ollamaResponse + + defer func() { + if err != nil { + callback(nil, err) + } + }() + err = t.client.Post(embUrl, embHeaders, embRequestBody, + func(statusCode int, responseHeaders http.Header, responseBody []byte) { + + if statusCode != http.StatusOK { + err = errors.New("failed to get embedding due to status code: " + strconv.Itoa(statusCode)) + callback(nil, err) + return + } + + resp, err = t.parseTextEmbedding(responseBody) + if err != nil { + err = fmt.Errorf("failed to parse response: %v", err) + callback(nil, err) + return + } + + log.Debugf("get embedding response: %d, %s", statusCode, responseBody) + + if len(resp.Embeddings) == 0 { + err = errors.New("no embedding found in response") + callback(nil, err) + return + } + + callback(resp.Embeddings[0], nil) + + }, t.config.timeout) + return err +} diff --git a/plugins/wasm-go/extensions/ai-cache/embedding/provider.go b/plugins/wasm-go/extensions/ai-cache/embedding/provider.go index 608f50ad54..7f0e14b269 100644 --- a/plugins/wasm-go/extensions/ai-cache/embedding/provider.go +++ b/plugins/wasm-go/extensions/ai-cache/embedding/provider.go @@ -12,6 +12,7 @@ const ( PROVIDER_TYPE_TEXTIN = "textin" PROVIDER_TYPE_COHERE = "cohere" PROVIDER_TYPE_OPENAI = "openai" + PROVIDER_TYPE_OLLAMA = "ollama" ) type providerInitializer interface { @@ -26,6 +27,7 @@ var ( PROVIDER_TYPE_TEXTIN: &textInProviderInitializer{}, PROVIDER_TYPE_COHERE: &cohereProviderInitializer{}, PROVIDER_TYPE_OPENAI: &openAIProviderInitializer{}, + PROVIDER_TYPE_OLLAMA: &ollamaProviderInitializer{}, } ) diff --git a/plugins/wasm-go/extensions/ai-cache/main.go b/plugins/wasm-go/extensions/ai-cache/main.go index 4bb3f2bad1..41014c5ebd 100644 --- a/plugins/wasm-go/extensions/ai-cache/main.go +++ b/plugins/wasm-go/extensions/ai-cache/main.go @@ -23,7 +23,7 @@ const ( SKIP_CACHE_HEADER = "x-higress-skip-ai-cache" ERROR_PARTIAL_MESSAGE_KEY = "errorPartialMessage" - DEFAULT_MAX_BODY_BYTES uint32 = 10 * 1024 * 1024 + DEFAULT_MAX_BODY_BYTES uint32 = 100 * 1024 * 1024 ) func main() { diff --git a/plugins/wasm-go/extensions/ai-history/README.md b/plugins/wasm-go/extensions/ai-history/README.md index d4684d292d..b8462345c5 100644 --- a/plugins/wasm-go/extensions/ai-history/README.md +++ b/plugins/wasm-go/extensions/ai-history/README.md @@ -20,17 +20,18 @@ description: AI 历史对话插件配置参考 ## 配置字段 -| 名称 | 数据类型 | 填写要求 | 默认值 | Description | -|-------------------|---------|----------|-----------------------|---------------------------------------------------------------------------| -| identityHeader | string | optional | "Authorization" | 身份解析对应的请求头,可用 Authorization,X-Mse-Consumer等 | -| fillHistoryCnt | integer | optional | 3 | 默认填充历史对话轮次 | -| cacheKeyPrefix | string | optional | "higress-ai-history:" | Redis缓存Key的前缀 | -| cacheTTL | integer | optional | 0 | 缓存的过期时间,单位是秒,默认值为0,即永不过期 | -| redis.serviceName | string | required | - | redis 服务名称,带服务类型的完整 FQDN 名称,例如 my-redis.dns、redis.my-ns.svc.cluster.local | -| redis.servicePort | integer | optional | 6379 | redis 服务端口 | -| redis.timeout | integer | optional | 1000 | 请求 redis 的超时时间,单位为毫秒 | -| redis.username | string | optional | - | 登陆 redis 的用户名 | -| redis.password | string | optional | - | 登陆 redis 的密码 | +| 名称 | 数据类型 | 填写要求 | 默认值 | Description | +|-------------------|----------|----------|-----------------------|----------------------------------------------------------------------------------------------| +| identityHeader | string | optional | "Authorization" | 身份解析对应的请求头,可用 Authorization,X-Mse-Consumer等 | +| fillHistoryCnt | integer | optional | 3 | 默认填充历史对话轮次 | +| cacheKeyPrefix | string | optional | "higress-ai-history:" | Redis缓存Key的前缀 | +| cacheTTL | integer | optional | 0 | 缓存的过期时间,单位是秒,默认值为0,即永不过期 | +| redis.serviceName | string | required | - | redis 服务名称,带服务类型的完整 FQDN 名称,例如 my-redis.dns、redis.my-ns.svc.cluster.local | +| redis.servicePort | integer | optional | 6379 | redis 服务端口 | +| redis.timeout | integer | optional | 1000 | 请求 redis 的超时时间,单位为毫秒 | +| redis.username | string | optional | - | 登陆 redis 的用户名 | +| redis.password | string | optional | - | 登陆 redis 的密码 | +| redis.database | int | optional | 0 | 使用的数据库id,例如配置为1,对应`SELECT 1` | ## 用法示例 diff --git a/plugins/wasm-go/extensions/ai-history/README_EN.md b/plugins/wasm-go/extensions/ai-history/README_EN.md index 1fc6144d40..7d0149a019 100644 --- a/plugins/wasm-go/extensions/ai-history/README_EN.md +++ b/plugins/wasm-go/extensions/ai-history/README_EN.md @@ -15,17 +15,19 @@ Plugin Execution Phase: `Default Phase` Plugin Execution Priority: `650` ## Configuration Fields -| Name | Data Type | Required | Default Value | Description | -|-------------------|---------|----------|-----------------------|---------------------------------------------------------------------------| -| identityHeader | string | optional | "Authorization" | The request header for identity resolution, can be Authorization, X-Mse-Consumer, etc. | -| fillHistoryCnt | integer | optional | 3 | Default number of historical dialogues to be filled. | -| cacheKeyPrefix | string | optional | "higress-ai-history:" | Prefix for Redis cache key. | -| cacheTTL | integer | optional | 0 | Cache expiration time in seconds, default value is 0, meaning it never expires. | -| redis.serviceName | string | required | - | Redis service name, full FQDN name with service type, e.g., my-redis.dns, redis.my-ns.svc.cluster.local | -| redis.servicePort | integer | optional | 6379 | Redis service port. | -| redis.timeout | integer | optional | 1000 | Timeout for requests to Redis, in milliseconds. | -| redis.username | string | optional | - | Username for logging into Redis. | -| redis.password | string | optional | - | Password for logging into Redis. | +| Name | Data Type | Required | Default Value | Description | +|-------------------|-----------|----------|-----------------------|---------------------------------------------------------------------------------------------------------| +| identityHeader | string | optional | "Authorization" | The request header for identity resolution, can be Authorization, X-Mse-Consumer, etc. | +| fillHistoryCnt | integer | optional | 3 | Default number of historical dialogues to be filled. | +| cacheKeyPrefix | string | optional | "higress-ai-history:" | Prefix for Redis cache key. | +| cacheTTL | integer | optional | 0 | Cache expiration time in seconds, default value is 0, meaning it never expires. | +| redis.serviceName | string | required | - | Redis service name, full FQDN name with service type, e.g., my-redis.dns, redis.my-ns.svc.cluster.local | +| redis.servicePort | integer | optional | 6379 | Redis service port. | +| redis.timeout | integer | optional | 1000 | Timeout for requests to Redis, in milliseconds. | +| redis.username | string | optional | - | Username for logging into Redis. | +| redis.password | string | optional | - | Password for logging into Redis. | +| redis.database | int | optional | 0 | The database ID used, for example, configured as 1, corresponds to `SELECT 1`. | + ## Usage Example ### Configuration Information diff --git a/plugins/wasm-go/extensions/ai-history/main.go b/plugins/wasm-go/extensions/ai-history/main.go index 3f728dd96d..f0fabaaa4c 100644 --- a/plugins/wasm-go/extensions/ai-history/main.go +++ b/plugins/wasm-go/extensions/ai-history/main.go @@ -76,6 +76,9 @@ type RedisInfo struct { // @Title zh-CN 请求超时 // @Description zh-CN 请求 redis 的超时时间,单位为毫秒。默认值是1000,即1秒 Timeout int `required:"false" yaml:"timeout" json:"timeout"` + // @Title zh-CN Database + // @Description zh-CN redis database + Database int `required:"false" yaml:"database" json:"database"` } type KVExtractor struct { @@ -138,6 +141,7 @@ func parseConfig(json gjson.Result, c *PluginConfig, log wrapper.Log) error { if c.RedisInfo.Timeout == 0 { c.RedisInfo.Timeout = 1000 } + c.RedisInfo.Database = int(json.Get("redis.database").Int()) c.QuestionFrom.RequestBody = "messages.@reverse.0.content" c.AnswerValueFrom.ResponseBody = "choices.0.message.content" c.AnswerStreamValueFrom.ResponseBody = "choices.0.delta.content" @@ -159,7 +163,7 @@ func parseConfig(json gjson.Result, c *PluginConfig, log wrapper.Log) error { FQDN: c.RedisInfo.ServiceName, Port: int64(c.RedisInfo.ServicePort), }) - return c.redisClient.Init(c.RedisInfo.Username, c.RedisInfo.Password, int64(c.RedisInfo.Timeout)) + return c.redisClient.Init(c.RedisInfo.Username, c.RedisInfo.Password, int64(c.RedisInfo.Timeout), wrapper.WithDataBase(c.RedisInfo.Database)) } func onHttpRequestHeaders(ctx wrapper.HttpContext, config PluginConfig, log wrapper.Log) types.Action { diff --git a/plugins/wasm-go/extensions/ai-proxy/README.md b/plugins/wasm-go/extensions/ai-proxy/README.md index 8f281ffd2b..cb685e6e03 100644 --- a/plugins/wasm-go/extensions/ai-proxy/README.md +++ b/plugins/wasm-go/extensions/ai-proxy/README.md @@ -31,18 +31,19 @@ description: AI 代理插件配置参考 `provider`的配置字段说明如下: -| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 | -|------------------| --------------- | -------- | ------ |-----------------------------------------------------------------------------------------------------------------------------------------------------------| -| `type` | string | 必填 | - | AI 服务提供商名称 | -| `apiTokens` | array of string | 非必填 | - | 用于在访问 AI 服务时进行认证的令牌。如果配置了多个 token,插件会在请求时随机进行选择。部分服务提供商只支持配置一个 token。 | -| `timeout` | number | 非必填 | - | 访问 AI 服务的超时时间。单位为毫秒。默认值为 120000,即 2 分钟 | -| `modelMapping` | map of string | 非必填 | - | AI 模型映射表,用于将请求中的模型名称映射为服务提供商支持模型名称。
1. 支持前缀匹配。例如用 "gpt-3-*" 匹配所有名称以“gpt-3-”开头的模型;
2. 支持使用 "*" 为键来配置通用兜底映射关系;
3. 如果映射的目标名称为空字符串 "",则表示保留原模型名称。 | -| `protocol` | string | 非必填 | - | 插件对外提供的 API 接口契约。目前支持以下取值:openai(默认值,使用 OpenAI 的接口契约)、original(使用目标服务提供商的原始接口契约) | -| `context` | object | 非必填 | - | 配置 AI 对话上下文信息 | -| `customSettings` | array of customSetting | 非必填 | - | 为AI请求指定覆盖或者填充参数 | -| `failover` | object | 非必填 | - | 配置 apiToken 的 failover 策略,当 apiToken 不可用时,将其移出 apiToken 列表,待健康检测通过后重新添加回 apiToken 列表 | -| `retryOnFailure` | object | 非必填 | - | 当请求失败时立即进行重试 | -| `capabilities` | map of string | 非必填 | - | 部分provider的部分ai能力原生兼容openai/v1格式,不需要重写,可以直接转发,通过此配置项指定来开启转发, key表示的是采用的厂商协议能力,values表示的真实的厂商该能力的api path, 厂商协议能力当前支持: openai/v1/chatcompletions, openai/v1/embeddings, openai/v1/imagegeneration, openai/v1/audiospeech, cohere/v1/rerank | +| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 | +|------------------| --------------- | -------- | ------ |-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `type` | string | 必填 | - | AI 服务提供商名称 | +| `apiTokens` | array of string | 非必填 | - | 用于在访问 AI 服务时进行认证的令牌。如果配置了多个 token,插件会在请求时随机进行选择。部分服务提供商只支持配置一个 token。 | +| `timeout` | number | 非必填 | - | 访问 AI 服务的超时时间。单位为毫秒。默认值为 120000,即 2 分钟 | +| `modelMapping` | map of string | 非必填 | - | AI 模型映射表,用于将请求中的模型名称映射为服务提供商支持模型名称。
1. 支持前缀匹配。例如用 "gpt-3-*" 匹配所有名称以“gpt-3-”开头的模型;
2. 支持使用 "*" 为键来配置通用兜底映射关系;
3. 如果映射的目标名称为空字符串 "",则表示保留原模型名称。 | +| `protocol` | string | 非必填 | - | 插件对外提供的 API 接口契约。目前支持以下取值:openai(默认值,使用 OpenAI 的接口契约)、original(使用目标服务提供商的原始接口契约) | +| `context` | object | 非必填 | - | 配置 AI 对话上下文信息 | +| `customSettings` | array of customSetting | 非必填 | - | 为AI请求指定覆盖或者填充参数 | +| `failover` | object | 非必填 | - | 配置 apiToken 的 failover 策略,当 apiToken 不可用时,将其移出 apiToken 列表,待健康检测通过后重新添加回 apiToken 列表 | +| `retryOnFailure` | object | 非必填 | - | 当请求失败时立即进行重试 | +| `reasoningContentMode` | string | 非必填 | - | 如何处理大模型服务返回的推理内容。目前支持以下取值:passthrough(正常输出推理内容)、ignore(不输出推理内容)、concat(将推理内容拼接在常规输出内容之前)。默认为 passthrough。仅支持通义千问服务。 | +| `capabilities` | map of string | 非必填 | - | 部分provider的部分ai能力原生兼容openai/v1格式,不需要重写,可以直接转发,通过此配置项指定来开启转发, key表示的是采用的厂商协议能力,values表示的真实的厂商该能力的api path, 厂商协议能力当前支持: openai/v1/chatcompletions, openai/v1/embeddings, openai/v1/imagegeneration, openai/v1/audiospeech, cohere/v1/rerank | `context`的配置字段说明如下: diff --git a/plugins/wasm-go/extensions/ai-proxy/config/config.go b/plugins/wasm-go/extensions/ai-proxy/config/config.go index 48f08dd9e4..f0b820345a 100644 --- a/plugins/wasm-go/extensions/ai-proxy/config/config.go +++ b/plugins/wasm-go/extensions/ai-proxy/config/config.go @@ -80,13 +80,16 @@ func (c *PluginConfig) Complete(log wrapper.Log) error { c.activeProvider = nil return nil } + var err error + c.activeProvider, err = provider.CreateProvider(*c.activeProviderConfig) + if err != nil { + return err + } providerConfig := c.GetProviderConfig() - err = providerConfig.SetApiTokensFailover(log, c.activeProvider) - - return err + return providerConfig.SetApiTokensFailover(log, c.activeProvider) } func (c *PluginConfig) GetProvider() provider.Provider { diff --git a/plugins/wasm-go/extensions/ai-proxy/main.go b/plugins/wasm-go/extensions/ai-proxy/main.go index dc6bc123ce..35d06b9502 100644 --- a/plugins/wasm-go/extensions/ai-proxy/main.go +++ b/plugins/wasm-go/extensions/ai-proxy/main.go @@ -15,12 +15,13 @@ import ( "github.com/higress-group/proxy-wasm-go-sdk/proxywasm" "github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types" "github.com/tidwall/gjson" + "github.com/tidwall/sjson" ) const ( pluginName = "ai-proxy" - defaultMaxBodyBytes uint32 = 10 * 1024 * 1024 + defaultMaxBodyBytes uint32 = 100 * 1024 * 1024 ) func main() { @@ -40,9 +41,11 @@ func parseGlobalConfig(json gjson.Result, pluginConfig *config.PluginConfig, log pluginConfig.FromJson(json) if err := pluginConfig.Validate(); err != nil { + log.Errorf("global rule config is invalid: %v", err) return err } if err := pluginConfig.Complete(log); err != nil { + log.Errorf("failed to apply global rule config: %v", err) return err } @@ -56,9 +59,11 @@ func parseOverrideRuleConfig(json gjson.Result, global config.PluginConfig, plug pluginConfig.FromJson(json) if err := pluginConfig.Validate(); err != nil { + log.Errorf("overriden rule config is invalid: %v", err) return err } if err := pluginConfig.Complete(log); err != nil { + log.Errorf("failed to apply overriden rule config: %v", err) return err } @@ -98,21 +103,23 @@ func onHttpRequestHeader(ctx wrapper.HttpContext, pluginConfig config.PluginConf // Always remove the Accept-Encoding header to prevent the LLM from sending compressed responses, // allowing plugins to inspect or modify the response correctly - proxywasm.RemoveHttpRequestHeader("Accept-Encoding") + _ = proxywasm.RemoveHttpRequestHeader("Accept-Encoding") if handler, ok := activeProvider.(provider.RequestHeadersHandler); ok { // Set the apiToken for the current request. providerConfig.SetApiTokenInUse(ctx, log) + // Set available apiTokens of current request in the context, will be used in the retryOnFailure + providerConfig.SetAvailableApiTokens(ctx, log) err := handler.OnRequestHeaders(ctx, apiName, log) if err != nil { - util.ErrorHandler("ai-proxy.proc_req_headers_failed", fmt.Errorf("failed to process request headers: %v", err)) + _ = util.ErrorHandler("ai-proxy.proc_req_headers_failed", fmt.Errorf("failed to process request headers: %v", err)) return types.ActionContinue } hasRequestBody := wrapper.HasRequestBody() if hasRequestBody { - proxywasm.RemoveHttpRequestHeader("Content-Length") + _ = proxywasm.RemoveHttpRequestHeader("Content-Length") ctx.SetRequestBodyBufferLimit(defaultMaxBodyBytes) // Delay the header processing to allow changing in OnRequestBody return types.HeaderStopIteration @@ -136,23 +143,21 @@ func onHttpRequestBody(ctx wrapper.HttpContext, pluginConfig config.PluginConfig if handler, ok := activeProvider.(provider.RequestBodyHandler); ok { apiName, _ := ctx.GetContext(provider.CtxKeyApiName).(provider.ApiName) - - newBody, settingErr := pluginConfig.GetProviderConfig().ReplaceByCustomSettings(body) + providerConfig := pluginConfig.GetProviderConfig() + newBody, settingErr := providerConfig.ReplaceByCustomSettings(body) if settingErr != nil { - util.ErrorHandler( - "ai-proxy.proc_req_body_failed", - fmt.Errorf("failed to replace request body by custom settings: %v", settingErr), - ) - return types.ActionContinue + log.Errorf("failed to replace request body by custom settings: %v", settingErr) + } + if providerConfig.IsOpenAIProtocol() { + newBody = normalizeOpenAiRequestBody(newBody, log) } - log.Debugf("[onHttpRequestBody] newBody=%s", newBody) body = newBody action, err := handler.OnRequestBody(ctx, apiName, body, log) if err == nil { return action } - util.ErrorHandler("ai-proxy.proc_req_body_failed", fmt.Errorf("failed to process request body: %v", err)) + _ = util.ErrorHandler("ai-proxy.proc_req_body_failed", fmt.Errorf("failed to process request body: %v", err)) } return types.ActionContinue } @@ -176,6 +181,7 @@ func onHttpResponseHeaders(ctx wrapper.HttpContext, pluginConfig config.PluginCo providerConfig := pluginConfig.GetProviderConfig() apiTokenInUse := providerConfig.GetApiTokenInUse(ctx) + apiTokens := providerConfig.GetAvailableApiToken(ctx) status, err := proxywasm.GetHttpResponseHeader(":status") if err != nil || status != "200" { @@ -183,7 +189,7 @@ func onHttpResponseHeaders(ctx wrapper.HttpContext, pluginConfig config.PluginCo log.Errorf("unable to load :status header from response: %v", err) } ctx.DontReadResponseBody() - return providerConfig.OnRequestFailed(activeProvider, ctx, apiTokenInUse, log) + return providerConfig.OnRequestFailed(activeProvider, ctx, apiTokenInUse, apiTokens, log) } // Reset ctxApiTokenRequestFailureCount if the request is successful, @@ -201,7 +207,11 @@ func onHttpResponseHeaders(ctx wrapper.HttpContext, pluginConfig config.PluginCo checkStream(ctx, log) _, needHandleBody := activeProvider.(provider.TransformResponseBodyHandler) - _, needHandleStreamingBody := activeProvider.(provider.StreamingResponseBodyHandler) + var needHandleStreamingBody bool + _, needHandleStreamingBody = activeProvider.(provider.StreamingResponseBodyHandler) + if !needHandleStreamingBody { + _, needHandleStreamingBody = activeProvider.(provider.StreamingEventHandler) + } if !needHandleBody && !needHandleStreamingBody { ctx.DontReadResponseBody() } else if !needHandleStreamingBody { @@ -220,7 +230,7 @@ func onStreamingResponseBody(ctx wrapper.HttpContext, pluginConfig config.Plugin } log.Debugf("[onStreamingResponseBody] provider=%s", activeProvider.GetProviderType()) - log.Debugf("isLastChunk=%v chunk: %s", isLastChunk, string(chunk)) + log.Debugf("[onStreamingResponseBody] isLastChunk=%v chunk: %s", isLastChunk, string(chunk)) if handler, ok := activeProvider.(provider.StreamingResponseBodyHandler); ok { apiName, _ := ctx.GetContext(provider.CtxKeyApiName).(provider.ApiName) @@ -230,6 +240,38 @@ func onStreamingResponseBody(ctx wrapper.HttpContext, pluginConfig config.Plugin } return chunk } + if handler, ok := activeProvider.(provider.StreamingEventHandler); ok { + apiName, _ := ctx.GetContext(provider.CtxKeyApiName).(provider.ApiName) + events := provider.ExtractStreamingEvents(ctx, chunk, log) + log.Debugf("[onStreamingResponseBody] %d events received", len(events)) + if len(events) == 0 { + // No events are extracted, return the original chunk + return chunk + } + var responseBuilder strings.Builder + for _, event := range events { + log.Debugf("processing event: %v", event) + + if event.IsEndData() { + responseBuilder.WriteString(event.ToHttpString()) + continue + } + + outputEvents, err := handler.OnStreamingEvent(ctx, apiName, event, log) + if err != nil { + log.Errorf("[onStreamingResponseBody] failed to process streaming event: %v\n%s", err, chunk) + return chunk + } + if outputEvents == nil || len(outputEvents) == 0 { + responseBuilder.WriteString(event.ToHttpString()) + } else { + for _, outputEvent := range outputEvents { + responseBuilder.WriteString(outputEvent.ToHttpString()) + } + } + } + return []byte(responseBuilder.String()) + } return chunk } @@ -247,16 +289,28 @@ func onHttpResponseBody(ctx wrapper.HttpContext, pluginConfig config.PluginConfi apiName, _ := ctx.GetContext(provider.CtxKeyApiName).(provider.ApiName) body, err := handler.TransformResponseBody(ctx, apiName, body, log) if err != nil { - util.ErrorHandler("ai-proxy.proc_resp_body_failed", fmt.Errorf("failed to process response body: %v", err)) + _ = util.ErrorHandler("ai-proxy.proc_resp_body_failed", fmt.Errorf("failed to process response body: %v", err)) return types.ActionContinue } if err = provider.ReplaceResponseBody(body, log); err != nil { - util.ErrorHandler("ai-proxy.replace_resp_body_failed", fmt.Errorf("failed to replace response body: %v", err)) + _ = util.ErrorHandler("ai-proxy.replace_resp_body_failed", fmt.Errorf("failed to replace response body: %v", err)) } } return types.ActionContinue } +func normalizeOpenAiRequestBody(body []byte, log wrapper.Log) []byte { + var err error + // Default setting include_usage. + if gjson.GetBytes(body, "stream").Bool() { + body, err = sjson.SetBytes(body, "stream_options.include_usage", true) + if err != nil { + log.Errorf("set include_usage failed, err:%s", err) + } + } + return body +} + func checkStream(ctx wrapper.HttpContext, log wrapper.Log) { contentType, err := proxywasm.GetHttpResponseHeader("Content-Type") if err != nil || !strings.HasPrefix(contentType, "text/event-stream") { diff --git a/plugins/wasm-go/extensions/ai-proxy/provider/failover.go b/plugins/wasm-go/extensions/ai-proxy/provider/failover.go index 6c8259949b..9644693f5e 100644 --- a/plugins/wasm-go/extensions/ai-proxy/provider/failover.go +++ b/plugins/wasm-go/extensions/ai-proxy/provider/failover.go @@ -32,6 +32,8 @@ type failover struct { healthCheckModel string `required:"false" yaml:"healthCheckModel" json:"healthCheckModel"` // @Title zh-CN 本次请求使用的 apiToken ctxApiTokenInUse string + // @Title zh-CN 记录本次请求时所有可用的 apiToken + ctxAvailableApiTokensInRequest string // @Title zh-CN 记录 apiToken 请求失败的次数,key 为 apiToken,value 为失败次数 ctxApiTokenRequestFailureCount string // @Title zh-CN 记录 apiToken 健康检测成功的次数,key 为 apiToken,value 为成功次数 @@ -527,6 +529,22 @@ func (c *ProviderConfig) GetGlobalRandomToken(log wrapper.Log) string { } } +func (c *ProviderConfig) GetAvailableApiToken(ctx wrapper.HttpContext) []string { + apiTokens, _ := ctx.GetContext(c.failover.ctxAvailableApiTokensInRequest).([]string) + return apiTokens +} + +// SetAvailableApiTokens set available apiTokens of current request in the context, will be used in the retryOnFailure +func (c *ProviderConfig) SetAvailableApiTokens(ctx wrapper.HttpContext, log wrapper.Log) { + var apiTokens []string + if c.isFailoverEnabled() { + apiTokens, _, _ = getApiTokens(c.failover.ctxApiTokens) + } else { + apiTokens = c.apiTokens + } + ctx.SetContext(c.failover.ctxAvailableApiTokensInRequest, apiTokens) +} + func (c *ProviderConfig) isFailoverEnabled() bool { return c.failover.enabled } @@ -539,12 +557,12 @@ func (c *ProviderConfig) resetSharedData() { _ = proxywasm.SetSharedData(c.failover.ctxApiTokenRequestFailureCount, nil, 0) } -func (c *ProviderConfig) OnRequestFailed(activeProvider Provider, ctx wrapper.HttpContext, apiTokenInUse string, log wrapper.Log) types.Action { +func (c *ProviderConfig) OnRequestFailed(activeProvider Provider, ctx wrapper.HttpContext, apiTokenInUse string, apiTokens []string, log wrapper.Log) types.Action { if c.isFailoverEnabled() { c.handleUnavailableApiToken(ctx, apiTokenInUse, log) } if c.isRetryOnFailureEnabled() && ctx.GetContext(ctxKeyIsStreaming) != nil && !ctx.GetContext(ctxKeyIsStreaming).(bool) { - c.retryFailedRequest(activeProvider, ctx, log) + c.retryFailedRequest(activeProvider, ctx, apiTokenInUse, apiTokens, log) return types.HeaderStopAllIterationAndWatermark } return types.ActionContinue diff --git a/plugins/wasm-go/extensions/ai-proxy/provider/model.go b/plugins/wasm-go/extensions/ai-proxy/provider/model.go index 726a18fca6..7de9cfe2fa 100644 --- a/plugins/wasm-go/extensions/ai-proxy/provider/model.go +++ b/plugins/wasm-go/extensions/ai-proxy/provider/model.go @@ -1,6 +1,9 @@ package provider -import "strings" +import ( + "fmt" + "strings" +) const ( streamEventIdItemKey = "id:" @@ -110,9 +113,16 @@ type chatCompletionChoice struct { } type usage struct { - PromptTokens int `json:"prompt_tokens,omitempty"` - CompletionTokens int `json:"completion_tokens,omitempty"` - TotalTokens int `json:"total_tokens,omitempty"` + PromptTokens int `json:"prompt_tokens,omitempty"` + CompletionTokens int `json:"completion_tokens,omitempty"` + TotalTokens int `json:"total_tokens,omitempty"` + CompletionTokensDetails *completionTokensDetails `json:"completion_tokens_details,omitempty"` +} + +type completionTokensDetails struct { + ReasoningTokens int `json:"reasoning_tokens,omitempty"` + AcceptedPredictionTokens int `json:"accepted_prediction_tokens,omitempty"` + RejectedPredictionTokens int `json:"rejected_prediction_tokens,omitempty"` } type chatMessage struct { @@ -126,6 +136,24 @@ type chatMessage struct { Refusal string `json:"refusal,omitempty"` } +func (m *chatMessage) handleReasoningContent(reasoningContentMode string) { + if m.ReasoningContent == "" { + return + } + switch reasoningContentMode { + case reasoningBehaviorIgnore: + m.ReasoningContent = "" + break + case reasoningBehaviorConcat: + m.Content = fmt.Sprintf("%v\n%v", m.ReasoningContent, m.Content) + m.ReasoningContent = "" + break + case reasoningBehaviorPassThrough: + default: + break + } +} + type messageContent struct { Type string `json:"type,omitempty"` Text string `json:"text"` @@ -138,6 +166,9 @@ type imageUrl struct { } func (m *chatMessage) IsEmpty() bool { + if m.ReasoningContent != "" { + return false + } if m.IsStringContent() && m.Content != "" { return false } @@ -247,14 +278,18 @@ func (m *functionCall) IsEmpty() bool { return m.Name == "" && m.Arguments == "" } -type streamEvent struct { +type StreamEvent struct { Id string `json:"id"` Event string `json:"event"` Data string `json:"data"` HttpStatus string `json:"http_status"` } -func (e *streamEvent) setValue(key, value string) { +func (e *StreamEvent) IsEndData() bool { + return e.Data == streamEndDataValue +} + +func (e *StreamEvent) SetValue(key, value string) { switch key { case streamEventIdItemKey: e.Id = value @@ -269,6 +304,10 @@ func (e *streamEvent) setValue(key, value string) { } } +func (e *StreamEvent) ToHttpString() string { + return fmt.Sprintf("%s %s\n\n", streamDataItemKey, e.Data) +} + // https://platform.openai.com/docs/guides/images type imageGenerationRequest struct { Model string `json:"model"` diff --git a/plugins/wasm-go/extensions/ai-proxy/provider/moonshot.go b/plugins/wasm-go/extensions/ai-proxy/provider/moonshot.go index f0f63cf792..46fa68c734 100644 --- a/plugins/wasm-go/extensions/ai-proxy/provider/moonshot.go +++ b/plugins/wasm-go/extensions/ai-proxy/provider/moonshot.go @@ -102,12 +102,12 @@ func (m *moonshotProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiNam }() if err != nil { log.Errorf("failed to load context file: %v", err) - util.ErrorHandler("ai-proxy.moonshot.load_ctx_failed", fmt.Errorf("failed to load context file: %v", err)) + _ = util.ErrorHandler("ai-proxy.moonshot.load_ctx_failed", fmt.Errorf("failed to load context file: %v", err)) return } err = m.performChatCompletion(ctx, content, request, log) if err != nil { - util.ErrorHandler("ai-proxy.moonshot.insert_ctx_failed", fmt.Errorf("failed to perform chat completion: %v", err)) + _ = util.ErrorHandler("ai-proxy.moonshot.insert_ctx_failed", fmt.Errorf("failed to perform chat completion: %v", err)) } }, log) if err == nil { @@ -161,79 +161,9 @@ func (m *moonshotProvider) sendRequest(method, path, body, apiKey string, callba } } -func (m *moonshotProvider) OnStreamingResponseBody(ctx wrapper.HttpContext, name ApiName, chunk []byte, isLastChunk bool, log wrapper.Log) ([]byte, error) { +func (m *moonshotProvider) OnStreamingEvent(ctx wrapper.HttpContext, name ApiName, event StreamEvent, log wrapper.Log) ([]StreamEvent, error) { if name != ApiNameChatCompletion { - return chunk, nil - } - receivedBody := chunk - if bufferedStreamingBody, has := ctx.GetContext(ctxKeyStreamingBody).([]byte); has { - receivedBody = append(bufferedStreamingBody, chunk...) - } - - eventStartIndex, lineStartIndex, valueStartIndex := -1, -1, -1 - - defer func() { - if eventStartIndex >= 0 && eventStartIndex < len(receivedBody) { - // Just in case the received chunk is not a complete event. - ctx.SetContext(ctxKeyStreamingBody, receivedBody[eventStartIndex:]) - } else { - ctx.SetContext(ctxKeyStreamingBody, nil) - } - }() - - var responseBuilder strings.Builder - currentKey := "" - currentEvent := &streamEvent{} - i, length := 0, len(receivedBody) - for i = 0; i < length; i++ { - ch := receivedBody[i] - if ch != '\n' { - if lineStartIndex == -1 { - if eventStartIndex == -1 { - eventStartIndex = i - } - lineStartIndex = i - valueStartIndex = -1 - } - if valueStartIndex == -1 { - if ch == ':' { - valueStartIndex = i + 1 - currentKey = string(receivedBody[lineStartIndex:valueStartIndex]) - } - } else if valueStartIndex == i && ch == ' ' { - // Skip leading spaces in data. - valueStartIndex = i + 1 - } - continue - } - - if lineStartIndex != -1 { - value := string(receivedBody[valueStartIndex:i]) - currentEvent.setValue(currentKey, value) - } else { - // Extra new line. The current event is complete. - log.Debugf("processing event: %v", currentEvent) - m.convertStreamEvent(&responseBuilder, currentEvent, log) - // Reset event parsing state. - eventStartIndex = -1 - currentEvent = &streamEvent{} - } - - // Reset line parsing state. - lineStartIndex = -1 - valueStartIndex = -1 - currentKey = "" - } - - modifiedResponseChunk := responseBuilder.String() - log.Debugf("=== modified response chunk: %s", modifiedResponseChunk) - return []byte(modifiedResponseChunk), nil -} - -func (m *moonshotProvider) convertStreamEvent(responseBuilder *strings.Builder, event *streamEvent, log wrapper.Log) error { - if event.Data == streamEndDataValue { - m.appendStreamEvent(responseBuilder, event) - return nil + return nil, nil } if gjson.Get(event.Data, "choices.0.usage").Exists() { @@ -241,20 +171,19 @@ func (m *moonshotProvider) convertStreamEvent(responseBuilder *strings.Builder, newData, err := sjson.Delete(event.Data, "choices.0.usage") if err != nil { log.Errorf("convert usage event error: %v", err) - return err + return nil, err } newData, err = sjson.SetRaw(newData, "usage", usageStr) if err != nil { log.Errorf("convert usage event error: %v", err) - return err + return nil, err } event.Data = newData } - m.appendStreamEvent(responseBuilder, event) - return nil + return []StreamEvent{event}, nil } -func (m *moonshotProvider) appendStreamEvent(responseBuilder *strings.Builder, event *streamEvent) { +func (m *moonshotProvider) appendStreamEvent(responseBuilder *strings.Builder, event *StreamEvent) { responseBuilder.WriteString(streamDataItemKey) responseBuilder.WriteString(event.Data) responseBuilder.WriteString("\n\n") diff --git a/plugins/wasm-go/extensions/ai-proxy/provider/openai.go b/plugins/wasm-go/extensions/ai-proxy/provider/openai.go index 0a170347f5..f875dbaa40 100644 --- a/plugins/wasm-go/extensions/ai-proxy/provider/openai.go +++ b/plugins/wasm-go/extensions/ai-proxy/provider/openai.go @@ -2,7 +2,6 @@ package provider import ( "encoding/json" - "fmt" "net/http" "path" "strings" @@ -58,10 +57,10 @@ func (m *openaiProviderInitializer) CreateProvider(config ProviderConfig) (Provi } customUrl := strings.TrimPrefix(strings.TrimPrefix(config.openaiCustomUrl, "http://"), "https://") pairs := strings.SplitN(customUrl, "/", 2) - if len(pairs) != 2 { - return nil, fmt.Errorf("invalid openaiCustomUrl:%s", config.openaiCustomUrl) + customPath := "/" + if len(pairs) == 2 { + customPath += pairs[1] } - customPath := "/" + pairs[1] isDirectCustomPath := isDirectPath(customPath) capabilities := m.DefaultCapabilities() if !isDirectCustomPath { @@ -128,21 +127,14 @@ func (m *openaiProvider) OnRequestBody(ctx wrapper.HttpContext, apiName ApiName, } func (m *openaiProvider) TransformRequestBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) ([]byte, error) { - request := &chatCompletionRequest{} - if err := decodeChatCompletionRequest(body, request); err != nil { - return nil, err - } if m.config.responseJsonSchema != nil { + request := &chatCompletionRequest{} + if err := decodeChatCompletionRequest(body, request); err != nil { + return nil, err + } log.Debugf("[ai-proxy] set response format to %s", m.config.responseJsonSchema) request.ResponseFormat = m.config.responseJsonSchema + body, _ = json.Marshal(request) } - if request.Stream { - // For stream requests, we need to include usage in the response. - if request.StreamOptions == nil { - request.StreamOptions = &streamOptions{IncludeUsage: true} - } else if !request.StreamOptions.IncludeUsage { - request.StreamOptions.IncludeUsage = true - } - } - return json.Marshal(request) + return m.config.defaultTransformRequestBody(ctx, apiName, body, log) } diff --git a/plugins/wasm-go/extensions/ai-proxy/provider/provider.go b/plugins/wasm-go/extensions/ai-proxy/provider/provider.go index 67cce2888b..c5ec8ce2d4 100644 --- a/plugins/wasm-go/extensions/ai-proxy/provider/provider.go +++ b/plugins/wasm-go/extensions/ai-proxy/provider/provider.go @@ -85,6 +85,10 @@ const ( objectChatCompletion = "chat.completion" objectChatCompletionChunk = "chat.completion.chunk" + reasoningBehaviorPassThrough = "passthrough" + reasoningBehaviorIgnore = "ignore" + reasoningBehaviorConcat = "concat" + wildcard = "*" defaultTimeout = 2 * 60 * 1000 // ms @@ -145,6 +149,10 @@ type StreamingResponseBodyHandler interface { OnStreamingResponseBody(ctx wrapper.HttpContext, name ApiName, chunk []byte, isLastChunk bool, log wrapper.Log) ([]byte, error) } +type StreamingEventHandler interface { + OnStreamingEvent(ctx wrapper.HttpContext, name ApiName, event StreamEvent, log wrapper.Log) ([]StreamEvent, error) +} + type ApiNameHandler interface { GetApiName(path string) ApiName } @@ -190,6 +198,9 @@ type ProviderConfig struct { // @Title zh-CN 失败请求重试 // @Description zh-CN 对失败的请求立即进行重试 retryOnFailure *retryOnFailure `required:"false" yaml:"retryOnFailure" json:"retryOnFailure"` + // @Title zh-CN 推理内容处理方式 + // @Description zh-CN 如何处理大模型服务返回的推理内容。目前支持以下取值:passthrough(正常输出推理内容)、ignore(不输出推理内容)、concat(将推理内容拼接在常规输出内容之前)。默认为 normal。仅支持通义千问服务。 + reasoningContentMode string `required:"false" yaml:"reasoningContentMode" json:"reasoningContentMode"` // @Title zh-CN 基于OpenAI协议的自定义后端URL // @Description zh-CN 仅适用于支持 openai 协议的服务。 openaiCustomUrl string `required:"false" yaml:"openaiCustomUrl" json:"openaiCustomUrl"` @@ -281,6 +292,10 @@ func (c *ProviderConfig) GetProtocol() string { return c.protocol } +func (c *ProviderConfig) IsOpenAIProtocol() bool { + return c.protocol == protocolOpenAI +} + func (c *ProviderConfig) FromJson(json gjson.Result) { c.id = json.Get("id").String() c.typ = json.Get("type").String() @@ -359,6 +374,20 @@ func (c *ProviderConfig) FromJson(json gjson.Result) { } } + c.reasoningContentMode = json.Get("reasoningContentMode").String() + if c.reasoningContentMode == "" { + c.reasoningContentMode = reasoningBehaviorPassThrough + } else { + c.reasoningContentMode = strings.ToLower(c.reasoningContentMode) + switch c.reasoningContentMode { + case reasoningBehaviorPassThrough, reasoningBehaviorIgnore, reasoningBehaviorConcat: + break + default: + c.reasoningContentMode = reasoningBehaviorPassThrough + break + } + } + failoverJson := json.Get("failover") c.failover = &failover{ enabled: false, @@ -554,6 +583,81 @@ func doGetMappedModel(model string, modelMapping map[string]string, log wrapper. return "" } +func ExtractStreamingEvents(ctx wrapper.HttpContext, chunk []byte, log wrapper.Log) []StreamEvent { + body := chunk + if bufferedStreamingBody, has := ctx.GetContext(ctxKeyStreamingBody).([]byte); has { + body = append(bufferedStreamingBody, chunk...) + } + + eventStartIndex, lineStartIndex, valueStartIndex := -1, -1, -1 + + defer func() { + if eventStartIndex >= 0 && eventStartIndex < len(body) { + // Just in case the received chunk is not a complete event. + ctx.SetContext(ctxKeyStreamingBody, body[eventStartIndex:]) + } else { + ctx.SetContext(ctxKeyStreamingBody, nil) + } + }() + + // Sample Qwen event response: + // + // event:result + // :HTTP_STATUS/200 + // data:{"output":{"choices":[{"message":{"content":"你好!","role":"assistant"},"finish_reason":"null"}]},"usage":{"total_tokens":116,"input_tokens":114,"output_tokens":2},"request_id":"71689cfc-1f42-9949-86e8-9563b7f832b1"} + // + // event:error + // :HTTP_STATUS/400 + // data:{"code":"InvalidParameter","message":"Preprocessor error","request_id":"0cbe6006-faec-9854-bf8b-c906d75c3bd8"} + // + + var events []StreamEvent + + currentKey := "" + currentEvent := &StreamEvent{} + i, length := 0, len(body) + for i = 0; i < length; i++ { + ch := body[i] + if ch != '\n' { + if lineStartIndex == -1 { + if eventStartIndex == -1 { + eventStartIndex = i + } + lineStartIndex = i + valueStartIndex = -1 + } + if valueStartIndex == -1 { + if ch == ':' { + valueStartIndex = i + 1 + currentKey = string(body[lineStartIndex:valueStartIndex]) + } + } else if valueStartIndex == i && ch == ' ' { + // Skip leading spaces in data. + valueStartIndex = i + 1 + } + continue + } + + if lineStartIndex != -1 { + value := string(body[valueStartIndex:i]) + currentEvent.SetValue(currentKey, value) + } else { + // Extra new line. The current event is complete. + events = append(events, *currentEvent) + // Reset event parsing state. + eventStartIndex = -1 + currentEvent = &StreamEvent{} + } + + // Reset line parsing state. + lineStartIndex = -1 + valueStartIndex = -1 + currentKey = "" + } + + return events +} + func (c *ProviderConfig) isSupportedAPI(apiName ApiName) bool { _, exist := c.capabilities[string(apiName)] return exist diff --git a/plugins/wasm-go/extensions/ai-proxy/provider/qwen.go b/plugins/wasm-go/extensions/ai-proxy/provider/qwen.go index 2f757c683a..4bb39c1210 100644 --- a/plugins/wasm-go/extensions/ai-proxy/provider/qwen.go +++ b/plugins/wasm-go/extensions/ai-proxy/provider/qwen.go @@ -188,89 +188,32 @@ func (m *qwenProvider) onEmbeddingsRequestBody(ctx wrapper.HttpContext, body []b return json.Marshal(qwenRequest) } -func (m *qwenProvider) OnStreamingResponseBody(ctx wrapper.HttpContext, name ApiName, chunk []byte, isLastChunk bool, log wrapper.Log) ([]byte, error) { +func (m *qwenProvider) OnStreamingEvent(ctx wrapper.HttpContext, name ApiName, event StreamEvent, log wrapper.Log) ([]StreamEvent, error) { if m.config.qwenEnableCompatible || name != ApiNameChatCompletion { - return chunk, nil - } - - receivedBody := chunk - if bufferedStreamingBody, has := ctx.GetContext(ctxKeyStreamingBody).([]byte); has { - receivedBody = append(bufferedStreamingBody, chunk...) + return nil, nil } incrementalStreaming := ctx.GetBoolContext(ctxKeyIncrementalStreaming, false) - eventStartIndex, lineStartIndex, valueStartIndex := -1, -1, -1 - - defer func() { - if eventStartIndex >= 0 && eventStartIndex < len(receivedBody) { - // Just in case the received chunk is not a complete event. - ctx.SetContext(ctxKeyStreamingBody, receivedBody[eventStartIndex:]) - } else { - ctx.SetContext(ctxKeyStreamingBody, nil) - } - }() - - // Sample Qwen event response: - // - // event:result - // :HTTP_STATUS/200 - // data:{"output":{"choices":[{"message":{"content":"你好!","role":"assistant"},"finish_reason":"null"}]},"usage":{"total_tokens":116,"input_tokens":114,"output_tokens":2},"request_id":"71689cfc-1f42-9949-86e8-9563b7f832b1"} - // - // event:error - // :HTTP_STATUS/400 - // data:{"code":"InvalidParameter","message":"Preprocessor error","request_id":"0cbe6006-faec-9854-bf8b-c906d75c3bd8"} - // - - var responseBuilder strings.Builder - currentKey := "" - currentEvent := &streamEvent{} - i, length := 0, len(receivedBody) - for i = 0; i < length; i++ { - ch := receivedBody[i] - if ch != '\n' { - if lineStartIndex == -1 { - if eventStartIndex == -1 { - eventStartIndex = i - } - lineStartIndex = i - valueStartIndex = -1 - } - if valueStartIndex == -1 { - if ch == ':' { - valueStartIndex = i + 1 - currentKey = string(receivedBody[lineStartIndex:valueStartIndex]) - } - } else if valueStartIndex == i && ch == ' ' { - // Skip leading spaces in data. - valueStartIndex = i + 1 - } - continue - } + qwenResponse := &qwenTextGenResponse{} + if err := json.Unmarshal([]byte(event.Data), qwenResponse); err != nil { + log.Errorf("unable to unmarshal Qwen response: %v", err) + return nil, fmt.Errorf("unable to unmarshal Qwen response: %v", err) + } - if lineStartIndex != -1 { - value := string(receivedBody[valueStartIndex:i]) - currentEvent.setValue(currentKey, value) - } else { - // Extra new line. The current event is complete. - log.Debugf("processing event: %v", currentEvent) - if err := m.convertStreamEvent(ctx, &responseBuilder, currentEvent, incrementalStreaming, log); err != nil { - return nil, err - } - // Reset event parsing state. - eventStartIndex = -1 - currentEvent = &streamEvent{} + var outputEvents []StreamEvent + responses := m.buildChatCompletionStreamingResponse(ctx, qwenResponse, incrementalStreaming, log) + for _, response := range responses { + responseBody, err := json.Marshal(response) + if err != nil { + log.Errorf("unable to marshal response: %v", err) + return nil, fmt.Errorf("unable to marshal response: %v", err) } - - // Reset line parsing state. - lineStartIndex = -1 - valueStartIndex = -1 - currentKey = "" + modifiedEvent := event + modifiedEvent.Data = string(responseBody) + outputEvents = append(outputEvents, modifiedEvent) } - - modifiedResponseChunk := responseBuilder.String() - log.Debugf("=== modified response chunk: %s", modifiedResponseChunk) - return []byte(modifiedResponseChunk), nil + return outputEvents, nil } func (m *qwenProvider) TransformResponseBody(ctx wrapper.HttpContext, apiName ApiName, body []byte, log wrapper.Log) ([]byte, error) { @@ -357,7 +300,7 @@ func (m *qwenProvider) buildQwenTextGenerationRequest(ctx wrapper.HttpContext, o func (m *qwenProvider) buildChatCompletionResponse(ctx wrapper.HttpContext, qwenResponse *qwenTextGenResponse) *chatCompletionResponse { choices := make([]chatCompletionChoice, 0, len(qwenResponse.Output.Choices)) for _, qwenChoice := range qwenResponse.Output.Choices { - message := qwenMessageToChatMessage(qwenChoice.Message) + message := qwenMessageToChatMessage(qwenChoice.Message, m.config.reasoningContentMode) choices = append(choices, chatCompletionChoice{ Message: &message, FinishReason: qwenChoice.FinishReason, @@ -395,7 +338,8 @@ func (m *qwenProvider) buildChatCompletionStreamingResponse(ctx wrapper.HttpCont finished := qwenChoice.FinishReason != "" && qwenChoice.FinishReason != "null" message := qwenChoice.Message - deltaContentMessage := &chatMessage{Role: message.Role, Content: message.Content} + deltaContentMessage := &chatMessage{Role: message.Role, Content: message.Content, ReasoningContent: message.ReasoningContent} + deltaContentMessage.handleReasoningContent(m.config.reasoningContentMode) deltaToolCallsMessage := &chatMessage{Role: message.Role, ToolCalls: append([]toolCall{}, message.ToolCalls...)} if !incrementalStreaming { for _, tc := range message.ToolCalls { @@ -430,6 +374,11 @@ func (m *qwenProvider) buildChatCompletionStreamingResponse(ctx wrapper.HttpCont } } } + if message.ReasoningContent == "" { + message.ReasoningContent = pushedMessage.ReasoningContent + } else { + deltaContentMessage.ReasoningContent = util.StripPrefix(deltaContentMessage.ReasoningContent, pushedMessage.ReasoningContent) + } if len(deltaToolCallsMessage.ToolCalls) > 0 && pushedMessage.ToolCalls != nil { for i, tc := range deltaToolCallsMessage.ToolCalls { if i >= len(pushedMessage.ToolCalls) { @@ -475,39 +424,6 @@ func (m *qwenProvider) buildChatCompletionStreamingResponse(ctx wrapper.HttpCont return responses } -func (m *qwenProvider) convertStreamEvent(ctx wrapper.HttpContext, responseBuilder *strings.Builder, event *streamEvent, incrementalStreaming bool, log wrapper.Log) error { - if event.Data == streamEndDataValue { - m.appendStreamEvent(responseBuilder, event) - return nil - } - - if event.Event != eventResult || event.HttpStatus != httpStatus200 { - // Something goes wrong. Just pass through the event. - m.appendStreamEvent(responseBuilder, event) - return nil - } - - qwenResponse := &qwenTextGenResponse{} - if err := json.Unmarshal([]byte(event.Data), qwenResponse); err != nil { - log.Errorf("unable to unmarshal Qwen response: %v", err) - return fmt.Errorf("unable to unmarshal Qwen response: %v", err) - } - - responses := m.buildChatCompletionStreamingResponse(ctx, qwenResponse, incrementalStreaming, log) - for _, response := range responses { - responseBody, err := json.Marshal(response) - if err != nil { - log.Errorf("unable to marshal response: %v", err) - return fmt.Errorf("unable to marshal response: %v", err) - } - modifiedEvent := &*event - modifiedEvent.Data = string(responseBody) - m.appendStreamEvent(responseBuilder, modifiedEvent) - } - - return nil -} - func (m *qwenProvider) insertHttpContextMessage(body []byte, content string, onlyOneSystemBeforeFile bool) ([]byte, error) { request := &qwenTextGenRequest{} if err := json.Unmarshal(body, request); err != nil { @@ -552,7 +468,7 @@ func (m *qwenProvider) insertHttpContextMessage(body []byte, content string, onl return json.Marshal(request) } -func (m *qwenProvider) appendStreamEvent(responseBuilder *strings.Builder, event *streamEvent) { +func (m *qwenProvider) appendStreamEvent(responseBuilder *strings.Builder, event *StreamEvent) { responseBuilder.WriteString(streamDataItemKey) responseBuilder.WriteString(event.Data) responseBuilder.WriteString("\n\n") @@ -690,13 +606,16 @@ type qwenTextEmbeddings struct { Embedding []float64 `json:"embedding"` } -func qwenMessageToChatMessage(qwenMessage qwenMessage) chatMessage { - return chatMessage{ - Name: qwenMessage.Name, - Role: qwenMessage.Role, - Content: qwenMessage.Content, - ToolCalls: qwenMessage.ToolCalls, +func qwenMessageToChatMessage(qwenMessage qwenMessage, reasoningContentMode string) chatMessage { + msg := chatMessage{ + Name: qwenMessage.Name, + Role: qwenMessage.Role, + Content: qwenMessage.Content, + ReasoningContent: qwenMessage.ReasoningContent, + ToolCalls: qwenMessage.ToolCalls, } + msg.handleReasoningContent(reasoningContentMode) + return msg } func (m *qwenMessage) IsStringContent() bool { diff --git a/plugins/wasm-go/extensions/ai-proxy/provider/retry.go b/plugins/wasm-go/extensions/ai-proxy/provider/retry.go index 033a8cd8c5..59691d855f 100644 --- a/plugins/wasm-go/extensions/ai-proxy/provider/retry.go +++ b/plugins/wasm-go/extensions/ai-proxy/provider/retry.go @@ -1,11 +1,13 @@ package provider import ( + "math/rand" + "net/http" + "github.com/alibaba/higress/plugins/wasm-go/extensions/ai-proxy/util" "github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper" "github.com/higress-group/proxy-wasm-go-sdk/proxywasm" "github.com/tidwall/gjson" - "net/http" ) const ( @@ -38,12 +40,12 @@ func (c *ProviderConfig) isRetryOnFailureEnabled() bool { return c.retryOnFailure.enabled } -func (c *ProviderConfig) retryFailedRequest(activeProvider Provider, ctx wrapper.HttpContext, log wrapper.Log) { +func (c *ProviderConfig) retryFailedRequest(activeProvider Provider, ctx wrapper.HttpContext, apiTokenInUse string, apiTokens []string, log wrapper.Log) { log.Debugf("Retry failed request: provider=%s", activeProvider.GetProviderType()) retryClient := createRetryClient(ctx) apiName, _ := ctx.GetContext(CtxKeyApiName).(ApiName) ctx.SetContext(ctxRetryCount, 1) - c.sendRetryRequest(ctx, apiName, activeProvider, retryClient, log) + c.sendRetryRequest(ctx, apiName, activeProvider, retryClient, apiTokenInUse, apiTokens, log) } func (c *ProviderConfig) transformResponseHeadersAndBody(ctx wrapper.HttpContext, activeProvider Provider, apiName ApiName, headers http.Header, body []byte, log wrapper.Log) ([][2]string, []byte) { @@ -67,7 +69,8 @@ func (c *ProviderConfig) transformResponseHeadersAndBody(ctx wrapper.HttpContext func (c *ProviderConfig) retryCall( ctx wrapper.HttpContext, log wrapper.Log, activeProvider Provider, apiName ApiName, statusCode int, responseHeaders http.Header, responseBody []byte, - retryClient *wrapper.ClusterClient[wrapper.RouteCluster]) { + retryClient *wrapper.ClusterClient[wrapper.RouteCluster], + apiTokenInUse string, apiTokens []string) { retryCount := ctx.GetContext(ctxRetryCount).(int) log.Debugf("Sent retry request: %d/%d", retryCount, c.retryOnFailure.maxRetries) @@ -76,6 +79,7 @@ func (c *ProviderConfig) retryCall( log.Debugf("Retry request succeeded") headers, body := c.transformResponseHeadersAndBody(ctx, activeProvider, apiName, responseHeaders, responseBody, log) proxywasm.SendHttpResponse(200, headers, body, -1) + return } else { log.Debugf("The retry request still failed, status: %d, responseHeaders: %v, responseBody: %s", statusCode, responseHeaders, string(responseBody)) } @@ -83,26 +87,41 @@ func (c *ProviderConfig) retryCall( retryCount++ if retryCount <= int(c.retryOnFailure.maxRetries) { ctx.SetContext(ctxRetryCount, retryCount) - c.sendRetryRequest(ctx, apiName, activeProvider, retryClient, log) + c.sendRetryRequest(ctx, apiName, activeProvider, retryClient, apiTokenInUse, apiTokens, log) } else { log.Debugf("Reached the maximum retry count: %d", c.retryOnFailure.maxRetries) proxywasm.ResumeHttpResponse() + return } } func (c *ProviderConfig) sendRetryRequest( ctx wrapper.HttpContext, apiName ApiName, activeProvider Provider, - retryClient *wrapper.ClusterClient[wrapper.RouteCluster], log wrapper.Log) { + retryClient *wrapper.ClusterClient[wrapper.RouteCluster], + apiTokenInUse string, apiTokens []string, log wrapper.Log) { + + // Remove last failed token from retry apiTokens list + apiTokens = removeApiTokenFromRetryList(apiTokens, apiTokenInUse, log) + if len(apiTokens) == 0 { + log.Debugf("No more apiTokens to retry") + proxywasm.ResumeHttpResponse() + return + } + // Set apiTokenInUse for the retry request + apiTokenInUse = GetRandomToken(apiTokens) + log.Debugf("Retry request with apiToken: %s", apiTokenInUse) + ctx.SetContext(c.failover.ctxApiTokenInUse, apiTokenInUse) requestHeaders, requestBody := c.getRetryRequestHeadersAndBody(ctx, activeProvider, apiName, log) path := getRetryPath(ctx) err := retryClient.Post(path, util.HeaderToSlice(requestHeaders), requestBody, func(statusCode int, responseHeaders http.Header, responseBody []byte) { - c.retryCall(ctx, log, activeProvider, apiName, statusCode, responseHeaders, responseBody, retryClient) + c.retryCall(ctx, log, activeProvider, apiName, statusCode, responseHeaders, responseBody, retryClient, apiTokenInUse, apiTokens) }, uint32(c.retryOnFailure.retryTimeout)) if err != nil { log.Errorf("Failed to send retry request: %v", err) proxywasm.ResumeHttpResponse() + return } } @@ -126,9 +145,7 @@ func getRetryPath(ctx wrapper.HttpContext) string { } func (c *ProviderConfig) getRetryRequestHeadersAndBody(ctx wrapper.HttpContext, activeProvider Provider, apiName ApiName, log wrapper.Log) (http.Header, []byte) { - // The retry request may be sent with different apiToken, so the header needs to be regenerated - c.SetApiTokenInUse(ctx, log) - + // The retry request is sent with different apiToken, so the header needs to be regenerated requestHeaders := http.Header{ "Content-Type": []string{"application/json"}, } @@ -139,3 +156,27 @@ func (c *ProviderConfig) getRetryRequestHeadersAndBody(ctx wrapper.HttpContext, return requestHeaders, requestBody } + +func removeApiTokenFromRetryList(apiTokens []string, removedApiToken string, log wrapper.Log) []string { + var availableApiTokens []string + for _, s := range apiTokens { + if s != removedApiToken { + availableApiTokens = append(availableApiTokens, s) + } + } + log.Debugf("Remove apiToken %s from retry apiTokens list", removedApiToken) + log.Debugf("Available retry apiTokens: %v", availableApiTokens) + return availableApiTokens +} + +func GetRandomToken(apiTokens []string) string { + count := len(apiTokens) + switch count { + case 0: + return "" + case 1: + return apiTokens[0] + default: + return apiTokens[rand.Intn(count)] + } +} diff --git a/plugins/wasm-go/extensions/ai-quota/README.md b/plugins/wasm-go/extensions/ai-quota/README.md index 4305272902..4b0d362fed 100644 --- a/plugins/wasm-go/extensions/ai-quota/README.md +++ b/plugins/wasm-go/extensions/ai-quota/README.md @@ -26,14 +26,14 @@ description: AI 配额管理插件配置参考 `redis`中每一项的配置字段说明 -| 配置项 | 类型 | 必填 | 默认值 | 说明 | -| ------------ | ------ | ---- | ---------------------------------------------------------- | --------------------------- | -| service_name | string | 必填 | - | redis 服务名称,带服务类型的完整 FQDN 名称,例如 my-redis.dns、redis.my-ns.svc.cluster.local | -| service_port | int | 否 | 服务类型为固定地址(static service)默认值为80,其他为6379 | 输入redis服务的服务端口 | -| username | string | 否 | - | redis用户名 | -| password | string | 否 | - | redis密码 | -| timeout | int | 否 | 1000 | redis连接超时时间,单位毫秒 | - +| 配置项 | 类型 | 必填 | 默认值 | 说明 | +| ------------ | ------ | ---- | ---------------------------------------------------------- | --------------------------- | +| service_name | string | 必填 | - | redis 服务名称,带服务类型的完整 FQDN 名称,例如 my-redis.dns、redis.my-ns.svc.cluster.local | +| service_port | int | 否 | 服务类型为固定地址(static service)默认值为80,其他为6379 | 输入redis服务的服务端口 | +| username | string | 否 | - | redis用户名 | +| password | string | 否 | - | redis密码 | +| timeout | int | 否 | 1000 | redis连接超时时间,单位毫秒 | +| database | int | 否 | 0 | 使用的数据库id,例如配置为1,对应`SELECT 1` | ## 配置示例 diff --git a/plugins/wasm-go/extensions/ai-quota/README_EN.md b/plugins/wasm-go/extensions/ai-quota/README_EN.md index e136a75969..0eff19aeed 100644 --- a/plugins/wasm-go/extensions/ai-quota/README_EN.md +++ b/plugins/wasm-go/extensions/ai-quota/README_EN.md @@ -18,13 +18,14 @@ Plugin execution priority: `750` | `admin_path` | string | Optional | /quota | Prefix for the path to manage quota requests | | `redis` | object | Yes | | Redis related configuration | Explanation of each configuration field in `redis` -| Configuration Item | Type | Required | Default Value | Explanation | -|---------------------|------------------|----------|---------------------------------------------------------|-----------------------------------------------| -| service_name | string | Required | - | Redis service name, full FQDN name with service type, e.g., my-redis.dns, redis.my-ns.svc.cluster.local | -| service_port | int | No | Default value for static service is 80; others are 6379 | Service port for the redis service | -| username | string | No | - | Redis username | -| password | string | No | - | Redis password | -| timeout | int | No | 1000 | Redis connection timeout in milliseconds | +| Configuration Item | Type | Required | Default Value | Explanation | +|--------------------|--------|----------|---------------------------------------------------------|---------------------------------------------------------------------------------------------------------| +| service_name | string | Required | - | Redis service name, full FQDN name with service type, e.g., my-redis.dns, redis.my-ns.svc.cluster.local | +| service_port | int | No | Default value for static service is 80; others are 6379 | Service port for the redis service | +| username | string | No | - | Redis username | +| password | string | No | - | Redis password | +| timeout | int | No | 1000 | Redis connection timeout in milliseconds | +| database | int | No | 0 | The database ID used, for example, configured as 1, corresponds to `SELECT 1`. | ## Configuration Example ### Identify request parameter apikey and apply rate limiting accordingly diff --git a/plugins/wasm-go/extensions/ai-quota/go.mod b/plugins/wasm-go/extensions/ai-quota/go.mod index ec77e402e4..8b9e11fd10 100644 --- a/plugins/wasm-go/extensions/ai-quota/go.mod +++ b/plugins/wasm-go/extensions/ai-quota/go.mod @@ -2,11 +2,11 @@ module github.com/alibaba/higress/plugins/wasm-go/extensions/ai-quota go 1.19 -//replace github.com/alibaba/higress/plugins/wasm-go => ../.. +replace github.com/alibaba/higress/plugins/wasm-go => ../.. require ( github.com/alibaba/higress/plugins/wasm-go v1.4.3-0.20240808022948-34f5722d93de - github.com/higress-group/proxy-wasm-go-sdk v0.0.0-20240711023527-ba358c48772f + github.com/higress-group/proxy-wasm-go-sdk v1.0.0 github.com/tidwall/gjson v1.17.3 github.com/tidwall/resp v0.1.1 ) diff --git a/plugins/wasm-go/extensions/ai-quota/go.sum b/plugins/wasm-go/extensions/ai-quota/go.sum index 996d474d43..b4ab172fe2 100644 --- a/plugins/wasm-go/extensions/ai-quota/go.sum +++ b/plugins/wasm-go/extensions/ai-quota/go.sum @@ -1,12 +1,10 @@ -github.com/alibaba/higress/plugins/wasm-go v1.4.3-0.20240808022948-34f5722d93de h1:lDLqj7Hw41ox8VdsP7oCTPhjPa3+QJUCKApcLh2a45Y= -github.com/alibaba/higress/plugins/wasm-go v1.4.3-0.20240808022948-34f5722d93de/go.mod h1:359don/ahMxpfeLMzr29Cjwcu8IywTTDUzWlBPRNLHw= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I= github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/higress-group/nottinygc v0.0.0-20231101025119-e93c4c2f8520 h1:IHDghbGQ2DTIXHBHxWfqCYQW1fKjyJ/I7W1pMyUDeEA= github.com/higress-group/nottinygc v0.0.0-20231101025119-e93c4c2f8520/go.mod h1:Nz8ORLaFiLWotg6GeKlJMhv8cci8mM43uEnLA5t8iew= -github.com/higress-group/proxy-wasm-go-sdk v0.0.0-20240711023527-ba358c48772f h1:ZIiIBRvIw62gA5MJhuwp1+2wWbqL9IGElQ499rUsYYg= -github.com/higress-group/proxy-wasm-go-sdk v0.0.0-20240711023527-ba358c48772f/go.mod h1:hNFjhrLUIq+kJ9bOcs8QtiplSQ61GZXtd2xHKx4BYRo= +github.com/higress-group/proxy-wasm-go-sdk v1.0.0 h1:BZRNf4R7jr9hwRivg/E29nkVaKEak5MWjBDhWjuHijU= +github.com/higress-group/proxy-wasm-go-sdk v1.0.0/go.mod h1:iiSyFbo+rAtbtGt/bsefv8GU57h9CCLYGJA74/tF5/0= github.com/magefile/mage v1.14.0 h1:6QDX3g6z1YvJ4olPhT1wksUcSa/V0a1B+pJb73fBjyo= github.com/magefile/mage v1.14.0/go.mod h1:z5UZb/iS3GoOSn0JgWuiw7dxlurVYTu+/jHXqQg881A= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= diff --git a/plugins/wasm-go/extensions/ai-quota/main.go b/plugins/wasm-go/extensions/ai-quota/main.go index 2facd912bc..2c6d75e8f4 100644 --- a/plugins/wasm-go/extensions/ai-quota/main.go +++ b/plugins/wasm-go/extensions/ai-quota/main.go @@ -69,6 +69,7 @@ type RedisInfo struct { Username string `required:"false" yaml:"username" json:"username"` Password string `required:"false" yaml:"password" json:"password"` Timeout int `required:"false" yaml:"timeout" json:"timeout"` + Database int `required:"false" yaml:"database" json:"database"` } func parseConfig(json gjson.Result, config *QuotaConfig, log wrapper.Log) error { @@ -110,17 +111,19 @@ func parseConfig(json gjson.Result, config *QuotaConfig, log wrapper.Log) error if timeout == 0 { timeout = 1000 } + database := int(redisConfig.Get("database").Int()) config.redisInfo.ServiceName = serviceName config.redisInfo.ServicePort = servicePort config.redisInfo.Username = username config.redisInfo.Password = password config.redisInfo.Timeout = timeout + config.redisInfo.Database = database config.redisClient = wrapper.NewRedisClusterClient(wrapper.FQDNCluster{ FQDN: serviceName, Port: int64(servicePort), }) - return config.redisClient.Init(username, password, int64(timeout)) + return config.redisClient.Init(username, password, int64(timeout), wrapper.WithDataBase(database)) } func onHttpRequestHeaders(context wrapper.HttpContext, config QuotaConfig, log wrapper.Log) types.Action { diff --git a/plugins/wasm-go/extensions/ai-search/README.md b/plugins/wasm-go/extensions/ai-search/README.md new file mode 100644 index 0000000000..5ae133148a --- /dev/null +++ b/plugins/wasm-go/extensions/ai-search/README.md @@ -0,0 +1,244 @@ +--- +title: AI 搜索增强 +keywords: [higress,ai search] +description: higress 支持通过集成搜索引擎(Google/Bing/Arxiv/Elasticsearch等)的实时结果,增强DeepSeek-R1等模型等回答准确性和时效性 +--- + +## 功能说明 + +`ai-search`插件通过集成搜索引擎(Google/Bing/Arxiv/Elasticsearch等)的实时结果,增强AI模型的回答准确性和时效性。插件会自动将搜索结果注入到提示模板中,并根据配置决定是否在最终回答中添加引用来源。 + +## 运行属性 + +插件执行阶段:`默认阶段` +插件执行优先级:`440` + +## 配置字段 + +| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 | +|------|----------|----------|--------|------| +| needReference | bool | 选填 | false | 是否在回答中添加引用来源 | +| referenceFormat | string | 选填 | `"**References:**\n%s"` | 引用内容格式,必须包含%s占位符 | +| defaultLang | string | 选填 | - | 默认搜索语言代码(如zh-CN/en-US) | +| promptTemplate | string | 选填 | 内置模板 | 提示模板,必须包含`{search_results}`和`{question}`占位符 | +| searchFrom | array of object | 必填 | - | 参考下面搜索引擎配置,至少配置一个引擎 | +| searchRewrite | object | 选填 | - | 搜索重写配置,用于使用LLM服务优化搜索查询 | + +## 搜索重写说明 + +搜索重写功能使用LLM服务对用户的原始查询进行分析和优化,可以: +1. 将用户的自然语言查询转换为更适合搜索引擎的关键词组合 +2. 对于Arxiv论文搜索,自动识别相关的论文类别并添加类别限定 +3. 对于私有知识库搜索,将长查询拆分成多个精准的关键词组合 + +强烈建议在使用Arxiv或Elasticsearch引擎时启用此功能。对于Arxiv搜索,它能准确识别论文所属领域并优化英文关键词;对于私有知识库搜索,它能提供更精准的关键词匹配,显著提升搜索效果。 + +## 搜索重写配置 + +| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 | +|------|----------|----------|--------|------| +| llmServiceName | string | 必填 | - | LLM服务名称 | +| llmServicePort | number | 必填 | - | LLM服务端口 | +| llmApiKey | string | 必填 | - | LLM服务API密钥 | +| llmUrl | string | 必填 | - | LLM服务API地址 | +| llmModelName | string | 必填 | - | LLM模型名称 | +| timeoutMillisecond | number | 选填 | 30000 | API调用超时时间(毫秒) | + +## 搜索引擎通用配置 + +| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 | +|------|----------|----------|--------|------| +| type | string | 必填 | - | 引擎类型(google/bing/arxiv/elasticsearch/quark) | +| serviceName | string | 必填 | - | 后端服务名称 | +| servicePort | number | 必填 | - | 后端服务端口 | +| apiKey | string | 必填 | - | 搜索引擎API密钥/Aliyun AccessKey | +| count | number | 选填 | 10 | 单次搜索返回结果数量 | +| start | number | 选填 | 0 | 搜索结果偏移量(从第start+1条结果开始返回) | +| timeoutMillisecond | number | 选填 | 5000 | API调用超时时间(毫秒) | +| optionArgs | map | 选填 | - | 搜索引擎特定参数(key-value格式) | + +## Google 特定配置 + +| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 | +|------|----------|----------|--------|------| +| cx | string | 必填 | - | Google自定义搜索引擎ID,用于指定搜索范围 | + +## Arxiv 特定配置 + +| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 | +|------|----------|----------|--------|------| +| arxivCategory | string | 选填 | - | 搜索的论文[类别](https://arxiv.org/category_taxonomy)(如cs.AI, cs.CL等) | + +## Elasticsearch 特定配置 + +| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 | +|------|----------|----------|--------|------| +| index | string | 必填 | - | 要搜索的Elasticsearch索引名称 | +| contentField | string | 必填 | - | 要查询的内容字段名称 | +| linkField | string | 必填 | - | 结果链接字段名称 | +| titleField | string | 必填 | - | 结果标题字段名称 | + +## Quark 特定配置 + +| 名称 | 数据类型 | 填写要求 | 默认值 | 描述 | +|------|----------|----------|--------|------| +| secretKey | string | 必填 | - | Aliyun SecretKey | +| endpoint | string | 选填 | iqs.cn-zhangjiakou.aliyuncs.com | 请求搜索引擎服务时的接入点 | + +## 配置示例 + +### 基础配置(单搜索引擎) + +```yaml +needReference: true +searchFrom: +- type: google + apiKey: "your-google-api-key" + cx: "search-engine-id" + serviceName: "google-svc.dns" + servicePort: 443 + count: 5 + optionArgs: + fileType: "pdf" +``` + +### Arxiv搜索配置 + +```yaml +searchFrom: +- type: arxiv + serviceName: "arxiv-svc.dns" + servicePort: 443 + arxivCategory: "cs.AI" + count: 10 +``` + + +### 夸克搜索配置 + +```yaml +searchFrom: +- type: quark + serviceName: "quark-svc.dns" + servicePort: 443 + apiKey: "aliyun accessKey" + count: 10 # 搜索网页数,最多10条 + secretKey: "aliyun secretKey" + endpoint: "iqs.cn-zhangjiakou.aliyuncs.com" +``` + +### 多搜索引擎配置 + +```yaml +defaultLang: "en-US" +promptTemplate: | + # Search Results: + {search_results} + + # Please answer this question: + {question} +searchFrom: +- type: google + apiKey: "google-key" + cx: "github-search-id" # 专门搜索GitHub内容的搜索引擎ID + serviceName: "google-svc.dns" + servicePort: 443 +- type: google + apiKey: "google-key" + cx: "news-search-id" # 专门搜索Google News内容的搜索引擎ID + serviceName: "google-svc.dns" + servicePort: 443 +- type: bing + apiKey: "bing-key" + serviceName: "bing-svc.dns" + servicePort: 443 + optionArgs: + answerCount: "5" +``` + +### 并发查询配置 + +由于搜索引擎对单次查询返回结果数量有限制(如Google限制单次最多返回100条结果),可以通过以下方式获取更多结果: +1. 设置较小的count值(如10) +2. 通过start参数指定结果偏移量 +3. 并发发起多个查询请求,每个请求的start值按count递增 + +例如,要获取30条结果,可以配置count=10并并发发起20个查询,每个查询的start值分别为0,10,20: + +```yaml +searchFrom: +- type: google + apiKey: "your-google-api-key" + cx: "search-engine-id" + serviceName: "google-svc.dns" + servicePort: 443 + start: 0 + count: 10 +- type: google + apiKey: "your-google-api-key" + cx: "search-engine-id" + serviceName: "google-svc.dns" + servicePort: 443 + start: 10 + count: 10 +- type: google + apiKey: "your-google-api-key" + cx: "search-engine-id" + serviceName: "google-svc.dns" + servicePort: 443 + start: 20 + count: 10 +``` + +注意,过高的并发可能会导致限流,需要根据实际情况调整。 + +### Elasticsearch 配置(用于对接私有知识库) + +```yaml +searchFrom: +- type: elasticsearch + serviceName: "es-svc.static" + # 固定地址服务的端口默认是80 + servicePort: 80 + index: "knowledge_base" + contentField: "content" + linkField: "url" + titleField: "title" +``` + +### 自定义引用格式 + +```yaml +needReference: true +referenceFormat: "### 数据来源\n%s" +searchFrom: +- type: bing + apiKey: "your-bing-key" + serviceName: "search-service.dns" + servicePort: 8080 +``` + +### 搜索重写配置 + +```yaml +searchFrom: +- type: google + apiKey: "your-google-api-key" + cx: "search-engine-id" + serviceName: "google-svc.dns" + servicePort: 443 +searchRewrite: + llmServiceName: "llm-svc.dns" + llmServicePort: 443 + llmApiKey: "your-llm-api-key" + llmUrl: "https://api.example.com/v1/chat/completions" + llmModelName: "gpt-3.5-turbo" + timeoutMillisecond: 15000 +``` + +## 注意事项 + +1. 提示词模版必须包含`{search_results}`和`{question}`占位符,可选使用`{cur_date}`插入当前日期(格式:2006年1月2日) +2. 默认模板包含搜索结果处理指引和回答规范,如无特殊需要可以直接用默认模板,否则请根据实际情况修改 +3. 多个搜索引擎是并行查询,总超时时间 = 所有搜索引擎配置中最大timeoutMillisecond值 + 处理时间 +4. Arxiv搜索不需要API密钥,但可以指定论文类别(arxivCategory)来缩小搜索范围 diff --git a/plugins/wasm-go/extensions/ai-search/README_EN.md b/plugins/wasm-go/extensions/ai-search/README_EN.md new file mode 100644 index 0000000000..1afd955bd9 --- /dev/null +++ b/plugins/wasm-go/extensions/ai-search/README_EN.md @@ -0,0 +1,243 @@ +--- +title: AI Search Enhancement +keywords: [higress, ai search] +description: Higress supports enhancing the accuracy and timeliness of responses from models like DeepSeek-R1 by integrating real-time results from search engines (Google/Bing/Arxiv/Elasticsearch etc.) +--- + +## Feature Description + +The `ai-search` plugin enhances the accuracy and timeliness of AI model responses by integrating real-time results from search engines (Google/Bing/Arxiv/Elasticsearch etc.). The plugin automatically injects search results into the prompt template and determines whether to add reference sources in the final response based on configuration. + +## Runtime Properties + +Plugin execution stage: `Default stage` +Plugin execution priority: `440` + +## Configuration Fields + +| Name | Data Type | Requirement | Default Value | Description | +|------|-----------|-------------|---------------|-------------| +| needReference | bool | Optional | false | Whether to add reference sources in the response | +| referenceFormat | string | Optional | `"**References:**\n%s"` | Reference content format, must include %s placeholder | +| defaultLang | string | Optional | - | Default search language code (e.g. zh-CN/en-US) | +| promptTemplate | string | Optional | Built-in template | Prompt template, must include `{search_results}` and `{question}` placeholders | +| searchFrom | array of object | Required | - | Refer to search engine configuration below, at least one engine must be configured | +| searchRewrite | object | Optional | - | Search rewrite configuration, used to optimize search queries using an LLM service | + +## Search Rewrite Description + +The search rewrite feature uses an LLM service to analyze and optimize the user's original query, which can: +1. Convert natural language queries into keyword combinations better suited for search engines +2. For Arxiv paper searches, automatically identify relevant paper categories and add category constraints +3. For private knowledge base searches, break down long queries into multiple precise keyword combinations + +It is strongly recommended to enable this feature when using Arxiv or Elasticsearch engines. For Arxiv searches, it can accurately identify paper domains and optimize English keywords; for private knowledge base searches, it can provide more precise keyword matching, significantly improving search effectiveness. + +## Search Rewrite Configuration + +| Name | Data Type | Requirement | Default Value | Description | +|------|-----------|-------------|---------------|-------------| +| llmServiceName | string | Required | - | LLM service name | +| llmServicePort | number | Required | - | LLM service port | +| llmApiKey | string | Required | - | LLM service API key | +| llmUrl | string | Required | - | LLM service API URL | +| llmModelName | string | Required | - | LLM model name | +| timeoutMillisecond | number | Optional | 30000 | API call timeout (milliseconds) | + +## Search Engine Common Configuration + +| Name | Data Type | Requirement | Default Value | Description | +|------|-----------|-------------|---------------|-------------| +| type | string | Required | - | Engine type (google/bing/arxiv/elasticsearch/quark) | +| apiKey | string | Required | - | Search engine API key/Aliyun AccessKey | +| serviceName | string | Required | - | Backend service name | +| servicePort | number | Required | - | Backend service port | +| count | number | Optional | 10 | Number of results returned per search | +| start | number | Optional | 0 | Search result offset (start returning from the start+1 result) | +| timeoutMillisecond | number | Optional | 5000 | API call timeout (milliseconds) | +| optionArgs | map | Optional | - | Search engine specific parameters (key-value format) | + +## Google Specific Configuration + +| Name | Data Type | Requirement | Default Value | Description | +|------|-----------|-------------|---------------|-------------| +| cx | string | Required | - | Google Custom Search Engine ID, used to specify search scope | + +## Arxiv Specific Configuration + +| Name | Data Type | Requirement | Default Value | Description | +|------|-----------|-------------|---------------|-------------| +| arxivCategory | string | Optional | - | Search paper [category](https://arxiv.org/category_taxonomy) (e.g. cs.AI, cs.CL etc.) | + +## Elasticsearch Specific Configuration + +| Name | Data Type | Requirement | Default Value | Description | +|------|-----------|-------------|---------------|-------------| +| index | string | Required | - | Elasticsearch index name to search | +| contentField | string | Required | - | Content field name to query | +| linkField | string | Required | - | Result link field name | +| titleField | string | Required | - | Result title field name | + +## Quark Specific Configuration + +| Name | Data Type | Requirement | Default Value | Description | +|------|----------|----------|--------|------| +| secretKey | string | Required | - | Aliyun SecretKey | +| endpoint | string | Optional | iqs.cn-zhangjiakou.aliyuncs.com | Endpoint for accessing quark | + +## Configuration Examples + +### Basic Configuration (Single Search Engine) + +```yaml +needReference: true +searchFrom: +- type: google + apiKey: "your-google-api-key" + cx: "search-engine-id" + serviceName: "google-svc.dns" + servicePort: 443 + count: 5 + optionArgs: + fileType: "pdf" +``` + +### Arxiv Search Configuration + +```yaml +searchFrom: +- type: arxiv + serviceName: "arxiv-svc.dns" + servicePort: 443 + arxivCategory: "cs.AI" + count: 10 +``` + +### 夸克搜索配置 + +```yaml +searchFrom: +- type: quark + serviceName: "quark-svc.dns" + servicePort: 443 + apiKey: "aliyun accessKey" + count: 10 + secretKey: "aliyun secretKey" + endpoint: "iqs.cn-zhangjiakou.aliyuncs.com" +``` + +### Multiple Search Engines Configuration + +```yaml +defaultLang: "en-US" +promptTemplate: | + # Search Results: + {search_results} + + # Please answer this question: + {question} +searchFrom: +- type: google + apiKey: "google-key" + cx: "github-search-id" # Search engine ID specifically for GitHub content + serviceName: "google-svc.dns" + servicePort: 443 +- type: google + apiKey: "google-key" + cx: "news-search-id" # Search engine ID specifically for Google News content + serviceName: "google-svc.dns" + servicePort: 443 +- type: bing + apiKey: "bing-key" + serviceName: "bing-svc.dns" + servicePort: 443 + optionArgs: + answerCount: "5" +``` + +### Concurrent Query Configuration + +Since search engines limit the number of results per query (e.g. Google limits to 100 results per query), you can get more results by: +1. Setting a smaller count value (e.g. 10) +2. Specifying result offset with start parameter +3. Concurrently initiating multiple query requests, with each request's start value incrementing by count + +For example, to get 30 results, configure count=10 and concurrently initiate 3 queries with start values 0,10,20 respectively: + +```yaml +searchFrom: +- type: google + apiKey: "your-google-api-key" + cx: "search-engine-id" + serviceName: "google-svc.dns" + servicePort: 443 + start: 0 + count: 10 +- type: google + apiKey: "your-google-api-key" + cx: "search-engine-id" + serviceName: "google-svc.dns" + servicePort: 443 + start: 10 + count: 10 +- type: google + apiKey: "your-google-api-key" + cx: "search-engine-id" + serviceName: "google-svc.dns" + servicePort: 443 + start: 20 + count: 10 +``` + +Note that excessive concurrency may lead to rate limiting, adjust according to actual situation. + +### Elasticsearch Configuration (For Private Knowledge Base Integration) + +```yaml +searchFrom: +- type: elasticsearch + serviceName: "es-svc.static" + # static ip service use 80 as default port + servicePort: 80 + index: "knowledge_base" + contentField: "content" + linkField: "url" + titleField: "title" +``` + +### Custom Reference Format + +```yaml +needReference: true +referenceFormat: "### Data Sources\n%s" +searchFrom: +- type: bing + apiKey: "your-bing-key" + serviceName: "search-service.dns" + servicePort: 8080 +``` + +### Search Rewrite Configuration + +```yaml +searchFrom: +- type: google + apiKey: "your-google-api-key" + cx: "search-engine-id" + serviceName: "google-svc.dns" + servicePort: 443 +searchRewrite: + llmServiceName: "llm-svc.dns" + llmServicePort: 443 + llmApiKey: "your-llm-api-key" + llmUrl: "https://api.example.com/v1/chat/completions" + llmModelName: "gpt-3.5-turbo" + timeoutMillisecond: 15000 +``` + +## Notes + +1. The prompt template must include `{search_results}` and `{question}` placeholders, optionally use `{cur_date}` to insert current date (format: January 2, 2006) +2. The default template includes search results processing instructions and response specifications, you can use the default template unless there are special needs +3. Multiple search engines query in parallel, total timeout = maximum timeoutMillisecond value among all search engine configurations + processing time +4. Arxiv search doesn't require API key, but you can specify paper category (arxivCategory) to narrow search scope diff --git a/plugins/wasm-go/extensions/ai-search/engine/arxiv/arxiv.go b/plugins/wasm-go/extensions/ai-search/engine/arxiv/arxiv.go new file mode 100644 index 0000000000..56a998ca33 --- /dev/null +++ b/plugins/wasm-go/extensions/ai-search/engine/arxiv/arxiv.go @@ -0,0 +1,134 @@ +package arxiv + +import ( + "bytes" + "errors" + "fmt" + "net/http" + "net/url" + "strings" + + "github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper" + "github.com/antchfx/xmlquery" + "github.com/tidwall/gjson" + + "github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search/engine" +) + +type ArxivSearch struct { + optionArgs map[string]string + start int + count int + timeoutMillisecond uint32 + client wrapper.HttpClient + arxivCategory string +} + +func NewArxivSearch(config *gjson.Result) (*ArxivSearch, error) { + engine := &ArxivSearch{} + serviceName := config.Get("serviceName").String() + if serviceName == "" { + return nil, errors.New("serviceName not found") + } + servicePort := config.Get("servicePort").Int() + if servicePort == 0 { + return nil, errors.New("servicePort not found") + } + engine.client = wrapper.NewClusterClient(wrapper.FQDNCluster{ + FQDN: serviceName, + Port: servicePort, + }) + engine.start = int(config.Get("start").Uint()) + engine.count = int(config.Get("count").Uint()) + if engine.count == 0 { + engine.count = 10 + } + engine.timeoutMillisecond = uint32(config.Get("timeoutMillisecond").Uint()) + if engine.timeoutMillisecond == 0 { + engine.timeoutMillisecond = 5000 + } + engine.optionArgs = map[string]string{} + for key, value := range config.Get("optionArgs").Map() { + valStr := value.String() + if valStr != "" { + engine.optionArgs[key] = value.String() + } + } + engine.arxivCategory = config.Get("arxivCategory").String() + return engine, nil +} + +func (a ArxivSearch) NeedExectue(ctx engine.SearchContext) bool { + return ctx.EngineType == "arxiv" +} + +func (a ArxivSearch) Client() wrapper.HttpClient { + return a.client +} + +func (a ArxivSearch) CallArgs(ctx engine.SearchContext) engine.CallArgs { + var searchQueryItems []string + for _, q := range ctx.Querys { + searchQueryItems = append(searchQueryItems, fmt.Sprintf("all:%s", url.QueryEscape(q))) + } + searchQuery := strings.Join(searchQueryItems, "+AND+") + category := ctx.ArxivCategory + if category == "" { + category = a.arxivCategory + } + if category != "" { + searchQuery = fmt.Sprintf("%s+AND+cat:%s", searchQuery, category) + } + queryUrl := fmt.Sprintf("https://export.arxiv.org/api/query?search_query=%s&max_results=%d&start=%d", + searchQuery, a.count, a.start) + var extraArgs []string + for key, value := range a.optionArgs { + extraArgs = append(extraArgs, fmt.Sprintf("%s=%s", key, url.QueryEscape(value))) + } + if len(extraArgs) > 0 { + queryUrl = fmt.Sprintf("%s&%s", queryUrl, strings.Join(extraArgs, "&")) + } + return engine.CallArgs{ + Method: http.MethodGet, + Url: queryUrl, + Headers: [][2]string{{"Accept", "application/atom+xml"}}, + TimeoutMillisecond: a.timeoutMillisecond, + } +} + +func (a ArxivSearch) ParseResult(ctx engine.SearchContext, response []byte) []engine.SearchResult { + var results []engine.SearchResult + doc, err := xmlquery.Parse(bytes.NewReader(response)) + if err != nil { + return results + } + + entries := xmlquery.Find(doc, "//entry") + for _, entry := range entries { + title := entry.SelectElement("title").InnerText() + link := "" + for _, l := range entry.SelectElements("link") { + if l.SelectAttr("rel") == "alternate" && l.SelectAttr("type") == "text/html" { + link = l.SelectAttr("href") + break + } + } + summary := entry.SelectElement("summary").InnerText() + publishTime := entry.SelectElement("published").InnerText() + authors := entry.SelectElements("author") + var authorNames []string + for _, author := range authors { + authorNames = append(authorNames, author.SelectElement("name").InnerText()) + } + content := fmt.Sprintf("%s\nAuthors: %s\nPublication time: %s", summary, strings.Join(authorNames, ", "), publishTime) + result := engine.SearchResult{ + Title: title, + Link: link, + Content: content, + } + if result.Valid() { + results = append(results, result) + } + } + return results +} diff --git a/plugins/wasm-go/extensions/ai-search/engine/bing/bing.go b/plugins/wasm-go/extensions/ai-search/engine/bing/bing.go new file mode 100644 index 0000000000..b24fe33464 --- /dev/null +++ b/plugins/wasm-go/extensions/ai-search/engine/bing/bing.go @@ -0,0 +1,128 @@ +package bing + +import ( + "errors" + "fmt" + "net/http" + "net/url" + "strings" + + "github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper" + "github.com/tidwall/gjson" + + "github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search/engine" +) + +type BingSearch struct { + optionArgs map[string]string + apiKey string + start int + count int + timeoutMillisecond uint32 + client wrapper.HttpClient +} + +func NewBingSearch(config *gjson.Result) (*BingSearch, error) { + engine := &BingSearch{} + engine.apiKey = config.Get("apiKey").String() + if engine.apiKey == "" { + return nil, errors.New("apiKey not found") + } + serviceName := config.Get("serviceName").String() + if serviceName == "" { + return nil, errors.New("serviceName not found") + } + servicePort := config.Get("servicePort").Int() + if servicePort == 0 { + return nil, errors.New("servicePort not found") + } + engine.client = wrapper.NewClusterClient(wrapper.FQDNCluster{ + FQDN: serviceName, + Port: servicePort, + }) + engine.start = int(config.Get("start").Uint()) + engine.count = int(config.Get("count").Uint()) + if engine.count == 0 { + engine.count = 10 + } + engine.timeoutMillisecond = uint32(config.Get("timeoutMillisecond").Uint()) + if engine.timeoutMillisecond == 0 { + engine.timeoutMillisecond = 5000 + } + engine.optionArgs = map[string]string{} + for key, value := range config.Get("optionArgs").Map() { + valStr := value.String() + if valStr != "" { + engine.optionArgs[key] = value.String() + } + } + return engine, nil +} + +func (b BingSearch) NeedExectue(ctx engine.SearchContext) bool { + return ctx.EngineType == "" || ctx.EngineType == "internet" +} + +func (b BingSearch) Client() wrapper.HttpClient { + return b.client +} + +func (b BingSearch) CallArgs(ctx engine.SearchContext) engine.CallArgs { + queryUrl := fmt.Sprintf("https://api.bing.microsoft.com/v7.0/search?q=%s&count=%d&offset=%d", + url.QueryEscape(strings.Join(ctx.Querys, " ")), b.count, b.start) + var extraArgs []string + for key, value := range b.optionArgs { + extraArgs = append(extraArgs, fmt.Sprintf("%s=%s", key, url.QueryEscape(value))) + } + if ctx.Language != "" { + extraArgs = append(extraArgs, fmt.Sprintf("mkt=%s", ctx.Language)) + } + if len(extraArgs) > 0 { + queryUrl = fmt.Sprintf("%s&%s", queryUrl, strings.Join(extraArgs, "&")) + } + return engine.CallArgs{ + Method: http.MethodGet, + Url: queryUrl, + Headers: [][2]string{{"Ocp-Apim-Subscription-Key", b.apiKey}}, + TimeoutMillisecond: b.timeoutMillisecond, + } +} + +func (b BingSearch) ParseResult(ctx engine.SearchContext, response []byte) []engine.SearchResult { + jsonObj := gjson.ParseBytes(response) + var results []engine.SearchResult + webPages := jsonObj.Get("webPages.value") + for _, page := range webPages.Array() { + result := engine.SearchResult{ + Title: page.Get("name").String(), + Link: page.Get("url").String(), + Content: page.Get("snippet").String(), + } + if result.Valid() { + results = append(results, result) + } + deepLinks := page.Get("deepLinks") + for _, inner := range deepLinks.Array() { + innerResult := engine.SearchResult{ + Title: inner.Get("name").String(), + Link: inner.Get("url").String(), + Content: inner.Get("snippet").String(), + } + if innerResult.Valid() { + results = append(results, innerResult) + } + } + } + news := jsonObj.Get("news.value") + for _, article := range news.Array() { + result := engine.SearchResult{ + Title: article.Get("name").String(), + Link: article.Get("url").String(), + Content: article.Get("description").String(), + } + if result.Valid() { + results = append(results, result) + } + } + return results +} diff --git a/plugins/wasm-go/extensions/ai-search/engine/elasticsearch/elasticsearch.go b/plugins/wasm-go/extensions/ai-search/engine/elasticsearch/elasticsearch.go new file mode 100644 index 0000000000..4290558c38 --- /dev/null +++ b/plugins/wasm-go/extensions/ai-search/engine/elasticsearch/elasticsearch.go @@ -0,0 +1,114 @@ +package elasticsearch + +import ( + "errors" + "fmt" + "net/http" + "strings" + + "github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper" + "github.com/tidwall/gjson" + + "github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search/engine" +) + +type ElasticsearchSearch struct { + client wrapper.HttpClient + index string + contentField string + linkField string + titleField string + start int + count int + timeoutMillisecond uint32 +} + +func NewElasticsearchSearch(config *gjson.Result) (*ElasticsearchSearch, error) { + engine := &ElasticsearchSearch{} + serviceName := config.Get("serviceName").String() + if serviceName == "" { + return nil, errors.New("serviceName not found") + } + servicePort := config.Get("servicePort").Int() + if servicePort == 0 { + return nil, errors.New("servicePort not found") + } + engine.client = wrapper.NewClusterClient(wrapper.FQDNCluster{ + FQDN: serviceName, + Port: servicePort, + }) + engine.index = config.Get("index").String() + if engine.index == "" { + return nil, errors.New("index not found") + } + engine.contentField = config.Get("contentField").String() + if engine.contentField == "" { + return nil, errors.New("contentField not found") + } + engine.linkField = config.Get("linkField").String() + if engine.linkField == "" { + return nil, errors.New("linkField not found") + } + engine.titleField = config.Get("titleField").String() + if engine.titleField == "" { + return nil, errors.New("titleField not found") + } + engine.timeoutMillisecond = uint32(config.Get("timeoutMillisecond").Uint()) + if engine.timeoutMillisecond == 0 { + engine.timeoutMillisecond = 5000 + } + engine.start = int(config.Get("start").Uint()) + engine.count = int(config.Get("count").Uint()) + if engine.count == 0 { + engine.count = 10 + } + return engine, nil +} + +func (e ElasticsearchSearch) NeedExectue(ctx engine.SearchContext) bool { + return ctx.EngineType == "private" +} + +func (e ElasticsearchSearch) Client() wrapper.HttpClient { + return e.client +} + +func (e ElasticsearchSearch) CallArgs(ctx engine.SearchContext) engine.CallArgs { + searchBody := fmt.Sprintf(`{ + "query": { + "match": { + "%s": { + "query": "%s", + "operator": "AND" + } + } + } + }`, e.contentField, strings.Join(ctx.Querys, " ")) + + return engine.CallArgs{ + Method: http.MethodPost, + Url: fmt.Sprintf("/%s/_search?from=%d&size=%d", e.index, e.start, e.count), + Headers: [][2]string{ + {"Content-Type", "application/json"}, + }, + Body: []byte(searchBody), + TimeoutMillisecond: e.timeoutMillisecond, + } +} + +func (e ElasticsearchSearch) ParseResult(ctx engine.SearchContext, response []byte) []engine.SearchResult { + jsonObj := gjson.ParseBytes(response) + var results []engine.SearchResult + for _, hit := range jsonObj.Get("hits.hits").Array() { + source := hit.Get("_source") + result := engine.SearchResult{ + Title: source.Get(e.titleField).String(), + Link: source.Get(e.linkField).String(), + Content: source.Get(e.contentField).String(), + } + if result.Valid() { + results = append(results, result) + } + } + return results +} diff --git a/plugins/wasm-go/extensions/ai-search/engine/google/google.go b/plugins/wasm-go/extensions/ai-search/engine/google/google.go new file mode 100644 index 0000000000..e189646b99 --- /dev/null +++ b/plugins/wasm-go/extensions/ai-search/engine/google/google.go @@ -0,0 +1,120 @@ +package google + +import ( + "errors" + "fmt" + "net/http" + "net/url" + "strings" + + "github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper" + "github.com/tidwall/gjson" + + "github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search/engine" +) + +type GoogleSearch struct { + optionArgs map[string]string + apiKey string + cx string + start int + count int + timeoutMillisecond uint32 + client wrapper.HttpClient +} + +func NewGoogleSearch(config *gjson.Result) (*GoogleSearch, error) { + engine := &GoogleSearch{} + engine.apiKey = config.Get("apiKey").String() + if engine.apiKey == "" { + return nil, errors.New("apiKey not found") + } + engine.cx = config.Get("cx").String() + if engine.cx == "" { + return nil, errors.New("cx not found") + } + serviceName := config.Get("serviceName").String() + if serviceName == "" { + return nil, errors.New("serviceName not found") + } + servicePort := config.Get("servicePort").Int() + if servicePort == 0 { + return nil, errors.New("servicePort not found") + } + engine.client = wrapper.NewClusterClient(wrapper.FQDNCluster{ + FQDN: serviceName, + Port: servicePort, + }) + engine.start = int(config.Get("start").Uint()) + engine.count = int(config.Get("count").Uint()) + if engine.count == 0 { + engine.count = 10 + } + if engine.count > 10 || engine.start+engine.count > 100 { + return nil, errors.New("count must be less than 10, and start + count must be less than or equal to 100.") + } + engine.timeoutMillisecond = uint32(config.Get("timeoutMillisecond").Uint()) + if engine.timeoutMillisecond == 0 { + engine.timeoutMillisecond = 5000 + } + engine.optionArgs = map[string]string{} + for key, value := range config.Get("optionArgs").Map() { + valStr := value.String() + if valStr != "" { + engine.optionArgs[key] = value.String() + } + } + return engine, nil +} + +func (g GoogleSearch) NeedExectue(ctx engine.SearchContext) bool { + return ctx.EngineType == "" || ctx.EngineType == "internet" +} + +func (g GoogleSearch) Client() wrapper.HttpClient { + return g.client +} + +func (g GoogleSearch) CallArgs(ctx engine.SearchContext) engine.CallArgs { + queryUrl := fmt.Sprintf("https://customsearch.googleapis.com/customsearch/v1?cx=%s&q=%s&num=%d&key=%s&start=%d", + g.cx, url.QueryEscape(strings.Join(ctx.Querys, " ")), g.count, g.apiKey, g.start+1) + var extraArgs []string + for key, value := range g.optionArgs { + extraArgs = append(extraArgs, fmt.Sprintf("%s=%s", key, url.QueryEscape(value))) + } + if ctx.Language != "" { + extraArgs = append(extraArgs, fmt.Sprintf("lr=lang_%s", ctx.Language)) + } + if len(extraArgs) > 0 { + queryUrl = fmt.Sprintf("%s&%s", queryUrl, strings.Join(extraArgs, "&")) + } + return engine.CallArgs{ + Method: http.MethodGet, + Url: queryUrl, + Headers: [][2]string{ + {"Accept", "application/json"}, + }, + TimeoutMillisecond: g.timeoutMillisecond, + } +} + +func (g GoogleSearch) ParseResult(ctx engine.SearchContext, response []byte) []engine.SearchResult { + jsonObj := gjson.ParseBytes(response) + var results []engine.SearchResult + for _, item := range jsonObj.Get("items").Array() { + content := item.Get("snippet").String() + metaDescription := item.Get("pagemap.metatags.0.og:description").String() + if metaDescription != "" { + content = fmt.Sprintf("%s\n...\n%s", content, metaDescription) + } + result := engine.SearchResult{ + Title: item.Get("title").String(), + Link: item.Get("link").String(), + Content: content, + } + if result.Valid() { + results = append(results, result) + } + } + return results +} diff --git a/plugins/wasm-go/extensions/ai-search/engine/quark/quark.go b/plugins/wasm-go/extensions/ai-search/engine/quark/quark.go new file mode 100644 index 0000000000..84273bb776 --- /dev/null +++ b/plugins/wasm-go/extensions/ai-search/engine/quark/quark.go @@ -0,0 +1,194 @@ +package quark + +import ( + "crypto/hmac" + "crypto/rand" + "crypto/sha256" + "encoding/hex" + "errors" + "fmt" + "net/http" + "net/url" + "sort" + "strings" + "time" + + "github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper" + "github.com/tidwall/gjson" + + "github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search/engine" +) + +type QuarkSearch struct { + apiKey string + secretKey string + timeoutMillisecond uint32 + client wrapper.HttpClient + count uint32 + endpoint string +} + +const ( + Path = "/linked-retrieval/linked-retrieval-entry/v2/linkedRetrieval/commands/genericSearch" + ContentSha256 = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" // for empty body + Action = "GenericSearch" + Version = "2024-11-11" + SignatureAlgorithm = "ACS3-HMAC-SHA256" + SignedHeaders = "host;x-acs-action;x-acs-content-sha256;x-acs-date;x-acs-signature-nonce;x-acs-version" +) + +func urlEncoding(rawStr string) string { + encodedStr := url.PathEscape(rawStr) + encodedStr = strings.ReplaceAll(encodedStr, "+", "%2B") + encodedStr = strings.ReplaceAll(encodedStr, ":", "%3A") + encodedStr = strings.ReplaceAll(encodedStr, "=", "%3D") + encodedStr = strings.ReplaceAll(encodedStr, "&", "%26") + encodedStr = strings.ReplaceAll(encodedStr, "$", "%24") + encodedStr = strings.ReplaceAll(encodedStr, "@", "%40") + // encodedStr := url.QueryEscape(rawStr) + return encodedStr +} + +func getSignature(stringToSign, secret string) string { + h := hmac.New(sha256.New, []byte(secret)) + h.Write([]byte(stringToSign)) + hash := h.Sum(nil) + return hex.EncodeToString(hash) +} + +func getCanonicalHeaders(params map[string]string) string { + paramArray := []string{} + for k, v := range params { + paramArray = append(paramArray, k+":"+v) + } + sort.Slice(paramArray, func(i, j int) bool { + return paramArray[i] <= paramArray[j] + }) + return strings.Join(paramArray, "\n") + "\n" +} + +func getHasedString(input string) string { + hash := sha256.Sum256([]byte(input)) + hashHex := hex.EncodeToString(hash[:]) + return hashHex +} + +func generateHexID(length int) (string, error) { + bytes := make([]byte, length/2) + if _, err := rand.Read(bytes); err != nil { + return "", err + } + return hex.EncodeToString(bytes), nil +} + +func NewQuarkSearch(config *gjson.Result) (*QuarkSearch, error) { + engine := &QuarkSearch{} + engine.apiKey = config.Get("apiKey").String() + if engine.apiKey == "" { + return nil, errors.New("apiKey not found") + } + engine.secretKey = config.Get("secretKey").String() + if engine.secretKey == "" { + return nil, errors.New("secretKey not found") + } + serviceName := config.Get("serviceName").String() + if serviceName == "" { + return nil, errors.New("serviceName not found") + } + servicePort := config.Get("servicePort").Int() + if servicePort == 0 { + return nil, errors.New("servicePort not found") + } + engine.endpoint = config.Get("endpoint").String() + if engine.endpoint == "" { + engine.endpoint = "iqs.cn-zhangjiakou.aliyuncs.com" + } + engine.count = uint32(config.Get("count").Int()) + if engine.count == 0 { + engine.count = 10 + } + engine.client = wrapper.NewClusterClient(wrapper.FQDNCluster{ + FQDN: serviceName, + Port: servicePort, + }) + engine.timeoutMillisecond = uint32(config.Get("timeoutMillisecond").Uint()) + if engine.timeoutMillisecond == 0 { + engine.timeoutMillisecond = 5000 + } + return engine, nil +} + +func (g QuarkSearch) NeedExectue(ctx engine.SearchContext) bool { + return ctx.EngineType == "" || ctx.EngineType == "internet" +} + +func (g QuarkSearch) Client() wrapper.HttpClient { + return g.client +} + +func (g QuarkSearch) CallArgs(ctx engine.SearchContext) engine.CallArgs { + query := strings.Join(ctx.Querys, " ") + canonicalURI := Path + queryParams := map[string]string{ + "query": query, + "timeRange": "NoLimit", + } + queryParamsStr := []string{} + for k, v := range queryParams { + queryParamsStr = append(queryParamsStr, k+"="+urlEncoding(v)) + } + canonicalQueryString := strings.Join(queryParamsStr, "&") + timeStamp := time.Now().UTC().Format("2006-01-02T15:04:05Z") + randomID, _ := generateHexID(32) + params := map[string]string{ + "host": g.endpoint, + "x-acs-action": Action, + "x-acs-content-sha256": ContentSha256, + "x-acs-date": timeStamp, + "x-acs-signature-nonce": randomID, + "x-acs-version": Version, + } + canonicalHeaders := getCanonicalHeaders(params) + canonicalRequest := http.MethodGet + "\n" + canonicalURI + "\n" + canonicalQueryString + "\n" + canonicalHeaders + "\n" + SignedHeaders + "\n" + ContentSha256 + stringToSign := SignatureAlgorithm + "\n" + getHasedString(canonicalRequest) + + authHeaderFmt := "%s Credential=%s,SignedHeaders=%s,Signature=%s" + authHeader := fmt.Sprintf(authHeaderFmt, SignatureAlgorithm, g.apiKey, SignedHeaders, getSignature(stringToSign, g.secretKey)) + + reqParams := url.Values{} + for k, v := range queryParams { + reqParams.Add(k, v) + } + requestURL := fmt.Sprintf("https://%s%s?%s", g.endpoint, Path, reqParams.Encode()) + + return engine.CallArgs{ + Method: http.MethodGet, + Url: requestURL, + Headers: [][2]string{ + {"x-acs-date", timeStamp}, + {"x-acs-signature-nonce", randomID}, + {"x-acs-content-sha256", ContentSha256}, + {"x-acs-version", Version}, + {"x-acs-action", Action}, + {"Authorization", authHeader}, + }, + Body: nil, + TimeoutMillisecond: g.timeoutMillisecond, + } +} + +func (g QuarkSearch) ParseResult(ctx engine.SearchContext, response []byte) []engine.SearchResult { + jsonObj := gjson.ParseBytes(response) + var results []engine.SearchResult + for index, item := range jsonObj.Get("pageItems").Array() { + result := engine.SearchResult{ + Title: item.Get("title").String(), + Link: item.Get("link").String(), + Content: item.Get("mainText").String(), + } + if result.Valid() && index < int(g.count) { + results = append(results, result) + } + } + return results +} diff --git a/plugins/wasm-go/extensions/ai-search/engine/types.go b/plugins/wasm-go/extensions/ai-search/engine/types.go new file mode 100644 index 0000000000..a0d6780bae --- /dev/null +++ b/plugins/wasm-go/extensions/ai-search/engine/types.go @@ -0,0 +1,37 @@ +package engine + +import ( + "github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper" +) + +type SearchResult struct { + Title string + Link string + Content string +} + +func (result SearchResult) Valid() bool { + return result.Title != "" && result.Link != "" && result.Content != "" +} + +type SearchContext struct { + EngineType string + Querys []string + Language string + ArxivCategory string +} + +type CallArgs struct { + Method string + Url string + Headers [][2]string + Body []byte + TimeoutMillisecond uint32 +} + +type SearchEngine interface { + NeedExectue(ctx SearchContext) bool + Client() wrapper.HttpClient + CallArgs(ctx SearchContext) CallArgs + ParseResult(ctx SearchContext, response []byte) []SearchResult +} diff --git a/plugins/wasm-go/extensions/ai-search/go.mod b/plugins/wasm-go/extensions/ai-search/go.mod new file mode 100644 index 0000000000..17bd972c49 --- /dev/null +++ b/plugins/wasm-go/extensions/ai-search/go.mod @@ -0,0 +1,26 @@ +module github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search + +go 1.18 + +replace github.com/alibaba/higress/plugins/wasm-go => ../.. + +require ( + github.com/alibaba/higress/plugins/wasm-go v0.0.0 + github.com/antchfx/xmlquery v1.4.4 + github.com/higress-group/proxy-wasm-go-sdk v1.0.0 + github.com/tidwall/gjson v1.18.0 + github.com/tidwall/sjson v1.2.5 +) + +require ( + github.com/antchfx/xpath v1.3.3 // indirect + github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect + github.com/google/uuid v1.3.0 // indirect + github.com/higress-group/nottinygc v0.0.0-20231101025119-e93c4c2f8520 // indirect + github.com/magefile/mage v1.14.0 // indirect + github.com/tidwall/match v1.1.1 // indirect + github.com/tidwall/pretty v1.2.0 // indirect + github.com/tidwall/resp v0.1.1 // indirect + golang.org/x/net v0.33.0 // indirect + golang.org/x/text v0.21.0 // indirect +) diff --git a/plugins/wasm-go/extensions/ai-search/go.sum b/plugins/wasm-go/extensions/ai-search/go.sum new file mode 100644 index 0000000000..81d555f4bd --- /dev/null +++ b/plugins/wasm-go/extensions/ai-search/go.sum @@ -0,0 +1,96 @@ +github.com/antchfx/xmlquery v1.4.4 h1:mxMEkdYP3pjKSftxss4nUHfjBhnMk4imGoR96FRY2dg= +github.com/antchfx/xmlquery v1.4.4/go.mod h1:AEPEEPYE9GnA2mj5Ur2L5Q5/2PycJ0N9Fusrx9b12fc= +github.com/antchfx/xpath v1.3.3 h1:tmuPQa1Uye0Ym1Zn65vxPgfltWb/Lxu2jeqIGteJSRs= +github.com/antchfx/xpath v1.3.3/go.mod h1:i54GszH55fYfBmoZXapTHN8T8tkcHfRgLyVwwqzXNcs= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE= +github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I= +github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/higress-group/nottinygc v0.0.0-20231101025119-e93c4c2f8520 h1:IHDghbGQ2DTIXHBHxWfqCYQW1fKjyJ/I7W1pMyUDeEA= +github.com/higress-group/nottinygc v0.0.0-20231101025119-e93c4c2f8520/go.mod h1:Nz8ORLaFiLWotg6GeKlJMhv8cci8mM43uEnLA5t8iew= +github.com/higress-group/proxy-wasm-go-sdk v1.0.0 h1:BZRNf4R7jr9hwRivg/E29nkVaKEak5MWjBDhWjuHijU= +github.com/higress-group/proxy-wasm-go-sdk v1.0.0/go.mod h1:iiSyFbo+rAtbtGt/bsefv8GU57h9CCLYGJA74/tF5/0= +github.com/magefile/mage v1.14.0 h1:6QDX3g6z1YvJ4olPhT1wksUcSa/V0a1B+pJb73fBjyo= +github.com/magefile/mage v1.14.0/go.mod h1:z5UZb/iS3GoOSn0JgWuiw7dxlurVYTu+/jHXqQg881A= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= +github.com/tidwall/gjson v1.14.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= +github.com/tidwall/gjson v1.18.0 h1:FIDeeyB800efLX89e5a8Y0BNH+LOngJyGrIWxG2FKQY= +github.com/tidwall/gjson v1.18.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= +github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA= +github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM= +github.com/tidwall/pretty v1.2.0 h1:RWIZEg2iJ8/g6fDDYzMpobmaoGh5OLl4AXtGUGPcqCs= +github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= +github.com/tidwall/resp v0.1.1 h1:Ly20wkhqKTmDUPlyM1S7pWo5kk0tDu8OoC/vFArXmwE= +github.com/tidwall/resp v0.1.1/go.mod h1:3/FrruOBAxPTPtundW0VXgmsQ4ZBA0Aw714lVYgwFa0= +github.com/tidwall/sjson v1.2.5 h1:kLy8mja+1c9jlljvWTlSazM7cKDRfJuR/bOJhcY5NcY= +github.com/tidwall/sjson v1.2.5/go.mod h1:Fvgq9kS/6ociJEDnK0Fk1cpYF4FIW6ZF7LAe+6jwd28= +github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= +golang.org/x/crypto v0.13.0/go.mod h1:y6Z2r+Rw4iayiXXAIxJIDAJ1zMW4yaTpebo8fPOliYc= +golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU= +golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8= +golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk= +golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= +golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= +golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= +golang.org/x/mod v0.15.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= +golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= +golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg= +golang.org/x/net v0.15.0/go.mod h1:idbUs1IY1+zTqbi8yxTbhexhEEk5ur9LInksu6HrEpk= +golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44= +golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM= +golang.org/x/net v0.33.0 h1:74SYHlV8BIgHIFC/LrYkOGIwL19eTYXQ5wc6TBuO36I= +golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y= +golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/telemetry v0.0.0-20240228155512-f48c80bd79b2/go.mod h1:TeRTkGYfJXctD9OcfyVLyj2J3IxLnKwHJR8f4D8a3YE= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= +golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo= +golang.org/x/term v0.12.0/go.mod h1:owVbMEjm3cBLCHdkQu9b1opXd4ETQWc3BhuQGKgXgvU= +golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk= +golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY= +golang.org/x/term v0.27.0/go.mod h1:iMsnZpn0cago0GOrHO2+Y7u7JPn5AylBrcoWkElMTSM= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= +golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= +golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= +golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo= +golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= +golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= +golang.org/x/tools v0.13.0/go.mod h1:HvlwmtVNQAhOuCjW7xxvovg8wbNq7LwfXh/k7wXUl58= +golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= diff --git a/plugins/wasm-go/extensions/ai-search/main.go b/plugins/wasm-go/extensions/ai-search/main.go new file mode 100644 index 0000000000..720e688ccc --- /dev/null +++ b/plugins/wasm-go/extensions/ai-search/main.go @@ -0,0 +1,568 @@ +// Copyright (c) 2022 Alibaba Group Holding Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + _ "embed" + "errors" + "fmt" + "net/http" + "strings" + "time" + + "github.com/higress-group/proxy-wasm-go-sdk/proxywasm" + "github.com/higress-group/proxy-wasm-go-sdk/proxywasm/types" + "github.com/tidwall/gjson" + "github.com/tidwall/sjson" + + "github.com/alibaba/higress/plugins/wasm-go/pkg/wrapper" + + "github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search/engine" + "github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search/engine/arxiv" + "github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search/engine/bing" + "github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search/engine/elasticsearch" + "github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search/engine/google" + "github.com/alibaba/higress/plugins/wasm-go/extensions/ai-search/engine/quark" +) + +type SearchRewrite struct { + client wrapper.HttpClient + url string + apiKey string + modelName string + timeoutMillisecond uint32 + prompt string +} + +type Config struct { + engine []engine.SearchEngine + promptTemplate string + referenceFormat string + defaultLanguage string + needReference bool + searchRewrite *SearchRewrite +} + +const ( + DEFAULT_MAX_BODY_BYTES uint32 = 100 * 1024 * 1024 +) + +//go:embed prompts/full.md +var fullSearchPrompts string + +//go:embed prompts/arxiv.md +var arxivSearchPrompts string + +//go:embed prompts/internet.md +var internetSearchPrompts string + +//go:embed prompts/private.md +var privateSearchPrompts string + +func main() { + wrapper.SetCtx( + "ai-search", + wrapper.ParseConfigBy(parseConfig), + wrapper.ProcessRequestHeadersBy(onHttpRequestHeaders), + wrapper.ProcessRequestBodyBy(onHttpRequestBody), + wrapper.ProcessResponseHeadersBy(onHttpResponseHeaders), + wrapper.ProcessStreamingResponseBodyBy(onStreamingResponseBody), + wrapper.ProcessResponseBodyBy(onHttpResponseBody), + ) +} + +func parseConfig(json gjson.Result, config *Config, log wrapper.Log) error { + config.needReference = json.Get("needReference").Bool() + if config.needReference { + config.referenceFormat = json.Get("referenceFormat").String() + if config.referenceFormat == "" { + config.referenceFormat = "**References:**\n%s" + } else if !strings.Contains(config.referenceFormat, "%s") { + return fmt.Errorf("invalid referenceFormat:%s", config.referenceFormat) + } + } + config.defaultLanguage = json.Get("defaultLang").String() + config.promptTemplate = json.Get("promptTemplate").String() + if config.promptTemplate == "" { + if config.needReference { + config.promptTemplate = `# 以下内容是基于用户发送的消息的搜索结果: +{search_results} +在我给你的搜索结果中,每个结果都是[webpage X begin]...[webpage X end]格式的,X代表每篇文章的数字索引。请在适当的情况下在句子末尾引用上下文。请按照引用编号[X]的格式在答案中对应部分引用上下文。如果一句话源自多个上下文,请列出所有相关的引用编号,例如[3][5],切记不要将引用集中在最后返回引用编号,而是在答案对应部分列出。 +在回答时,请注意以下几点: +- 今天是北京时间:{cur_date}。 +- 并非搜索结果的所有内容都与用户的问题密切相关,你需要结合问题,对搜索结果进行甄别、筛选。 +- 对于列举类的问题(如列举所有航班信息),尽量将答案控制在10个要点以内,并告诉用户可以查看搜索来源、获得完整信息。优先提供信息完整、最相关的列举项;如非必要,不要主动告诉用户搜索结果未提供的内容。 +- 对于创作类的问题(如写论文),请务必在正文的段落中引用对应的参考编号,例如[3][5],不能只在文章末尾引用。你需要解读并概括用户的题目要求,选择合适的格式,充分利用搜索结果并抽取重要信息,生成符合用户要求、极具思想深度、富有创造力与专业性的答案。你的创作篇幅需要尽可能延长,对于每一个要点的论述要推测用户的意图,给出尽可能多角度的回答要点,且务必信息量大、论述详尽。 +- 如果回答很长,请尽量结构化、分段落总结。如果需要分点作答,尽量控制在5个点以内,并合并相关的内容。 +- 对于客观类的问答,如果问题的答案非常简短,可以适当补充一到两句相关信息,以丰富内容。 +- 你需要根据用户要求和回答内容选择合适、美观的回答格式,确保可读性强。 +- 你的回答应该综合多个相关网页来回答,不能重复引用一个网页。 +- 除非用户要求,否则你回答的语言需要和用户提问的语言保持一致。 + +# 用户消息为: +{question}` + } else { + config.promptTemplate = `# 以下内容是基于用户发送的消息的搜索结果: +{search_results} +在我给你的搜索结果中,每个结果都是[webpage begin]...[webpage end]格式的。 +在回答时,请注意以下几点: +- 今天是北京时间:{cur_date}。 +- 并非搜索结果的所有内容都与用户的问题密切相关,你需要结合问题,对搜索结果进行甄别、筛选。 +- 对于列举类的问题(如列举所有航班信息),尽量将答案控制在10个要点以内。如非必要,不要主动告诉用户搜索结果未提供的内容。 +- 对于创作类的问题(如写论文),你需要解读并概括用户的题目要求,选择合适的格式,充分利用搜索结果并抽取重要信息,生成符合用户要求、极具思想深度、富有创造力与专业性的答案。你的创作篇幅需要尽可能延长,对于每一个要点的论述要推测用户的意图,给出尽可能多角度的回答要点,且务必信息量大、论述详尽。 +- 如果回答很长,请尽量结构化、分段落总结。如果需要分点作答,尽量控制在5个点以内,并合并相关的内容。 +- 对于客观类的问答,如果问题的答案非常简短,可以适当补充一到两句相关信息,以丰富内容。 +- 你需要根据用户要求和回答内容选择合适、美观的回答格式,确保可读性强。 +- 你的回答应该综合多个相关网页来回答,但回答中不要给出网页的引用来源。 +- 除非用户要求,否则你回答的语言需要和用户提问的语言保持一致。 + +# 用户消息为: +{question}` + } + } + if !strings.Contains(config.promptTemplate, "{search_results}") || + !strings.Contains(config.promptTemplate, "{question}") { + return fmt.Errorf("invalid promptTemplate, must contains {search_results} and {question}:%s", config.promptTemplate) + } + var internetExists, privateExists, arxivExists bool + for _, e := range json.Get("searchFrom").Array() { + switch e.Get("type").String() { + case "bing": + searchEngine, err := bing.NewBingSearch(&e) + if err != nil { + return fmt.Errorf("bing search engine init failed:%s", err) + } + config.engine = append(config.engine, searchEngine) + internetExists = true + case "google": + searchEngine, err := google.NewGoogleSearch(&e) + if err != nil { + return fmt.Errorf("google search engine init failed:%s", err) + } + config.engine = append(config.engine, searchEngine) + internetExists = true + case "arxiv": + searchEngine, err := arxiv.NewArxivSearch(&e) + if err != nil { + return fmt.Errorf("arxiv search engine init failed:%s", err) + } + config.engine = append(config.engine, searchEngine) + arxivExists = true + case "elasticsearch": + searchEngine, err := elasticsearch.NewElasticsearchSearch(&e) + if err != nil { + return fmt.Errorf("elasticsearch search engine init failed:%s", err) + } + config.engine = append(config.engine, searchEngine) + privateExists = true + case "quark": + searchEngine, err := quark.NewQuarkSearch(&e) + if err != nil { + return fmt.Errorf("elasticsearch search engine init failed:%s", err) + } + config.engine = append(config.engine, searchEngine) + internetExists = true + default: + return fmt.Errorf("unkown search engine:%s", e.Get("type").String()) + } + } + searchRewriteJson := json.Get("searchRewrite") + if searchRewriteJson.Exists() { + searchRewrite := &SearchRewrite{} + llmServiceName := searchRewriteJson.Get("llmServiceName").String() + if llmServiceName == "" { + return errors.New("llm_service_name not found") + } + llmServicePort := searchRewriteJson.Get("llmServicePort").Int() + if llmServicePort == 0 { + return errors.New("llmServicePort not found") + } + searchRewrite.client = wrapper.NewClusterClient(wrapper.FQDNCluster{ + FQDN: llmServiceName, + Port: llmServicePort, + }) + llmApiKey := searchRewriteJson.Get("llmApiKey").String() + if llmApiKey == "" { + return errors.New("llmApiKey not found") + } + searchRewrite.apiKey = llmApiKey + llmUrl := searchRewriteJson.Get("llmUrl").String() + if llmUrl == "" { + return errors.New("llmUrl not found") + } + searchRewrite.url = llmUrl + llmModelName := searchRewriteJson.Get("llmModelName").String() + if llmModelName == "" { + return errors.New("llmModelName not found") + } + searchRewrite.modelName = llmModelName + llmTimeout := searchRewriteJson.Get("timeoutMillisecond").Uint() + if llmTimeout == 0 { + llmTimeout = 30000 + } + searchRewrite.timeoutMillisecond = uint32(llmTimeout) + // The consideration here is that internet searches are generally available, but arxiv and private sources may not be. + if arxivExists { + if privateExists { + // private + internet + arxiv + searchRewrite.prompt = fullSearchPrompts + } else { + // internet + arxiv + searchRewrite.prompt = arxivSearchPrompts + } + } else if privateExists { + // private + internet + searchRewrite.prompt = privateSearchPrompts + } else if internetExists { + // only internet + searchRewrite.prompt = internetSearchPrompts + } + config.searchRewrite = searchRewrite + } + if len(config.engine) == 0 { + return fmt.Errorf("no avaliable search engine found") + } + log.Debugf("ai search enabled, config: %#v", config) + return nil +} + +func onHttpRequestHeaders(ctx wrapper.HttpContext, config Config, log wrapper.Log) types.Action { + contentType, _ := proxywasm.GetHttpRequestHeader("content-type") + // The request does not have a body. + if contentType == "" { + return types.ActionContinue + } + if !strings.Contains(contentType, "application/json") { + log.Warnf("content is not json, can't process: %s", contentType) + ctx.DontReadRequestBody() + return types.ActionContinue + } + ctx.SetRequestBodyBufferLimit(DEFAULT_MAX_BODY_BYTES) + _ = proxywasm.RemoveHttpRequestHeader("Accept-Encoding") + return types.ActionContinue +} + +func onHttpRequestBody(ctx wrapper.HttpContext, config Config, body []byte, log wrapper.Log) types.Action { + var queryIndex int + var query string + messages := gjson.GetBytes(body, "messages").Array() + for i := len(messages) - 1; i >= 0; i-- { + if messages[i].Get("role").String() == "user" { + queryIndex = i + query = messages[i].Get("content").String() + break + } + } + if query == "" { + log.Errorf("not found user query in body:%s", body) + return types.ActionContinue + } + searchRewrite := config.searchRewrite + if searchRewrite != nil { + startTime := time.Now() + rewritePrompt := strings.Replace(searchRewrite.prompt, "{question}", query, 1) + rewriteBody, _ := sjson.SetBytes([]byte(fmt.Sprintf( + `{"stream":false,"max_tokens":100,"model":"%s","messages":[{"role":"user","content":""}]}`, + searchRewrite.modelName)), "messages.0.content", rewritePrompt) + err := searchRewrite.client.Post(searchRewrite.url, + [][2]string{ + {"Content-Type", "application/json"}, + {"Authorization", fmt.Sprintf("Bearer %s", searchRewrite.apiKey)}, + }, rewriteBody, + func(statusCode int, responseHeaders http.Header, responseBody []byte) { + if statusCode != http.StatusOK { + log.Errorf("search rewrite failed, status: %d", statusCode) + // After a rewrite failure, no further search is performed, thus quickly identifying the failure. + proxywasm.ResumeHttpRequest() + return + } + + content := gjson.GetBytes(responseBody, "choices.0.message.content").String() + log.Infof("LLM rewritten query response: %s (took %v), original search query:%s", + strings.ReplaceAll(content, "\n", `\n`), time.Since(startTime), query) + if strings.Contains(content, "none") { + log.Debugf("no search required") + proxywasm.ResumeHttpRequest() + return + } + + // Parse search queries from LLM response + var searchContexts []engine.SearchContext + for _, line := range strings.Split(content, "\n") { + line = strings.TrimSpace(line) + if line == "" { + continue + } + + parts := strings.SplitN(line, ":", 2) + if len(parts) != 2 { + continue + } + + engineType := strings.TrimSpace(parts[0]) + queryStr := strings.TrimSpace(parts[1]) + + var ctx engine.SearchContext + ctx.Language = config.defaultLanguage + + switch { + case engineType == "internet": + ctx.EngineType = engineType + ctx.Querys = []string{queryStr} + case engineType == "private": + ctx.EngineType = engineType + ctx.Querys = strings.Split(queryStr, ",") + for i := range ctx.Querys { + ctx.Querys[i] = strings.TrimSpace(ctx.Querys[i]) + } + default: + // Arxiv category + ctx.EngineType = "arxiv" + ctx.ArxivCategory = engineType + ctx.Querys = strings.Split(queryStr, ",") + for i := range ctx.Querys { + ctx.Querys[i] = strings.TrimSpace(ctx.Querys[i]) + } + } + + if len(ctx.Querys) > 0 { + searchContexts = append(searchContexts, ctx) + if ctx.ArxivCategory != "" { + // Conduct i/nquiries in all areas to increase recall. + backupCtx := ctx + backupCtx.ArxivCategory = "" + searchContexts = append(searchContexts, backupCtx) + } + } + } + + if len(searchContexts) == 0 { + log.Errorf("no valid search contexts found") + proxywasm.ResumeHttpRequest() + return + } + if types.ActionContinue == executeSearch(ctx, config, queryIndex, body, searchContexts, log) { + proxywasm.ResumeHttpRequest() + } + }, searchRewrite.timeoutMillisecond) + if err != nil { + log.Errorf("search rewrite call llm service failed:%s", err) + // After a rewrite failure, no further search is performed, thus quickly identifying the failure. + return types.ActionContinue + } + return types.ActionPause + } + + // Execute search without rewrite + return executeSearch(ctx, config, queryIndex, body, []engine.SearchContext{{ + Querys: []string{query}, + Language: config.defaultLanguage, + }}, log) +} + +func executeSearch(ctx wrapper.HttpContext, config Config, queryIndex int, body []byte, searchContexts []engine.SearchContext, log wrapper.Log) types.Action { + searchResultGroups := make([][]engine.SearchResult, len(config.engine)) + var finished int + var searching int + for i := 0; i < len(config.engine); i++ { + configEngine := config.engine[i] + + // Check if engine needs to execute for any of the search contexts + var needsExecute bool + for _, searchCtx := range searchContexts { + if configEngine.NeedExectue(searchCtx) { + needsExecute = true + break + } + } + if !needsExecute { + continue + } + + // Process all search contexts for this engine + for _, searchCtx := range searchContexts { + if !configEngine.NeedExectue(searchCtx) { + continue + } + args := configEngine.CallArgs(searchCtx) + index := i + err := configEngine.Client().Call(args.Method, args.Url, args.Headers, args.Body, + func(statusCode int, responseHeaders http.Header, responseBody []byte) { + defer func() { + finished++ + if finished == searching { + // Merge search results from all engines with deduplication + var mergedResults []engine.SearchResult + seenLinks := make(map[string]bool) + for _, results := range searchResultGroups { + for _, result := range results { + if !seenLinks[result.Link] { + seenLinks[result.Link] = true + mergedResults = append(mergedResults, result) + } + } + } + // Format search results for prompt template + var formattedResults []string + var formattedReferences []string + for j, result := range mergedResults { + if config.needReference { + formattedResults = append(formattedResults, + fmt.Sprintf("[webpage %d begin]\n%s\n[webpage %d end]", j+1, result.Content, j+1)) + formattedReferences = append(formattedReferences, + fmt.Sprintf("[%d] [%s](%s)", j+1, result.Title, result.Link)) + } else { + formattedResults = append(formattedResults, + fmt.Sprintf("[webpage begin]\n%s\n[webpage end]", result.Content)) + } + } + // Prepare template variables + curDate := time.Now().In(time.FixedZone("CST", 8*3600)).Format("2006年1月2日") + searchResults := strings.Join(formattedResults, "\n") + log.Debugf("searchResults: %s", searchResults) + // Fill prompt template + prompt := strings.Replace(config.promptTemplate, "{search_results}", searchResults, 1) + prompt = strings.Replace(prompt, "{question}", searchContexts[0].Querys[0], 1) + prompt = strings.Replace(prompt, "{cur_date}", curDate, 1) + // Update request body with processed prompt + modifiedBody, err := sjson.SetBytes(body, fmt.Sprintf("messages.%d.content", queryIndex), prompt) + if err != nil { + log.Errorf("modify request message content failed, err:%v, body:%s", err, body) + } else { + log.Debugf("modifeid body:%s", modifiedBody) + proxywasm.ReplaceHttpRequestBody(modifiedBody) + if config.needReference { + ctx.SetContext("References", strings.Join(formattedReferences, "\n")) + } + } + proxywasm.ResumeHttpRequest() + } + }() + if statusCode != http.StatusOK { + log.Errorf("search call failed, status: %d, engine: %#v", statusCode, configEngine) + return + } + // Append results to existing slice for this engine + searchResultGroups[index] = append(searchResultGroups[index], configEngine.ParseResult(searchCtx, responseBody)...) + }, args.TimeoutMillisecond) + if err != nil { + log.Errorf("search call failed, engine: %#v", configEngine) + continue + } + searching++ + } + } + if searching > 0 { + return types.ActionPause + } + return types.ActionContinue +} + +func onHttpResponseHeaders(ctx wrapper.HttpContext, config Config, log wrapper.Log) types.Action { + if !config.needReference { + ctx.DontReadResponseBody() + return types.ActionContinue + } + proxywasm.RemoveHttpResponseHeader("content-length") + contentType, err := proxywasm.GetHttpResponseHeader("Content-Type") + if err != nil || !strings.HasPrefix(contentType, "text/event-stream") { + if err != nil { + log.Errorf("unable to load content-type header from response: %v", err) + } + ctx.BufferResponseBody() + ctx.SetResponseBodyBufferLimit(DEFAULT_MAX_BODY_BYTES) + } + return types.ActionContinue +} + +func onHttpResponseBody(ctx wrapper.HttpContext, config Config, body []byte, log wrapper.Log) types.Action { + references := ctx.GetStringContext("References", "") + if references == "" { + return types.ActionContinue + } + content := gjson.GetBytes(body, "choices.0.message.content") + modifiedContent := fmt.Sprintf("%s\n\n%s", fmt.Sprintf(config.referenceFormat, references), content) + body, err := sjson.SetBytes(body, "choices.0.message.content", modifiedContent) + if err != nil { + log.Errorf("modify response message content failed, err:%v, body:%s", err, body) + return types.ActionContinue + } + proxywasm.ReplaceHttpResponseBody(body) + return types.ActionContinue +} + +func onStreamingResponseBody(ctx wrapper.HttpContext, config Config, chunk []byte, isLastChunk bool, log wrapper.Log) []byte { + if ctx.GetBoolContext("ReferenceAppended", false) { + return chunk + } + references := ctx.GetStringContext("References", "") + if references == "" { + return chunk + } + modifiedChunk, responseReady := setReferencesToFirstMessage(ctx, chunk, fmt.Sprintf(config.referenceFormat, references), log) + if responseReady { + ctx.SetContext("ReferenceAppended", true) + return modifiedChunk + } else { + return []byte("") + } +} + +const PARTIAL_MESSAGE_CONTEXT_KEY = "partialMessage" + +func setReferencesToFirstMessage(ctx wrapper.HttpContext, chunk []byte, references string, log wrapper.Log) ([]byte, bool) { + if len(chunk) == 0 { + log.Debugf("chunk is empty") + return nil, false + } + + var partialMessage []byte + partialMessageI := ctx.GetContext(PARTIAL_MESSAGE_CONTEXT_KEY) + if partialMessageI != nil { + if pMsg, ok := partialMessageI.([]byte); ok { + partialMessage = append(pMsg, chunk...) + } else { + log.Warnf("invalid partial message type: %T", partialMessageI) + partialMessage = chunk + } + } else { + partialMessage = chunk + } + + if len(partialMessage) == 0 { + log.Debugf("partial message is empty") + return nil, false + } + messages := strings.Split(string(partialMessage), "\n\n") + if len(messages) > 1 { + firstMessage := messages[0] + log.Debugf("first message: %s", firstMessage) + firstMessage = strings.TrimPrefix(firstMessage, "data:") + firstMessage = strings.TrimPrefix(firstMessage, " ") + firstMessage = strings.TrimSuffix(firstMessage, "\n") + deltaContent := gjson.Get(firstMessage, "choices.0.delta.content") + modifiedMessage, err := sjson.Set(firstMessage, "choices.0.delta.content", fmt.Sprintf("%s\n\n%s", references, deltaContent)) + if err != nil { + log.Errorf("modify response delta content failed, err:%v", err) + return partialMessage, true + } + modifiedMessage = fmt.Sprintf("data: %s", modifiedMessage) + log.Debugf("modified message: %s", firstMessage) + messages[0] = string(modifiedMessage) + return []byte(strings.Join(messages, "\n\n")), true + } + ctx.SetContext(PARTIAL_MESSAGE_CONTEXT_KEY, partialMessage) + return nil, false +} diff --git a/plugins/wasm-go/extensions/ai-search/prompts/arxiv.md b/plugins/wasm-go/extensions/ai-search/prompts/arxiv.md new file mode 100644 index 0000000000..34aeefa413 --- /dev/null +++ b/plugins/wasm-go/extensions/ai-search/prompts/arxiv.md @@ -0,0 +1,214 @@ +# 目标 +你需要分析**用户发送的消息**,是否需要查询搜索引擎(Google/Bing)/论文资料库(Arxiv),并按照如下情况回复相应内容: + +## 情况一:不需要查询搜索引擎/论文资料/私有知识库 +### 情况举例: +1. **用户发送的消息**不是在提问或寻求帮助 +2. **用户发送的消息**是要求翻译文字 + +### 思考过程 +根据上面的**情况举例**,如果符合,则按照下面**回复内容示例**进行回复,注意不要输出思考过程 + +### 回复内容示例: +none + +## 情况二:需要查询搜索引擎/论文资料 +### 情况举例: +1. 答复**用户发送的消息**,需依赖互联网上最新的资料 +2. 答复**用户发送的消息**,需依赖论文等专业资料 +3. 通过查询资料,可以更好地答复**用户发送的消息** + +### 思考过程 +根据上面的**情况举例**,以及其他需要查询资料的情况,如果符合,按照以下步骤思考,并按照下面**回复内容示例**进行回复,注意不要输出思考过程: +1. What: 分析要答复**用户发送的消息**,需要了解什么知识和资料 +2. Where: 判断了解这个知识和资料要向Google等搜索引擎提问,还是向Arxiv论文资料库进行查询,或者需要同时查询多个地方 +3. How: 分析对于要查询的知识和资料,应该提出什么样的问题 +4. Adjust: 明确要向什么地方查询什么问题后,按下面方式对问题进行调整 + 4.1. 向搜索引擎提问:用一句话概括问题,并且针对搜索引擎做问题优化 + 4.2. 向Arxiv论文资料库提问: + 4.2.1. 明确问题所属领域,然后确定Arxiv的Category值,Category可选的枚举如下: + - cs.AI: Artificial Intelligence + - cs.AR: Hardware Architecture + - cs.CC: Computational Complexity + - cs.CE: Computational Engineering, Finance, and Science + - cs.CG: Computational Geometry + - cs.CL: Computation and Language + - cs.CR: Cryptography and Security + - cs.CV: Computer Vision and Pattern Recognition + - cs.CY: Computers and Society + - cs.DB: Databases + - cs.DC: Distributed, Parallel, and Cluster Computing + - cs.DL: Digital Libraries + - cs.DM: Discrete Mathematics + - cs.DS: Data Structures and Algorithms + - cs.ET: Emerging Technologies + - cs.FL: Formal Languages and Automata Theory + - cs.GL: General Literature + - cs.GR: Graphics + - cs.GT: Computer Science and Game Theory + - cs.HC: Human-Computer Interaction + - cs.IR: Information Retrieval + - cs.IT: Information Theory + - cs.LG: Machine Learning + - cs.LO: Logic in Computer Science + - cs.MA: Multiagent Systems + - cs.MM: Multimedia + - cs.MS: Mathematical Software + - cs.NA: Numerical Analysis + - cs.NE: Neural and Evolutionary Computing + - cs.NI: Networking and Internet Architecture + - cs.OH: Other Computer Science + - cs.OS: Operating Systems + - cs.PF: Performance + - cs.PL: Programming Languages + - cs.RO: Robotics + - cs.SC: Symbolic Computation + - cs.SD: Sound + - cs.SE: Software Engineering + - cs.SI: Social and Information Networks + - cs.SY: Systems and Control + - econ.EM: Econometrics + - econ.GN: General Economics + - econ.TH: Theoretical Economics + - eess.AS: Audio and Speech Processing + - eess.IV: Image and Video Processing + - eess.SP: Signal Processing + - eess.SY: Systems and Control + - math.AC: Commutative Algebra + - math.AG: Algebraic Geometry + - math.AP: Analysis of PDEs + - math.AT: Algebraic Topology + - math.CA: Classical Analysis and ODEs + - math.CO: Combinatorics + - math.CT: Category Theory + - math.CV: Complex Variables + - math.DG: Differential Geometry + - math.DS: Dynamical Systems + - math.FA: Functional Analysis + - math.GM: General Mathematics + - math.GN: General Topology + - math.GR: Group Theory + - math.GT: Geometric Topology + - math.HO: History and Overview + - math.IT: Information Theory + - math.KT: K-Theory and Homology + - math.LO: Logic + - math.MG: Metric Geometry + - math.MP: Mathematical Physics + - math.NA: Numerical Analysis + - math.NT: Number Theory + - math.OA: Operator Algebras + - math.OC: Optimization and Control + - math.PR: Probability + - math.QA: Quantum Algebra + - math.RA: Rings and Algebras + - math.RT: Representation Theory + - math.SG: Symplectic Geometry + - math.SP: Spectral Theory + - math.ST: Statistics Theory + - astro-ph.CO: Cosmology and Nongalactic Astrophysics + - astro-ph.EP: Earth and Planetary Astrophysics + - astro-ph.GA: Astrophysics of Galaxies + - astro-ph.HE: High Energy Astrophysical Phenomena + - astro-ph.IM: Instrumentation and Methods for Astrophysics + - astro-ph.SR: Solar and Stellar Astrophysics + - cond-mat.dis-nn: Disordered Systems and Neural Networks + - cond-mat.mes-hall: Mesoscale and Nanoscale Physics + - cond-mat.mtrl-sci: Materials Science + - cond-mat.other: Other Condensed Matter + - cond-mat.quant-gas: Quantum Gases + - cond-mat.soft: Soft Condensed Matter + - cond-mat.stat-mech: Statistical Mechanics + - cond-mat.str-el: Strongly Correlated Electrons + - cond-mat.supr-con: Superconductivity + - gr-qc: General Relativity and Quantum Cosmology + - hep-ex: High Energy Physics - Experiment + - hep-lat: High Energy Physics - Lattice + - hep-ph: High Energy Physics - Phenomenology + - hep-th: High Energy Physics - Theory + - math-ph: Mathematical Physics + - nlin.AO: Adaptation and Self-Organizing Systems + - nlin.CD: Chaotic Dynamics + - nlin.CG: Cellular Automata and Lattice Gases + - nlin.PS: Pattern Formation and Solitons + - nlin.SI: Exactly Solvable and Integrable Systems + - nucl-ex: Nuclear Experiment + - nucl-th: Nuclear Theory + - physics.acc-ph: Accelerator Physics + - physics.ao-ph: Atmospheric and Oceanic Physics + - physics.app-ph: Applied Physics + - physics.atm-clus: Atomic and Molecular Clusters + - physics.atom-ph: Atomic Physics + - physics.bio-ph: Biological Physics + - physics.chem-ph: Chemical Physics + - physics.class-ph: Classical Physics + - physics.comp-ph: Computational Physics + - physics.data-an: Data Analysis, Statistics and Probability + - physics.ed-ph: Physics Education + - physics.flu-dyn: Fluid Dynamics + - physics.gen-ph: General Physics + - physics.geo-ph: Geophysics + - physics.hist-ph: History and Philosophy of Physics + - physics.ins-det: Instrumentation and Detectors + - physics.med-ph: Medical Physics + - physics.optics: Optics + - physics.plasm-ph: Plasma Physics + - physics.pop-ph: Popular Physics + - physics.soc-ph: Physics and Society + - physics.space-ph: Space Physics + - quant-ph: Quantum Physics + - q-bio.BM: Biomolecules + - q-bio.CB: Cell Behavior + - q-bio.GN: Genomics + - q-bio.MN: Molecular Networks + - q-bio.NC: Neurons and Cognition + - q-bio.OT: Other Quantitative Biology + - q-bio.PE: Populations and Evolution + - q-bio.QM: Quantitative Methods + - q-bio.SC: Subcellular Processes + - q-bio.TO: Tissues and Organs + - q-fin.CP: Computational Finance + - q-fin.EC: Economics + - q-fin.GN: General Finance + - q-fin.MF: Mathematical Finance + - q-fin.PM: Portfolio Management + - q-fin.PR: Pricing of Securities + - q-fin.RM: Risk Management + - q-fin.ST: Statistical Finance + - q-fin.TR: Trading and Market Microstructure + - stat.AP: Applications + - stat.CO: Computation + - stat.ME: Methodology + - stat.ML: Machine Learning + - stat.OT: Other Statistics + - stat.TH: Statistics Theory + 4.2.2. 根据问题所属领域,将问题拆分成多组关键词的组合,同时组合中的关键词个数尽量不要超过3个 +5. Final: 按照下面**回复内容示例**进行回复,注意: + - 不要输出思考过程 + - 可以向多个查询目标分别查询多次,多个查询用换行分隔,总查询次数控制在5次以内 + - 查询搜索引擎时,需要以"internet:"开头 + - 查询Arxiv论文时,需要以Arxiv的Category值开头,例如"cs.AI:" + - 查询Arxiv论文时,优先用英文表述关键词进行搜索 + - 当用多个关键词查询时,关键词之间用","分隔 + - 尽量满足**用户发送的消息**中的搜索要求,例如用户要求用英文搜索,则需用英文表述问题和关键词 + - 用户如果没有要求搜索语言,则用和**用户发送的消息**一致的语言表述问题和关键词 + - 如果**用户发送的消息**使用中文,至少要有一条向搜索引擎查询的中文问题 + +### 回复内容示例: + +#### 用不同语言查询多次搜索引擎 +internet: 黄金价格走势 +internet: The trend of gold prices + +#### 向Arxiv的多个类目查询多次 +cs.AI: attention mechanism +cs.AI: neuron +q-bio.NC: brain,attention mechanism + +#### 向多个查询目标查询多次 +internet: 中国未来房价趋势 +internet: 最新中国经济政策 +econ.TH: policy, real estate + +# 用户发送的消息为: +{question} diff --git a/plugins/wasm-go/extensions/ai-search/prompts/full.md b/plugins/wasm-go/extensions/ai-search/prompts/full.md new file mode 100644 index 0000000000..aec605d1b8 --- /dev/null +++ b/plugins/wasm-go/extensions/ai-search/prompts/full.md @@ -0,0 +1,221 @@ +# 目标 +你需要分析**用户发送的消息**,是否需要查询搜索引擎(Google/Bing)/论文资料库(Arxiv)/私有知识库,并按照如下情况回复相应内容: + +## 情况一:不需要查询搜索引擎/论文资料/私有知识库 +### 情况举例: +1. **用户发送的消息**不是在提问或寻求帮助 +2. **用户发送的消息**是要求翻译文字 + +### 思考过程 +根据上面的**情况举例**,如果符合,则按照下面**回复内容示例**进行回复,注意不要输出思考过程 + +### 回复内容示例: +none + +## 情况二:需要查询搜索引擎/论文资料/私有知识库 +### 情况举例: +1. 答复**用户发送的消息**,需依赖互联网上最新的资料 +2. 答复**用户发送的消息**,需依赖论文等专业资料 +3. 通过查询资料,可以更好地答复**用户发送的消息** + +### 思考过程 +根据上面的**情况举例**,以及其他需要查询资料的情况,如果符合,按照以下步骤思考,并按照下面**回复内容示例**进行回复,注意不要输出思考过程: +1. What: 分析要答复**用户发送的消息**,需要了解什么知识和资料 +2. Where: 判断了解这个知识和资料要向Google等搜索引擎提问,还是向Arxiv论文资料库进行查询,还是向私有知识库进行查询,或者需要同时查询多个地方 +3. How: 分析对于要查询的知识和资料,应该提出什么样的问题 +4. Adjust: 明确要向什么地方查询什么问题后,按下面方式对问题进行调整 + 4.1. 向搜索引擎提问:用一句话概括问题,并且针对搜索引擎做问题优化 + 4.2. 向私有知识库提问:将问题拆分成多组关键词的组合,同时组合中的关键词个数尽量不要超过3个 + 4.3. 向Arxiv论文资料库提问: + 4.3.1. 明确问题所属领域,然后确定Arxiv的Category值,Category可选的枚举如下: + - cs.AI: Artificial Intelligence + - cs.AR: Hardware Architecture + - cs.CC: Computational Complexity + - cs.CE: Computational Engineering, Finance, and Science + - cs.CG: Computational Geometry + - cs.CL: Computation and Language + - cs.CR: Cryptography and Security + - cs.CV: Computer Vision and Pattern Recognition + - cs.CY: Computers and Society + - cs.DB: Databases + - cs.DC: Distributed, Parallel, and Cluster Computing + - cs.DL: Digital Libraries + - cs.DM: Discrete Mathematics + - cs.DS: Data Structures and Algorithms + - cs.ET: Emerging Technologies + - cs.FL: Formal Languages and Automata Theory + - cs.GL: General Literature + - cs.GR: Graphics + - cs.GT: Computer Science and Game Theory + - cs.HC: Human-Computer Interaction + - cs.IR: Information Retrieval + - cs.IT: Information Theory + - cs.LG: Machine Learning + - cs.LO: Logic in Computer Science + - cs.MA: Multiagent Systems + - cs.MM: Multimedia + - cs.MS: Mathematical Software + - cs.NA: Numerical Analysis + - cs.NE: Neural and Evolutionary Computing + - cs.NI: Networking and Internet Architecture + - cs.OH: Other Computer Science + - cs.OS: Operating Systems + - cs.PF: Performance + - cs.PL: Programming Languages + - cs.RO: Robotics + - cs.SC: Symbolic Computation + - cs.SD: Sound + - cs.SE: Software Engineering + - cs.SI: Social and Information Networks + - cs.SY: Systems and Control + - econ.EM: Econometrics + - econ.GN: General Economics + - econ.TH: Theoretical Economics + - eess.AS: Audio and Speech Processing + - eess.IV: Image and Video Processing + - eess.SP: Signal Processing + - eess.SY: Systems and Control + - math.AC: Commutative Algebra + - math.AG: Algebraic Geometry + - math.AP: Analysis of PDEs + - math.AT: Algebraic Topology + - math.CA: Classical Analysis and ODEs + - math.CO: Combinatorics + - math.CT: Category Theory + - math.CV: Complex Variables + - math.DG: Differential Geometry + - math.DS: Dynamical Systems + - math.FA: Functional Analysis + - math.GM: General Mathematics + - math.GN: General Topology + - math.GR: Group Theory + - math.GT: Geometric Topology + - math.HO: History and Overview + - math.IT: Information Theory + - math.KT: K-Theory and Homology + - math.LO: Logic + - math.MG: Metric Geometry + - math.MP: Mathematical Physics + - math.NA: Numerical Analysis + - math.NT: Number Theory + - math.OA: Operator Algebras + - math.OC: Optimization and Control + - math.PR: Probability + - math.QA: Quantum Algebra + - math.RA: Rings and Algebras + - math.RT: Representation Theory + - math.SG: Symplectic Geometry + - math.SP: Spectral Theory + - math.ST: Statistics Theory + - astro-ph.CO: Cosmology and Nongalactic Astrophysics + - astro-ph.EP: Earth and Planetary Astrophysics + - astro-ph.GA: Astrophysics of Galaxies + - astro-ph.HE: High Energy Astrophysical Phenomena + - astro-ph.IM: Instrumentation and Methods for Astrophysics + - astro-ph.SR: Solar and Stellar Astrophysics + - cond-mat.dis-nn: Disordered Systems and Neural Networks + - cond-mat.mes-hall: Mesoscale and Nanoscale Physics + - cond-mat.mtrl-sci: Materials Science + - cond-mat.other: Other Condensed Matter + - cond-mat.quant-gas: Quantum Gases + - cond-mat.soft: Soft Condensed Matter + - cond-mat.stat-mech: Statistical Mechanics + - cond-mat.str-el: Strongly Correlated Electrons + - cond-mat.supr-con: Superconductivity + - gr-qc: General Relativity and Quantum Cosmology + - hep-ex: High Energy Physics - Experiment + - hep-lat: High Energy Physics - Lattice + - hep-ph: High Energy Physics - Phenomenology + - hep-th: High Energy Physics - Theory + - math-ph: Mathematical Physics + - nlin.AO: Adaptation and Self-Organizing Systems + - nlin.CD: Chaotic Dynamics + - nlin.CG: Cellular Automata and Lattice Gases + - nlin.PS: Pattern Formation and Solitons + - nlin.SI: Exactly Solvable and Integrable Systems + - nucl-ex: Nuclear Experiment + - nucl-th: Nuclear Theory + - physics.acc-ph: Accelerator Physics + - physics.ao-ph: Atmospheric and Oceanic Physics + - physics.app-ph: Applied Physics + - physics.atm-clus: Atomic and Molecular Clusters + - physics.atom-ph: Atomic Physics + - physics.bio-ph: Biological Physics + - physics.chem-ph: Chemical Physics + - physics.class-ph: Classical Physics + - physics.comp-ph: Computational Physics + - physics.data-an: Data Analysis, Statistics and Probability + - physics.ed-ph: Physics Education + - physics.flu-dyn: Fluid Dynamics + - physics.gen-ph: General Physics + - physics.geo-ph: Geophysics + - physics.hist-ph: History and Philosophy of Physics + - physics.ins-det: Instrumentation and Detectors + - physics.med-ph: Medical Physics + - physics.optics: Optics + - physics.plasm-ph: Plasma Physics + - physics.pop-ph: Popular Physics + - physics.soc-ph: Physics and Society + - physics.space-ph: Space Physics + - quant-ph: Quantum Physics + - q-bio.BM: Biomolecules + - q-bio.CB: Cell Behavior + - q-bio.GN: Genomics + - q-bio.MN: Molecular Networks + - q-bio.NC: Neurons and Cognition + - q-bio.OT: Other Quantitative Biology + - q-bio.PE: Populations and Evolution + - q-bio.QM: Quantitative Methods + - q-bio.SC: Subcellular Processes + - q-bio.TO: Tissues and Organs + - q-fin.CP: Computational Finance + - q-fin.EC: Economics + - q-fin.GN: General Finance + - q-fin.MF: Mathematical Finance + - q-fin.PM: Portfolio Management + - q-fin.PR: Pricing of Securities + - q-fin.RM: Risk Management + - q-fin.ST: Statistical Finance + - q-fin.TR: Trading and Market Microstructure + - stat.AP: Applications + - stat.CO: Computation + - stat.ME: Methodology + - stat.ML: Machine Learning + - stat.OT: Other Statistics + - stat.TH: Statistics Theory + 4.3.2. 根据问题所属领域,将问题拆分成多组关键词的组合,同时组合中的关键词个数尽量不要超过3个 +5. Final: 按照下面**回复内容示例**进行回复,注意: + - 不要输出思考过程 + - 可以向多个查询目标分别查询多次,多个查询用换行分隔,总查询次数控制在5次以内 + - 查询搜索引擎时,需要以"internet:"开头 + - 查询私有知识库时,需要以"private:"开头 + - 查询Arxiv论文时,需要以Arxiv的Category值开头,例如"cs.AI:" + - 查询Arxiv论文时,优先用英文表述关键词进行搜索 + - 当用多个关键词查询时,关键词之间用","分隔 + - 尽量满足**用户发送的消息**中的搜索要求,例如用户要求用英文搜索,则需用英文表述问题和关键词 + - 用户如果没有要求搜索语言,则用和**用户发送的消息**一致的语言表述问题和关键词 + - 如果**用户发送的消息**使用中文,至少要有一条向搜索引擎查询的中文问题 + +### 回复内容示例: + +#### 用不同语言查询多次搜索引擎 +internet: 黄金价格走势 +internet: The trend of gold prices + +#### 向Arxiv的多个类目查询多次 +cs.AI: attention mechanism +cs.AI: neuron +q-bio.NC: brain,attention mechanism + +#### 向私有知识库查询多次 +private: 电子钱包,密码 +private: 张三,身份证号 + +#### 向多个查询目标查询多次 +internet: 中国未来房价趋势 +internet: 最新中国经济政策 +econ.TH: policy, real estate +private: 财务状况 + +# 用户发送的消息为: +{question} diff --git a/plugins/wasm-go/extensions/ai-search/prompts/internet.md b/plugins/wasm-go/extensions/ai-search/prompts/internet.md new file mode 100644 index 0000000000..f12836fc62 --- /dev/null +++ b/plugins/wasm-go/extensions/ai-search/prompts/internet.md @@ -0,0 +1,41 @@ +# 目标 +你需要分析**用户发送的消息**,是否需要查询搜索引擎(Google/Bing),并按照如下情况回复相应内容: + +## 情况一:不需要查询搜索引擎 +### 情况举例: +1. **用户发送的消息**不是在提问或寻求帮助 +2. **用户发送的消息**是要求翻译文字 + +### 思考过程 +根据上面的**情况举例**,如果符合,则按照下面**回复内容示例**进行回复,注意不要输出思考过程 + +### 回复内容示例: +none + +## 情况二:需要查询搜索引擎 +### 情况举例: +1. 答复**用户发送的消息**,需依赖互联网上最新的资料 +2. 答复**用户发送的消息**,需依赖论文等专业资料 +3. 通过查询资料,可以更好地答复**用户发送的消息** + +### 思考过程 +根据上面的**情况举例**,以及其他需要查询资料的情况,如果符合,按照以下步骤思考,并按照下面**回复内容示例**进行回复,注意不要输出思考过程: +1. What: 分析要答复**用户发送的消息**,需要了解什么知识和资料 +2. How: 分析对于要查询的知识和资料,应该提出什么样的问题 +3. Adjust: 明确查询什么问题后,用一句话概括问题,并且针对搜索引擎做问题优化 +4. Final: 按照下面**回复内容示例**进行回复,注意: + - 不要输出思考过程 + - 可以查询多次,多个查询用换行分隔,总查询次数控制在5次以内 + - 需要以"internet:"开头 + - 尽量满足**用户发送的消息**中的搜索要求,例如用户要求用英文搜索,则需用英文表述问题和关键词 + - 用户如果没有要求搜索语言,则用和**用户发送的消息**一致的语言表述问题和关键词 + - 如果**用户发送的消息**使用中文,至少要有一条向搜索引擎查询的中文问题 + +### 回复内容示例: + +#### 用不同语言查询多次搜索引擎 +internet: 黄金价格走势 +internet: The trend of gold prices + +# 用户发送的消息为: +{question} diff --git a/plugins/wasm-go/extensions/ai-search/prompts/private.md b/plugins/wasm-go/extensions/ai-search/prompts/private.md new file mode 100644 index 0000000000..4ba0fc62ce --- /dev/null +++ b/plugins/wasm-go/extensions/ai-search/prompts/private.md @@ -0,0 +1,55 @@ +# 目标 +你需要分析**用户发送的消息**,是否需要查询搜索引擎(Google/Bing)/私有知识库,并按照如下情况回复相应内容: + +## 情况一:不需要查询搜索引擎/私有知识库 +### 情况举例: +1. **用户发送的消息**不是在提问或寻求帮助 +2. **用户发送的消息**是要求翻译文字 + +### 思考过程 +根据上面的**情况举例**,如果符合,则按照下面**回复内容示例**进行回复,注意不要输出思考过程 + +### 回复内容示例: +none + +## 情况二:需要查询搜索引擎/私有知识库 +### 情况举例: +1. 答复**用户发送的消息**,需依赖互联网上最新的资料 +2. 答复**用户发送的消息**,需依赖论文等专业资料 +3. 通过查询资料,可以更好地答复**用户发送的消息** + +### 思考过程 +根据上面的**情况举例**,以及其他需要查询资料的情况,如果符合,按照以下步骤思考,并按照下面**回复内容示例**进行回复,注意不要输出思考过程: +1. What: 分析要答复**用户发送的消息**,需要了解什么知识和资料 +2. Where: 判断了解这个知识和资料要向Google等搜索引擎提问,还是向私有知识库进行查询,或者需要同时查询多个地方 +3. How: 分析对于要查询的知识和资料,应该提出什么样的问题 +4. Adjust: 明确要向什么地方查询什么问题后,按下面方式对问题进行调整 + 4.1. 向搜索引擎提问:用一句话概括问题,并且针对搜索引擎做问题优化 + 4.2. 向私有知识库提问:将问题拆分成多组关键词的组合,同时组合中的关键词个数尽量不要超过3个 +5. Final: 按照下面**回复内容示例**进行回复,注意: + - 不要输出思考过程 + - 可以向多个查询目标分别查询多次,多个查询用换行分隔,总查询次数控制在5次以内 + - 查询搜索引擎时,需要以"internet:"开头 + - 查询私有知识库时,需要以"private:"开头 + - 当用多个关键词查询时,关键词之间用","分隔 + - 尽量满足**用户发送的消息**中的搜索要求,例如用户要求用英文搜索,则需用英文表述问题和关键词 + - 用户如果没有要求搜索语言,则用和**用户发送的消息**一致的语言表述问题和关键词 + - 如果**用户发送的消息**使用中文,至少要有一条向搜索引擎查询的中文问题 + +### 回复内容示例: + +#### 用不同语言查询多次搜索引擎 +internet: 黄金价格走势 +internet: The trend of gold prices + +#### 向私有知识库查询多次 +private: 电子钱包,密码 +private: 张三,身份证号 + +#### 向多个查询目标查询多次 +internet: 中国未来房价趋势 +internet: 最新中国经济政策 +private: 财务状况 + +# 用户发送的消息为: +{question} diff --git a/plugins/wasm-go/extensions/ai-search/prompts/test_ai_search.py b/plugins/wasm-go/extensions/ai-search/prompts/test_ai_search.py new file mode 100644 index 0000000000..64fbce9545 --- /dev/null +++ b/plugins/wasm-go/extensions/ai-search/prompts/test_ai_search.py @@ -0,0 +1,56 @@ +import argparse +import requests +import time +import json + +def main(): + # 解析命令行参数 + parser = argparse.ArgumentParser(description='AI Search Test Script') + parser.add_argument('--question', required=True, help='The question to analyze') + parser.add_argument('--prompt', required=True, help='The prompt file to analyze') + args = parser.parse_args() + + # 读取并解析prompts.md模板 + # 这里假设prompts.md已经复制到当前目录 + with open(args.prompt, 'r', encoding='utf-8') as f: + prompt_template = f.read() + + # 替换模板中的{question}变量 + prompt = prompt_template.replace('{question}', args.question) + + # 准备请求数据 + headers = { + 'Content-Type': 'application/json', + } + data = { + "model": "deepseek-v3", + "max_tokens": 100, + "messages": [ + { + "role": "user", + "content": prompt + } + ] + } + + # 发送请求并计时 + start_time = time.time() + try: + response = requests.post( + 'http://localhost:8080/v1/chat/completions', + headers=headers, + data=json.dumps(data) + ) + response.raise_for_status() + end_time = time.time() + + # 处理响应 + result = response.json() + print("Response:") + print(result['choices'][0]['message']['content']) + print(f"\nRequest took {end_time - start_time:.2f} seconds") + except requests.exceptions.RequestException as e: + print(f"Request failed: {e}") + +if __name__ == '__main__': + main() diff --git a/plugins/wasm-go/extensions/ai-token-ratelimit/README.md b/plugins/wasm-go/extensions/ai-token-ratelimit/README.md index a1d6a2fe36..c3c0c23340 100644 --- a/plugins/wasm-go/extensions/ai-token-ratelimit/README.md +++ b/plugins/wasm-go/extensions/ai-token-ratelimit/README.md @@ -51,14 +51,14 @@ description: AI Token限流插件配置参考 `redis`中每一项的配置字段说明 -| 配置项 | 类型 | 必填 | 默认值 | 说明 | -| ------------ | ------ | ---- | ---------------------------------------------------------- | --------------------------- | -| service_name | string | 必填 | - | redis 服务名称,带服务类型的完整 FQDN 名称,例如 my-redis.dns、redis.my-ns.svc.cluster.local | -| service_port | int | 否 | 服务类型为固定地址(static service)默认值为80,其他为6379 | 输入redis服务的服务端口 | -| username | string | 否 | - | redis用户名 | -| password | string | 否 | - | redis密码 | -| timeout | int | 否 | 1000 | redis连接超时时间,单位毫秒 | - +| 配置项 | 类型 | 必填 | 默认值 | 说明 | +| ------------ | ------ | ---- | ---------------------------------------------------------- | --------------------------- | +| service_name | string | 必填 | - | redis 服务名称,带服务类型的完整 FQDN 名称,例如 my-redis.dns、redis.my-ns.svc.cluster.local | +| service_port | int | 否 | 服务类型为固定地址(static service)默认值为80,其他为6379 | 输入redis服务的服务端口 | +| username | string | 否 | - | redis用户名 | +| password | string | 否 | - | redis密码 | +| timeout | int | 否 | 1000 | redis连接超时时间,单位毫秒 | +| database | int | 否 | 0 | 使用的数据库id,例如配置为1,对应`SELECT 1` | ## 配置示例 @@ -258,24 +258,12 @@ spec: '*': "qwen-turbo" ingress: - qwen - url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-proxy:v1.0.0 + url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-proxy:1.0.0 phase: UNSPECIFIED_PHASE priority: 100 --- apiVersion: extensions.higress.io/v1alpha1 kind: WasmPlugin -metadata: - name: ai-statistics - namespace: higress-system -spec: - defaultConfig: - enable: true - url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-token-statistics:v1.0.0 - phase: UNSPECIFIED_PHASE - priority: 200 ---- -apiVersion: extensions.higress.io/v1alpha1 -kind: WasmPlugin metadata: name: ai-token-ratelimit namespace: higress-system @@ -294,7 +282,7 @@ spec: # service_name: redis.default.svc.cluster.local service_name: redis.dns service_port: 6379 - url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-token-ratelimit:v1.0.0 + url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-token-ratelimit:1.0.0 phase: UNSPECIFIED_PHASE priority: 600 ``` @@ -370,10 +358,19 @@ spec: pathType: Prefix ``` +转发 higress-gateway 的流量到本地,方便进行测试。 + +```bash +kubectl port-forward svc/higress-gateway -n higress-system 18000:80 +``` + 触发限流效果如下: ```bash -curl "http://qwen-test.com:18000/v1/chat/completions?apikey=123456" -H "Content-Type: application/json" -d '{ +curl "http://localhost:18000/v1/chat/completions?apikey=123456" \ +-H "Host: qwen-test.com" \ +-H "Content-Type: application/json" \ +-d '{ "model": "gpt-3", "messages": [ { diff --git a/plugins/wasm-go/extensions/ai-token-ratelimit/README_EN.md b/plugins/wasm-go/extensions/ai-token-ratelimit/README_EN.md index c07e7aa2f6..cf502198e2 100644 --- a/plugins/wasm-go/extensions/ai-token-ratelimit/README_EN.md +++ b/plugins/wasm-go/extensions/ai-token-ratelimit/README_EN.md @@ -43,13 +43,14 @@ Field descriptions for each item in `limit_keys` | token_per_day | int | No, optionally select one in `token_per_second`, `token_per_minute`, `token_per_hour`, `token_per_day` | - | Allowed number of token requests per day | Field descriptions for each item in `redis` -| Configuration Item | Type | Required | Default Value | Description | -| ----------------------- | ----------------- | -------- | --------------------------------------------------------------- | ----------------------------------------------- | -| service_name | string | Required | - | Full FQDN name of the redis service, including service type, e.g., my-redis.dns, redis.my-ns.svc.cluster.local | -| service_port | int | No | Default value for static addresses (static service) is 80; otherwise, it is 6379 | Input the service port of the redis service | -| username | string | No | - | Redis username | -| password | string | No | - | Redis password | -| timeout | int | No | 1000 | Redis connection timeout in milliseconds | +| Configuration Item | Type | Required | Default Value | Description | +| ----------------------- | ----------------- | -------- | --------------------------------------------------------------- | ----------------------------------------------- | +| service_name | string | Required | - | Full FQDN name of the redis service, including service type, e.g., my-redis.dns, redis.my-ns.svc.cluster.local | +| service_port | int | No | Default value for static addresses (static service) is 80; otherwise, it is 6379 | Input the service port of the redis service | +| username | string | No | - | Redis username | +| password | string | No | - | Redis password | +| timeout | int | No | 1000 | Redis connection timeout in milliseconds | +| database | int | No | 0 | The database ID used, for example, configured as 1, corresponds to `SELECT 1`. | ## Configuration Examples ### Identify request parameter apikey for differentiated rate limiting @@ -233,24 +234,12 @@ spec: '*': "qwen-turbo" ingress: - qwen - url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-proxy:v1.0.0 + url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-proxy:1.0.0 phase: UNSPECIFIED_PHASE priority: 100 --- apiVersion: extensions.higress.io/v1alpha1 kind: WasmPlugin -metadata: - name: ai-statistics - namespace: higress-system -spec: - defaultConfig: - enable: true - url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-token-statistics:v1.0.0 - phase: UNSPECIFIED_PHASE - priority: 200 ---- -apiVersion: extensions.higress.io/v1alpha1 -kind: WasmPlugin metadata: name: ai-token-ratelimit namespace: higress-system @@ -269,7 +258,7 @@ spec: # service_name: redis.default.svc.cluster.local service_name: redis.dns service_port: 6379 - url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-token-ratelimit:v1.0.0 + url: oci://higress-registry.cn-hangzhou.cr.aliyuncs.com/plugins/ai-token-ratelimit:1.0.0 phase: UNSPECIFIED_PHASE priority: 600 ``` @@ -346,10 +335,19 @@ spec: pathType: Prefix ``` +Forward the traffic of higress-gateway to the local, making it convenient for testing. + +```bash +kubectl port-forward svc/higress-gateway -n higress-system 18000:80 +``` + The rate limiting effect is triggered as follows: ```bash -curl "http://qwen-test.com:18000/v1/chat/completions?apikey=123456" -H "Content-Type: application/json" -d '{ +curl "http://localhost:18000/v1/chat/completions?apikey=123456" \ +-H "Host: qwen-test.com" \ +-H "Content-Type: application/json" \ +-d '{ "model": "gpt-3", "messages": [ { diff --git a/plugins/wasm-go/extensions/ai-token-ratelimit/config.go b/plugins/wasm-go/extensions/ai-token-ratelimit/config.go index 9668f18617..743f2925f5 100644 --- a/plugins/wasm-go/extensions/ai-token-ratelimit/config.go +++ b/plugins/wasm-go/extensions/ai-token-ratelimit/config.go @@ -110,7 +110,8 @@ func initRedisClusterClient(json gjson.Result, config *ClusterKeyRateLimitConfig FQDN: serviceName, Port: int64(servicePort), }) - return config.redisClient.Init(username, password, int64(timeout)) + database := int(redisConfig.Get("database").Int()) + return config.redisClient.Init(username, password, int64(timeout), wrapper.WithDataBase(database)) } func parseClusterKeyRateLimitConfig(json gjson.Result, config *ClusterKeyRateLimitConfig) error { diff --git a/plugins/wasm-go/extensions/cluster-key-rate-limit/README.md b/plugins/wasm-go/extensions/cluster-key-rate-limit/README.md index 883e2535c2..f75ea01bcb 100644 --- a/plugins/wasm-go/extensions/cluster-key-rate-limit/README.md +++ b/plugins/wasm-go/extensions/cluster-key-rate-limit/README.md @@ -52,13 +52,14 @@ description: 基于 Key 集群限流插件配置参考 `redis` 中每一项的配置字段说明。 -| 配置项 | 类型 | 必填 | 默认值 | 说明 | -| ------------ | ------ | ---- | ---------------------------------------------------------- |---------------------------------------------------------------------------| +| 配置项 | 类型 | 必填 | 默认值 | 说明 | +| ------------ | ------ | ---- | ---------------------------------------------------------- | --------------------------------------------------------------------------- | | service_name | string | 必填 | - | redis 服务名称,带服务类型的完整 FQDN 名称,例如 my-redis.dns、redis.my-ns.svc.cluster.local | -| service_port | int | 否 | 服务类型为固定地址(static service)默认值为80,其他为6379 | 输入redis服务的服务端口 | -| username | string | 否 | - | redis 用户名 | -| password | string | 否 | - | redis 密码 | -| timeout | int | 否 | 1000 | redis 连接超时时间,单位毫秒 | +| service_port | int | 否 | 服务类型为固定地址(static service)默认值为80,其他为6379 | 输入redis服务的服务端口 | +| username | string | 否 | - | redis 用户名 | +| password | string | 否 | - | redis 密码 | +| timeout | int | 否 | 1000 | redis 连接超时时间,单位毫秒 | +| database | int | 否 | 0 | 使用的数据库id,例如配置为1,对应`SELECT 1` | ## 配置示例 diff --git a/plugins/wasm-go/extensions/cluster-key-rate-limit/README_EN.md b/plugins/wasm-go/extensions/cluster-key-rate-limit/README_EN.md index 4a4dcf8633..83e0935d91 100644 --- a/plugins/wasm-go/extensions/cluster-key-rate-limit/README_EN.md +++ b/plugins/wasm-go/extensions/cluster-key-rate-limit/README_EN.md @@ -46,13 +46,15 @@ Description of configuration fields for each item in `limit_keys`. | query_per_day | int | No, one of `query_per_second`, `query_per_minute`, `query_per_hour`, `query_per_day` is optional. | - | Allowed number of requests per day. | Description of configuration fields for each item in `redis`. -| Configuration Item | Type | Required | Default Value | Description | -|---------------------------|---------------|----------|------------------------------------------------------------|---------------------------------------------------------------------------| -| service_name | string | Required | - | Full FQDN name of the Redis service, including service type, e.g., my-redis.dns, redis.my-ns.svc.cluster.local. | -| service_port | int | No | 80 for static services; otherwise 6379 | Service port for the Redis service. | -| username | string | No | - | Redis username. | -| password | string | No | - | Redis password. | -| timeout | int | No | 1000 | Redis connection timeout in milliseconds. | +| Configuration Item | Type | Required | Default Value | Description | +|--------------------|--------|----------|----------------------------------------|-----------------------------------------------------------------------------------------------------------------| +| service_name | string | Required | - | Full FQDN name of the Redis service, including service type, e.g., my-redis.dns, redis.my-ns.svc.cluster.local. | +| service_port | int | No | 80 for static services; otherwise 6379 | Service port for the Redis service. | +| username | string | No | - | Redis username. | +| password | string | No | - | Redis password. | +| timeout | int | No | 1000 | Redis connection timeout in milliseconds. | +| database | int | No | 0 | The database ID used, for example, configured as 1, corresponds to `SELECT 1`. | + ## Configuration Examples diff --git a/plugins/wasm-go/extensions/cluster-key-rate-limit/config.go b/plugins/wasm-go/extensions/cluster-key-rate-limit/config.go index 3689c36561..00d84b21fc 100644 --- a/plugins/wasm-go/extensions/cluster-key-rate-limit/config.go +++ b/plugins/wasm-go/extensions/cluster-key-rate-limit/config.go @@ -110,7 +110,8 @@ func initRedisClusterClient(json gjson.Result, config *ClusterKeyRateLimitConfig FQDN: serviceName, Port: int64(servicePort), }) - return config.redisClient.Init(username, password, int64(timeout)) + database := int(redisConfig.Get("database").Int()) + return config.redisClient.Init(username, password, int64(timeout), wrapper.WithDataBase(database)) } func parseClusterKeyRateLimitConfig(json gjson.Result, config *ClusterKeyRateLimitConfig) error { diff --git a/plugins/wasm-go/extensions/ext-auth/README.md b/plugins/wasm-go/extensions/ext-auth/README.md index cca0f655c8..de7e2feb83 100644 --- a/plugins/wasm-go/extensions/ext-auth/README.md +++ b/plugins/wasm-go/extensions/ext-auth/README.md @@ -77,6 +77,7 @@ MatchRule 类型每一项的配置字段说明,在使用 `array of MatchRule` | 名称 | 数据类型 | 必填 | 默认值 | 描述 | | ------------------- | -------- | ---- | ------ | ------------------------------------------------------------ | | `match_rule_domain` | string | 否 | - | 匹配规则域名,支持通配符模式,例如 `*.bar.com` | +| `match_rule_method` | []string | 否 | - | 匹配请求方法 | | `match_rule_path` | string | 否 | - | 匹配请求路径的规则 | | `match_rule_type` | string | 否 | - | 匹配请求路径的规则类型,可选 `exact` , `prefix` , `suffix`, `contains`, `regex` | @@ -100,27 +101,41 @@ MatchRule 类型每一项的配置字段说明,在使用 `array of MatchRule` **白名单模式** ```yaml +# 白名单模式配置,符合白名单规则的请求无需验证 match_type: 'whitelist' match_list: - - match_rule_domain: '*.bar.com' - match_rule_path: '/foo' - match_rule_type: 'prefix' + # 所有以 api.example.com 为域名,且路径前缀为 /public 的请求无需验证 + - match_rule_domain: 'api.example.com' + match_rule_path: '/public' + match_rule_type: 'prefix' + # 针对图片资源服务器 images.example.com,所有 GET 请求无需验证 + - match_rule_domain: 'images.example.com' + match_rule_method: ["GET"] + # 所有域名下,路径精确匹配 /health-check 的 HEAD 请求无需验证 + - match_rule_method: ["HEAD"] + match_rule_path: '/health-check' + match_rule_type: 'exact' ``` -泛域名 `*.bar.com` 下前缀匹配 `/foo` 的请求无需验证 - **黑名单模式** ```yaml +# 黑名单模式配置,符合黑名单规则的请求需要验证 match_type: 'blacklist' match_list: - - match_rule_domain: '*.bar.com' - match_rule_path: '/headers' - match_rule_type: 'prefix' + # 所有以 admin.example.com 为域名,且路径前缀为 /sensitive 的请求需要验证 + - match_rule_domain: 'admin.example.com' + match_rule_path: '/sensitive' + match_rule_type: 'prefix' + # 所有域名下,路径精确匹配 /user 的 DELETE 请求需要验证 + - match_rule_method: ["DELETE"] + match_rule_path: '/user' + match_rule_type: 'exact' + # 所有以 legacy.example.com 为域名的 POST 请求需要验证 + - match_rule_domain: 'legacy.example.com' + match_rule_method: ["POST"] ``` -只有泛域名 `*.bar.com` 下前缀匹配 `/header` 的请求需要验证 - ## 配置示例 下面假设 `ext-auth` 服务在 Kubernetes 中 serviceName 为 `ext-auth`,端口 `8090`,路径为 `/auth`,命名空间为 `backend` @@ -185,13 +200,13 @@ content-length: 0 http_service: authorization_request: allowed_headers: - - exact: x-auth-version + - exact: x-auth-version headers_to_add: x-envoy-header: true authorization_response: allowed_upstream_headers: - - exact: x-user-id - - exact: x-auth-version + - exact: x-user-id + - exact: x-auth-version endpoint_mode: envoy endpoint: service_name: ext-auth.backend.svc.cluster.local @@ -287,13 +302,13 @@ content-length: 0 http_service: authorization_request: allowed_headers: - - exact: x-auth-version + - exact: x-auth-version headers_to_add: x-envoy-header: true authorization_response: allowed_upstream_headers: - - exact: x-user-id - - exact: x-auth-version + - exact: x-user-id + - exact: x-auth-version endpoint_mode: forward_auth endpoint: service_name: ext-auth.backend.svc.cluster.local diff --git a/plugins/wasm-go/extensions/ext-auth/README_EN.md b/plugins/wasm-go/extensions/ext-auth/README_EN.md index a095690cf6..8a01216009 100644 --- a/plugins/wasm-go/extensions/ext-auth/README_EN.md +++ b/plugins/wasm-go/extensions/ext-auth/README_EN.md @@ -77,6 +77,7 @@ Configuration fields for each item of `MatchRule` type. When using `array of Mat | Name | Data Type | Required | Default Value | Description | | --- | --- | --- | --- | --- | | `match_rule_domain` | string | No | - | The domain of the matching rule, supports wildcard patterns, e.g., `*.bar.com` | +| `match_rule_method` | []string | No | - | Matching rule for the request method | | `match_rule_path` | string | No | - | The rule for matching the request path | | `match_rule_type` | string | No | - | The type of the rule for matching the request path, can be `exact`, `prefix`, `suffix`, `contains`, `regex` | @@ -100,27 +101,41 @@ Supports blacklist and whitelist mode configuration. The default is the whitelis **Whitelist Mode** ```yaml +# Configuration for the whitelist mode. Requests that match the whitelist rules do not need verification. match_type: 'whitelist' match_list: - - match_rule_domain: '*.bar.com' - match_rule_path: '/foo' - match_rule_type: 'prefix' + # Requests with the domain name api.example.com and a path prefixed with /public do not need verification. + - match_rule_domain: 'api.example.com' + match_rule_path: '/public' + match_rule_type: 'prefix' + # For the image resource server images.example.com, all GET requests do not need verification. + - match_rule_domain: 'images.example.com' + match_rule_method: ["GET"] + # For all domains, HEAD requests with an exact path match of /health-check do not need verification. + - match_rule_method: ["HEAD"] + match_rule_path: '/health-check' + match_rule_type: 'exact' ``` -Requests with a prefix match of `/foo` under the wildcard domain `*.bar.com` do not need to be verified. - **Blacklist Mode** ```yaml +# Configuration for the blacklist mode. Requests that match the blacklist rules need verification. match_type: 'blacklist' match_list: - - match_rule_domain: '*.bar.com' - match_rule_path: '/headers' - match_rule_type: 'prefix' + # Requests with the domain name admin.example.com and a path prefixed with /sensitive need verification. + - match_rule_domain: 'admin.example.com' + match_rule_path: '/sensitive' + match_rule_type: 'prefix' + # For all domains, DELETE requests with an exact path match of /user need verification. + - match_rule_method: ["DELETE"] + match_rule_path: '/user' + match_rule_type: 'exact' + # For the domain legacy.example.com, all POST requests need verification. + - match_rule_domain: 'legacy.example.com' + match_rule_method: ["POST"] ``` -Only requests with a prefix match of `/header` under the wildcard domain `*.bar.com` need to be verified. - ## Configuration Examples @@ -186,13 +201,13 @@ Configuration of the `ext-auth` plugin: http_service: authorization_request: allowed_headers: - - exact: x-auth-version + - exact: x-auth-version headers_to_add: x-envoy-header: true authorization_response: allowed_upstream_headers: - - exact: x-user-id - - exact: x-auth-version + - exact: x-user-id + - exact: x-auth-version endpoint_mode: envoy endpoint: service_name: ext-auth.backend.svc.cluster.local @@ -286,13 +301,13 @@ Configuration of the `ext-auth` plugin: http_service: authorization_request: allowed_headers: - - exact: x-auth-version + - exact: x-auth-version headers_to_add: x-envoy-header: true authorization_response: allowed_upstream_headers: - - exact: x-user-id - - exact: x-auth-version + - exact: x-user-id + - exact: x-auth-version endpoint_mode: forward_auth endpoint: service_name: ext-auth.backend.svc.cluster.local diff --git a/plugins/wasm-go/extensions/ext-auth/config/config.go b/plugins/wasm-go/extensions/ext-auth/config/config.go index 5709bbf9b0..def0955cea 100644 --- a/plugins/wasm-go/extensions/ext-auth/config/config.go +++ b/plugins/wasm-go/extensions/ext-auth/config/config.go @@ -260,19 +260,28 @@ func parseMatchRules(json gjson.Result, config *ExtAuthConfig) error { var err error matchListConfig.ForEach(func(key, value gjson.Result) bool { - pathMatcher, buildErr := expr.BuildStringMatcher( - value.Get("match_rule_type").Str, - value.Get("match_rule_path").Str, false) - if buildErr != nil { - err = fmt.Errorf("failed to build string matcher for rule with domain %q, path %q, type %q: %w", - value.Get("match_rule_domain").Str, - value.Get("match_rule_path").Str, - value.Get("match_rule_type").Str, - buildErr) - return false // stop iterating + domain := value.Get("match_rule_domain").Str + methodArray := value.Get("match_rule_method").Array() + matchRuleType := value.Get("match_rule_type").Str + matchRulePath := value.Get("match_rule_path").Str + + var pathMatcher expr.Matcher + var buildErr error + + if matchRuleType == "" && matchRulePath == "" { + pathMatcher = nil + } else { + pathMatcher, buildErr = expr.BuildStringMatcher(matchRuleType, matchRulePath, false) + if buildErr != nil { + err = fmt.Errorf("failed to build string matcher for rule with domain %q, method %v, path %q, type %q: %w", + domain, methodArray, matchRulePath, matchRuleType, buildErr) + return false // stop iterating + } } + ruleList = append(ruleList, expr.Rule{ - Domain: value.Get("match_rule_domain").Str, + Domain: domain, + Method: convertToStringList(methodArray), Path: pathMatcher, }) return true // keep iterating @@ -297,3 +306,11 @@ func convertToStringMap(result gjson.Result) map[string]string { }) return m } + +func convertToStringList(results []gjson.Result) []string { + interfaces := make([]string, len(results)) + for i, result := range results { + interfaces[i] = result.String() + } + return interfaces +} diff --git a/plugins/wasm-go/extensions/ext-auth/config/config_test.go b/plugins/wasm-go/extensions/ext-auth/config/config_test.go index 02750356e7..299035f458 100644 --- a/plugins/wasm-go/extensions/ext-auth/config/config_test.go +++ b/plugins/wasm-go/extensions/ext-auth/config/config_test.go @@ -218,6 +218,7 @@ func TestParseConfig(t *testing.T) { RuleList: []expr.Rule{ { Domain: "*.bar.com", + Method: []string{}, Path: func() expr.Matcher { pathMatcher, err := expr.BuildStringMatcher(expr.MatchPatternPrefix, "/headers", false) if err != nil { @@ -248,6 +249,7 @@ func TestParseConfig(t *testing.T) { "match_list": [ { "match_rule_domain": "*.foo.com", + "match_rule_method": ["GET"], "match_rule_path": "/api", "match_rule_type": "exact" } @@ -269,6 +271,7 @@ func TestParseConfig(t *testing.T) { RuleList: []expr.Rule{ { Domain: "*.foo.com", + Method: []string{"GET"}, Path: func() expr.Matcher { pathMatcher, err := expr.BuildStringMatcher(expr.MatchPatternExact, "/api", false) if err != nil { @@ -284,6 +287,50 @@ func TestParseConfig(t *testing.T) { StatusOnError: 403, }, }, + { + name: "Valid Match Rules with Whitelist - Only Method", + json: `{ + "http_service": { + "endpoint_mode": "envoy", + "endpoint": { + "service_name": "example.com", + "service_port": 80, + "path_prefix": "/auth" + } + }, + "match_type": "whitelist", + "match_list": [ + { + "match_rule_method": ["GET"] + } + ] + }`, + expected: ExtAuthConfig{ + HttpService: HttpService{ + EndpointMode: "envoy", + Client: wrapper.NewClusterClient(wrapper.FQDNCluster{ + FQDN: "example.com", + Port: 80, + Host: "", + }), + PathPrefix: "/auth", + Timeout: 1000, + }, + MatchRules: expr.MatchRules{ + Mode: "whitelist", + RuleList: []expr.Rule{ + { + Domain: "", + Method: []string{"GET"}, + Path: nil, + }, + }, + }, + FailureModeAllow: false, + FailureModeAllowHeaderAdd: false, + StatusOnError: 403, + }, + }, { name: "Missing Match Type", json: `{ @@ -342,12 +389,13 @@ func TestParseConfig(t *testing.T) { "match_list": [ { "match_rule_domain": "*.bar.com", + "match_rule_method": ["POST","PUT","DELETE"], "match_rule_path": "/headers", "match_rule_type": "invalid_type" } ] }`, - expectedErr: `failed to build string matcher for rule with domain "*.bar.com", path "/headers", type "invalid_type": unknown string matcher type`, + expectedErr: `failed to build string matcher for rule with domain "*.bar.com", method [POST PUT DELETE], path "/headers", type "invalid_type": unknown string matcher type`, }, } diff --git a/plugins/wasm-go/extensions/ext-auth/expr/match_rules.go b/plugins/wasm-go/extensions/ext-auth/expr/match_rules.go index c4c89fe385..bc74cd9bff 100644 --- a/plugins/wasm-go/extensions/ext-auth/expr/match_rules.go +++ b/plugins/wasm-go/extensions/ext-auth/expr/match_rules.go @@ -3,6 +3,7 @@ package expr import ( "strings" + "ext-auth/util" regexp "github.com/wasilibs/go-re2" ) @@ -18,6 +19,7 @@ type MatchRules struct { type Rule struct { Domain string + Method []string Path Matcher } @@ -28,19 +30,19 @@ func MatchRulesDefaults() MatchRules { } } -// IsAllowedByMode checks if the given domain and path are allowed based on the configuration mode. -func (config *MatchRules) IsAllowedByMode(domain, path string) bool { +// IsAllowedByMode checks if the given domain, method and path are allowed based on the configuration mode. +func (config *MatchRules) IsAllowedByMode(domain, method, path string) bool { switch config.Mode { case ModeWhitelist: for _, rule := range config.RuleList { - if rule.matchDomainAndPath(domain, path) { + if rule.matchesAllConditions(domain, method, path) { return true } } return false case ModeBlacklist: for _, rule := range config.RuleList { - if rule.matchDomainAndPath(domain, path) { + if rule.matchesAllConditions(domain, method, path) { return false } } @@ -50,17 +52,21 @@ func (config *MatchRules) IsAllowedByMode(domain, path string) bool { } } -// matchDomainAndPath checks if the given domain and path match the rule. -// If rule.Domain is empty, it only checks rule.Path. -// If rule.Path is empty, it only checks rule.Domain. -// If both are empty, it returns false. -func (rule *Rule) matchDomainAndPath(domain, path string) bool { - if rule.Domain == "" && rule.Path == nil { +// matchesAllConditions checks if the given domain, method and path match all conditions of the rule. +func (rule *Rule) matchesAllConditions(domain, method, path string) bool { + // If all conditions are empty, return false + if rule.Domain == "" && rule.Path == nil && len(rule.Method) == 0 { return false } + + // Check domain and path matching domainMatch := rule.Domain == "" || matchDomain(domain, rule.Domain) pathMatch := rule.Path == nil || rule.Path.Match(path) - return domainMatch && pathMatch + + // Check HTTP method matching: if no methods are specified, any method is allowed + methodMatch := len(rule.Method) == 0 || util.ContainsString(rule.Method, method) + + return domainMatch && pathMatch && methodMatch } // matchDomain checks if the given domain matches the pattern. diff --git a/plugins/wasm-go/extensions/ext-auth/expr/match_rules_test.go b/plugins/wasm-go/extensions/ext-auth/expr/match_rules_test.go index 5d041262ac..f6ab9a542f 100644 --- a/plugins/wasm-go/extensions/ext-auth/expr/match_rules_test.go +++ b/plugins/wasm-go/extensions/ext-auth/expr/match_rules_test.go @@ -6,11 +6,20 @@ import ( "github.com/stretchr/testify/assert" ) +func createMatcher(pattern string, caseSensitive bool) Matcher { + pathMatcher, err := newStringExactMatcher(pattern, caseSensitive) + if err != nil { + panic(err) + } + return pathMatcher +} + func TestIsAllowedByMode(t *testing.T) { tests := []struct { name string config MatchRules domain string + method string path string expected bool }{ @@ -21,17 +30,13 @@ func TestIsAllowedByMode(t *testing.T) { RuleList: []Rule{ { Domain: "example.com", - Path: func() Matcher { - pathMatcher, err := newStringExactMatcher("/foo", true) - if err != nil { - t.Fatalf("Failed to create Matcher: %v", err) - } - return pathMatcher - }(), + Method: []string{"GET"}, + Path: createMatcher("/foo", true), }, }, }, domain: "example.com", + method: "GET", path: "/foo", expected: true, }, @@ -42,18 +47,14 @@ func TestIsAllowedByMode(t *testing.T) { RuleList: []Rule{ { Domain: "example.com", - Path: func() Matcher { - pathMatcher, err := newStringExactMatcher("/foo", true) - if err != nil { - t.Fatalf("Failed to create Matcher: %v", err) - } - return pathMatcher - }(), + Method: []string{"GET"}, + Path: createMatcher("/foo", true), }, }, }, domain: "example.com", - path: "/bar", + method: "POST", + path: "/foo", expected: false, }, { @@ -63,17 +64,13 @@ func TestIsAllowedByMode(t *testing.T) { RuleList: []Rule{ { Domain: "example.com", - Path: func() Matcher { - pathMatcher, err := newStringExactMatcher("/foo", true) - if err != nil { - t.Fatalf("Failed to create Matcher: %v", err) - } - return pathMatcher - }(), + Method: []string{"GET"}, + Path: createMatcher("/foo", true), }, }, }, domain: "example.com", + method: "GET", path: "/foo", expected: false, }, @@ -84,18 +81,14 @@ func TestIsAllowedByMode(t *testing.T) { RuleList: []Rule{ { Domain: "example.com", - Path: func() Matcher { - pathMatcher, err := newStringExactMatcher("/foo", true) - if err != nil { - t.Fatalf("Failed to create Matcher: %v", err) - } - return pathMatcher - }(), + Method: []string{"GET"}, + Path: createMatcher("/foo", true), }, }, }, domain: "example.com", - path: "/bar", + method: "POST", + path: "/foo", expected: true, }, { @@ -107,6 +100,7 @@ func TestIsAllowedByMode(t *testing.T) { }, }, domain: "example.com", + method: "GET", path: "/foo", expected: true, }, @@ -117,29 +111,25 @@ func TestIsAllowedByMode(t *testing.T) { RuleList: []Rule{ { Domain: "", - Path: func() Matcher { - pathMatcher, err := newStringExactMatcher("/foo", true) - if err != nil { - t.Fatalf("Failed to create Matcher: %v", err) - } - return pathMatcher - }(), + Path: createMatcher("/foo", true), }, }, }, domain: "example.com", + method: "GET", path: "/foo", expected: true, }, { - name: "Both Domain and Path are empty", + name: "All fields (Domain, Method, Path) are empty", config: MatchRules{ Mode: ModeWhitelist, RuleList: []Rule{ - {Domain: "", Path: nil}, + {Domain: "", Method: []string{}, Path: nil}, }, }, domain: "example.com", + method: "GET", path: "/foo", expected: false, }, @@ -150,17 +140,13 @@ func TestIsAllowedByMode(t *testing.T) { RuleList: []Rule{ { Domain: "example.com", - Path: func() Matcher { - pathMatcher, err := newStringExactMatcher("/foo", true) - if err != nil { - t.Fatalf("Failed to create Matcher: %v", err) - } - return pathMatcher - }(), + Method: []string{"GET"}, + Path: createMatcher("/foo", true), }, }, }, domain: "example.com", + method: "GET", path: "/foo", expected: false, }, @@ -171,17 +157,13 @@ func TestIsAllowedByMode(t *testing.T) { RuleList: []Rule{ { Domain: "*.example.com", - Path: func() Matcher { - pathMatcher, err := newStringExactMatcher("/foo", true) - if err != nil { - t.Fatalf("Failed to create Matcher: %v", err) - } - return pathMatcher - }(), + Method: []string{"GET"}, + Path: createMatcher("/foo", true), }, }, }, domain: "sub.example.com", + method: "GET", path: "/foo", expected: true, }, @@ -192,20 +174,48 @@ func TestIsAllowedByMode(t *testing.T) { RuleList: []Rule{ { Domain: "*.example.com", - Path: func() Matcher { - pathMatcher, err := newStringExactMatcher("/foo", true) - if err != nil { - t.Fatalf("Failed to create Matcher: %v", err) - } - return pathMatcher - }(), + Method: []string{"GET"}, + Path: createMatcher("/foo", true), }, }, }, domain: "example.com", + method: "GET", path: "/foo", expected: false, }, + { + name: "Whitelist mode, only method matches", + config: MatchRules{ + Mode: ModeWhitelist, + RuleList: []Rule{ + { + Method: []string{"GET"}, + Path: nil, + }, + }, + }, + domain: "example.com", + method: "GET", + path: "/foo", + expected: true, + }, + { + name: "Whitelist mode, only domain matches", + config: MatchRules{ + Mode: ModeWhitelist, + RuleList: []Rule{ + { + Domain: "example.com", + Path: nil, + }, + }, + }, + domain: "example.com", + method: "GET", + path: "/foo", + expected: true, + }, { name: "Blacklist mode, generic domain matches", config: MatchRules{ @@ -213,17 +223,13 @@ func TestIsAllowedByMode(t *testing.T) { RuleList: []Rule{ { Domain: "*.example.com", - Path: func() Matcher { - pathMatcher, err := newStringExactMatcher("/foo", true) - if err != nil { - t.Fatalf("Failed to create Matcher: %v", err) - } - return pathMatcher - }(), + Method: []string{"GET"}, + Path: createMatcher("/foo", true), }, }, }, domain: "sub.example.com", + method: "GET", path: "/foo", expected: false, }, @@ -234,25 +240,89 @@ func TestIsAllowedByMode(t *testing.T) { RuleList: []Rule{ { Domain: "*.example.com", - Path: func() Matcher { - pathMatcher, err := newStringExactMatcher("/foo", true) - if err != nil { - t.Fatalf("Failed to create Matcher: %v", err) - } - return pathMatcher - }(), + Method: []string{"GET"}, + Path: createMatcher("/foo", true), }, }, }, domain: "example.com", + method: "GET", path: "/foo", expected: true, }, + { + name: "Domain with special characters", + config: MatchRules{ + Mode: ModeWhitelist, + RuleList: []Rule{ + { + Domain: "example-*.com", + Method: []string{"GET"}, + Path: createMatcher("/foo", true), + }, + }, + }, + domain: "example-test.com", + method: "GET", + path: "/foo", + expected: true, + }, + { + name: "Path with special characters", + config: MatchRules{ + Mode: ModeWhitelist, + RuleList: []Rule{ + { + Domain: "example.com", + Method: []string{"GET"}, + Path: createMatcher("/foo-bar", true), + }, + }, + }, + domain: "example.com", + method: "GET", + path: "/foo-bar", + expected: true, + }, + { + name: "Multiple methods, one matches", + config: MatchRules{ + Mode: ModeWhitelist, + RuleList: []Rule{ + { + Domain: "example.com", + Method: []string{"GET", "POST"}, + Path: createMatcher("/foo", true), + }, + }, + }, + domain: "example.com", + method: "POST", + path: "/foo", + expected: true, + }, + { + name: "Multiple methods, none match", + config: MatchRules{ + Mode: ModeWhitelist, + RuleList: []Rule{ + { + Domain: "example.com", + Method: []string{"GET", "POST"}, + Path: createMatcher("/foo", true), + }, + }, + }, + domain: "example.com", + method: "PUT", + path: "/foo", + expected: false, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - result := tt.config.IsAllowedByMode(tt.domain, tt.path) + result := tt.config.IsAllowedByMode(tt.domain, tt.method, tt.path) assert.Equal(t, tt.expected, result) }) } diff --git a/plugins/wasm-go/extensions/ext-auth/main.go b/plugins/wasm-go/extensions/ext-auth/main.go index 8cc8c05952..7d3ce54b4a 100644 --- a/plugins/wasm-go/extensions/ext-auth/main.go +++ b/plugins/wasm-go/extensions/ext-auth/main.go @@ -51,9 +51,8 @@ const ( ) func onHttpRequestHeaders(ctx wrapper.HttpContext, config config.ExtAuthConfig, log wrapper.Log) types.Action { - path := wrapper.GetRequestPathWithoutQuery() // If the request's domain and path match the MatchRules, skip authentication - if config.MatchRules.IsAllowedByMode(ctx.Host(), path) { + if config.MatchRules.IsAllowedByMode(ctx.Host(), ctx.Method(), wrapper.GetRequestPathWithoutQuery()) { ctx.DontReadRequestBody() return types.ActionContinue } diff --git a/plugins/wasm-go/extensions/ext-auth/util/utils.go b/plugins/wasm-go/extensions/ext-auth/util/utils.go index eef1852878..2f6d8586a9 100644 --- a/plugins/wasm-go/extensions/ext-auth/util/utils.go +++ b/plugins/wasm-go/extensions/ext-auth/util/utils.go @@ -37,3 +37,12 @@ func ExtractFromHeader(headers [][2]string, headerKey string) string { } return "" } + +func ContainsString(slice []string, s string) bool { + for _, item := range slice { + if strings.EqualFold(item, s) { + return true + } + } + return false +} diff --git a/plugins/wasm-rust/Dockerfile b/plugins/wasm-rust/Dockerfile index b2c6725b14..ffdea8c537 100644 --- a/plugins/wasm-rust/Dockerfile +++ b/plugins/wasm-rust/Dockerfile @@ -3,10 +3,10 @@ WORKDIR /workspace RUN rustup target add wasm32-wasip1 ARG PLUGIN_NAME="say-hello" ARG BUILD_OPTS="--release" -ARG BUILDRC=".buildrc" +ARG PREBUILD=".prebuild" COPY . . WORKDIR /workspace/extensions/$PLUGIN_NAME -RUN if [ -f $BUILDRC ]; then sh $BUILDRC; fi +RUN if [ -f $PREBUILD ]; then sh $PREBUILD; fi RUN cargo build --target wasm32-wasip1 $BUILD_OPTS \ && cp target/wasm32-wasip1/release/*.wasm /main.wasm diff --git a/plugins/wasm-rust/extensions/ai-data-masking/.buildrc b/plugins/wasm-rust/extensions/ai-data-masking/.buildrc deleted file mode 100644 index bd317b8605..0000000000 --- a/plugins/wasm-rust/extensions/ai-data-masking/.buildrc +++ /dev/null @@ -1 +0,0 @@ -apt update && apt-get install gcc gcc-multilib llvm clang -y && apt clean \ No newline at end of file diff --git a/plugins/wasm-rust/extensions/ai-data-masking/.prebuild b/plugins/wasm-rust/extensions/ai-data-masking/.prebuild new file mode 100644 index 0000000000..ba1b9b5d6d --- /dev/null +++ b/plugins/wasm-rust/extensions/ai-data-masking/.prebuild @@ -0,0 +1,3 @@ +apt-get update +apt-get install gcc gcc-multilib llvm clang -y +apt-get clean diff --git a/plugins/wasm-rust/extensions/ai-data-masking/src/lib.rs b/plugins/wasm-rust/extensions/ai-data-masking/src/lib.rs index ca2db3da42..dc10bc3715 100644 --- a/plugins/wasm-rust/extensions/ai-data-masking/src/lib.rs +++ b/plugins/wasm-rust/extensions/ai-data-masking/src/lib.rs @@ -13,8 +13,10 @@ // limitations under the License. mod deny_word; +mod msg_window; use crate::deny_word::DenyWord; +use crate::msg_window::MsgWindow; use fancy_regex::Regex; use grok::patterns; use higress_wasm_rust::log::Log; @@ -27,8 +29,8 @@ use proxy_wasm::traits::{Context, HttpContext, RootContext}; use proxy_wasm::types::{Bytes, ContextType, DataAction, HeaderAction, LogLevel}; use rust_embed::Embed; use serde::de::Error; -use serde::Deserialize; use serde::Deserializer; +use serde::{Deserialize, Serialize}; use serde_json::{json, Value}; use std::cell::RefCell; use std::collections::{BTreeMap, HashMap, VecDeque}; @@ -66,9 +68,12 @@ struct AiDataMasking { config: Option>, mask_map: HashMap>, is_openai: bool, + is_openai_stream: Option, stream: bool, - res_body: Bytes, log: Log, + msg_window: MsgWindow, + char_window_size: usize, + byte_window_size: usize, } fn deserialize_regexp<'de, D>(deserializer: D) -> Result where @@ -213,10 +218,33 @@ struct ResMessage { #[serde(default)] delta: Option, } + +#[derive(Default, Debug, Deserialize, Serialize, Clone)] +struct Usage { + completion_tokens: i32, + prompt_tokens: i32, + total_tokens: i32, +} + +impl Usage { + pub fn add(&mut self, usage: &Usage) { + self.completion_tokens += usage.completion_tokens; + self.prompt_tokens += usage.prompt_tokens; + self.total_tokens += usage.total_tokens; + } + pub fn reset(&mut self) { + self.completion_tokens = 0; + self.prompt_tokens = 0; + self.total_tokens = 0; + } +} + #[derive(Default, Debug, Deserialize)] struct Res { #[serde(default)] choices: Vec, + #[serde(default)] + usage: Usage, } static SYSTEM_PATTERNS: &[(&str, &str)] = &[ @@ -334,9 +362,12 @@ impl RootContextWrapper for AiDataMaskingRoot { mask_map: HashMap::new(), config: None, is_openai: false, + is_openai_stream: None, stream: false, - res_body: Bytes::new(), + msg_window: MsgWindow::new(), log: Log::new(PLUGIN_NAME.to_string()), + char_window_size: 0, + byte_window_size: 0, })) } } @@ -416,32 +447,6 @@ impl AiDataMasking { DataAction::StopIterationAndBuffer } - fn process_sse_message(&mut self, sse_message: &str) -> Vec { - let mut messages = Vec::new(); - for msg in sse_message.split('\n') { - if !msg.starts_with("data:") { - continue; - } - let res: Res = if let Some(m) = msg.strip_prefix("data:") { - match serde_json::from_str(m) { - Ok(r) => r, - Err(_) => continue, - } - } else { - continue; - }; - - if res.choices.is_empty() { - continue; - } - for choice in &res.choices { - if let Some(delta) = &choice.delta { - messages.push(delta.content.clone()); - } - } - } - messages - } fn replace_request_msg(&mut self, message: &str) -> String { let config = self.config.as_ref().unwrap(); let mut msg = message.to_string(); @@ -464,6 +469,13 @@ impl AiDataMasking { } Type::Replace => rule.regex.replace(from_word, &rule.value).to_string(), }; + if to_word.len() > self.byte_window_size { + self.byte_window_size = to_word.len(); + } + if to_word.chars().count() > self.char_window_size { + self.char_window_size = to_word.chars().count(); + } + replace_pair.push((from_word.to_string(), to_word.clone())); if rule.restore && !to_word.is_empty() { @@ -499,6 +511,7 @@ impl HttpContext for AiDataMasking { _end_of_stream: bool, ) -> HeaderAction { if has_request_body() { + self.set_http_request_header("Content-Length", None); HeaderAction::StopIteration } else { HeaderAction::Continue @@ -512,58 +525,41 @@ impl HttpContext for AiDataMasking { self.set_http_response_header("Content-Length", None); HeaderAction::Continue } - fn on_http_response_body(&mut self, body_size: usize, _end_of_stream: bool) -> DataAction { + fn on_http_response_body(&mut self, body_size: usize, end_of_stream: bool) -> DataAction { if !self.stream { return DataAction::Continue; } - if let Some(body) = self.get_http_response_body(0, body_size) { - self.res_body.extend(&body); - - if let Ok(body_str) = String::from_utf8(self.res_body.clone()) { - if self.is_openai { - let messages = self.process_sse_message(&body_str); - - if self.check_message(&messages.join("")) { + if body_size > 0 { + if let Some(body) = self.get_http_response_body(0, body_size) { + if self.is_openai && self.is_openai_stream.is_none() { + self.is_openai_stream = Some(body.starts_with(b"data:")); + } + self.msg_window.push(&body, self.is_openai_stream.unwrap()); + if let Ok(mut msg) = String::from_utf8(self.msg_window.message.clone()) { + if self.check_message(&msg) { return self.deny(true); } - } else if self.check_message(&body_str) { - return self.deny(true); - } - } - if self.mask_map.is_empty() { - return DataAction::Continue; - } - if let Ok(body_str) = std::str::from_utf8(&body) { - let mut new_str = body_str.to_string(); - if self.is_openai { - let messages = self.process_sse_message(body_str); - - for message in messages { - let mut new_message = message.clone(); + if !self.mask_map.is_empty() { for (from_word, to_word) in self.mask_map.iter() { if let Some(to) = to_word { - new_message = new_message.replace(from_word, to); + msg = msg.replace(from_word, to); } } - if new_message != message { - new_str = new_str.replace( - &json!(message).to_string(), - &json!(new_message).to_string(), - ); - } - } - } else { - for (from_word, to_word) in self.mask_map.iter() { - if let Some(to) = to_word { - new_str = new_str.replace(from_word, to); - } } - } - if new_str != body_str { - self.replace_http_response_body(new_str.as_bytes()); + self.msg_window.message = msg.as_bytes().to_vec(); } } } + let new_body = if end_of_stream { + self.msg_window.finish(self.is_openai_stream.unwrap()) + } else { + self.msg_window.pop( + self.char_window_size * 2, + self.byte_window_size * 2, + self.is_openai_stream.unwrap(), + ) + }; + self.replace_http_response_body(&new_body); DataAction::Continue } } @@ -586,7 +582,6 @@ impl HttpContextWrapper for AiDataMasking { return DataAction::Continue; } let config = self.config.as_ref().unwrap(); - let mut req_body = match String::from_utf8(req_body.clone()) { Ok(r) => r, Err(_) => return DataAction::Continue, diff --git a/plugins/wasm-rust/extensions/ai-data-masking/src/msg_window.rs b/plugins/wasm-rust/extensions/ai-data-masking/src/msg_window.rs new file mode 100644 index 0000000000..b8b33aacb0 --- /dev/null +++ b/plugins/wasm-rust/extensions/ai-data-masking/src/msg_window.rs @@ -0,0 +1,338 @@ +use higress_wasm_rust::event_stream::EventStream; +use serde_json::json; + +use crate::{Res, Usage}; + +#[derive(Default)] +pub(crate) struct MsgWindow { + stream_parser: EventStream, + pub(crate) message: Vec, + usage: Usage, +} + +impl MsgWindow { + pub fn new() -> Self { + MsgWindow::default() + } + + fn update_event(&mut self, event: Vec) -> Option> { + if event.is_empty() || !event.starts_with(b"data:") { + Some(event) + } else if let Ok(res) = serde_json::from_slice::(&event[b"data:".len()..]) { + for choice in &res.choices { + if let Some(delta) = &choice.delta { + self.message.extend(delta.content.as_bytes()); + } + } + self.usage.add(&res.usage); + None + } else if event.starts_with(b"data: [DONE]") { + None + } else { + Some(event) + } + } + pub fn push(&mut self, data: &[u8], is_openai: bool) { + if is_openai { + self.stream_parser.update(data.to_vec()); + while let Some(event) = self.stream_parser.next() { + if let Some(msg) = self.update_event(event) { + self.message.extend(msg); + } + } + } else { + self.message.extend(data); + } + } + + pub fn pop( + &mut self, + char_window_size: usize, + byte_window_size: usize, + is_openai: bool, + ) -> Vec { + if let Ok(message) = String::from_utf8(self.message.clone()) { + let chars = message.chars().collect::>(); + if chars.len() <= char_window_size { + return Vec::new(); + } + let ret = chars[..chars.len() - char_window_size] + .iter() + .collect::(); + self.message = chars[chars.len() - char_window_size..] + .iter() + .collect::() + .as_bytes() + .to_vec(); + + if is_openai { + let usage = self.usage.clone(); + self.usage.reset(); + format!( + "data: {}\n\n", + json!({"choices": [{"index": 0, "delta": {"role": "assistant", "content": ret}}], "usage": usage}) + ).as_bytes().to_vec() + } else { + ret.as_bytes().to_vec() + } + } else { + let ret = self.message[..self.message.len() - byte_window_size].to_vec(); + self.message = self.message[self.message.len() - byte_window_size..].to_vec(); + ret + } + } + + pub fn finish(&mut self, is_openai: bool) -> Vec { + if let Some(event) = self.stream_parser.flush() { + self.update_event(event); + } + if self.message.is_empty() { + Vec::new() + } else if is_openai { + format!( + "data: {}\n\ndata: [DONE]\n\n", + json!({"choices": [{"index": 0, "delta": {"role": "assistant", "content": String::from_utf8_lossy(&self.message)}}], "usage": self.usage}) + ).as_bytes().to_vec() + } else { + self.message.clone() + } + } +} + +#[cfg(test)] +mod tests { + + use super::*; + + #[test] + fn test_msg() { + let mut msg_win = MsgWindow::default(); + let data = r#"data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"H"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"ig"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"ress"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":" 是"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"一个"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"基于"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":" Ist"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"io"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":" 的"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"高性能"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"服务"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"网格"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"数据"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"平面"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"项目"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":","},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"旨在"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"提供"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"高"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872009,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"吞"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"吐"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"量"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"、"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"低"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"延迟"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"和"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"可"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"扩展"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"的服务"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"通信"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"管理"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"。"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"它"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"为企业"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"级"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"应用"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"提供了"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"丰富的"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"流量"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"治理"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"功能"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":","},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"如"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"负载"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"均衡"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"、"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"熔"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"断"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"、"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"限"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"流"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872010,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"等"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":",并"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"支持"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"多"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"协议"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"代理"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"("},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"包括"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":" HTTP"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"/"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"1"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"."},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"1"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":","},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":" HTTP"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"/"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"2"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":","},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":" g"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"RPC"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":")。"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"H"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"ig"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"ress"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":" 的"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"设计"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"目标"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"是"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"优化"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":" Ist"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"io"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":" 在"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"大规模"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"集群"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872011,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"中的"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"性能"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"表现"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":","},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"满足"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"高"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"并发"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"场景"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"下的"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"需求"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":"。"},"finish_reason":null}]} + +data: {"id":"chatcmpl-936","object":"chat.completion.chunk","created":1739872012,"model":"qwen2.5-coder:32b","system_fingerprint":"fp_ollama","choices":[{"index":0,"delta":{"role":"assistant","content":""},"finish_reason":"stop"}]} + +data: [DONE] + +"#; + let mut buffer = Vec::new(); + for line in data.split("\n\n") { + msg_win.push(line.as_bytes(), true); + msg_win.push(b"\n\n", true); + if let Ok(mut msg) = String::from_utf8(msg_win.message.clone()) { + msg = msg.replace("Higress", "***higress***"); + msg_win.message = msg.as_bytes().to_vec(); + } + buffer.extend(msg_win.pop(7, 7, true)); + } + buffer.extend(msg_win.finish(true)); + let mut message = String::new(); + for line in buffer.split(|&x| x == b'\n') { + if line.is_empty() { + continue; + } + assert!(line.starts_with(b"data:")); + if line.starts_with(b"data: [DONE]") { + continue; + } + let des = serde_json::from_slice(&line[b"data:".len()..]); + assert!(des.is_ok()); + let res: Res = des.unwrap(); + for choice in &res.choices { + if let Some(delta) = &choice.delta { + message.push_str(&delta.content); + } + } + } + assert_eq!(message, "***higress*** 是一个基于 Istio 的高性能服务网格数据平面项目,旨在提供高吞吐量、低延迟和可扩展的服务通信管理。它为企业级应用提供了丰富的流量治理功能,如负载均衡、熔断、限流等,并支持多协议代理(包括 HTTP/1.1, HTTP/2, gRPC)。***higress*** 的设计目标是优化 Istio 在大规模集群中的性能表现,满足高并发场景下的需求。"); + } +} diff --git a/plugins/wasm-rust/src/event_stream.rs b/plugins/wasm-rust/src/event_stream.rs index 97715dcac1..cb12a35f00 100644 --- a/plugins/wasm-rust/src/event_stream.rs +++ b/plugins/wasm-rust/src/event_stream.rs @@ -108,10 +108,7 @@ impl EventStream { } fn is_2eol(&self, i: usize) -> Option { - let size1 = match self.is_eol(i) { - None => return None, - Some(size1) => size1, - }; + let size1 = self.is_eol(i)?; if i + size1 < self.buffer.len() { match self.is_eol(i + size1) { None => { diff --git a/tools/hack/build-envoy.sh b/tools/hack/build-envoy.sh index c07f24cb1f..931d0f5b1f 100755 --- a/tools/hack/build-envoy.sh +++ b/tools/hack/build-envoy.sh @@ -30,9 +30,11 @@ fi CONDITIONAL_HOST_MOUNTS+="--mount type=bind,source=${ROOT}/external/package,destination=/home/package " CONDITIONAL_HOST_MOUNTS+="--mount type=bind,source=${ROOT}/external/envoy,destination=/home/envoy " +BUILD_TOOLS_IMG=${BUILD_TOOLS_IMG:-"higress-registry.cn-hangzhou.cr.aliyuncs.com/higress/build-tools:release-1.19-ef344298e65eeb2d9e2d07b87eb4e715c2def613"} + BUILD_WITH_CONTAINER=1 \ CONDITIONAL_HOST_MOUNTS=${CONDITIONAL_HOST_MOUNTS} \ BUILD_ENVOY_BINARY_ONLY=1 \ DOCKER_RUN_OPTIONS="--user root -e HTTP_PROXY -e HTTPS_PROXY" \ - IMG=higress-registry.cn-hangzhou.cr.aliyuncs.com/higress/build-tools-proxy:release-1.19-ef344298e65eeb2d9e2d07b87eb4e715c2def613 \ + IMG=${BUILD_TOOLS_IMG} \ make test_release diff --git a/tools/hack/build-istio-image.sh b/tools/hack/build-istio-image.sh index 2cb46578be..5c46753827 100755 --- a/tools/hack/build-istio-image.sh +++ b/tools/hack/build-istio-image.sh @@ -25,14 +25,34 @@ CONDITIONAL_HOST_MOUNTS+="--mount type=bind,source=${ROOT}/external/package,dest DOCKER_RUN_OPTIONS+="-e HTTP_PROXY -e HTTPS_PROXY" +BUILD_TOOLS_IMG=${BUILD_TOOLS_IMG:-"higress-registry.cn-hangzhou.cr.aliyuncs.com/higress/build-tools:release-1.19-ef344298e65eeb2d9e2d07b87eb4e715c2def613"} + +ORIGINAL_HUB=${HUB} + +echo "IMG_URL=$IMG_URL" + +if [ -n "$IMG_URL" ]; then + TAG=${IMG_URL#*:} + HUB=${IMG_URL%:*} + HUB=${HUB%/*} + if [ "$TAG" == "${IMG_URL}" ]; then + TAG=latest + fi +fi + +echo "HUB=$HUB" +echo "TAG=$TAG" + GOOS_LOCAL=linux TARGET_OS=linux TARGET_ARCH=${TARGET_ARCH} \ ISTIO_ENVOY_LINUX_RELEASE_URL=${ISTIO_ENVOY_LINUX_RELEASE_URL} \ BUILD_WITH_CONTAINER=1 \ USE_REAL_USER=${USE_REAL_USER:-0} \ CONDITIONAL_HOST_MOUNTS=${CONDITIONAL_HOST_MOUNTS} \ DOCKER_BUILD_VARIANTS=default DOCKER_TARGETS="${DOCKER_TARGETS}" \ - ISTIO_BASE_REGISTRY="${HUB}" \ + ISTIO_BASE_REGISTRY="${ORIGINAL_HUB}" \ BASE_VERSION="${HIGRESS_BASE_VERSION}" \ DOCKER_RUN_OPTIONS=${DOCKER_RUN_OPTIONS} \ - IMG=higress-registry.cn-hangzhou.cr.aliyuncs.com/higress/build-tools:release-1.19-ef344298e65eeb2d9e2d07b87eb4e715c2def613 \ + HUB="${HUB}" \ + TAG="${TAG}" \ + IMG=${BUILD_TOOLS_IMG} \ make "$@" diff --git a/tools/hack/build-istio-pilot.sh b/tools/hack/build-istio-pilot.sh index 351ac8962a..7acf9d1231 100755 --- a/tools/hack/build-istio-pilot.sh +++ b/tools/hack/build-istio-pilot.sh @@ -19,7 +19,9 @@ set -euo pipefail source "$(dirname -- "$0")/setup-istio-env.sh" cd ${ROOT}/external/istio -rm -rf out/linux_${TARGET_ARCH}; +rm -rf out/linux_${TARGET_ARCH}; + +BUILD_TOOLS_IMG=${BUILD_TOOLS_IMG:-"higress-registry.cn-hangzhou.cr.aliyuncs.com/higress/build-tools:release-1.19-ef344298e65eeb2d9e2d07b87eb4e715c2def613"} GOOS_LOCAL=linux TARGET_OS=linux TARGET_ARCH=${TARGET_ARCH} \ ISTIO_ENVOY_LINUX_RELEASE_URL=${ISTIO_ENVOY_LINUX_RELEASE_URL} \ @@ -28,5 +30,5 @@ GOOS_LOCAL=linux TARGET_OS=linux TARGET_ARCH=${TARGET_ARCH} \ ISTIO_BASE_REGISTRY="${HUB}" \ BASE_VERSION="${HIGRESS_BASE_VERSION}" \ DOCKER_RUN_OPTIONS="--user root -e HTTP_PROXY -e HTTPS_PROXY" \ - IMG=higress-registry.cn-hangzhou.cr.aliyuncs.com/higress/build-tools:release-1.19-ef344298e65eeb2d9e2d07b87eb4e715c2def613 \ + IMG=${BUILD_TOOLS_IMG} \ make build-linux