Skip to content

Commit

Permalink
Merge pull request #1080 from CrowleyRajapakse/master
Browse files Browse the repository at this point in the history
Adding health check and readiness/liveness probes for APK Agent
  • Loading branch information
CrowleyRajapakse authored Feb 19, 2024
2 parents 6b38e51 + 941f28f commit 5d3459e
Show file tree
Hide file tree
Showing 4 changed files with 133 additions and 10 deletions.
11 changes: 8 additions & 3 deletions apim-apk-agent/internal/agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ import (
"time"

"github.com/fsnotify/fsnotify"
"github.com/wso2/apk/adapter/pkg/health"
healthservice "github.com/wso2/apk/adapter/pkg/health/api/wso2/health/service"
cpv1alpha2 "github.com/wso2/apk/common-go-libs/apis/cp/v1alpha2"
dpv1alpha1 "github.com/wso2/apk/common-go-libs/apis/dp/v1alpha1"
dpv1alpha2 "github.com/wso2/apk/common-go-libs/apis/dp/v1alpha2"
Expand All @@ -43,6 +43,7 @@ import (
logging "github.com/wso2/product-apim-tooling/apim-apk-agent/internal/logging"
"github.com/wso2/product-apim-tooling/apim-apk-agent/internal/messaging"
"github.com/wso2/product-apim-tooling/apim-apk-agent/internal/synchronizer"
"github.com/wso2/product-apim-tooling/apim-apk-agent/pkg/health"
"github.com/wso2/product-apim-tooling/apim-apk-agent/pkg/managementserver"
"google.golang.org/grpc"
"google.golang.org/grpc/credentials"
Expand Down Expand Up @@ -151,6 +152,7 @@ func Run(conf *config.Config) {

// Load initial data from control plane
eventhub.LoadInitialData(conf, mgr.GetClient())
health.RestService.SetStatus(true)

if eventHubEnabled {
var connectionURLList = conf.ControlPlane.BrokerConnectionParameters.EventListeningEndpoints
Expand All @@ -161,6 +163,7 @@ func Run(conf *config.Config) {

// Load initial KM data from control plane
synchronizer.FetchKeyManagersOnStartUp(mgr.GetClient())
health.NotificationListenerService.SetStatus(true)

var grpcOptions []grpc.ServerOption
grpcOptions = append(grpcOptions, grpc.KeepaliveParams(
Expand Down Expand Up @@ -201,13 +204,15 @@ func Run(conf *config.Config) {
loggers.LoggerAPKOperator.ErrorC(logging.PrintError(logging.Error1100, logging.BLOCKER, "Failed to listen on port: %v, error: %v", port, err.Error()))
}
apkmgt.RegisterEventStreamServiceServer(grpcServer, &managementserver.EventServer{})
// register health service
healthservice.RegisterHealthServer(grpcServer, &health.Server{})
loggers.LoggerAPKOperator.Info("port: ", port, " APK agent Listening for gRPC connections")
go managementserver.StartInternalServer(restPort)
go func() {
loggers.LoggerAPKOperator.Info("Starting GRPC server.")
health.CommonEnforcerGrpcService.SetStatus(true)
health.CommonControllerGrpcService.SetStatus(true)
if err = grpcServer.Serve(lis); err != nil {
health.CommonEnforcerGrpcService.SetStatus(false)
health.CommonControllerGrpcService.SetStatus(false)
loggers.LoggerAPKOperator.ErrorC(logging.PrintError(logging.Error1101, logging.BLOCKER, "Failed to start XDS GRPS server, error: %v", err.Error()))
}
}()
Expand Down
92 changes: 92 additions & 0 deletions apim-apk-agent/pkg/health/health.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
/*
* Copyright (c) 2024, WSO2 LLC. (http://www.wso2.org) All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/

package health

import (
"context"
"sync"

healthservice "github.com/wso2/apk/adapter/pkg/health/api/wso2/health/service"
logger "github.com/wso2/product-apim-tooling/apim-apk-agent/pkg/loggers"
)

var (
serviceHealthStatus = make(map[string]bool)
healthStatuses = map[bool]string{
true: "HEALTHY",
false: "UNHEALTHY",
}
mutexForHealthUpdate sync.Mutex
)

// Service components to be set health status
const (
RestService service = "apk.apim.agent.internal.RestService"
NotificationListenerService service = "apk.apim.agent.internal.NotificationListenerService"
CommonControllerGrpcService service = "apk.apim.agent.internal.CommonControllerGrpcService"
)

type service string

// SetStatus sets the health state of the service
func (s service) SetStatus(isHealthy bool) {
mutexForHealthUpdate.Lock()
defer mutexForHealthUpdate.Unlock()
logger.LoggerHealth.Infof("Update health status of service \"%s\" as %s", s, healthStatuses[isHealthy])
serviceHealthStatus[string(s)] = isHealthy
}

// Server represents the Health GRPC server
type Server struct {
healthservice.UnimplementedHealthServer
}

// Check responds the health check client with health status of the APIM APK Agent
func (s Server) Check(ctx context.Context, request *healthservice.HealthCheckRequest) (*healthservice.HealthCheckResponse, error) {
logger.LoggerHealth.Debugf("Querying health state for APIM APK Agent service \"%s\"", request.Service)
logger.LoggerHealth.Debugf("Internal health state map: %v", serviceHealthStatus)

if request.Service == "" {
// overall health of the server
isHealthy := true
for _, ok := range serviceHealthStatus {
isHealthy = isHealthy && ok
}

if isHealthy {
logger.LoggerHealth.Debug("Responding health state of APIM APK Agent as HEALTHY")
return &healthservice.HealthCheckResponse{Status: healthservice.HealthCheckResponse_SERVING}, nil
}
logger.LoggerHealth.Debug("Responding health state of APIM APK Agent as NOT_HEALTHY")
return &healthservice.HealthCheckResponse{Status: healthservice.HealthCheckResponse_NOT_SERVING}, nil
}

// health of the component of a server
if isHealthy, ok := serviceHealthStatus[request.Service]; ok {
if isHealthy {
logger.LoggerHealth.Debugf("Responding health state of APIM APK Agent service \"%s\" as HEALTHY", request.Service)
return &healthservice.HealthCheckResponse{Status: healthservice.HealthCheckResponse_SERVING}, nil
}
logger.LoggerHealth.Debugf("Responding health state of APIM APK Agent service \"%s\" as NOT_HEALTHY", request.Service)
return &healthservice.HealthCheckResponse{Status: healthservice.HealthCheckResponse_NOT_SERVING}, nil
}

// no component found
logger.LoggerHealth.Debugf("Responding health state of APIM APK Agent service \"%s\" as UNKNOWN", request.Service)
return &healthservice.HealthCheckResponse{Status: healthservice.HealthCheckResponse_UNKNOWN}, nil
}
19 changes: 12 additions & 7 deletions apim-apk-agent/resources/check_health.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,17 @@
# limitations under the License.
# -----------------------------------------------------------------------

ADAPTER_XDS_PORT="${ADAPTER_XDS_PORT:-18000}"
ADAPTER_SERVER_NAME="${ADAPTER_SERVER_NAME:-adapter}"
grpc_health_probe -addr "127.0.0.1:${ADAPTER_XDS_PORT}" \
# Debugging: Print out the values of variables
echo "APIM_APK_AGENT_GRPC_PORT: ${APIM_APK_AGENT_GRPC_PORT}"
echo "APIM_APK_AGENT_SERVER_NAME: ${APIM_APK_AGENT_SERVER_NAME}"
echo "APIM_APK_AGENT_PUBLIC_CERT_PATH: ${APIM_APK_AGENT_PUBLIC_CERT_PATH}"
echo "APIM_APK_AGENT_PRIVATE_KEY_PATH: ${APIM_APK_AGENT_PRIVATE_KEY_PATH}"

# Run grpc_health_probe with debugging information
grpc_health_probe -addr "127.0.0.1:${APIM_APK_AGENT_GRPC_PORT}" \
-tls \
-tls-ca-cert "${ADAPTER_PUBLIC_CERT_PATH}" \
-tls-client-cert "${ADAPTER_PUBLIC_CERT_PATH}" \
-tls-client-key "${ADAPTER_PRIVATE_KEY_PATH}" \
-tls-server-name ${ADAPTER_SERVER_NAME} \
-tls-ca-cert "${APIM_APK_AGENT_PUBLIC_CERT_PATH}" \
-tls-client-cert "${APIM_APK_AGENT_PUBLIC_CERT_PATH}" \
-tls-client-key "${APIM_APK_AGENT_PRIVATE_KEY_PATH}" \
-tls-server-name ${APIM_APK_AGENT_SERVER_NAME} \
-connect-timeout=3s
21 changes: 21 additions & 0 deletions helm-charts/templates/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,15 @@ spec:
containerPort: 18000
- name: rest-port
containerPort: 18001
env:
- name: APIM_APK_AGENT_PRIVATE_KEY_PATH
value: /home/wso2/security/keystore/apk-agent.key
- name: APIM_APK_AGENT_PUBLIC_CERT_PATH
value: /home/wso2/security/keystore/apk-agent.crt
- name: APIM_APK_AGENT_SERVER_NAME
value: apim-apk-agent-service.{{ .Release.Namespace }}.svc
- name: APIM_APK_AGENT_GRPC_PORT
value: "18000"
volumeMounts:
- name: log-conf-volume
mountPath: /home/wso2/conf/
Expand All @@ -57,6 +66,18 @@ spec:
- name: apk-agent-certificates
mountPath: /home/wso2/security/truststore/apk-agent-ca.crt
subPath: ca.crt
readinessProbe:
exec:
command: [ "sh", "check_health.sh" ]
initialDelaySeconds: 20
periodSeconds: 20
failureThreshold: 5
livenessProbe:
exec:
command: [ "sh", "check_health.sh" ]
initialDelaySeconds: 20
periodSeconds: 20
failureThreshold: 5
volumes:
- name: log-conf-volume
configMap:
Expand Down

0 comments on commit 5d3459e

Please sign in to comment.