Skip to content
This repository has been archived by the owner on Nov 13, 2024. It is now read-only.

Enhancement: Session Error handling and caching #26

Merged
merged 23 commits into from
Mar 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -47,4 +47,10 @@
/*.sqlite

## environment vars
.env
.env

## docker compose file
docker-compose.yml

## MAC
.DS_Store
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ To onboard the gateway server without having to dig deep, you can follow the [Qu
Every release candidate is published to https://github.com/pokt-network/gateway-server/pkgs/container/pocket-gateway-server

## Docker Compose
There is an all-inclusive docker-compose file available for development [docker-compose.yml](docker-compose.yml)
There is an all-inclusive docker-compose file available for development [docker-compose.yml](docker-compose.yml.sample)

## Minimum Hardware Requirements to run
- 1GB of RAM
Expand Down
30 changes: 16 additions & 14 deletions cmd/gateway_server/internal/config/dot_env_config_provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,45 +87,47 @@ func (c DotEnvGlobalConfigProvider) GetAltruistRequestTimeout() time.Duration {
func NewDotEnvConfigProvider() *DotEnvGlobalConfigProvider {
_ = godotenv.Load()

poktRPCTimeout, err := time.ParseDuration(getEnvVar(poktRPCTimeoutEnv))
poktRPCTimeout, err := time.ParseDuration(getEnvVar(poktRPCTimeoutEnv, ""))
if err != nil {
panic(fmt.Sprintf("Error parsing %s: %s", poktRPCTimeoutEnv, err))
}

httpServerPort, err := strconv.ParseUint(getEnvVar(httpServerPortEnv), 10, 64)
httpServerPort, err := strconv.ParseUint(getEnvVar(httpServerPortEnv, ""), 10, 64)
if err != nil {
panic(fmt.Sprintf("Error parsing %s: %s", httpServerPortEnv, err))
}

sessionCacheTTLDuration, err := time.ParseDuration(getEnvVar(sessionCacheTTLEnv))
sessionCacheTTLDuration, err := time.ParseDuration(getEnvVar(sessionCacheTTLEnv, ""))
if err != nil {
panic(fmt.Sprintf("Error parsing %s: %s", sessionCacheTTLDuration, err))
}

altruistRequestTimeoutDuration, err := time.ParseDuration(getEnvVar(altruistRequestTimeoutEnv))
altruistRequestTimeoutDuration, err := time.ParseDuration(getEnvVar(altruistRequestTimeoutEnv, defaultAltruistRequestTimeout.String()))
if err != nil {
// Provide a default to prevent any breaking changes with new env variable.
altruistRequestTimeoutDuration = defaultAltruistRequestTimeout
}

return &DotEnvGlobalConfigProvider{
poktRPCFullHost: getEnvVar(poktRPCFullHostEnv),
poktRPCFullHost: getEnvVar(poktRPCFullHostEnv, ""),
httpServerPort: uint(httpServerPort),
poktRPCRequestTimeout: poktRPCTimeout,
sessionCacheTTL: sessionCacheTTLDuration,
databaseConnectionUrl: getEnvVar(dbConnectionUrlEnv),
environmentStage: global_config.EnvironmentStage(getEnvVar(environmentStageEnv)),
poktApplicationsEncryptionKey: getEnvVar(poktApplicationsEncryptionKeyEnv),
apiKey: getEnvVar(apiKey),
databaseConnectionUrl: getEnvVar(dbConnectionUrlEnv, ""),
environmentStage: global_config.EnvironmentStage(getEnvVar(environmentStageEnv, "")),
poktApplicationsEncryptionKey: getEnvVar(poktApplicationsEncryptionKeyEnv, ""),
apiKey: getEnvVar(apiKey, ""),
altruistRequestTimeout: altruistRequestTimeoutDuration,
}
}

// getEnvVar retrieves the value of the environment variable with error handling.
func getEnvVar(name string) string {
value, exists := os.LookupEnv(name)
if !exists {
panic(fmt.Errorf("%s not set", name))
func getEnvVar(name string, defaultValue string) string {
if value, exists := os.LookupEnv(name); exists {
return value
}
return value
if defaultValue != "" {
return defaultValue
}
panic(fmt.Errorf("%s not set", name))
}
1 change: 1 addition & 0 deletions cmd/gateway_server/internal/models/qos_node.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,5 @@ type PublicQosNode struct {
IsHeathy bool `json:"is_heathy"`
IsSynced bool `json:"is_synced"`
LastKnownHeight uint64 `json:"last_known_height"`
P90Latency float64 `json:"p90_latency"`
}
10 changes: 8 additions & 2 deletions cmd/gateway_server/internal/transform/qos_node.go
Original file line number Diff line number Diff line change
@@ -1,21 +1,27 @@
package transform

import (
"math"
"pokt_gateway_server/cmd/gateway_server/internal/models"
internal_model "pokt_gateway_server/internal/node_selector_service/models"
)

func ToPublicQosNode(node *internal_model.QosNode) *models.PublicQosNode {
latency := node.LatencyTracker.GetP90Latency()
if math.IsNaN(latency) {
latency = 0.0
}
return &models.PublicQosNode{
ServiceUrl: node.MorseNode.ServiceUrl,
Chain: node.GetChain(),
SessionHeight: node.PocketSession.SessionHeader.SessionHeight,
AppPublicKey: node.AppSigner.PublicKey,
SessionHeight: node.MorseSession.SessionHeader.SessionHeight,
AppPublicKey: node.MorseSigner.PublicKey,
TimeoutReason: string(node.GetTimeoutReason()),
LastKnownErr: node.GetLastKnownErrorStr(),
IsHeathy: node.IsHealthy(),
IsSynced: node.IsSynced(),
LastKnownHeight: node.GetLastKnownHeight(),
TimeoutUntil: node.GetTimeoutUntil(),
P90Latency: latency,
}
}
4 changes: 2 additions & 2 deletions cmd/gateway_server/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,8 @@ func main() {
ttlcache.WithTTL[string, *session_registry.Session](gatewayConfigProvider.GetSessionCacheTTL()),
)

nodeCache := ttlcache.New[string, []*qos_models.QosNode](
ttlcache.WithTTL[string, []*qos_models.QosNode](gatewayConfigProvider.GetSessionCacheTTL()),
nodeCache := ttlcache.New[qos_models.SessionChainKey, []*qos_models.QosNode](
ttlcache.WithTTL[qos_models.SessionChainKey, []*qos_models.QosNode](gatewayConfigProvider.GetSessionCacheTTL()),
)

poktApplicationRegistry := apps_registry.NewCachedAppsRegistry(client, querier, gatewayConfigProvider, logger.Named("pokt_application_registry"))
Expand Down
33 changes: 0 additions & 33 deletions docker-compose.yml

This file was deleted.

42 changes: 42 additions & 0 deletions docker-compose.yml.sample
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
version: '3'

services:
gateway-server:
build: .
volumes:
- .env:/app/.env
ports:
- "${HTTP_SERVER_PORT}:${HTTP_SERVER_PORT}"
env_file:
- ./.env
networks:
- bridged_network
# depends_on:
# - postgres
restart: on-failure
logging:
driver: json-file
options:
max-size: 10m

# this postgres database is only to be used for testing. It should not be used in production systems
# Leverage a production ready postgres database with HA/replicas in prod.
# postgres:
# image: postgres:latest
# environment:
# POSTGRES_DB: postgres
# POSTGRES_USER: myuser
# POSTGRES_PASSWORD: mypassword
# ports:
# - "5433:5432"
# volumes:
# - postgres_data:/var/lib/postgresql/data
# networks:
# - bridged_network

volumes:
postgres_data:

networks:
bridged_network:
driver: bridge
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ require (
github.com/golang/protobuf v1.5.3 // indirect
github.com/hashicorp/errwrap v1.1.0 // indirect
github.com/hashicorp/go-multierror v1.1.1 // indirect
github.com/influxdata/tdigest v0.0.1 // indirect
github.com/jackc/chunkreader/v2 v2.0.1 // indirect
github.com/jackc/pgio v1.0.0 // indirect
github.com/jackc/pgpassfile v1.0.0 // indirect
Expand Down
7 changes: 7 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ github.com/golang/protobuf v1.3.5/go.mod h1:6O5/vntMXwX2lRkT1hjjk0nAC1IDOTvTlVgj
github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg=
github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
Expand All @@ -55,6 +56,8 @@ github.com/hashicorp/errwrap v1.1.0 h1:OxrOeh75EUXMY8TBjag2fzXGZ40LB6IKw45YeGUDY
github.com/hashicorp/errwrap v1.1.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo=
github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM=
github.com/influxdata/tdigest v0.0.1 h1:XpFptwYmnEKUqmkcDjrzffswZ3nvNeevbUSLPP/ZzIY=
github.com/influxdata/tdigest v0.0.1/go.mod h1:Z0kXnxzbTC2qrx4NaIzYkE1k66+6oEDQTvL95hQFh5Y=
github.com/jackc/chunkreader v1.0.0/go.mod h1:RT6O25fNZIuasFJRyZ4R/Y2BbhasbmZXF9QQ7T3kePo=
github.com/jackc/chunkreader/v2 v2.0.0/go.mod h1:odVSm741yZoC3dpHEUXIqA9tQRhFrgOHwnPIn9lDKlk=
github.com/jackc/chunkreader/v2 v2.0.1 h1:i+RDz65UE+mmpjTfyz0MoVTnzeYxroil2G82ki7MGG8=
Expand Down Expand Up @@ -227,6 +230,7 @@ golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5y
golang.org/x/crypto v0.6.0/go.mod h1:OFC/31mSvZgRz0V1QTNCzfAI1aIRzbiufJtkMIlEp58=
golang.org/x/crypto v0.17.0 h1:r8bRNjWL3GshPW3gkd+RpvzWrZAwPS49OmTGZ/uhM4k=
golang.org/x/crypto v0.17.0/go.mod h1:gCAAfMLgwOJRpTjQ2zCCt2OcSfYMTeZVSRtQlPC7Nq4=
golang.org/x/exp v0.0.0-20180321215751-8460e604b9de/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/exp v0.0.0-20231110203233-9a3e6036ecaa h1:FRnLl4eNAQl8hwxVVC17teOw8kdjVDVAiFMtgUdTSRQ=
golang.org/x/exp v0.0.0-20231110203233-9a3e6036ecaa/go.mod h1:zk2irFbV9DP96SEBUUAy67IdHUaZuSnrz1n472HUCLE=
golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
Expand Down Expand Up @@ -279,6 +283,7 @@ golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ=
golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
golang.org/x/tools v0.0.0-20180525024113-a5b4c53f6e8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
golang.org/x/tools v0.0.0-20190425163242-31fd60d6bfdc/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
Expand All @@ -297,8 +302,10 @@ golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8T
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
gonum.org/v1/gonum v0.0.0-20181121035319-3f7ecaa7e8ca/go.mod h1:Y+Yx5eoAFn32cQvJDxZx5Dpnq+c3wtXuadVZAcxbbBo=
gonum.org/v1/gonum v0.15.0 h1:2lYxjRbTYyxkJxlhC+LvJIx3SsANPdRybu1tGj9/OrQ=
gonum.org/v1/gonum v0.15.0/go.mod h1:xzZVBJBtS+Mz4q0Yl2LJTk+OxOg4jiXZ7qBoM0uISGo=
gonum.org/v1/netlib v0.0.0-20181029234149-ec6d1f5cefe6/go.mod h1:wa6Ws7BG/ESfp6dHfk7C6KdzKA7wR7u/rKwOGE66zvw=
google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
google.golang.org/protobuf v1.31.0 h1:g0LDEJHgrBl9N9r17Ru3sqWhkIx2NB67okBHPwC7hs8=
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ func SendRelaysAsync(relayer pokt_v0.PocketRelayer, nodes []*models.QosNode, pay
Payload: &relayer_models.Payload{Data: payload, Method: method},
Chain: node.GetChain(),
SelectedNodePubKey: node.GetPublicKey(),
Session: node.PocketSession,
Session: node.MorseSession,
})
relayResponses <- &nodeRelayResponse{
Node: node,
Expand Down
38 changes: 21 additions & 17 deletions internal/node_selector_service/checks/error_handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,23 @@ const timeoutErrorPenalty = time.Second * 15
// 24 hours is analogous to indefinite
const kickOutSessionPenalty = time.Hour * 24

const (
errPocketInvalidServicerMsg = "failed to find correct servicer PK"
errPocketInvalidBlockHeightMsg = "the block height passed is invalid"
errPocketRequestTimeoutMsg = "request timeout"
errPocketOverServiceMsg = "the max number of relays serviced for this node is exceeded"
errPocketMaximumEvidenceSealedMsg = "the evidence is sealed, either max relays reached or claim already submitted"
var (
errsKickSession = []string{"failed to find correct servicer PK", "the max number of relays serviced for this node is exceeded", "the evidence is sealed, either max relays reached or claim already submitted"}
errsTimeout = []string{"connection refused", "the request block height is out of sync with the current block height", "no route to host", "unexpected EOF", "i/o timeout", "tls: failed to verify certificate", "no such host", "the block height passed is invalid", "request timeout"}
)

const (
errHttpSSLExpired = "tls: failed to verify certificate"
errHttpNoSuchHostMsg = "no such host"
)
func doesErrorContains(errsSubString []string, err error) bool {
if err == nil {
return false
}
errStr := err.Error()
for _, errSubString := range errsSubString {
if strings.Contains(errStr, errSubString) {
return true
}
}
return false
}

// isKickableSessionErr - determines if a node should be kicked from a session to send relays
func isKickableSessionErr(err error) bool {
Expand All @@ -36,23 +41,22 @@ func isKickableSessionErr(err error) bool {
}
// Fallback in the event the error is not parsed correctly due to node operator configurations / custom clients, resort to a simple string check
// node runner cannot serve with expired ssl
if err != nil && (strings.Contains(err.Error(), errHttpSSLExpired) || strings.Contains(err.Error(), errPocketOverServiceMsg) || strings.Contains(err.Error(), errPocketMaximumEvidenceSealedMsg) || strings.Contains(err.Error(), errPocketInvalidServicerMsg)) {
return true
}
return false
return doesErrorContains(errsKickSession, err)
}

func isTimeoutError(err error) bool {
// If Invalid block height, pocket is not caught up to latest session
if err == relayer_models.ErrPocketCoreInvalidBlockHeight {
return true
}

// Check if pocket error returns 500
pocketError, ok := err.(relayer_models.PocketRPCError)
if ok {
return pocketError.HttpCode >= 500
if ok && pocketError.HttpCode >= 500 {
return true
}
// Fallback in the event the error is not parsed correctly due to node operator configurations / custom clients, resort to a simple string check
return err == fasthttp.ErrTimeout || err == fasthttp.ErrDialTimeout || err == fasthttp.ErrTLSHandshakeTimeout || err != nil && (strings.Contains(err.Error(), errHttpNoSuchHostMsg) || strings.Contains(err.Error(), errPocketRequestTimeoutMsg) || strings.Contains(err.Error(), errPocketInvalidBlockHeightMsg))
return err == fasthttp.ErrConnectionClosed || err == fasthttp.ErrTimeout || err == fasthttp.ErrDialTimeout || err == fasthttp.ErrTLSHandshakeTimeout || doesErrorContains(errsTimeout, err)
}

// DefaultPunishNode generic punisher for whenever a node returns an error independent of a specific check
Expand Down
Loading
Loading