From 26a4881cecf7fd40a60112ad0074ec52c7c5f46f Mon Sep 17 00:00:00 2001 From: satyakamala03 <128077872+satyakamala03@users.noreply.github.com> Date: Mon, 24 Jun 2024 10:19:14 +0530 Subject: [PATCH] Occ config logging (#394) * occ configurations logging * cal event success * adding cal data * added TODOs * Remove log_occ_confogs.go * Remove testing files * source of configs - files * whitelist format change * code clean up * code review changes-1 * CR fixes * CR fixes * Delete tests/unittest/config_logging/main_test.go * clean up * Merge branch 'occ-config-logging' of /Users/simmidisetty/Documents/GitHub/OpenSourceHera/src/github.com/paypal/hera with conflicts. * test for config logging * removing test changes * tests for all cases * test * making minor changes for logging feature specific data * changes for incorporate review comments --------- Co-authored-by: simmidisetty Co-authored-by: Rajesh S --- lib/config.go | 157 ++++++++++++++++++++- lib/main.go | 13 ++ tests/unittest/config_logging/main_test.go | 141 ++++++++++++++++++ 3 files changed, 308 insertions(+), 3 deletions(-) create mode 100644 tests/unittest/config_logging/main_test.go diff --git a/lib/config.go b/lib/config.go index 4b8bdc66..3e8afe59 100644 --- a/lib/config.go +++ b/lib/config.go @@ -20,13 +20,13 @@ package lib import ( "errors" "fmt" + "github.com/paypal/hera/cal" + "github.com/paypal/hera/config" + "github.com/paypal/hera/utility/logger" "os" "path/filepath" "strings" "sync/atomic" - - "github.com/paypal/hera/config" - "github.com/paypal/hera/utility/logger" ) //The Config contains all the static configuration @@ -73,6 +73,9 @@ type Config struct { // config_reload_time_ms(30 * 1000) // ConfigReloadTimeMs int + // + // + ConfigLoggingReloadTimeHours int // custom_auth_timeout(1000) CustomAuthTimeoutMs int // time_skew_threshold_warn(2) @@ -268,6 +271,7 @@ func InitConfig() error { } gAppConfig.ConfigReloadTimeMs = cdb.GetOrDefaultInt("config_reload_time_ms", 30*1000) + gAppConfig.ConfigLoggingReloadTimeHours = cdb.GetOrDefaultInt("config_logging_reload_time_hours", 24) gAppConfig.CustomAuthTimeoutMs = cdb.GetOrDefaultInt("custom_auth_timeout", 1000) gAppConfig.TimeSkewThresholdWarnSec = cdb.GetOrDefaultInt("time_skew_threshold_warn", 2) gAppConfig.TimeSkewThresholdErrorSec = cdb.GetOrDefaultInt("time_skew_threshold_error", 15) @@ -464,6 +468,153 @@ func InitConfig() error { return nil } +func LogOccConfigs() { + whiteListConfigs := map[string]map[string]interface{}{ + "BACKLOG": { + "backlog_pct": gAppConfig.BacklogPct, + "request_backlog_timeout": gAppConfig.BacklogTimeoutMsec, + "short_backlog_timeout": gAppConfig.ShortBacklogTimeoutMsec, + }, + "BOUNCER": { + "bouncer_enabled": gAppConfig.BouncerEnabled, + "bouncer_startup_delay": gAppConfig.BouncerStartupDelay, + "bouncer_poll_interval_ms": gAppConfig.BouncerPollInterval, + }, + "PROFILE": { + "enable_profile": gAppConfig.EnableProfile, + "profile_http_port": gAppConfig.ProfileHTTPPort, + "profile_telnet_port": gAppConfig.ProfileTelnetPort, + }, + "SHARDING": { + "enable_sharding": gAppConfig.EnableSharding, + "use_shardmap": gAppConfig.UseShardMap, + "num_shards": gAppConfig.NumOfShards, + "shard_key_name": gAppConfig.ShardKeyName, + "max_scuttle": gAppConfig.MaxScuttleBuckets, + "scuttle_col_name": gAppConfig.ScuttleColName, + "shard_key_value_type_is_string": gAppConfig.ShardKeyValueTypeIsString, + "enable_whitelist_test": gAppConfig.EnableWhitelistTest, + "whitelist_children": gAppConfig.NumWhitelistChildren, + "sharding_postfix": gAppConfig.ShardingPostfix, + "sharding_cfg_reload_interval": gAppConfig.ShardingCfgReloadInterval, + "hostname_prefix": gAppConfig.HostnamePrefix, + "sharding_cross_keys_err": gAppConfig.ShardingCrossKeysErr, + //"enable_sql_rewrite", // not found anywhere? + "sharding_algo": gAppConfig.ShardingAlgoHash, + "cfg_from_tns_override_num_shards": gAppConfig.CfgFromTnsOverrideNumShards, + }, + "TAF": { + "enable_taf": gAppConfig.EnableTAF, + "cfg_from_tns_override_taf": gAppConfig.CfgFromTnsOverrideTaf, + "testing_enable_dml_taf": gAppConfig.TestingEnableDMLTaf, + "taf_timeout_ms": gAppConfig.TAFTimeoutMs, + "taf_bin_duration": gAppConfig.TAFBinDuration, + "taf_allow_slow_every_x": gAppConfig.TAFAllowSlowEveryX, + "taf_normally_slow_count": gAppConfig.TAFNormallySlowCount, + }, + "BIND-EVICTION": { + "child.executable": gAppConfig.ChildExecutable, + //"enable_bind_hash_logging" FOUND FOR SOME OCCs ONLY IN occ.def + "bind_eviction_threshold_pct": gAppConfig.BindEvictionThresholdPct, + "bind_eviction_decr_per_sec": gAppConfig.BindEvictionDecrPerSec, + "bind_eviction_target_conn_pct": gAppConfig.BindEvictionTargetConnPct, + "bind_eviction_max_throttle": gAppConfig.BindEvictionMaxThrottle, + "bind_eviction_names": gAppConfig.BindEvictionNames, + "skip_eviction_host_prefix": gAppConfig.SkipEvictRegex, + "eviction_host_prefix": gAppConfig.EvictRegex, + "query_bind_blocker_min_sql_prefix": gAppConfig.QueryBindBlockerMinSqlPrefix, + "enable_connlimit_check": gAppConfig.EnableConnLimitCheck, + }, + "MANUAL-RATE-LIMITER": { + "enable_query_bind_blocker": gAppConfig.EnableQueryBindBlocker, + }, + "SATURATION-RECOVERY": { + "saturation_recover_threshold": GetSatRecoverThresholdMs(), + "saturation_recover_throttle_rate": GetSatRecoverThrottleRate(), + }, + "SOFT-EVICTION": { + "soft_eviction_effective_time": gAppConfig.SoftEvictionEffectiveTimeMs, + "soft_eviction_probability": gAppConfig.SoftEvictionProbability, + }, + "WORKER-CONFIGURATIONS": { + "lifespan_check_interval": gAppConfig.lifeSpanCheckInterval, + "lifo_scheduler_enabled": gAppConfig.LifoScheduler, + //"num_workers_per_proxy", // only present in occ.def for some occs + //"max_clients_per_worker", // only present in occ.def for some occs + "max_stranded_time_interval": gAppConfig.StrandedWorkerTimeoutMs, + "high_load_max_stranded_time_interval": gAppConfig.HighLoadStrandedWorkerTimeoutMs, + "high_load_skip_initiate_recover_pct": gAppConfig.HighLoadSkipInitiateRecoverPct, + "enable_danglingworker_recovery": gAppConfig.EnableDanglingWorkerRecovery, + "max_db_connects_per_sec": gAppConfig.MaxDbConnectsPerSec, + "max_lifespan_per_child": GetMaxLifespanPerChild(), + "max_requests_per_child": GetMaxRequestsPerChild(), + "max_desire_healthy_worker_pct": gAppConfig.MaxDesiredHealthyWorkerPct, + }, + "R-W-SPLIT": { + "readonly_children_pct": gAppConfig.ReadonlyPct, + "cfg_from_tns_override_rw_split": gAppConfig.CfgFromTnsOverrideRWSplit, + }, + "RAC": { + "management_table_prefix": gAppConfig.ManagementTablePrefix, + "rac_sql_interval": gAppConfig.RacMaintReloadInterval, + "rac_restart_window": gAppConfig.RacRestartWindow, + }, + "NO-CATEGORY": { + "database_type": gAppConfig.DatabaseType, // Oracle = 0; MySQL=1; POSTGRES=2 + "cfg_from_tns": gAppConfig.CfgFromTns, + "log_level": gOpsConfig.logLevel, + "high_load_pct": gAppConfig.HighLoadPct, + "init_limit_pct": gAppConfig.InitLimitPct, + "num_standby_dbs": gAppConfig.NumStdbyDbs, + }, + } + + for feature, configs := range whiteListConfigs { + switch feature { + case "BACKLOG": + if gAppConfig.BacklogPct == 0 { + continue + } + case "BOUNCER": + if !gAppConfig.BouncerEnabled { + continue + } + case "PROFILE": + if !gAppConfig.EnableProfile { + continue + } + case "SHARDING": + if !gAppConfig.EnableSharding { + continue + } + case "TAF": + if !gAppConfig.EnableTAF { + continue + } + case "R-W-SPLIT": + if gAppConfig.ReadonlyPct == 0 { + continue + } + case "SOFT-EVICTION", "BIND-EVICTION": + if GetSatRecoverThrottleRate() <= 0 { + continue + } + case "MANUAL-RATE-LIMITER": + if !gAppConfig.EnableQueryBindBlocker { + continue + } + } + + evt := cal.NewCalEvent("OCC_CONFIG", fmt.Sprintf(feature), cal.TransOK, "") + for cfg, val := range configs { + s := fmt.Sprintf("%v", val) + evt.AddDataStr(cfg, s) + } + evt.Completed() + } + +} + // CheckOpsConfigChange checks if the ops config file needs to be reloaded and reloads it if necessary. // it is called every several seconds from a dedicated go-routine. func CheckOpsConfigChange() { diff --git a/lib/main.go b/lib/main.go index 7a6227fd..30b70f94 100644 --- a/lib/main.go +++ b/lib/main.go @@ -116,6 +116,19 @@ func Run() { } }() + //This logs the configured parameter with the feature name in the CAL log periodically based on ConfigLoggingReloadTimeHours. + LogOccConfigs() + configLoggingTicker := time.NewTicker(time.Duration(GetConfig().ConfigLoggingReloadTimeHours) * time.Hour) + defer configLoggingTicker.Stop() + go func() { + for { + select { + case <-configLoggingTicker.C: + LogOccConfigs() + } + } + }() + CheckEnableProfiling() GoStats() diff --git a/tests/unittest/config_logging/main_test.go b/tests/unittest/config_logging/main_test.go new file mode 100644 index 00000000..f9125b47 --- /dev/null +++ b/tests/unittest/config_logging/main_test.go @@ -0,0 +1,141 @@ +package main + +import ( + "context" + "database/sql" + + "fmt" + "os" + "strings" + "testing" + "time" + + //"github.com/paypal/hera/client/gosqldriver" + _ "github.com/paypal/hera/client/gosqldriver/tcp" + "github.com/paypal/hera/tests/unittest/testutil" + "github.com/paypal/hera/utility/logger" +) + +var mx testutil.Mux +var tableName string + +func cfg() (map[string]string, map[string]string, testutil.WorkerType) { + + appcfg := make(map[string]string) + // best to chose an "unique" port in case golang runs tests in paralel + appcfg["bind_port"] = "31003" + appcfg["log_level"] = "5" + appcfg["log_file"] = "hera.log" + appcfg["enable_sharding"] = "true" + appcfg["num_shards"] = "3" + appcfg["bouncer_enabled"] = "true" + appcfg["sharding_algo"] = "mod" + appcfg["shard_key_name"] = "id" + appcfg["config_logging_reload_time_hours"] = "0.0002" + pfx := os.Getenv("MGMT_TABLE_PREFIX") + if pfx != "" { + appcfg["management_table_prefix"] = pfx + } + appcfg["sharding_cfg_reload_interval"] = "3600" + appcfg["rac_sql_interval"] = "0" + //appcfg["readonly_children_pct"] = "40" + + appcfg["soft_eviction_effective_time"] = "10000" + appcfg["bind_eviction_threshold_pct"] = "60" + + opscfg := make(map[string]string) + opscfg["opscfg.default.server.max_connections"] = "3" + opscfg["opscfg.default.server.log_level"] = "5" + opscfg["opscfg.default.server.saturation_recover_throttle_rate"] = "30" + + return appcfg, opscfg, testutil.MySQLWorker +} + +func setupShardMap() { + twoTask := os.Getenv("TWO_TASK") + if !strings.HasPrefix(twoTask, "tcp") { + // not mysql + return + } + shard := 0 + db, err := sql.Open("heraloop", fmt.Sprintf("%d:0:0", shard)) + if err != nil { + testutil.Fatal("Error starting Mux:", err) + return + } + db.SetMaxIdleConns(0) + defer db.Close() + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + conn, err := db.Conn(ctx) + if err != nil { + testutil.Fatalf("Error getting connection %s\n", err.Error()) + } + defer conn.Close() + + testutil.RunDML("create table hera_shard_map ( scuttle_id smallint not null, shard_id tinyint not null, status char(1) , read_status char(1), write_status char(1), remarks varchar(500))") + + for i := 0; i < 1024; i++ { + shard := 0 + if i <= 8 { + shard = i % 3 + } + testutil.RunDML(fmt.Sprintf("insert into hera_shard_map ( scuttle_id, shard_id, status, read_status, write_status ) values ( %d, %d, 'Y', 'Y', 'Y' )", i, shard)) + } +} + +func before() error { + tableName = os.Getenv("TABLE_NAME") + if tableName == "" { + tableName = "jdbc_hera_test" + } + if strings.HasPrefix(os.Getenv("TWO_TASK"), "tcp") { + // mysql + testutil.RunDML("create table jdbc_hera_test ( ID BIGINT, INT_VAL BIGINT, STR_VAL VARCHAR(500))") + } + return nil +} + +func TestMain(m *testing.M) { + os.Exit(testutil.UtilMain(m, cfg, before)) +} + +func TestConfigLogging(t *testing.T) { + logger.GetLogger().Log(logger.Debug, "TestConfigLogging setup") + setupShardMap() + logger.GetLogger().Log(logger.Debug, "TestConfigLogging begin +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n") + + hostname, _ := os.Hostname() + db, err := sql.Open("hera", hostname+":31003") + if err != nil { + t.Fatal("Error starting Mux:", err) + return + } + db.SetMaxIdleConns(0) + defer db.Close() + time.Sleep(10 * time.Second) + if testutil.RegexCountFile("OCC_CONFIG\tSHARDING", "cal.log") < 1 { + t.Fatalf("SHARDING configuration details are missing.") + } + + if testutil.RegexCountFile("OCC_CONFIG\tBACKLOG", "cal.log") < 1 { + t.Fatalf("BACKLOG config details are missing.") + } + + if testutil.RegexCountFile("OCC_CONFIG\tTAF", "cal.log") > 0 { + t.Fatalf("TAF is not enabled so we should not see TAF config logging.") + } + + if testutil.RegexCountFile("OCC_CONFIG\tR-W-SPLIT", "cal.log") > 0 { + t.Fatalf("R-W-SPLIT is not configured, it should not log R-W-SPLIT config details.") + } + + if testutil.RegexCountFile("OCC_CONFIG\tSOFT-EVICTION", "cal.log") < 1 { + t.Fatalf("Saturation recovery enabled, so it should log SOFT-EVICTION configurations") + } + + if testutil.RegexCountFile("OCC_CONFIG\tBIND-EVICTION", "cal.log") < 1 { + t.Fatalf("Saturation recovery enabled, so it should log BIND-EVICTION configurations") + } + logger.GetLogger().Log(logger.Debug, "TestShardingMod done -------------------------------------------------------------") +}