From e8ba229270639759948645b0aa1811744e7f4908 Mon Sep 17 00:00:00 2001 From: Tim Vaillancourt Date: Mon, 13 May 2024 16:32:23 +0200 Subject: [PATCH] Add sql text counts stats to `vtcombo`,`vtgate`+`vttablet` Signed-off-by: Tim Vaillancourt --- go/vt/vtgate/vtgate.go | 13 ++++-- go/vt/vttablet/tabletserver/query_engine.go | 18 +++++--- .../tabletserver/query_engine_test.go | 45 ++++++++++++++----- go/vt/vttablet/tabletserver/query_executor.go | 4 +- 4 files changed, 57 insertions(+), 23 deletions(-) diff --git a/go/vt/vtgate/vtgate.go b/go/vt/vtgate/vtgate.go index 8f8aa8b0061..f8106820ee5 100644 --- a/go/vt/vtgate/vtgate.go +++ b/go/vt/vtgate/vtgate.go @@ -209,6 +209,11 @@ var ( "VtgateApiRowsAffected", "Rows affected by a write (DML) operation through the VTgate API", []string{"Operation", "Keyspace", "DbType"}) + + sqlTextCounts = stats.NewCountersWithMultiLabels( + "VtgateSQLTextCounts", + "Vtgate API query SQL text counts", + []string{"Operation", "Keyspace", "DbType"}) ) // VTGate is the rpc interface to vtgate. Only one instance @@ -225,9 +230,10 @@ type VTGate struct { // stats objects. // TODO(sougou): This needs to be cleaned up. There // are global vars that depend on this member var. - timings *stats.MultiTimings - rowsReturned *stats.CountersWithMultiLabels - rowsAffected *stats.CountersWithMultiLabels + timings *stats.MultiTimings + rowsReturned *stats.CountersWithMultiLabels + rowsAffected *stats.CountersWithMultiLabels + sqlTextCounts *stats.CountersWithMultiLabels // the throttled loggers for all errors, one per API entry logExecute *logutil.ThrottledLogger @@ -459,6 +465,7 @@ func (vtg *VTGate) Execute(ctx context.Context, mysqlCtx vtgateservice.MySQLConn if err == nil { vtg.rowsReturned.Add(statsKey, int64(len(qr.Rows))) vtg.rowsAffected.Add(statsKey, int64(qr.RowsAffected)) + vtg.sqlTextCounts.Add(statsKey, int64(len(sql))) return session, qr, nil } diff --git a/go/vt/vttablet/tabletserver/query_engine.go b/go/vt/vttablet/tabletserver/query_engine.go index dc4128a7c69..64184fe4cdc 100644 --- a/go/vt/vttablet/tabletserver/query_engine.go +++ b/go/vt/vttablet/tabletserver/query_engine.go @@ -187,8 +187,8 @@ type QueryEngine struct { // stats // Note: queryErrorCountsWithCode is similar to queryErrorCounts except it contains error code as an additional dimension - queryCounts, queryCountsWithTabletType, queryTimes, queryErrorCounts, queryErrorCountsWithCode, queryRowsAffected, queryRowsReturned *stats.CountersWithMultiLabels - queryCacheHits, queryCacheMisses *stats.CounterFunc + queryCounts, queryCountsWithTabletType, queryTimes, queryErrorCounts, queryErrorCountsWithCode, queryRowsAffected, queryRowsReturned, querySQLTextCounts *stats.CountersWithMultiLabels + queryCacheHits, queryCacheMisses *stats.CounterFunc // stats flags enablePerWorkloadTableMetrics bool @@ -298,6 +298,7 @@ func NewQueryEngine(env tabletenv.Env, se *schema.Engine) *QueryEngine { qe.queryTimes = env.Exporter().NewCountersWithMultiLabels("QueryTimesNs", "query times in ns", labels) qe.queryRowsAffected = env.Exporter().NewCountersWithMultiLabels("QueryRowsAffected", "query rows affected", labels) qe.queryRowsReturned = env.Exporter().NewCountersWithMultiLabels("QueryRowsReturned", "query rows returned", labels) + qe.querySQLTextCounts = env.Exporter().NewCountersWithMultiLabels("QuerySQLTextCounts", "query sql text counts", labels) qe.queryErrorCounts = env.Exporter().NewCountersWithMultiLabels("QueryErrorCounts", "query error counts", labels) qe.queryErrorCountsWithCode = env.Exporter().NewCountersWithMultiLabels("QueryErrorCountsWithCode", "query error counts with error code", []string{"Table", "Plan", "Code"}) @@ -559,9 +560,9 @@ func (qe *QueryEngine) QueryPlanCacheLen() (count int) { } // AddStats adds the given stats for the planName.tableName -func (qe *QueryEngine) AddStats(planType planbuilder.PlanType, tableName, workload string, tabletType topodata.TabletType, queryCount int64, duration, mysqlTime time.Duration, rowsAffected, rowsReturned, errorCount int64, errorCode string) { +func (qe *QueryEngine) AddStats(plan *TabletPlan, tableName, workload string, tabletType topodata.TabletType, queryCount int64, duration, mysqlTime time.Duration, rowsAffected, rowsReturned, errorCount int64, errorCode string) { // table names can contain "." characters, replace them! - keys := []string{tableName, planType.String()} + keys := []string{tableName, plan.PlanID.String()} // Only use the workload as a label if that's enabled in the configuration. if qe.enablePerWorkloadTableMetrics { keys = append(keys, workload) @@ -569,20 +570,23 @@ func (qe *QueryEngine) AddStats(planType planbuilder.PlanType, tableName, worklo qe.queryCounts.Add(keys, queryCount) qe.queryTimes.Add(keys, int64(duration)) qe.queryErrorCounts.Add(keys, errorCount) + if plan.FullQuery != nil { + qe.querySQLTextCounts.Add(keys, int64(len(plan.FullQuery.Query))) + } - qe.queryCountsWithTabletType.Add([]string{tableName, planType.String(), tabletType.String()}, queryCount) + qe.queryCountsWithTabletType.Add([]string{tableName, plan.PlanID.String(), tabletType.String()}, queryCount) // queryErrorCountsWithCode is similar to queryErrorCounts except we have an additional dimension // of error code. if errorCount > 0 { - errorKeys := []string{tableName, planType.String(), errorCode} + errorKeys := []string{tableName, plan.PlanID.String(), errorCode} qe.queryErrorCountsWithCode.Add(errorKeys, errorCount) } // For certain plan types like select, we only want to add their metrics to rows returned // But there are special cases like `SELECT ... INTO OUTFILE ''` which return positive rows affected // So we check if it is positive and add that too. - switch planType { + switch plan.PlanID { case planbuilder.PlanSelect, planbuilder.PlanSelectStream, planbuilder.PlanSelectImpossible, planbuilder.PlanShow, planbuilder.PlanOtherRead: qe.queryRowsReturned.Add(keys, rowsReturned) if rowsAffected > 0 { diff --git a/go/vt/vttablet/tabletserver/query_engine_test.go b/go/vt/vttablet/tabletserver/query_engine_test.go index 146414e819b..980223e8e62 100644 --- a/go/vt/vttablet/tabletserver/query_engine_test.go +++ b/go/vt/vttablet/tabletserver/query_engine_test.go @@ -635,9 +635,21 @@ func TestPlanCachePollution(t *testing.T) { } func TestAddQueryStats(t *testing.T) { + fakeSelectPlan := &TabletPlan{ + Plan: &planbuilder.Plan{ + PlanID: planbuilder.PlanSelect, + FullQuery: &sqlparser.ParsedQuery{Query: `select * from something where something=123`}, // 43 length + }, + } + fakeInsertPlan := &TabletPlan{ + Plan: &planbuilder.Plan{ + PlanID: planbuilder.PlanInsert, + FullQuery: &sqlparser.ParsedQuery{Query: `insert into something (id, msg) values(123, 'hello world!')`}, // 59 length + }, + } testcases := []struct { name string - planType planbuilder.PlanType + plan *TabletPlan tableName string tabletType topodata.TabletType queryCount int64 @@ -654,12 +666,13 @@ func TestAddQueryStats(t *testing.T) { expectedQueryTimes string expectedQueryRowsAffected string expectedQueryRowsReturned string + expectedQuerySQLTextCounts string expectedQueryErrorCounts string expectedQueryErrorCountsWithCode string }{ { name: "select query", - planType: planbuilder.PlanSelect, + plan: fakeSelectPlan, tableName: "A", tabletType: topodata.TabletType_PRIMARY, queryCount: 1, @@ -674,12 +687,13 @@ func TestAddQueryStats(t *testing.T) { expectedQueryTimes: `{"A.Select": 10}`, expectedQueryRowsAffected: `{}`, expectedQueryRowsReturned: `{"A.Select": 15}`, + expectedQuerySQLTextCounts: `{"A.Select": 43}`, expectedQueryErrorCounts: `{"A.Select": 0}`, expectedQueryErrorCountsWithCode: `{}`, expectedQueryCountsWithTableType: `{"A.Select.PRIMARY": 1}`, }, { name: "select query against a replica", - planType: planbuilder.PlanSelect, + plan: fakeSelectPlan, tableName: "A", tabletType: topodata.TabletType_REPLICA, queryCount: 1, @@ -694,12 +708,13 @@ func TestAddQueryStats(t *testing.T) { expectedQueryTimes: `{"A.Select": 10}`, expectedQueryRowsAffected: `{}`, expectedQueryRowsReturned: `{"A.Select": 15}`, + expectedQuerySQLTextCounts: `{"A.Select": 43}`, expectedQueryErrorCounts: `{"A.Select": 0}`, expectedQueryErrorCountsWithCode: `{}`, expectedQueryCountsWithTableType: `{"A.Select.REPLICA": 1}`, }, { name: "select into query", - planType: planbuilder.PlanSelect, + plan: fakeSelectPlan, tableName: "A", tabletType: topodata.TabletType_PRIMARY, queryCount: 1, @@ -714,12 +729,13 @@ func TestAddQueryStats(t *testing.T) { expectedQueryTimes: `{"A.Select": 10}`, expectedQueryRowsAffected: `{"A.Select": 15}`, expectedQueryRowsReturned: `{"A.Select": 0}`, + expectedQuerySQLTextCounts: `{"A.Select": 43}`, expectedQueryErrorCounts: `{"A.Select": 0}`, expectedQueryErrorCountsWithCode: `{}`, expectedQueryCountsWithTableType: `{"A.Select.PRIMARY": 1}`, }, { name: "error", - planType: planbuilder.PlanSelect, + plan: fakeSelectPlan, tableName: "A", tabletType: topodata.TabletType_PRIMARY, queryCount: 1, @@ -734,12 +750,13 @@ func TestAddQueryStats(t *testing.T) { expectedQueryTimes: `{"A.Select": 10}`, expectedQueryRowsAffected: `{}`, expectedQueryRowsReturned: `{"A.Select": 0}`, + expectedQuerySQLTextCounts: `{"A.Select": 43}`, expectedQueryErrorCounts: `{"A.Select": 1}`, expectedQueryErrorCountsWithCode: `{"A.Select.RESOURCE_EXHAUSTED": 1}`, expectedQueryCountsWithTableType: `{"A.Select.PRIMARY": 1}`, }, { name: "insert query", - planType: planbuilder.PlanInsert, + plan: fakeInsertPlan, tableName: "A", tabletType: topodata.TabletType_PRIMARY, queryCount: 1, @@ -754,12 +771,13 @@ func TestAddQueryStats(t *testing.T) { expectedQueryTimes: `{"A.Insert": 10}`, expectedQueryRowsAffected: `{"A.Insert": 15}`, expectedQueryRowsReturned: `{}`, + expectedQuerySQLTextCounts: `{"A.Insert": 59}`, expectedQueryErrorCounts: `{"A.Insert": 0}`, expectedQueryErrorCountsWithCode: `{}`, expectedQueryCountsWithTableType: `{"A.Insert.PRIMARY": 1}`, }, { name: "select query with per workload metrics", - planType: planbuilder.PlanSelect, + plan: fakeSelectPlan, tableName: "A", tabletType: topodata.TabletType_PRIMARY, queryCount: 1, @@ -774,12 +792,13 @@ func TestAddQueryStats(t *testing.T) { expectedQueryTimes: `{"A.Select.some-workload": 10}`, expectedQueryRowsAffected: `{}`, expectedQueryRowsReturned: `{"A.Select.some-workload": 15}`, + expectedQuerySQLTextCounts: `{"A.Select.some-workload": 43}`, expectedQueryErrorCounts: `{"A.Select.some-workload": 0}`, expectedQueryErrorCountsWithCode: `{}`, expectedQueryCountsWithTableType: `{"A.Select.PRIMARY": 1}`, }, { name: "select into query with per workload metrics", - planType: planbuilder.PlanSelect, + plan: fakeSelectPlan, tableName: "A", tabletType: topodata.TabletType_PRIMARY, queryCount: 1, @@ -794,12 +813,13 @@ func TestAddQueryStats(t *testing.T) { expectedQueryTimes: `{"A.Select.some-workload": 10}`, expectedQueryRowsAffected: `{"A.Select.some-workload": 15}`, expectedQueryRowsReturned: `{"A.Select.some-workload": 0}`, + expectedQuerySQLTextCounts: `{"A.Select.some-workload": 43}`, expectedQueryErrorCounts: `{"A.Select.some-workload": 0}`, expectedQueryErrorCountsWithCode: `{}`, expectedQueryCountsWithTableType: `{"A.Select.PRIMARY": 1}`, }, { name: "error with per workload metrics", - planType: planbuilder.PlanSelect, + plan: fakeSelectPlan, tableName: "A", tabletType: topodata.TabletType_PRIMARY, queryCount: 1, @@ -814,12 +834,13 @@ func TestAddQueryStats(t *testing.T) { expectedQueryTimes: `{"A.Select.some-workload": 10}`, expectedQueryRowsAffected: `{}`, expectedQueryRowsReturned: `{"A.Select.some-workload": 0}`, + expectedQuerySQLTextCounts: `{"A.Select.some-workload": 43}`, expectedQueryErrorCounts: `{"A.Select.some-workload": 1}`, expectedQueryErrorCountsWithCode: `{"A.Select.RESOURCE_EXHAUSTED": 1}`, expectedQueryCountsWithTableType: `{"A.Select.PRIMARY": 1}`, }, { name: "insert query with per workload metrics", - planType: planbuilder.PlanInsert, + plan: fakeInsertPlan, tableName: "A", tabletType: topodata.TabletType_PRIMARY, queryCount: 1, @@ -834,6 +855,7 @@ func TestAddQueryStats(t *testing.T) { expectedQueryTimes: `{"A.Insert.some-workload": 10}`, expectedQueryRowsAffected: `{"A.Insert.some-workload": 15}`, expectedQueryRowsReturned: `{}`, + expectedQuerySQLTextCounts: `{"A.Insert.some-workload": 59}`, expectedQueryErrorCounts: `{"A.Insert.some-workload": 0}`, expectedQueryErrorCountsWithCode: `{}`, expectedQueryCountsWithTableType: `{"A.Insert.PRIMARY": 1}`, @@ -849,12 +871,13 @@ func TestAddQueryStats(t *testing.T) { env := tabletenv.NewEnv(vtenv.NewTestEnv(), cfg, "TestAddQueryStats_"+testcase.name) se := schema.NewEngine(env) qe := NewQueryEngine(env, se) - qe.AddStats(testcase.planType, testcase.tableName, testcase.workload, testcase.tabletType, testcase.queryCount, testcase.duration, testcase.mysqlTime, testcase.rowsAffected, testcase.rowsReturned, testcase.errorCount, testcase.errorCode) + qe.AddStats(testcase.plan, testcase.tableName, testcase.workload, testcase.tabletType, testcase.queryCount, testcase.duration, testcase.mysqlTime, testcase.rowsAffected, testcase.rowsReturned, testcase.errorCount, testcase.errorCode) assert.Equal(t, testcase.expectedQueryCounts, qe.queryCounts.String()) assert.Equal(t, testcase.expectedQueryCountsWithTableType, qe.queryCountsWithTabletType.String()) assert.Equal(t, testcase.expectedQueryTimes, qe.queryTimes.String()) assert.Equal(t, testcase.expectedQueryRowsAffected, qe.queryRowsAffected.String()) assert.Equal(t, testcase.expectedQueryRowsReturned, qe.queryRowsReturned.String()) + assert.Equal(t, testcase.expectedQuerySQLTextCounts, qe.querySQLTextCounts.String()) assert.Equal(t, testcase.expectedQueryErrorCounts, qe.queryErrorCounts.String()) assert.Equal(t, testcase.expectedQueryErrorCountsWithCode, qe.queryErrorCountsWithCode.String()) }) diff --git a/go/vt/vttablet/tabletserver/query_executor.go b/go/vt/vttablet/tabletserver/query_executor.go index b538f9342eb..c8229ede638 100644 --- a/go/vt/vttablet/tabletserver/query_executor.go +++ b/go/vt/vttablet/tabletserver/query_executor.go @@ -139,12 +139,12 @@ func (qre *QueryExecutor) Execute() (reply *sqltypes.Result, err error) { errCode = vtErrorCode.String() if reply == nil { - qre.tsv.qe.AddStats(qre.plan.PlanID, tableName, qre.options.GetWorkloadName(), qre.targetTabletType, 1, duration, mysqlTime, 0, 0, 1, errCode) + qre.tsv.qe.AddStats(qre.plan, tableName, qre.options.GetWorkloadName(), qre.targetTabletType, 1, duration, mysqlTime, 0, 0, 1, errCode) qre.plan.AddStats(1, duration, mysqlTime, 0, 0, 1) return } - qre.tsv.qe.AddStats(qre.plan.PlanID, tableName, qre.options.GetWorkloadName(), qre.targetTabletType, 1, duration, mysqlTime, int64(reply.RowsAffected), int64(len(reply.Rows)), 0, errCode) + qre.tsv.qe.AddStats(qre.plan, tableName, qre.options.GetWorkloadName(), qre.targetTabletType, 1, duration, mysqlTime, int64(reply.RowsAffected), int64(len(reply.Rows)), 0, errCode) qre.plan.AddStats(1, duration, mysqlTime, reply.RowsAffected, uint64(len(reply.Rows)), 0) qre.logStats.RowsAffected = int(reply.RowsAffected) qre.logStats.Rows = reply.Rows