diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 6778ec7e0de..7b9b54a19bb 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -28,6 +28,7 @@ go.sum @ajm188 @deepthi @harshit-gangal @mattlord @rohit-nayak-ps @systay @froui /go/test/endtoend/onlineddl @rohit-nayak-ps @shlomi-noach /go/test/endtoend/messaging @mattlord @rohit-nayak-ps @derekperkins /go/test/endtoend/schemadiff @shlomi-noach @mattlord +/go/test/endtoend/transaction @harshit-gangal @systay @frouioui @GuptaManan100 /go/test/endtoend/*throttler* @shlomi-noach @mattlord @timvaillancourt /go/test/endtoend/vtgate @harshit-gangal @systay @frouioui /go/test/endtoend/vtorc @deepthi @shlomi-noach @GuptaManan100 @timvaillancourt diff --git a/.github/workflows/vtop_example.yml b/.github/workflows/vtop_example.yml new file mode 100644 index 00000000000..fb5ae87c101 --- /dev/null +++ b/.github/workflows/vtop_example.yml @@ -0,0 +1,97 @@ +name: vtop_example +on: [push, pull_request] +concurrency: + group: format('{0}-{1}', ${{ github.ref }}, 'vtop_example') + cancel-in-progress: true + +jobs: + build: + name: VTop Example + runs-on: self-hosted + + steps: + - name: Skip CI + run: | + if [[ "${{contains( github.event.pull_request.labels.*.name, 'Skip CI')}}" == "true" ]]; then + echo "skipping CI due to the 'Skip CI' label" + exit 1 + fi + + - name: Check if workflow needs to be skipped + id: skip-workflow + run: | + skip='false' + if [[ "${{github.event.pull_request}}" == "" ]] && [[ "${{github.ref}}" != "refs/heads/main" ]] && [[ ! "${{github.ref}}" =~ ^refs/heads/release-[0-9]+\.[0-9]$ ]] && [[ ! "${{github.ref}}" =~ "refs/tags/.*" ]]; then + skip='true' + fi + echo Skip ${skip} + echo "skip-workflow=${skip}" >> $GITHUB_OUTPUT + + PR_DATA=$(curl -s\ + -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \ + -H "Accept: application/vnd.github.v3+json" \ + "https://api.github.com/repos/${{ github.repository }}/pulls/${{ github.event.pull_request.number }}") + draft=$(echo "$PR_DATA" | jq .draft -r) + echo "is_draft=${draft}" >> $GITHUB_OUTPUT + + - name: Check out code + if: steps.skip-workflow.outputs.skip-workflow == 'false' + uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + + - name: Check for changes in relevant files + if: steps.skip-workflow.outputs.skip-workflow == 'false' + uses: dorny/paths-filter@ebc4d7e9ebcb0b1eb21480bb8f43113e996ac77a # v3.0.1 + id: changes + with: + token: '' + filters: | + end_to_end: + - 'go/**/*.go' + - 'go/vt/sidecardb/**/*.sql' + - 'test.go' + - 'Makefile' + - 'build.env' + - 'go.[sumod]' + - 'proto/*.proto' + - 'tools/**' + - 'config/**' + - 'bootstrap.sh' + - 'examples/**' + - 'test/**' + - '.github/workflows/vtop_example.yml' + + - name: Set up Go + if: steps.skip-workflow.outputs.skip-workflow == 'false' && steps.changes.outputs.end_to_end == 'true' + uses: actions/setup-go@0a12ed9d6a96ab950c8f026ed9f722fe0da7ef32 # v5.0.2 + with: + go-version-file: go.mod + + - name: Tune the OS + if: steps.skip-workflow.outputs.skip-workflow == 'false' && steps.changes.outputs.end_to_end == 'true' + run: | + echo '1024 65535' | sudo tee -a /proc/sys/net/ipv4/ip_local_port_range + + - name: Get dependencies + if: steps.skip-workflow.outputs.skip-workflow == 'false' && steps.changes.outputs.end_to_end == 'true' + run: | + # Install everything we need, and configure + sudo apt-get install -y eatmydata make + go mod download + + # needed for vtctldclient + - name: Build vitess + if: steps.skip-workflow.outputs.skip-workflow == 'false' && steps.changes.outputs.end_to_end == 'true' + run: | + make build + + - name: Install kubectl & kind + if: steps.skip-workflow.outputs.skip-workflow == 'false' && steps.changes.outputs.end_to_end == 'true' + run: | + make install_kubectl_kind + + - name: vtop_example + if: steps.skip-workflow.outputs.skip-workflow == 'false' && steps.changes.outputs.end_to_end == 'true' + timeout-minutes: 60 + run: | + source build.env + eatmydata -- go run test.go -docker=false -skip-build -print-log -follow -retry=1 -timeout=60m vtop_example \ No newline at end of file diff --git a/changelog/19.0/19.0.8/changelog.md b/changelog/19.0/19.0.8/changelog.md new file mode 100644 index 00000000000..97995e779b7 --- /dev/null +++ b/changelog/19.0/19.0.8/changelog.md @@ -0,0 +1,29 @@ +# Changelog of Vitess v19.0.8 + +### Bug fixes +#### Topology + * [release-19.0] Close zookeeper topo connection on disconnect (#17136) [#17191](https://github.com/vitessio/vitess/pull/17191) +#### VTTablet + * [release-19.0] Fix deadlock in messager and health streamer (#17230) [#17233](https://github.com/vitessio/vitess/pull/17233) + * [release-19.0] Fix potential deadlock in health streamer (#17261) [#17268](https://github.com/vitessio/vitess/pull/17268) +### CI/Build +#### Build/CI + * [release-19.0] Specify Ubuntu 24.04 for all jobs (#17278) [#17280](https://github.com/vitessio/vitess/pull/17280) +#### Cluster management + * [release-19.0] Fix flakiness in `TestListenerShutdown` (#17024) [#17187](https://github.com/vitessio/vitess/pull/17187) +#### General + * [release-19.0] Upgrade the Golang version to `go1.22.9` [#17214](https://github.com/vitessio/vitess/pull/17214) +### Enhancement +#### Query Serving + * [release-19.0] Fix to prevent stopping buffering prematurely (#17013) [#17203](https://github.com/vitessio/vitess/pull/17203) +### Internal Cleanup +#### Build/CI + * [release-19.0] Change the name of the vitess-tester repository (#16917) [#17028](https://github.com/vitessio/vitess/pull/17028) +### Release +#### General + * [release-19.0] Bump to `v19.0.8-SNAPSHOT` after the `v19.0.7` release [#17158](https://github.com/vitessio/vitess/pull/17158) + * [release-19.0] Code Freeze for `v19.0.8` [#17310](https://github.com/vitessio/vitess/pull/17310) +### Testing +#### Build/CI + * [release-19.0] Flakes: Address flakiness in TestZkConnClosedOnDisconnect (#17194) [#17195](https://github.com/vitessio/vitess/pull/17195) + diff --git a/changelog/19.0/19.0.8/release_notes.md b/changelog/19.0/19.0.8/release_notes.md new file mode 100644 index 00000000000..ffb601fba4f --- /dev/null +++ b/changelog/19.0/19.0.8/release_notes.md @@ -0,0 +1,7 @@ +# Release of Vitess v19.0.8 +The entire changelog for this release can be found [here](https://github.com/vitessio/vitess/blob/main/changelog/19.0/19.0.8/changelog.md). + +The release includes 11 merged Pull Requests. + +Thanks to all our contributors: @app/vitess-bot, @frouioui, @vitess-bot + diff --git a/changelog/19.0/README.md b/changelog/19.0/README.md index 008c92c2aec..5893d3b1f4c 100644 --- a/changelog/19.0/README.md +++ b/changelog/19.0/README.md @@ -1,4 +1,8 @@ ## v19.0 +* **[19.0.8](19.0.8)** + * [Changelog](19.0.8/changelog.md) + * [Release Notes](19.0.8/release_notes.md) + * **[19.0.7](19.0.7)** * [Changelog](19.0.7/changelog.md) * [Release Notes](19.0.7/release_notes.md) diff --git a/changelog/20.0/20.0.4/changelog.md b/changelog/20.0/20.0.4/changelog.md new file mode 100644 index 00000000000..ec4af560368 --- /dev/null +++ b/changelog/20.0/20.0.4/changelog.md @@ -0,0 +1,27 @@ +# Changelog of Vitess v20.0.4 + +### Bug fixes +#### Query Serving + * [release-20.0] Use proper keyspace when updating the query graph of a reference DML (#17226) [#17257](https://github.com/vitessio/vitess/pull/17257) +#### Topology + * [release-20.0] Close zookeeper topo connection on disconnect (#17136) [#17192](https://github.com/vitessio/vitess/pull/17192) +#### VTTablet + * [release-20.0] Fix deadlock in messager and health streamer (#17230) [#17234](https://github.com/vitessio/vitess/pull/17234) + * [release-20.0] Fix potential deadlock in health streamer (#17261) [#17269](https://github.com/vitessio/vitess/pull/17269) +### CI/Build +#### Build/CI + * [release-20.0] Specify Ubuntu 24.04 for all jobs (#17278) [#17281](https://github.com/vitessio/vitess/pull/17281) +#### Cluster management + * [release-20.0] Fix flakiness in `TestListenerShutdown` (#17024) [#17188](https://github.com/vitessio/vitess/pull/17188) +#### General + * [release-20.0] Upgrade the Golang version to `go1.22.9` [#17212](https://github.com/vitessio/vitess/pull/17212) +### Enhancement +#### Query Serving + * [release-20.0] Fix to prevent stopping buffering prematurely (#17013) [#17204](https://github.com/vitessio/vitess/pull/17204) +### Internal Cleanup +#### Build/CI + * [release-20.0] Change the name of the vitess-tester repository (#16917) [#17029](https://github.com/vitessio/vitess/pull/17029) +### Testing +#### Build/CI + * [release-20.0] Flakes: Address flakiness in TestZkConnClosedOnDisconnect (#17194) [#17196](https://github.com/vitessio/vitess/pull/17196) + diff --git a/changelog/20.0/20.0.4/release_notes.md b/changelog/20.0/20.0.4/release_notes.md new file mode 100644 index 00000000000..42dc5b2b8a3 --- /dev/null +++ b/changelog/20.0/20.0.4/release_notes.md @@ -0,0 +1,7 @@ +# Release of Vitess v20.0.4 +The entire changelog for this release can be found [here](https://github.com/vitessio/vitess/blob/main/changelog/20.0/20.0.4/changelog.md). + +The release includes 10 merged Pull Requests. + +Thanks to all our contributors: @app/vitess-bot, @frouioui + diff --git a/changelog/20.0/README.md b/changelog/20.0/README.md index f41ea711fb8..2fe6e3d9d61 100644 --- a/changelog/20.0/README.md +++ b/changelog/20.0/README.md @@ -1,4 +1,8 @@ ## v20.0 +* **[20.0.4](20.0.4)** + * [Changelog](20.0.4/changelog.md) + * [Release Notes](20.0.4/release_notes.md) + * **[20.0.3](20.0.3)** * [Changelog](20.0.3/changelog.md) * [Release Notes](20.0.3/release_notes.md) diff --git a/changelog/21.0/21.0.0/summary.md b/changelog/21.0/21.0.0/summary.md index 512aa45a12f..1c34f5ad81a 100644 --- a/changelog/21.0/21.0.0/summary.md +++ b/changelog/21.0/21.0.0/summary.md @@ -9,6 +9,7 @@ - [Deprecated VTTablet Flags](#vttablet-flags) - [Deletion of deprecated metrics](#metric-deletion) - [Deprecated Metrics](#deprecations-metrics) + - **[RPC Changes](#rpc-changes)** - **[Traffic Mirroring](#traffic-mirroring)** - **[Atomic Distributed Transaction Support](#atomic-transaction)** - **[New VTGate Shutdown Behavior](#new-vtgate-shutdown-behavior)** @@ -77,6 +78,12 @@ The following metrics are now deprecated and will be deleted in a future release | `vttablet` | `QueryCacheHits` | `QueryEnginePlanCacheHits` | | `vttablet` | `QueryCacheMisses` | `QueryEnginePlanCacheMisses` | +### RPC Changes + +These are the RPC changes made in this release - +1. `ReadReparentJournalInfo` RPC has been added in TabletManagerClient interface, that is going to be used in EmergencyReparentShard for better errant GTID detection. +2. `PrimaryStatus` RPC in TabletManagerClient interface has been updated to also return the server UUID of the primary. This is going to be used in the vttablets so that they can do their own errant GTID detection in `SetReplicationSource`. + ### Traffic Mirroring Traffic mirroring is intended to help reduce some of the uncertainty inherent to `MoveTables SwitchTraffic`. When diff --git a/changelog/21.0/21.0.1/changelog.md b/changelog/21.0/21.0.1/changelog.md new file mode 100644 index 00000000000..1410591c3d5 --- /dev/null +++ b/changelog/21.0/21.0.1/changelog.md @@ -0,0 +1,57 @@ +# Changelog of Vitess v21.0.1 + +### Bug fixes +#### Backup and Restore + * [release-21.0] Fix how we cancel the context in the builtin backup engine (#17285) [#17291](https://github.com/vitessio/vitess/pull/17291) + * [release-21.0] S3: optional endpoint resolver and correct retrier [#17307](https://github.com/vitessio/vitess/pull/17307) +#### Cluster management + * [release-21.0] Fix panic in vttablet when closing topo server twice (#17094) [#17122](https://github.com/vitessio/vitess/pull/17122) +#### Online DDL + * [release-21.0] Online DDL: fix defer function, potential connection pool exhaustion (#17207) [#17210](https://github.com/vitessio/vitess/pull/17210) +#### Query Serving + * [release-21.0] bugfix: treat EXPLAIN like SELECT (#17054) [#17058](https://github.com/vitessio/vitess/pull/17058) + * [release-21.0] Delegate Column Availability Checks to MySQL for Single-Route Queries (#17077) [#17087](https://github.com/vitessio/vitess/pull/17087) + * [release-21.0] bugfix: Handle CTEs with columns named in the CTE def (#17179) [#17181](https://github.com/vitessio/vitess/pull/17181) + * [release-21.0] Use proper keyspace when updating the query graph of a reference DML (#17226) [#17258](https://github.com/vitessio/vitess/pull/17258) +#### Topology + * [release-21.0] Close zookeeper topo connection on disconnect (#17136) [#17193](https://github.com/vitessio/vitess/pull/17193) +#### VReplication + * [release-21.0] VReplication: Qualify and SQL escape tables in created AutoIncrement VSchema definitions (#17174) [#17176](https://github.com/vitessio/vitess/pull/17176) +#### VTTablet + * [release-21.0] Fix deadlock in messager and health streamer (#17230) [#17235](https://github.com/vitessio/vitess/pull/17235) + * [release-21.0] Fix potential deadlock in health streamer (#17261) [#17270](https://github.com/vitessio/vitess/pull/17270) +### CI/Build +#### Build/CI + * [release-21.0] Specify Ubuntu 24.04 for all jobs (#17278) [#17282](https://github.com/vitessio/vitess/pull/17282) +#### Cluster management + * [release-21.0] Fix flakiness in `TestListenerShutdown` (#17024) [#17189](https://github.com/vitessio/vitess/pull/17189) +#### General + * [release-21.0] Upgrade the Golang version to `go1.23.3` [#17211](https://github.com/vitessio/vitess/pull/17211) +### Dependencies +#### Java + * [release-21.0] java package updates for grpc and protobuf and release plugins (#17100) [#17105](https://github.com/vitessio/vitess/pull/17105) +### Documentation +#### Documentation + * [Direct PR][release-21.0] Add RPC changes segment in the summary doc [#17034](https://github.com/vitessio/vitess/pull/17034) +### Enhancement +#### Online DDL + * [release-21.0] Improve Schema Engine's TablesWithSize80 query (#17066) [#17091](https://github.com/vitessio/vitess/pull/17091) +#### Query Serving + * [release-21.0] Fix to prevent stopping buffering prematurely (#17013) [#17205](https://github.com/vitessio/vitess/pull/17205) +#### VReplication + * [release-21.0] Binlog: Improve ZstdInMemoryDecompressorMaxSize management (#17220) [#17241](https://github.com/vitessio/vitess/pull/17241) +### Internal Cleanup +#### Build/CI + * [release-21.0] Change the name of the vitess-tester repository (#16917) [#17030](https://github.com/vitessio/vitess/pull/17030) +### Regression +#### Backup and Restore + * [release-21.0] Fix unreachable errors when taking a backup (#17062) [#17112](https://github.com/vitessio/vitess/pull/17112) +### Release +#### General + * [release-21.0] Bump to `v21.0.1-SNAPSHOT` after the `v21.0.0` release [#17098](https://github.com/vitessio/vitess/pull/17098) +### Testing +#### Build/CI + * [release-21.0] Flakes: Address flakiness in TestZkConnClosedOnDisconnect (#17194) [#17197](https://github.com/vitessio/vitess/pull/17197) +#### Query Serving + * [release-21.0] fix: flaky test on twopc transaction (#17068) [#17070](https://github.com/vitessio/vitess/pull/17070) + diff --git a/changelog/21.0/21.0.1/release_notes.md b/changelog/21.0/21.0.1/release_notes.md new file mode 100644 index 00000000000..e42d16379a6 --- /dev/null +++ b/changelog/21.0/21.0.1/release_notes.md @@ -0,0 +1,7 @@ +# Release of Vitess v21.0.1 +The entire changelog for this release can be found [here](https://github.com/vitessio/vitess/blob/main/changelog/21.0/21.0.1/changelog.md). + +The release includes 25 merged Pull Requests. + +Thanks to all our contributors: @GuptaManan100, @app/vitess-bot, @frouioui, @vitess-bot + diff --git a/changelog/21.0/README.md b/changelog/21.0/README.md index a77e98bcaba..f3a98feb55a 100644 --- a/changelog/21.0/README.md +++ b/changelog/21.0/README.md @@ -1,4 +1,8 @@ ## v21.0 +* **[21.0.1](21.0.1)** + * [Changelog](21.0.1/changelog.md) + * [Release Notes](21.0.1/release_notes.md) + * **[21.0.0](21.0.0)** * [Changelog](21.0.0/changelog.md) * [Release Notes](21.0.0/release_notes.md) diff --git a/changelog/22.0/22.0.0/summary.md b/changelog/22.0/22.0.0/summary.md index 7c7257bfae3..d21acf48a30 100644 --- a/changelog/22.0/22.0.0/summary.md +++ b/changelog/22.0/22.0.0/summary.md @@ -5,6 +5,7 @@ - **[Major Changes](#major-changes)** - **[RPC Changes](#rpc-changes)** - **[Prefer not promoting a replica that is currently taking a backup](#reparents-prefer-not-backing-up)** + - **[VTOrc Config File Changes](#vtorc-config-file-changes)** ## Major Changes @@ -25,4 +26,25 @@ For planned reparents, hosts taking backups with a backup engine other than `bui valid candidates. This means they will never get promoted - not even if there's no other candidates. Note that behavior for `builtin` backups remains unchanged: a replica that is currently taking a `builtin` backup will -never be promoted, neither by planned nor by emergency reparents. \ No newline at end of file +never be promoted, neither by planned nor by emergency reparents. + +### VTOrc Config File Changes + +The configuration file for VTOrc has been updated to now support dynamic fields. The old `--config` parameter has been removed. The alternative is to use the `--config-file` parameter. The configuration can now be provided in json, yaml or any other format that [viper](https://github.com/spf13/viper) supports. + +The following fields can be dynamically changed - +1. `instance-poll-time` +2. `prevent-cross-cell-failover` +3. `snapshot-topology-interval` +4. `reasonable-replication-lag` +5. `audit-to-backend` +6. `audit-to-syslog` +7. `audit-purge-duration` +8. `wait-replicas-timeout` +9. `tolerable-replication-lag` +10. `topo-information-refresh-duration` +11. `recovery-poll-duration` +12. `allow-emergency-reparent` +13. `change-tablets-with-errant-gtid-to-drained` + +To upgrade to the newer version of the configuration file, first switch to using the flags in your current deployment before upgrading. Then you can switch to using the configuration file in the newer release. diff --git a/examples/common/scripts/vtctld-up.sh b/examples/common/scripts/vtctld-up.sh index 6902a851997..4a4b2587c4f 100755 --- a/examples/common/scripts/vtctld-up.sh +++ b/examples/common/scripts/vtctld-up.sh @@ -33,6 +33,7 @@ vtctld \ --port $vtctld_web_port \ --grpc_port $grpc_port \ --pid_file $VTDATAROOT/tmp/vtctld.pid \ + --pprof-http \ > $VTDATAROOT/tmp/vtctld.out 2>&1 & for _ in {0..300}; do diff --git a/examples/common/scripts/vtgate-up.sh b/examples/common/scripts/vtgate-up.sh index dbaaad02367..fd7860cf6ba 100755 --- a/examples/common/scripts/vtgate-up.sh +++ b/examples/common/scripts/vtgate-up.sh @@ -41,6 +41,7 @@ vtgate \ --pid_file $VTDATAROOT/tmp/vtgate.pid \ --enable_buffer \ --mysql_auth_server_impl none \ + --pprof-http \ > $VTDATAROOT/tmp/vtgate.out 2>&1 & # Block waiting for vtgate to be listening diff --git a/examples/common/scripts/vtorc-up.sh b/examples/common/scripts/vtorc-up.sh index 23ca4e62b48..807f522b1f7 100755 --- a/examples/common/scripts/vtorc-up.sh +++ b/examples/common/scripts/vtorc-up.sh @@ -11,7 +11,9 @@ vtorc \ $TOPOLOGY_FLAGS \ --logtostderr \ --alsologtostderr \ - --config="${script_dir}/../vtorc/config.json" \ + --config-path="${script_dir}/../vtorc/" \ + --config-name="config.yaml" \ + --config-type="yml" \ --port $port \ > "${log_dir}/vtorc.out" 2>&1 & diff --git a/examples/common/scripts/vttablet-up.sh b/examples/common/scripts/vttablet-up.sh index daa40aee894..282cd0553ea 100755 --- a/examples/common/scripts/vttablet-up.sh +++ b/examples/common/scripts/vttablet-up.sh @@ -54,6 +54,7 @@ vttablet \ --service_map 'grpc-queryservice,grpc-tabletmanager,grpc-updatestream' \ --pid_file $VTDATAROOT/$tablet_dir/vttablet.pid \ --heartbeat_on_demand_duration=5s \ + --pprof-http \ > $VTDATAROOT/$tablet_dir/vttablet.out 2>&1 & # Block waiting for the tablet to be listening diff --git a/examples/common/vtorc/config.json b/examples/common/vtorc/config.json deleted file mode 100644 index 53b012c2162..00000000000 --- a/examples/common/vtorc/config.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "RecoveryPeriodBlockSeconds": 1, - "InstancePollSeconds": 1 -} \ No newline at end of file diff --git a/examples/common/vtorc/config.yaml b/examples/common/vtorc/config.yaml new file mode 100644 index 00000000000..26af59afac8 --- /dev/null +++ b/examples/common/vtorc/config.yaml @@ -0,0 +1,13 @@ +instance-poll-time: 1s +prevent-cross-cell-failover: false +snapshot-topology-interval: 0h +reasonable-replication-lag: 10s +audit-to-backend: false +audit-to-syslog: false +audit-purge-duration: 168h +wait-replicas-timeout: 30s +tolerable-replication-lag: 0s +topo-information-refresh-duration: 15s +recovery-poll-duration: 1s +allow-emergency-reparent: true +change-tablets-with-errant-gtid-to-drained: false diff --git a/examples/operator/101_initial_cluster.yaml b/examples/operator/101_initial_cluster.yaml index c26219254f1..c044141dd4b 100644 --- a/examples/operator/101_initial_cluster.yaml +++ b/examples/operator/101_initial_cluster.yaml @@ -15,7 +15,7 @@ spec: vtbackup: vitess/lite:latest vtorc: vitess/lite:latest mysqld: - mysql80Compatible: vitess/lite:latest + mysql80Compatible: mysql:8.0.30 mysqldExporter: prom/mysqld-exporter:v0.11.0 cells: - name: zone1 @@ -79,7 +79,7 @@ spec: cpu: 100m memory: 128Mi extraFlags: - recovery-period-block-duration: 5s + instance-poll-time: 1s partitionings: - equal: parts: 1 @@ -155,23 +155,6 @@ stringData: # Vitess defaults ############################################################################### - # Vitess-internal database. - CREATE DATABASE IF NOT EXISTS _vt; - # Note that definitions of local_metadata and shard_metadata should be the same - # as in production which is defined in go/vt/mysqlctl/metadata_tables.go. - CREATE TABLE IF NOT EXISTS _vt.local_metadata ( - name VARCHAR(255) NOT NULL, - value VARCHAR(255) NOT NULL, - db_name VARBINARY(255) NOT NULL, - PRIMARY KEY (db_name, name) - ) ENGINE=InnoDB; - CREATE TABLE IF NOT EXISTS _vt.shard_metadata ( - name VARCHAR(255) NOT NULL, - value MEDIUMBLOB NOT NULL, - db_name VARBINARY(255) NOT NULL, - PRIMARY KEY (db_name, name) - ) ENGINE=InnoDB; - # Admin user with all privileges. CREATE USER 'vt_dba'@'localhost'; GRANT ALL ON *.* TO 'vt_dba'@'localhost'; @@ -200,12 +183,10 @@ stringData: ON *.* TO 'vt_allprivs'@'localhost'; # User for slave replication connections. - # TODO: Should we set a password on this since it allows remote connections? CREATE USER 'vt_repl'@'%'; GRANT REPLICATION SLAVE ON *.* TO 'vt_repl'@'%'; - # User for Vitess filtered replication (binlog player). - # Same permissions as vt_app. + # User for Vitess VReplication (base vstreamers and vplayer). CREATE USER 'vt_filtered'@'localhost'; GRANT SELECT, INSERT, UPDATE, DELETE, CREATE, DROP, RELOAD, PROCESS, FILE, REFERENCES, INDEX, ALTER, SHOW DATABASES, CREATE TEMPORARY TABLES, @@ -213,6 +194,13 @@ stringData: SHOW VIEW, CREATE ROUTINE, ALTER ROUTINE, CREATE USER, EVENT, TRIGGER ON *.* TO 'vt_filtered'@'localhost'; + # User for general MySQL monitoring. + CREATE USER 'vt_monitoring'@'localhost'; + GRANT SELECT, PROCESS, SUPER, REPLICATION CLIENT, RELOAD + ON *.* TO 'vt_monitoring'@'localhost'; + GRANT SELECT, UPDATE, DELETE, DROP + ON performance_schema.* TO 'vt_monitoring'@'localhost'; + # custom sql is used to add custom scripts like creating users/passwords. We use it in our tests # {{custom_sql}} diff --git a/examples/operator/201_customer_tablets.yaml b/examples/operator/201_customer_tablets.yaml index 5800a5e05df..a16723ade57 100644 --- a/examples/operator/201_customer_tablets.yaml +++ b/examples/operator/201_customer_tablets.yaml @@ -75,7 +75,7 @@ spec: cpu: 100m memory: 128Mi extraFlags: - recovery-period-block-duration: 5s + instance-poll-time: 1s partitionings: - equal: parts: 1 diff --git a/examples/operator/302_new_shards.yaml b/examples/operator/302_new_shards.yaml index 2e15bc40d28..b954dab7beb 100644 --- a/examples/operator/302_new_shards.yaml +++ b/examples/operator/302_new_shards.yaml @@ -75,7 +75,7 @@ spec: cpu: 100m memory: 128Mi extraFlags: - recovery-period-block-duration: 5s + instance-poll-time: 1s partitionings: - equal: parts: 1 diff --git a/examples/operator/306_down_shard_0.yaml b/examples/operator/306_down_shard_0.yaml index 4bdb694d678..7206d1b4e64 100644 --- a/examples/operator/306_down_shard_0.yaml +++ b/examples/operator/306_down_shard_0.yaml @@ -75,7 +75,7 @@ spec: cpu: 100m memory: 128Mi extraFlags: - recovery-period-block-duration: 5s + instance-poll-time: 1s partitionings: - equal: parts: 1 diff --git a/examples/operator/401_scheduled_backups.yaml b/examples/operator/401_scheduled_backups.yaml index 0e74ada8478..9e8ed29aa9f 100644 --- a/examples/operator/401_scheduled_backups.yaml +++ b/examples/operator/401_scheduled_backups.yaml @@ -116,7 +116,7 @@ spec: cpu: 100m memory: 128Mi extraFlags: - recovery-period-block-duration: 5s + instance-poll-time: 1s partitionings: - equal: parts: 1 diff --git a/go/cmd/vtorc/cli/cli.go b/go/cmd/vtorc/cli/cli.go index 1233c1e2ac2..b79793c6492 100644 --- a/go/cmd/vtorc/cli/cli.go +++ b/go/cmd/vtorc/cli/cli.go @@ -20,6 +20,7 @@ import ( "github.com/spf13/cobra" "vitess.io/vitess/go/acl" + "vitess.io/vitess/go/viperutil/debug" "vitess.io/vitess/go/vt/log" "vitess.io/vitess/go/vt/servenv" "vitess.io/vitess/go/vt/vtorc/config" @@ -29,8 +30,7 @@ import ( ) var ( - configFile string - Main = &cobra.Command{ + Main = &cobra.Command{ Use: "vtorc", Short: "VTOrc is the automated fault detection and repair tool in Vitess.", Example: `vtorc \ @@ -51,22 +51,16 @@ var ( func run(cmd *cobra.Command, args []string) { servenv.Init() - config.UpdateConfigValuesFromFlags() inst.RegisterStats() log.Info("starting vtorc") - if len(configFile) > 0 { - config.ForceRead(configFile) - } else { - config.Read("/etc/vtorc.conf.json", "conf/vtorc.conf.json", "vtorc.conf.json") - } - if config.Config.AuditToSyslog { + if config.GetAuditToSyslog() { inst.EnableAuditSyslog() } config.MarkConfigurationLoaded() // Log final config values to debug if something goes wrong. - config.LogConfigValues() + log.Infof("Running with Configuration - %v", debug.AllSettings()) server.StartVTOrcDiscovery() server.RegisterVTOrcAPIEndpoints() @@ -96,7 +90,5 @@ func init() { servenv.MoveFlagsToCobraCommand(Main) logic.RegisterFlags(Main.Flags()) - config.RegisterFlags(Main.Flags()) acl.RegisterFlags(Main.Flags()) - Main.Flags().StringVar(&configFile, "config", "", "config file name") } diff --git a/go/flags/endtoend/vtcombo.txt b/go/flags/endtoend/vtcombo.txt index 01a391d0cad..c10d045ecbd 100644 --- a/go/flags/endtoend/vtcombo.txt +++ b/go/flags/endtoend/vtcombo.txt @@ -414,7 +414,7 @@ Flags: --vreplication_copy_phase_duration duration Duration for each copy phase loop (before running the next catchup: default 1h) (default 1h0m0s) --vreplication_copy_phase_max_innodb_history_list_length int The maximum InnoDB transaction history that can exist on a vstreamer (source) before starting another round of copying rows. This helps to limit the impact on the source tablet. (default 1000000) --vreplication_copy_phase_max_mysql_replication_lag int The maximum MySQL replication lag (in seconds) that can exist on a vstreamer (source) before starting another round of copying rows. This helps to limit the impact on the source tablet. (default 43200) - --vreplication_experimental_flags int (Bitmask) of experimental features in vreplication to enable (default 3) + --vreplication_experimental_flags int (Bitmask) of experimental features in vreplication to enable (default 7) --vreplication_heartbeat_update_interval int Frequency (in seconds, default 1, max 60) at which the time_updated column of a vreplication stream when idling (default 1) --vreplication_max_time_to_retry_on_error duration stop automatically retrying when we've had consecutive failures with the same error for this long after the first occurrence --vreplication_net_read_timeout int Session value of net_read_timeout for vreplication, in seconds (default 300) diff --git a/go/flags/endtoend/vtorc.txt b/go/flags/endtoend/vtorc.txt index d34b4404df7..efccb0afdfc 100644 --- a/go/flags/endtoend/vtorc.txt +++ b/go/flags/endtoend/vtorc.txt @@ -25,7 +25,6 @@ Flags: --catch-sigpipe catch and ignore SIGPIPE on stdout and stderr if specified --change-tablets-with-errant-gtid-to-drained Whether VTOrc should be changing the type of tablets with errant GTIDs to DRAINED --clusters_to_watch strings Comma-separated list of keyspaces or keyspace/shards that this instance will monitor and repair. Defaults to all clusters in the topology. Example: "ks1,ks2/-80" - --config string config file name --config-file string Full path of the config file (with extension) to use. If set, --config-path, --config-type, and --config-name are ignored. --config-file-not-found-handling ConfigFileNotFoundHandling Behavior when a config file is not found. (Options: error, exit, ignore, warn) (default warn) --config-name string Name of the config file (without extension) to search for. (default "vtconfig") diff --git a/go/flags/endtoend/vttablet.txt b/go/flags/endtoend/vttablet.txt index 8be7b620469..f79db05f327 100644 --- a/go/flags/endtoend/vttablet.txt +++ b/go/flags/endtoend/vttablet.txt @@ -414,7 +414,7 @@ Flags: --vreplication_copy_phase_duration duration Duration for each copy phase loop (before running the next catchup: default 1h) (default 1h0m0s) --vreplication_copy_phase_max_innodb_history_list_length int The maximum InnoDB transaction history that can exist on a vstreamer (source) before starting another round of copying rows. This helps to limit the impact on the source tablet. (default 1000000) --vreplication_copy_phase_max_mysql_replication_lag int The maximum MySQL replication lag (in seconds) that can exist on a vstreamer (source) before starting another round of copying rows. This helps to limit the impact on the source tablet. (default 43200) - --vreplication_experimental_flags int (Bitmask) of experimental features in vreplication to enable (default 3) + --vreplication_experimental_flags int (Bitmask) of experimental features in vreplication to enable (default 7) --vreplication_heartbeat_update_interval int Frequency (in seconds, default 1, max 60) at which the time_updated column of a vreplication stream when idling (default 1) --vreplication_max_time_to_retry_on_error duration stop automatically retrying when we've had consecutive failures with the same error for this long after the first occurrence --vreplication_net_read_timeout int Session value of net_read_timeout for vreplication, in seconds (default 300) diff --git a/go/test/endtoend/cluster/cluster_process.go b/go/test/endtoend/cluster/cluster_process.go index 6f800a70a49..b89e007b4f2 100644 --- a/go/test/endtoend/cluster/cluster_process.go +++ b/go/test/endtoend/cluster/cluster_process.go @@ -1301,7 +1301,6 @@ func (cluster *LocalProcessCluster) NewVTOrcProcess(config VTOrcConfiguration) * VtctlProcess: *base, LogDir: cluster.TmpDirectory, Config: config, - WebPort: cluster.GetAndReservePort(), Port: cluster.GetAndReservePort(), } } diff --git a/go/test/endtoend/cluster/vtorc_process.go b/go/test/endtoend/cluster/vtorc_process.go index 4fcb68e292d..af101a8bebd 100644 --- a/go/test/endtoend/cluster/vtorc_process.go +++ b/go/test/endtoend/cluster/vtorc_process.go @@ -43,20 +43,28 @@ type VTOrcProcess struct { ExtraArgs []string ConfigPath string Config VTOrcConfiguration - WebPort int + NoOverride bool proc *exec.Cmd exit chan error } type VTOrcConfiguration struct { - Debug bool - ListenAddress string - RecoveryPeriodBlockSeconds int - TopologyRefreshSeconds int `json:",omitempty"` - PreventCrossDataCenterPrimaryFailover bool `json:",omitempty"` - LockShardTimeoutSeconds int `json:",omitempty"` - ReplicationLagQuery string `json:",omitempty"` - FailPrimaryPromotionOnLagMinutes int `json:",omitempty"` + InstancePollTime string `json:"instance-poll-time,omitempty"` + SnapshotTopologyInterval string `json:"snapshot-topology-interval,omitempty"` + PreventCrossCellFailover bool `json:"prevent-cross-cell-failover,omitempty"` + ReasonableReplicationLag string `json:"reasonable-replication-lag,omitempty"` + AuditToBackend bool `json:"audit-to-backend,omitempty"` + AuditToSyslog bool `json:"audit-to-syslog,omitempty"` + AuditPurgeDuration string `json:"audit-purge-duration,omitempty"` + WaitReplicasTimeout string `json:"wait-replicas-timeout,omitempty"` + TolerableReplicationLag string `json:"tolerable-replication-lag,omitempty"` + TopoInformationRefreshDuration string `json:"topo-information-refresh-duration,omitempty"` + RecoveryPollDuration string `json:"recovery-poll-duration,omitempty"` + AllowEmergencyReparent string `json:"allow-emergency-reparent,omitempty"` + ChangeTabletsWithErrantGtidToDrained bool `json:"change-tablets-with-errant-gtid-to-drained,omitempty"` + LockShardTimeoutSeconds int `json:",omitempty"` + ReplicationLagQuery string `json:",omitempty"` + FailPrimaryPromotionOnLagMinutes int `json:",omitempty"` } // ToJSONString will marshal this configuration as JSON @@ -65,12 +73,12 @@ func (config *VTOrcConfiguration) ToJSONString() string { return string(b) } -func (config *VTOrcConfiguration) AddDefaults(webPort int) { - config.Debug = true - if config.RecoveryPeriodBlockSeconds == 0 { - config.RecoveryPeriodBlockSeconds = 1 - } - config.ListenAddress = fmt.Sprintf(":%d", webPort) +func (config *VTOrcConfiguration) addValuesToCheckOverride() { + config.InstancePollTime = "10h" +} + +func (orc *VTOrcProcess) RewriteConfiguration() error { + return os.WriteFile(orc.ConfigPath, []byte(orc.Config.ToJSONString()), 0644) } // Setup starts orc process with required arguements @@ -91,7 +99,9 @@ func (orc *VTOrcProcess) Setup() (err error) { orc.ConfigPath = configFile.Name() // Add the default configurations and print them out - orc.Config.AddDefaults(orc.WebPort) + if !orc.NoOverride { + orc.Config.addValuesToCheckOverride() + } log.Errorf("configuration - %v", orc.Config.ToJSONString()) _, err = configFile.WriteString(orc.Config.ToJSONString()) if err != nil { @@ -111,15 +121,18 @@ func (orc *VTOrcProcess) Setup() (err error) { "--topo_implementation", orc.TopoImplementation, "--topo_global_server_address", orc.TopoGlobalAddress, "--topo_global_root", orc.TopoGlobalRoot, - "--config", orc.ConfigPath, + "--config-file", orc.ConfigPath, "--port", fmt.Sprintf("%d", orc.Port), - // This parameter is overriden from the config file, added here to just verify that we indeed use the config file paramter over the flag - "--recovery-period-block-duration", "10h", - "--instance-poll-time", "1s", - // Faster topo information refresh speeds up the tests. This doesn't add any significant load either - "--topo-information-refresh-duration", "3s", "--bind-address", "127.0.0.1", ) + if !orc.NoOverride { + orc.proc.Args = append(orc.proc.Args, + // This parameter is overriden from the config file. This verifies that we indeed use the flag value over the config file. + "--instance-poll-time", "1s", + // Faster topo information refresh speeds up the tests. This doesn't add any significant load either. + "--topo-information-refresh-duration", "3s", + ) + } if *isCoverage { orc.proc.Args = append(orc.proc.Args, "--test.coverprofile="+getCoveragePath("orc.out")) diff --git a/go/test/endtoend/onlineddl/flow/onlineddl_flow_test.go b/go/test/endtoend/onlineddl/flow/onlineddl_flow_test.go index c442c042f8a..035789e4b87 100644 --- a/go/test/endtoend/onlineddl/flow/onlineddl_flow_test.go +++ b/go/test/endtoend/onlineddl/flow/onlineddl_flow_test.go @@ -63,7 +63,6 @@ import ( "vitess.io/vitess/go/test/endtoend/throttler" "vitess.io/vitess/go/vt/log" "vitess.io/vitess/go/vt/schema" - vttablet "vitess.io/vitess/go/vt/vttablet/common" throttlebase "vitess.io/vitess/go/vt/vttablet/tabletserver/throttle/base" "vitess.io/vitess/go/vt/vttablet/tabletserver/throttle/throttlerapp" ) @@ -145,9 +144,6 @@ func TestMain(m *testing.M) { "--heartbeat_on_demand_duration", "5s", "--migration_check_interval", "2s", "--watch_replication_stream", - // Test VPlayer batching mode. - fmt.Sprintf("--vreplication_experimental_flags=%d", - vttablet.VReplicationExperimentalFlagAllowNoBlobBinlogRowImage|vttablet.VReplicationExperimentalFlagOptimizeInserts|vttablet.VReplicationExperimentalFlagVPlayerBatching), } clusterInstance.VtGateExtraArgs = []string{ "--ddl_strategy", "online", diff --git a/go/test/endtoend/onlineddl/vrepl_stress/onlineddl_vrepl_mini_stress_test.go b/go/test/endtoend/onlineddl/vrepl_stress/onlineddl_vrepl_mini_stress_test.go index e0dd9701cf8..88c145dc40c 100644 --- a/go/test/endtoend/onlineddl/vrepl_stress/onlineddl_vrepl_mini_stress_test.go +++ b/go/test/endtoend/onlineddl/vrepl_stress/onlineddl_vrepl_mini_stress_test.go @@ -38,7 +38,6 @@ import ( "vitess.io/vitess/go/test/endtoend/throttler" "vitess.io/vitess/go/vt/log" "vitess.io/vitess/go/vt/schema" - vttablet "vitess.io/vitess/go/vt/vttablet/common" ) type WriteMetrics struct { @@ -184,9 +183,6 @@ func TestMain(m *testing.M) { "--heartbeat_on_demand_duration", "5s", "--migration_check_interval", "5s", "--watch_replication_stream", - // Test VPlayer batching mode. - fmt.Sprintf("--vreplication_experimental_flags=%d", - vttablet.VReplicationExperimentalFlagAllowNoBlobBinlogRowImage|vttablet.VReplicationExperimentalFlagOptimizeInserts|vttablet.VReplicationExperimentalFlagVPlayerBatching), } clusterInstance.VtGateExtraArgs = []string{ "--ddl_strategy", "online", diff --git a/go/test/endtoend/onlineddl/vrepl_stress_suite/onlineddl_vrepl_stress_suite_test.go b/go/test/endtoend/onlineddl/vrepl_stress_suite/onlineddl_vrepl_stress_suite_test.go index 440b921f9ba..85b3585beb4 100644 --- a/go/test/endtoend/onlineddl/vrepl_stress_suite/onlineddl_vrepl_stress_suite_test.go +++ b/go/test/endtoend/onlineddl/vrepl_stress_suite/onlineddl_vrepl_stress_suite_test.go @@ -51,7 +51,6 @@ import ( "vitess.io/vitess/go/timer" "vitess.io/vitess/go/vt/log" "vitess.io/vitess/go/vt/schema" - vttablet "vitess.io/vitess/go/vt/vttablet/common" ) type testcase struct { @@ -436,9 +435,6 @@ func TestMain(m *testing.M) { "--migration_check_interval", "5s", "--vstream_packet_size", "4096", // Keep this value small and below 10k to ensure multilple vstream iterations "--watch_replication_stream", - // Test VPlayer batching mode. - fmt.Sprintf("--vreplication_experimental_flags=%d", - vttablet.VReplicationExperimentalFlagAllowNoBlobBinlogRowImage|vttablet.VReplicationExperimentalFlagOptimizeInserts|vttablet.VReplicationExperimentalFlagVPlayerBatching), } clusterInstance.VtGateExtraArgs = []string{ "--ddl_strategy", "online", diff --git a/go/test/endtoend/transaction/twopc/fuzz/fuzzer_test.go b/go/test/endtoend/transaction/twopc/fuzz/fuzzer_test.go index 75bc46bacab..da6486242df 100644 --- a/go/test/endtoend/transaction/twopc/fuzz/fuzzer_test.go +++ b/go/test/endtoend/transaction/twopc/fuzz/fuzzer_test.go @@ -53,7 +53,9 @@ var ( } insertIntoFuzzUpdate = "INSERT INTO twopc_fuzzer_update (id, col) VALUES (%d, %d)" + insertIntoFuzzMulti = "INSERT INTO twopc_fuzzer_multi (id) VALUES (%d)" updateFuzzUpdate = "UPDATE twopc_fuzzer_update SET col = col + %d WHERE id = %d" + updateFuzzUpdateMulti = "UPDATE twopc_fuzzer_update join twopc_fuzzer_multi using (id) SET col = col + %d WHERE id = %d" insertIntoFuzzInsert = "INSERT INTO twopc_fuzzer_insert (id, updateSet, threadId) VALUES (%d, %d, %d)" selectFromFuzzUpdate = "SELECT col FROM twopc_fuzzer_update WHERE id = %d" selectIdFromFuzzInsert = "SELECT threadId FROM twopc_fuzzer_insert WHERE updateSet = %d AND id = %d ORDER BY col" @@ -294,6 +296,10 @@ func (fz *fuzzer) initialize(t *testing.T, conn *mysql.Conn) { for _, id := range updateSet { _, err := conn.ExecuteFetch(fmt.Sprintf(insertIntoFuzzUpdate, id, 0), 0, false) require.NoError(t, err) + // We insert the same id values in multi table as we in the update table. We use this for running + // multi-table updates and inserts. + _, err = conn.ExecuteFetch(fmt.Sprintf(insertIntoFuzzMulti, id), 0, false) + require.NoError(t, err) } } } @@ -331,12 +337,20 @@ func (fz *fuzzer) generateAndExecuteTransaction(threadId int) { _, _ = conn.ExecuteFetch(finalCommand, 0, false) } +func getUpdateQuery(incrementVal int32, id int) string { + if rand.Intn(2) == 1 { + return fmt.Sprintf(updateFuzzUpdateMulti, incrementVal, id) + } + return fmt.Sprintf(updateFuzzUpdate, incrementVal, id) +} + // generateUpdateQueries generates the queries to run updates on the twopc_fuzzer_update table. // It takes the update set index and the value to increment the set by. func (fz *fuzzer) generateUpdateQueries(updateSet int, incrementVal int32) []string { var queries []string for _, id := range fz.updateRowsVals[updateSet] { - queries = append(queries, fmt.Sprintf(updateFuzzUpdate, incrementVal, id)) + // Use multi table DML queries half the time. + queries = append(queries, getUpdateQuery(incrementVal, id)) } rand.Shuffle(len(queries), func(i, j int) { queries[i], queries[j] = queries[j], queries[i] @@ -427,7 +441,7 @@ func (fz *fuzzer) randomDML() string { } // Generate UPDATE updateId := fz.updateRowsVals[rand.Intn(len(fz.updateRowsVals))][rand.Intn(len(updateRowBaseVals))] - return fmt.Sprintf(updateFuzzUpdate, rand.Intn(100000), updateId) + return getUpdateQuery(rand.Int31n(100000), updateId) } /* diff --git a/go/test/endtoend/transaction/twopc/fuzz/main_test.go b/go/test/endtoend/transaction/twopc/fuzz/main_test.go index 1b05615d51a..15574c8d072 100644 --- a/go/test/endtoend/transaction/twopc/fuzz/main_test.go +++ b/go/test/endtoend/transaction/twopc/fuzz/main_test.go @@ -126,5 +126,6 @@ func cleanup(t *testing.T) { utils.ClearOutTable(t, vtParams, "twopc_fuzzer_insert") utils.ClearOutTable(t, vtParams, "twopc_fuzzer_update") + utils.ClearOutTable(t, vtParams, "twopc_fuzzer_multi") utils.ClearOutTable(t, vtParams, "twopc_t1") } diff --git a/go/test/endtoend/transaction/twopc/fuzz/schema.sql b/go/test/endtoend/transaction/twopc/fuzz/schema.sql index 5173166bfd4..b070466087d 100644 --- a/go/test/endtoend/transaction/twopc/fuzz/schema.sql +++ b/go/test/endtoend/transaction/twopc/fuzz/schema.sql @@ -4,6 +4,11 @@ create table twopc_fuzzer_update ( primary key (id) ) Engine=InnoDB; +create table twopc_fuzzer_multi ( + id bigint, + primary key (id) +) Engine=InnoDB; + create table twopc_fuzzer_insert ( id bigint, updateSet bigint, diff --git a/go/test/endtoend/transaction/twopc/fuzz/vschema.json b/go/test/endtoend/transaction/twopc/fuzz/vschema.json index 415b5958f54..83107bc96ff 100644 --- a/go/test/endtoend/transaction/twopc/fuzz/vschema.json +++ b/go/test/endtoend/transaction/twopc/fuzz/vschema.json @@ -3,6 +3,9 @@ "vindexes": { "reverse_bits": { "type": "reverse_bits" + }, + "xxhash": { + "type": "xxhash" } }, "tables": { @@ -22,6 +25,14 @@ } ] }, + "twopc_fuzzer_multi": { + "column_vindexes": [ + { + "column": "id", + "name": "xxhash" + } + ] + }, "twopc_t1": { "column_vindexes": [ { diff --git a/go/test/endtoend/transaction/twopc/main_test.go b/go/test/endtoend/transaction/twopc/main_test.go index 2f27198fd2e..631b29647c9 100644 --- a/go/test/endtoend/transaction/twopc/main_test.go +++ b/go/test/endtoend/transaction/twopc/main_test.go @@ -143,6 +143,9 @@ func cleanup(t *testing.T) { cluster.PanicHandler(t) twopcutil.ClearOutTable(t, vtParams, "twopc_user") twopcutil.ClearOutTable(t, vtParams, "twopc_t1") + twopcutil.ClearOutTable(t, vtParams, "twopc_lookup") + twopcutil.ClearOutTable(t, vtParams, "lookup_unique") + twopcutil.ClearOutTable(t, vtParams, "lookup") sm.reset() } diff --git a/go/test/endtoend/transaction/twopc/schema.sql b/go/test/endtoend/transaction/twopc/schema.sql index 7c289a03c2a..aff839eabe9 100644 --- a/go/test/endtoend/transaction/twopc/schema.sql +++ b/go/test/endtoend/transaction/twopc/schema.sql @@ -18,4 +18,27 @@ create table twopc_t1 id bigint, col bigint, primary key (id) -) Engine=InnoDB; \ No newline at end of file +) Engine=InnoDB; + +create table twopc_lookup +( + id bigint, + col bigint, + col_unique bigint, + primary key (id) +) Engine=InnoDB; + +create table lookup +( + col varchar(128), + id bigint, + keyspace_id varbinary(100), + primary key (id) +) Engine = InnoDB; + +create table lookup_unique +( + col_unique varchar(128), + keyspace_id varbinary(100), + primary key (col_unique) +) Engine = InnoDB; diff --git a/go/test/endtoend/transaction/twopc/twopc_test.go b/go/test/endtoend/transaction/twopc/twopc_test.go index 033d93f8792..5a97f79a79f 100644 --- a/go/test/endtoend/transaction/twopc/twopc_test.go +++ b/go/test/endtoend/transaction/twopc/twopc_test.go @@ -1395,8 +1395,6 @@ func TestReadTransactionStatus(t *testing.T) { "insert into twopc_t1(id, col) values(6, 4)", "insert into twopc_t1(id, col) values(9, 4)", }) - // Allow enough time for the commit to have started. - time.Sleep(1 * time.Second) // Create a tablet manager client and use it to read the transaction state. tmc := grpctmclient.NewClient() @@ -1405,12 +1403,24 @@ func TestReadTransactionStatus(t *testing.T) { defer cancel() primaryTablet := getTablet(clusterInstance.Keyspaces[0].Shards[2].FindPrimaryTablet().GrpcPort) + // Wait for the transaction to show up in the unresolved list. var unresTransaction *querypb.TransactionMetadata - for _, shard := range clusterInstance.Keyspaces[0].Shards { - urtRes, err := tmc.GetUnresolvedTransactions(ctx, getTablet(shard.FindPrimaryTablet().GrpcPort), 1) - require.NoError(t, err) - if len(urtRes) > 0 { - unresTransaction = urtRes[0] + timeout := time.After(10 * time.Second) + for { + for _, shard := range clusterInstance.Keyspaces[0].Shards { + urtRes, err := tmc.GetUnresolvedTransactions(ctx, getTablet(shard.FindPrimaryTablet().GrpcPort), 1) + require.NoError(t, err) + if len(urtRes) > 0 { + unresTransaction = urtRes[0] + } + } + if unresTransaction != nil { + break + } + select { + case <-timeout: + require.Fail(t, "timed out waiting for unresolved transaction") + default: } } require.NotNil(t, unresTransaction) @@ -1439,6 +1449,228 @@ func TestReadTransactionStatus(t *testing.T) { wg.Wait() } +// TestVindexes tests that different vindexes work well with two-phase commit. +func TestVindexes(t *testing.T) { + testcases := []struct { + name string + initQueries []string + testQueries []string + logExpected map[string][]string + }{ + { + name: "Lookup Single Update", + initQueries: []string{ + "insert into twopc_lookup(id, col, col_unique) values(4, 4, 6)", + "insert into twopc_lookup(id, col, col_unique) values(6, 4, 9)", + "insert into twopc_lookup(id, col, col_unique) values(9, 4, 4)", + }, + testQueries: []string{ + "begin", + "update twopc_lookup set col = 9 where col_unique = 9", + "commit", + }, + logExpected: map[string][]string{ + "ks.redo_statement:80-": { + "insert:[VARCHAR(\"dtid-3\") INT64(1) BLOB(\"delete from lookup where col = 4 and id = 6 and keyspace_id = _binary'`\\\\0\\\\0\\\\0\\\\0\\\\0\\\\0\\\\0' limit 10001\")]", + "insert:[VARCHAR(\"dtid-3\") INT64(2) BLOB(\"insert into lookup(col, id, keyspace_id) values (9, 6, _binary'`\\\\0\\\\0\\\\0\\\\0\\\\0\\\\0\\\\0')\")]", + "delete:[VARCHAR(\"dtid-3\") INT64(1) BLOB(\"delete from lookup where col = 4 and id = 6 and keyspace_id = _binary'`\\\\0\\\\0\\\\0\\\\0\\\\0\\\\0\\\\0' limit 10001\")]", + "delete:[VARCHAR(\"dtid-3\") INT64(2) BLOB(\"insert into lookup(col, id, keyspace_id) values (9, 6, _binary'`\\\\0\\\\0\\\\0\\\\0\\\\0\\\\0\\\\0')\")]", + }, + "ks.twopc_lookup:40-80": { + "update:[INT64(6) INT64(9) INT64(9)]", + }, + "ks.lookup:80-": { + "delete:[VARCHAR(\"4\") INT64(6) VARBINARY(\"`\\x00\\x00\\x00\\x00\\x00\\x00\\x00\")]", + "insert:[VARCHAR(\"9\") INT64(6) VARBINARY(\"`\\x00\\x00\\x00\\x00\\x00\\x00\\x00\")]", + }, + }, + }, + { + name: "Lookup-Unique Single Update", + initQueries: []string{ + "insert into twopc_lookup(id, col, col_unique) values(4, 4, 6)", + "insert into twopc_lookup(id, col, col_unique) values(6, 4, 9)", + "insert into twopc_lookup(id, col, col_unique) values(9, 4, 4)", + }, + testQueries: []string{ + "begin", + "update twopc_lookup set col_unique = 20 where col_unique = 9", + "commit", + }, + logExpected: map[string][]string{ + "ks.redo_statement:80-": { + "insert:[VARCHAR(\"dtid-3\") INT64(1) BLOB(\"delete from lookup_unique where col_unique = 9 and keyspace_id = _binary'`\\\\0\\\\0\\\\0\\\\0\\\\0\\\\0\\\\0' limit 10001\")]", + "insert:[VARCHAR(\"dtid-3\") INT64(2) BLOB(\"insert into lookup_unique(col_unique, keyspace_id) values (20, _binary'`\\\\0\\\\0\\\\0\\\\0\\\\0\\\\0\\\\0')\")]", + "delete:[VARCHAR(\"dtid-3\") INT64(1) BLOB(\"delete from lookup_unique where col_unique = 9 and keyspace_id = _binary'`\\\\0\\\\0\\\\0\\\\0\\\\0\\\\0\\\\0' limit 10001\")]", + "delete:[VARCHAR(\"dtid-3\") INT64(2) BLOB(\"insert into lookup_unique(col_unique, keyspace_id) values (20, _binary'`\\\\0\\\\0\\\\0\\\\0\\\\0\\\\0\\\\0')\")]", + }, + "ks.twopc_lookup:40-80": { + "update:[INT64(6) INT64(4) INT64(20)]", + }, + "ks.lookup_unique:80-": { + "delete:[VARCHAR(\"9\") VARBINARY(\"`\\x00\\x00\\x00\\x00\\x00\\x00\\x00\")]", + "insert:[VARCHAR(\"20\") VARBINARY(\"`\\x00\\x00\\x00\\x00\\x00\\x00\\x00\")]", + }, + }, + }, + { + name: "Lookup And Lookup-Unique Single Delete", + initQueries: []string{ + "insert into twopc_lookup(id, col, col_unique) values(4, 4, 6)", + "insert into twopc_lookup(id, col, col_unique) values(6, 4, 9)", + "insert into twopc_lookup(id, col, col_unique) values(9, 4, 4)", + }, + testQueries: []string{ + "begin", + "delete from twopc_lookup where col_unique = 9", + "commit", + }, + logExpected: map[string][]string{ + "ks.redo_statement:80-": { + "insert:[VARCHAR(\"dtid-3\") INT64(1) BLOB(\"delete from lookup where col = 4 and id = 6 and keyspace_id = _binary'`\\\\0\\\\0\\\\0\\\\0\\\\0\\\\0\\\\0' limit 10001\")]", + "insert:[VARCHAR(\"dtid-3\") INT64(2) BLOB(\"delete from lookup_unique where col_unique = 9 and keyspace_id = _binary'`\\\\0\\\\0\\\\0\\\\0\\\\0\\\\0\\\\0' limit 10001\")]", + "delete:[VARCHAR(\"dtid-3\") INT64(1) BLOB(\"delete from lookup where col = 4 and id = 6 and keyspace_id = _binary'`\\\\0\\\\0\\\\0\\\\0\\\\0\\\\0\\\\0' limit 10001\")]", + "delete:[VARCHAR(\"dtid-3\") INT64(2) BLOB(\"delete from lookup_unique where col_unique = 9 and keyspace_id = _binary'`\\\\0\\\\0\\\\0\\\\0\\\\0\\\\0\\\\0' limit 10001\")]", + }, + "ks.twopc_lookup:40-80": { + "delete:[INT64(6) INT64(4) INT64(9)]", + }, + "ks.lookup_unique:80-": { + "delete:[VARCHAR(\"9\") VARBINARY(\"`\\x00\\x00\\x00\\x00\\x00\\x00\\x00\")]", + }, + "ks.lookup:80-": { + "delete:[VARCHAR(\"4\") INT64(6) VARBINARY(\"`\\x00\\x00\\x00\\x00\\x00\\x00\\x00\")]", + }, + }, + }, + { + name: "Lookup And Lookup-Unique Single Insertion", + initQueries: []string{ + "insert into twopc_lookup(id, col, col_unique) values(4, 4, 6)", + "insert into twopc_lookup(id, col, col_unique) values(6, 4, 9)", + "insert into twopc_lookup(id, col, col_unique) values(9, 4, 4)", + }, + testQueries: []string{ + "begin", + "insert into twopc_lookup(id, col, col_unique) values(20, 4, 22)", + "commit", + }, + logExpected: map[string][]string{ + "ks.redo_statement:80-": { + "insert:[VARCHAR(\"dtid-3\") INT64(1) BLOB(\"insert into lookup(col, id, keyspace_id) values (4, 20, _binary'(\\\\0\\\\0\\\\0\\\\0\\\\0\\\\0\\\\0')\")]", + "delete:[VARCHAR(\"dtid-3\") INT64(1) BLOB(\"insert into lookup(col, id, keyspace_id) values (4, 20, _binary'(\\\\0\\\\0\\\\0\\\\0\\\\0\\\\0\\\\0')\")]", + }, + "ks.lookup:80-": { + "insert:[VARCHAR(\"4\") INT64(20) VARBINARY(\"(\\x00\\x00\\x00\\x00\\x00\\x00\\x00\")]", + }, + "ks.lookup_unique:-40": { + "insert:[VARCHAR(\"22\") VARBINARY(\"(\\x00\\x00\\x00\\x00\\x00\\x00\\x00\")]", + }, + "ks.twopc_lookup:-40": { + "insert:[INT64(20) INT64(4) INT64(22)]", + }, + }, + }, + { + name: "Lookup And Lookup-Unique Mix", + initQueries: []string{ + "insert into twopc_lookup(id, col, col_unique) values(4, 4, 6)", + "insert into twopc_lookup(id, col, col_unique) values(6, 4, 9)", + "insert into twopc_lookup(id, col, col_unique) values(9, 4, 4)", + }, + testQueries: []string{ + "begin", + "insert into twopc_lookup(id, col, col_unique) values(20, 4, 22)", + "update twopc_lookup set col = 9 where col_unique = 9", + "delete from twopc_lookup where id = 9", + "commit", + }, + logExpected: map[string][]string{ + "ks.redo_statement:80-": { + "insert:[VARCHAR(\"dtid-3\") INT64(1) BLOB(\"insert into lookup(col, id, keyspace_id) values (4, 20, _binary'(\\\\0\\\\0\\\\0\\\\0\\\\0\\\\0\\\\0')\")]", + "insert:[VARCHAR(\"dtid-3\") INT64(2) BLOB(\"delete from lookup where col = 4 and id = 6 and keyspace_id = _binary'`\\\\0\\\\0\\\\0\\\\0\\\\0\\\\0\\\\0' limit 10001\")]", + "insert:[VARCHAR(\"dtid-3\") INT64(3) BLOB(\"insert into lookup(col, id, keyspace_id) values (9, 6, _binary'`\\\\0\\\\0\\\\0\\\\0\\\\0\\\\0\\\\0')\")]", + "insert:[VARCHAR(\"dtid-3\") INT64(4) BLOB(\"delete from lookup where col = 4 and id = 9 and keyspace_id = _binary'\\x90\\\\0\\\\0\\\\0\\\\0\\\\0\\\\0\\\\0' limit 10001\")]", + "insert:[VARCHAR(\"dtid-3\") INT64(5) BLOB(\"delete from lookup_unique where col_unique = 4 and keyspace_id = _binary'\\x90\\\\0\\\\0\\\\0\\\\0\\\\0\\\\0\\\\0' limit 10001\")]", + "insert:[VARCHAR(\"dtid-3\") INT64(6) BLOB(\"delete from twopc_lookup where id = 9 limit 10001 /* INT64 */\")]", + "delete:[VARCHAR(\"dtid-3\") INT64(1) BLOB(\"insert into lookup(col, id, keyspace_id) values (4, 20, _binary'(\\\\0\\\\0\\\\0\\\\0\\\\0\\\\0\\\\0')\")]", + "delete:[VARCHAR(\"dtid-3\") INT64(2) BLOB(\"delete from lookup where col = 4 and id = 6 and keyspace_id = _binary'`\\\\0\\\\0\\\\0\\\\0\\\\0\\\\0\\\\0' limit 10001\")]", + "delete:[VARCHAR(\"dtid-3\") INT64(3) BLOB(\"insert into lookup(col, id, keyspace_id) values (9, 6, _binary'`\\\\0\\\\0\\\\0\\\\0\\\\0\\\\0\\\\0')\")]", + "delete:[VARCHAR(\"dtid-3\") INT64(4) BLOB(\"delete from lookup where col = 4 and id = 9 and keyspace_id = _binary'\\x90\\\\0\\\\0\\\\0\\\\0\\\\0\\\\0\\\\0' limit 10001\")]", + "delete:[VARCHAR(\"dtid-3\") INT64(5) BLOB(\"delete from lookup_unique where col_unique = 4 and keyspace_id = _binary'\\x90\\\\0\\\\0\\\\0\\\\0\\\\0\\\\0\\\\0' limit 10001\")]", + "delete:[VARCHAR(\"dtid-3\") INT64(6) BLOB(\"delete from twopc_lookup where id = 9 limit 10001 /* INT64 */\")]", + }, + "ks.redo_statement:40-80": { + "insert:[VARCHAR(\"dtid-3\") INT64(1) BLOB(\"update twopc_lookup set col = 9 where col_unique = 9 limit 10001 /* INT64 */\")]", + "delete:[VARCHAR(\"dtid-3\") INT64(1) BLOB(\"update twopc_lookup set col = 9 where col_unique = 9 limit 10001 /* INT64 */\")]", + }, + "ks.twopc_lookup:-40": { + "insert:[INT64(20) INT64(4) INT64(22)]", + }, + "ks.twopc_lookup:40-80": { + "update:[INT64(6) INT64(9) INT64(9)]", + }, + "ks.twopc_lookup:80-": { + "delete:[INT64(9) INT64(4) INT64(4)]", + }, + "ks.lookup_unique:-40": { + "insert:[VARCHAR(\"22\") VARBINARY(\"(\\x00\\x00\\x00\\x00\\x00\\x00\\x00\")]", + }, + "ks.lookup_unique:80-": { + "delete:[VARCHAR(\"4\") VARBINARY(\"\\x90\\x00\\x00\\x00\\x00\\x00\\x00\\x00\")]", + }, + "ks.lookup:80-": { + "insert:[VARCHAR(\"4\") INT64(20) VARBINARY(\"(\\x00\\x00\\x00\\x00\\x00\\x00\\x00\")]", + "delete:[VARCHAR(\"4\") INT64(6) VARBINARY(\"`\\x00\\x00\\x00\\x00\\x00\\x00\\x00\")]", + "insert:[VARCHAR(\"9\") INT64(6) VARBINARY(\"`\\x00\\x00\\x00\\x00\\x00\\x00\\x00\")]", + "delete:[VARCHAR(\"4\") INT64(9) VARBINARY(\"\\x90\\x00\\x00\\x00\\x00\\x00\\x00\\x00\")]", + }, + }, + }, + } + + for _, tt := range testcases { + t.Run(tt.name, func(t *testing.T) { + defer cleanup(t) + + vtgateConn, err := cluster.DialVTGate(context.Background(), t.Name(), vtgateGrpcAddress, "dt_user", "") + require.NoError(t, err) + defer vtgateConn.Close() + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + ch := make(chan *binlogdatapb.VEvent) + runVStream(t, ctx, ch, vtgateConn) + + conn := vtgateConn.Session("", nil) + qCtx, cancel := context.WithCancel(context.Background()) + defer cancel() + + // initial insert + for _, query := range tt.initQueries { + execute(qCtx, t, conn, query) + } + + // ignore initial change + tableMap := make(map[string][]*querypb.Field) + dtMap := make(map[string]string) + _ = retrieveTransitionsWithTimeout(t, ch, tableMap, dtMap, 2*time.Second) + + // Insert into multiple shards + for _, query := range tt.testQueries { + execute(qCtx, t, conn, query) + } + + // Below check ensures that the transaction is resolved by the resolver on receiving unresolved transaction signal from MM. + logTable := retrieveTransitionsWithTimeout(t, ch, tableMap, dtMap, 2*time.Second) + for key, val := range tt.logExpected { + assert.EqualValues(t, val, logTable[key], key) + } + }) + } +} + func getTablet(tabletGrpcPort int) *tabletpb.Tablet { portMap := make(map[string]int32) portMap["grpc"] = int32(tabletGrpcPort) diff --git a/go/test/endtoend/transaction/twopc/vschema.json b/go/test/endtoend/transaction/twopc/vschema.json index bca58b05c1e..0c22f40d54b 100644 --- a/go/test/endtoend/transaction/twopc/vschema.json +++ b/go/test/endtoend/transaction/twopc/vschema.json @@ -6,6 +6,24 @@ }, "reverse_bits": { "type": "reverse_bits" + }, + "lookup_vdx": { + "type": "lookup", + "params": { + "table": "lookup", + "from": "col,id", + "to": "keyspace_id" + }, + "owner": "twopc_lookup" + }, + "lookup_unique_vdx": { + "type": "lookup_unique", + "params": { + "table": "lookup_unique", + "from": "col_unique", + "to": "keyspace_id" + }, + "owner": "twopc_lookup" } }, "tables": { @@ -32,6 +50,41 @@ "name": "reverse_bits" } ] + }, + "twopc_lookup": { + "column_vindexes": [ + { + "column": "id", + "name": "reverse_bits" + }, + { + "columns": [ + "col", + "id" + ], + "name": "lookup_vdx" + }, + { + "column": "col_unique", + "name": "lookup_unique_vdx" + } + ] + }, + "lookup": { + "column_vindexes": [ + { + "column": "col", + "name": "xxhash" + } + ] + }, + "lookup_unique": { + "column_vindexes": [ + { + "column": "col_unique", + "name": "xxhash" + } + ] } } } \ No newline at end of file diff --git a/go/test/endtoend/vreplication/cluster_test.go b/go/test/endtoend/vreplication/cluster_test.go index 119843651bc..dc5a72e5e88 100644 --- a/go/test/endtoend/vreplication/cluster_test.go +++ b/go/test/endtoend/vreplication/cluster_test.go @@ -39,7 +39,6 @@ import ( "vitess.io/vitess/go/vt/mysqlctl" "vitess.io/vitess/go/vt/sqlparser" "vitess.io/vitess/go/vt/vtgate/planbuilder/plancontext" - vttablet "vitess.io/vitess/go/vt/vttablet/common" vtctldatapb "vitess.io/vitess/go/vt/proto/vtctldata" ) @@ -101,18 +100,6 @@ func (cc *ClusterConfig) enableGTIDCompression() func() { } } -// setAllVTTabletExperimentalFlags sets all the experimental flags for vttablet and returns a function -// that can be used to reset them in a defer. -func setAllVTTabletExperimentalFlags() func() { - experimentalArgs := fmt.Sprintf("--vreplication_experimental_flags=%d", - vttablet.VReplicationExperimentalFlagAllowNoBlobBinlogRowImage|vttablet.VReplicationExperimentalFlagOptimizeInserts|vttablet.VReplicationExperimentalFlagVPlayerBatching) - oldArgs := extraVTTabletArgs - extraVTTabletArgs = append(extraVTTabletArgs, experimentalArgs) - return func() { - extraVTTabletArgs = oldArgs - } -} - // VitessCluster represents all components within the test cluster type VitessCluster struct { t *testing.T diff --git a/go/test/endtoend/vreplication/fk_test.go b/go/test/endtoend/vreplication/fk_test.go index 34881cbcd1a..f977d5a74cd 100644 --- a/go/test/endtoend/vreplication/fk_test.go +++ b/go/test/endtoend/vreplication/fk_test.go @@ -29,7 +29,6 @@ import ( "vitess.io/vitess/go/mysql" "vitess.io/vitess/go/sqltypes" "vitess.io/vitess/go/vt/log" - vttablet "vitess.io/vitess/go/vt/vttablet/common" binlogdatapb "vitess.io/vitess/go/vt/proto/binlogdata" ) @@ -43,9 +42,6 @@ func TestFKWorkflow(t *testing.T) { extraVTTabletArgs = []string{ // Ensure that there are multiple copy phase cycles per table. "--vstream_packet_size=256", - // Test VPlayer batching mode. - fmt.Sprintf("--vreplication_experimental_flags=%d", - vttablet.VReplicationExperimentalFlagAllowNoBlobBinlogRowImage|vttablet.VReplicationExperimentalFlagOptimizeInserts|vttablet.VReplicationExperimentalFlagVPlayerBatching), } defer func() { extraVTTabletArgs = nil }() diff --git a/go/test/endtoend/vreplication/vdiff2_test.go b/go/test/endtoend/vreplication/vdiff2_test.go index aaf4cae5375..612ba00236b 100644 --- a/go/test/endtoend/vreplication/vdiff2_test.go +++ b/go/test/endtoend/vreplication/vdiff2_test.go @@ -36,7 +36,6 @@ import ( "vitess.io/vitess/go/test/endtoend/cluster" "vitess.io/vitess/go/vt/log" "vitess.io/vitess/go/vt/sqlparser" - vttablet "vitess.io/vitess/go/vt/vttablet/common" binlogdatapb "vitess.io/vitess/go/vt/proto/binlogdata" tabletmanagerdatapb "vitess.io/vitess/go/vt/proto/tabletmanagerdata" @@ -140,9 +139,6 @@ func TestVDiff2(t *testing.T) { extraVTTabletArgs = []string{ // This forces us to use multiple vstream packets even with small test tables. "--vstream_packet_size=1", - // Test VPlayer batching mode. - fmt.Sprintf("--vreplication_experimental_flags=%d", - vttablet.VReplicationExperimentalFlagAllowNoBlobBinlogRowImage|vttablet.VReplicationExperimentalFlagOptimizeInserts|vttablet.VReplicationExperimentalFlagVPlayerBatching), } vc = NewVitessCluster(t, &clusterOptions{cells: strings.Split(cellNames, ",")}) diff --git a/go/test/endtoend/vreplication/vdiff_helper_test.go b/go/test/endtoend/vreplication/vdiff_helper_test.go index 561edfe8b7e..fd223d78082 100644 --- a/go/test/endtoend/vreplication/vdiff_helper_test.go +++ b/go/test/endtoend/vreplication/vdiff_helper_test.go @@ -35,7 +35,7 @@ import ( ) const ( - vdiffTimeout = 120 * time.Second // We can leverage auto retry on error with this longer-than-usual timeout + vdiffTimeout = 180 * time.Second // We can leverage auto retry on error with this longer-than-usual timeout vdiffRetryTimeout = 30 * time.Second vdiffStatusCheckInterval = 5 * time.Second vdiffRetryInterval = 5 * time.Second @@ -71,7 +71,8 @@ func doVtctlclientVDiff(t *testing.T, keyspace, workflow, cells string, want *ex ksWorkflow := fmt.Sprintf("%s.%s", keyspace, workflow) t.Run(fmt.Sprintf("vtctlclient vdiff %s", ksWorkflow), func(t *testing.T) { // update-table-stats is needed in order to test progress reports. - uuid, _ := performVDiff2Action(t, true, ksWorkflow, cells, "create", "", false, "--auto-retry", "--update-table-stats") + uuid, _ := performVDiff2Action(t, true, ksWorkflow, cells, "create", "", false, "--auto-retry", + "--update-table-stats", fmt.Sprintf("--filtered_replication_wait_time=%v", vdiffTimeout/2)) info := waitForVDiff2ToComplete(t, true, ksWorkflow, cells, uuid, time.Time{}) require.NotNil(t, info) require.Equal(t, workflow, info.Workflow) @@ -164,7 +165,7 @@ func doVtctldclientVDiff(t *testing.T, keyspace, workflow, cells string, want *e ksWorkflow := fmt.Sprintf("%s.%s", keyspace, workflow) t.Run(fmt.Sprintf("vtctldclient vdiff %s", ksWorkflow), func(t *testing.T) { // update-table-stats is needed in order to test progress reports. - flags := []string{"--auto-retry", "--update-table-stats"} + flags := []string{"--auto-retry", "--update-table-stats", fmt.Sprintf("--filtered-replication-wait-time=%v", vdiffTimeout/2)} if len(extraFlags) > 0 { flags = append(flags, extraFlags...) } diff --git a/go/test/endtoend/vreplication/vreplication_test.go b/go/test/endtoend/vreplication/vreplication_test.go index 04a5eabc33b..d3193298a0c 100644 --- a/go/test/endtoend/vreplication/vreplication_test.go +++ b/go/test/endtoend/vreplication/vreplication_test.go @@ -293,7 +293,6 @@ func TestVreplicationCopyThrottling(t *testing.T) { } func TestBasicVreplicationWorkflow(t *testing.T) { - defer setAllVTTabletExperimentalFlags() sourceKsOpts["DBTypeVersion"] = "mysql-8.0" targetKsOpts["DBTypeVersion"] = "mysql-8.0" testBasicVreplicationWorkflow(t, "noblob") @@ -595,8 +594,6 @@ func TestCellAliasVreplicationWorkflow(t *testing.T) { cells := []string{"zone1", "zone2"} resetCompression := mainClusterConfig.enableGTIDCompression() defer resetCompression() - resetExperimentalFlags := setAllVTTabletExperimentalFlags() - defer resetExperimentalFlags() vc = NewVitessCluster(t, &clusterOptions{cells: cells}) defer vc.TearDown() diff --git a/go/test/endtoend/vtorc/api/api_test.go b/go/test/endtoend/vtorc/api/api_test.go index 670e8c803fa..638ea5fa72e 100644 --- a/go/test/endtoend/vtorc/api/api_test.go +++ b/go/test/endtoend/vtorc/api/api_test.go @@ -35,8 +35,7 @@ import ( func TestAPIEndpoints(t *testing.T) { defer cluster.PanicHandler(t) utils.SetupVttabletsAndVTOrcs(t, clusterInfo, 2, 1, nil, cluster.VTOrcConfiguration{ - PreventCrossDataCenterPrimaryFailover: true, - RecoveryPeriodBlockSeconds: 5, + PreventCrossCellFailover: true, }, 1, "") keyspace := &clusterInfo.ClusterInstance.Keyspaces[0] shard0 := &keyspace.Shards[0] diff --git a/go/test/endtoend/vtorc/api/config_test.go b/go/test/endtoend/vtorc/api/config_test.go new file mode 100644 index 00000000000..71cc6291be7 --- /dev/null +++ b/go/test/endtoend/vtorc/api/config_test.go @@ -0,0 +1,204 @@ +/* +Copyright 2024 The Vitess Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +*/ + +package api + +import ( + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "vitess.io/vitess/go/test/endtoend/cluster" + "vitess.io/vitess/go/test/endtoend/vtorc/utils" +) + +// TestDynamicConfigs tests the dyanamic configurations that VTOrc offers. +func TestDynamicConfigs(t *testing.T) { + defer cluster.PanicHandler(t) + utils.SetupVttabletsAndVTOrcs(t, clusterInfo, 2, 1, nil, cluster.VTOrcConfiguration{}, 1, "") + vtorc := clusterInfo.ClusterInstance.VTOrcProcesses[0] + + // Restart VTOrc without any flag overrides so that all the configurations can be tested. + err := vtorc.TearDown() + require.NoError(t, err) + vtorc.Config = cluster.VTOrcConfiguration{} + vtorc.NoOverride = true + err = vtorc.Setup() + require.NoError(t, err) + + // Call API with retry to ensure VTOrc is up + status, resp := utils.MakeAPICallRetry(t, vtorc, "/debug/health", func(code int, response string) bool { + return code != 200 + }) + // Verify when VTOrc is healthy, it has also run the first discovery. + assert.Equal(t, 200, status) + assert.Contains(t, resp, `"Healthy": true,`) + + t.Run("InstancePollTime", func(t *testing.T) { + // Get configuration and verify the output. + waitForConfig(t, vtorc, `"instance-poll-time": 5000000000`) + // Update configuration and verify the output. + vtorc.Config.InstancePollTime = "10h" + err := vtorc.RewriteConfiguration() + assert.NoError(t, err) + // Wait until the config has been updated and seen. + waitForConfig(t, vtorc, `"instance-poll-time": "10h"`) + }) + + t.Run("PreventCrossCellFailover", func(t *testing.T) { + // Get configuration and verify the output. + waitForConfig(t, vtorc, `"prevent-cross-cell-failover": false`) + // Update configuration and verify the output. + vtorc.Config.PreventCrossCellFailover = true + err := vtorc.RewriteConfiguration() + assert.NoError(t, err) + // Wait until the config has been updated and seen. + waitForConfig(t, vtorc, `"prevent-cross-cell-failover": true`) + }) + + t.Run("SnapshotTopologyInterval", func(t *testing.T) { + // Get configuration and verify the output. + waitForConfig(t, vtorc, `"snapshot-topology-interval": 0`) + // Update configuration and verify the output. + vtorc.Config.SnapshotTopologyInterval = "10h" + err := vtorc.RewriteConfiguration() + assert.NoError(t, err) + // Wait until the config has been updated and seen. + waitForConfig(t, vtorc, `"snapshot-topology-interval": "10h"`) + }) + + t.Run("ReasonableReplicationLag", func(t *testing.T) { + // Get configuration and verify the output. + waitForConfig(t, vtorc, `"reasonable-replication-lag": 10000000000`) + // Update configuration and verify the output. + vtorc.Config.ReasonableReplicationLag = "10h" + err := vtorc.RewriteConfiguration() + assert.NoError(t, err) + // Wait until the config has been updated and seen. + waitForConfig(t, vtorc, `"reasonable-replication-lag": "10h"`) + }) + + t.Run("AuditToBackend", func(t *testing.T) { + // Get configuration and verify the output. + waitForConfig(t, vtorc, `"audit-to-backend": false`) + // Update configuration and verify the output. + vtorc.Config.AuditToBackend = true + err := vtorc.RewriteConfiguration() + assert.NoError(t, err) + // Wait until the config has been updated and seen. + waitForConfig(t, vtorc, `"audit-to-backend": true`) + }) + + t.Run("AuditToSyslog", func(t *testing.T) { + // Get configuration and verify the output. + waitForConfig(t, vtorc, `"audit-to-syslog": false`) + // Update configuration and verify the output. + vtorc.Config.AuditToSyslog = true + err := vtorc.RewriteConfiguration() + assert.NoError(t, err) + // Wait until the config has been updated and seen. + waitForConfig(t, vtorc, `"audit-to-syslog": true`) + }) + + t.Run("AuditPurgeDuration", func(t *testing.T) { + // Get configuration and verify the output. + waitForConfig(t, vtorc, `"audit-purge-duration": 604800000000000`) + // Update configuration and verify the output. + vtorc.Config.AuditPurgeDuration = "10h" + err := vtorc.RewriteConfiguration() + assert.NoError(t, err) + // Wait until the config has been updated and seen. + waitForConfig(t, vtorc, `"audit-purge-duration": "10h"`) + }) + + t.Run("WaitReplicasTimeout", func(t *testing.T) { + // Get configuration and verify the output. + waitForConfig(t, vtorc, `"wait-replicas-timeout": 30000000000`) + // Update configuration and verify the output. + vtorc.Config.WaitReplicasTimeout = "10h" + err := vtorc.RewriteConfiguration() + assert.NoError(t, err) + // Wait until the config has been updated and seen. + waitForConfig(t, vtorc, `"wait-replicas-timeout": "10h"`) + }) + + t.Run("TolerableReplicationLag", func(t *testing.T) { + // Get configuration and verify the output. + waitForConfig(t, vtorc, `"tolerable-replication-lag": 0`) + // Update configuration and verify the output. + vtorc.Config.TolerableReplicationLag = "10h" + err := vtorc.RewriteConfiguration() + assert.NoError(t, err) + // Wait until the config has been updated and seen. + waitForConfig(t, vtorc, `"tolerable-replication-lag": "10h"`) + }) + + t.Run("TopoInformationRefreshDuration", func(t *testing.T) { + // Get configuration and verify the output. + waitForConfig(t, vtorc, `"topo-information-refresh-duration": 15000000000`) + // Update configuration and verify the output. + vtorc.Config.TopoInformationRefreshDuration = "10h" + err := vtorc.RewriteConfiguration() + assert.NoError(t, err) + // Wait until the config has been updated and seen. + waitForConfig(t, vtorc, `"topo-information-refresh-duration": "10h"`) + }) + + t.Run("RecoveryPollDuration", func(t *testing.T) { + // Get configuration and verify the output. + waitForConfig(t, vtorc, `"recovery-poll-duration": 1000000000`) + // Update configuration and verify the output. + vtorc.Config.RecoveryPollDuration = "10h" + err := vtorc.RewriteConfiguration() + assert.NoError(t, err) + // Wait until the config has been updated and seen. + waitForConfig(t, vtorc, `"recovery-poll-duration": "10h"`) + }) + + t.Run("AllowEmergencyReparent", func(t *testing.T) { + // Get configuration and verify the output. + waitForConfig(t, vtorc, `"allow-emergency-reparent": true`) + // Update configuration and verify the output. + vtorc.Config.AllowEmergencyReparent = "false" + err := vtorc.RewriteConfiguration() + assert.NoError(t, err) + // Wait until the config has been updated and seen. + waitForConfig(t, vtorc, `"allow-emergency-reparent": "false"`) + }) + + t.Run("ChangeTabletsWithErrantGtidToDrained", func(t *testing.T) { + // Get configuration and verify the output. + waitForConfig(t, vtorc, `"change-tablets-with-errant-gtid-to-drained": false`) + // Update configuration and verify the output. + vtorc.Config.ChangeTabletsWithErrantGtidToDrained = true + err := vtorc.RewriteConfiguration() + assert.NoError(t, err) + // Wait until the config has been updated and seen. + waitForConfig(t, vtorc, `"change-tablets-with-errant-gtid-to-drained": true`) + }) +} + +// waitForConfig waits for the expectedConfig to be present in the VTOrc configuration. +func waitForConfig(t *testing.T, vtorc *cluster.VTOrcProcess, expectedConfig string) { + t.Helper() + status, _ := utils.MakeAPICallRetry(t, vtorc, "/api/config", func(_ int, response string) bool { + return !strings.Contains(response, expectedConfig) + }) + require.EqualValues(t, 200, status) +} diff --git a/go/test/endtoend/vtorc/general/vtorc_test.go b/go/test/endtoend/vtorc/general/vtorc_test.go index 2ec2fd4b0ae..9cc931897bf 100644 --- a/go/test/endtoend/vtorc/general/vtorc_test.go +++ b/go/test/endtoend/vtorc/general/vtorc_test.go @@ -42,7 +42,7 @@ func TestPrimaryElection(t *testing.T) { defer utils.PrintVTOrcLogsOnFailure(t, clusterInfo.ClusterInstance) defer cluster.PanicHandler(t) utils.SetupVttabletsAndVTOrcs(t, clusterInfo, 2, 1, nil, cluster.VTOrcConfiguration{ - PreventCrossDataCenterPrimaryFailover: true, + PreventCrossCellFailover: true, }, 2, "") keyspace := &clusterInfo.ClusterInstance.Keyspaces[0] shard0 := &keyspace.Shards[0] @@ -128,7 +128,7 @@ func TestSingleKeyspace(t *testing.T) { defer utils.PrintVTOrcLogsOnFailure(t, clusterInfo.ClusterInstance) defer cluster.PanicHandler(t) utils.SetupVttabletsAndVTOrcs(t, clusterInfo, 1, 1, []string{"--clusters_to_watch", "ks"}, cluster.VTOrcConfiguration{ - PreventCrossDataCenterPrimaryFailover: true, + PreventCrossCellFailover: true, }, 1, "") keyspace := &clusterInfo.ClusterInstance.Keyspaces[0] shard0 := &keyspace.Shards[0] @@ -147,7 +147,7 @@ func TestKeyspaceShard(t *testing.T) { defer utils.PrintVTOrcLogsOnFailure(t, clusterInfo.ClusterInstance) defer cluster.PanicHandler(t) utils.SetupVttabletsAndVTOrcs(t, clusterInfo, 1, 1, []string{"--clusters_to_watch", "ks/0"}, cluster.VTOrcConfiguration{ - PreventCrossDataCenterPrimaryFailover: true, + PreventCrossCellFailover: true, }, 1, "") keyspace := &clusterInfo.ClusterInstance.Keyspaces[0] shard0 := &keyspace.Shards[0] @@ -169,7 +169,7 @@ func TestVTOrcRepairs(t *testing.T) { defer utils.PrintVTOrcLogsOnFailure(t, clusterInfo.ClusterInstance) defer cluster.PanicHandler(t) utils.SetupVttabletsAndVTOrcs(t, clusterInfo, 3, 0, []string{"--change-tablets-with-errant-gtid-to-drained"}, cluster.VTOrcConfiguration{ - PreventCrossDataCenterPrimaryFailover: true, + PreventCrossCellFailover: true, }, 1, "") keyspace := &clusterInfo.ClusterInstance.Keyspaces[0] shard0 := &keyspace.Shards[0] @@ -348,7 +348,7 @@ func TestRepairAfterTER(t *testing.T) { defer utils.PrintVTOrcLogsOnFailure(t, clusterInfo.ClusterInstance) defer cluster.PanicHandler(t) utils.SetupVttabletsAndVTOrcs(t, clusterInfo, 2, 0, nil, cluster.VTOrcConfiguration{ - PreventCrossDataCenterPrimaryFailover: true, + PreventCrossCellFailover: true, }, 1, "") keyspace := &clusterInfo.ClusterInstance.Keyspaces[0] shard0 := &keyspace.Shards[0] @@ -384,7 +384,7 @@ func TestSemiSync(t *testing.T) { newCluster := utils.SetupNewClusterSemiSync(t) defer utils.PrintVTOrcLogsOnFailure(t, newCluster.ClusterInstance) utils.StartVTOrcs(t, newCluster, nil, cluster.VTOrcConfiguration{ - PreventCrossDataCenterPrimaryFailover: true, + PreventCrossCellFailover: true, }, 1) defer func() { utils.StopVTOrcs(t, newCluster) @@ -482,7 +482,7 @@ func TestVTOrcWithPrs(t *testing.T) { defer utils.PrintVTOrcLogsOnFailure(t, clusterInfo.ClusterInstance) defer cluster.PanicHandler(t) utils.SetupVttabletsAndVTOrcs(t, clusterInfo, 4, 0, nil, cluster.VTOrcConfiguration{ - PreventCrossDataCenterPrimaryFailover: true, + PreventCrossCellFailover: true, }, 1, "") keyspace := &clusterInfo.ClusterInstance.Keyspaces[0] shard0 := &keyspace.Shards[0] @@ -514,8 +514,6 @@ func TestVTOrcWithPrs(t *testing.T) { "--new-primary", replica.Alias) require.NoError(t, err, "error in PlannedReparentShard output - %s", output) - time.Sleep(40 * time.Second) - // check that the replica gets promoted utils.CheckPrimaryTablet(t, clusterInfo, replica, true) // Verify that VTOrc didn't run any other recovery @@ -618,7 +616,7 @@ func TestDurabilityPolicySetLater(t *testing.T) { // Now start the vtorc instances utils.StartVTOrcs(t, newCluster, nil, cluster.VTOrcConfiguration{ - PreventCrossDataCenterPrimaryFailover: true, + PreventCrossCellFailover: true, }, 1) defer func() { utils.StopVTOrcs(t, newCluster) @@ -645,7 +643,7 @@ func TestFullStatusConnectionPooling(t *testing.T) { utils.SetupVttabletsAndVTOrcs(t, clusterInfo, 4, 0, []string{ "--tablet_manager_grpc_concurrency=1", }, cluster.VTOrcConfiguration{ - PreventCrossDataCenterPrimaryFailover: true, + PreventCrossCellFailover: true, }, 1, "") keyspace := &clusterInfo.ClusterInstance.Keyspaces[0] shard0 := &keyspace.Shards[0] diff --git a/go/test/endtoend/vtorc/primaryfailure/primary_failure_test.go b/go/test/endtoend/vtorc/primaryfailure/primary_failure_test.go index 886aa3a580a..a46e3789730 100644 --- a/go/test/endtoend/vtorc/primaryfailure/primary_failure_test.go +++ b/go/test/endtoend/vtorc/primaryfailure/primary_failure_test.go @@ -44,7 +44,7 @@ func TestDownPrimary(t *testing.T) { // If that replica is more advanced than the same-cell-replica, then we try to promote the cross-cell replica as an intermediate source. // If we don't specify a small value of --wait-replicas-timeout, then we would end up waiting for 30 seconds for the dead-primary to respond, failing this test. utils.SetupVttabletsAndVTOrcs(t, clusterInfo, 2, 1, []string{"--remote_operation_timeout=10s", "--wait-replicas-timeout=5s"}, cluster.VTOrcConfiguration{ - PreventCrossDataCenterPrimaryFailover: true, + PreventCrossCellFailover: true, }, 1, "semi_sync") keyspace := &clusterInfo.ClusterInstance.Keyspaces[0] shard0 := &keyspace.Shards[0] @@ -150,7 +150,7 @@ func TestDownPrimaryBeforeVTOrc(t *testing.T) { // Start a VTOrc instance utils.StartVTOrcs(t, clusterInfo, []string{"--remote_operation_timeout=10s"}, cluster.VTOrcConfiguration{ - PreventCrossDataCenterPrimaryFailover: true, + PreventCrossCellFailover: true, }, 1) vtOrcProcess := clusterInfo.ClusterInstance.VTOrcProcesses[0] @@ -244,7 +244,7 @@ func TestDeadPrimaryRecoversImmediately(t *testing.T) { // If that replica is more advanced than the same-cell-replica, then we try to promote the cross-cell replica as an intermediate source. // If we don't specify a small value of --wait-replicas-timeout, then we would end up waiting for 30 seconds for the dead-primary to respond, failing this test. utils.SetupVttabletsAndVTOrcs(t, clusterInfo, 2, 1, []string{"--remote_operation_timeout=10s", "--wait-replicas-timeout=5s"}, cluster.VTOrcConfiguration{ - PreventCrossDataCenterPrimaryFailover: true, + PreventCrossCellFailover: true, }, 1, "semi_sync") keyspace := &clusterInfo.ClusterInstance.Keyspaces[0] shard0 := &keyspace.Shards[0] @@ -324,7 +324,7 @@ func TestCrossDataCenterFailure(t *testing.T) { defer utils.PrintVTOrcLogsOnFailure(t, clusterInfo.ClusterInstance) defer cluster.PanicHandler(t) utils.SetupVttabletsAndVTOrcs(t, clusterInfo, 2, 1, nil, cluster.VTOrcConfiguration{ - PreventCrossDataCenterPrimaryFailover: true, + PreventCrossCellFailover: true, }, 1, "") keyspace := &clusterInfo.ClusterInstance.Keyspaces[0] shard0 := &keyspace.Shards[0] @@ -370,7 +370,7 @@ func TestCrossDataCenterFailureError(t *testing.T) { defer utils.PrintVTOrcLogsOnFailure(t, clusterInfo.ClusterInstance) defer cluster.PanicHandler(t) utils.SetupVttabletsAndVTOrcs(t, clusterInfo, 1, 1, nil, cluster.VTOrcConfiguration{ - PreventCrossDataCenterPrimaryFailover: true, + PreventCrossCellFailover: true, }, 1, "") keyspace := &clusterInfo.ClusterInstance.Keyspaces[0] shard0 := &keyspace.Shards[0] @@ -417,7 +417,7 @@ func TestLostRdonlyOnPrimaryFailure(t *testing.T) { defer utils.PrintVTOrcLogsOnFailure(t, clusterInfo.ClusterInstance) defer cluster.PanicHandler(t) utils.SetupVttabletsAndVTOrcs(t, clusterInfo, 2, 2, nil, cluster.VTOrcConfiguration{ - PreventCrossDataCenterPrimaryFailover: true, + PreventCrossCellFailover: true, }, 1, "") keyspace := &clusterInfo.ClusterInstance.Keyspaces[0] shard0 := &keyspace.Shards[0] @@ -728,8 +728,8 @@ func TestDownPrimaryPromotionRuleWithLagCrossCenter(t *testing.T) { defer utils.PrintVTOrcLogsOnFailure(t, clusterInfo.ClusterInstance) defer cluster.PanicHandler(t) utils.SetupVttabletsAndVTOrcs(t, clusterInfo, 2, 1, nil, cluster.VTOrcConfiguration{ - LockShardTimeoutSeconds: 5, - PreventCrossDataCenterPrimaryFailover: true, + LockShardTimeoutSeconds: 5, + PreventCrossCellFailover: true, }, 1, "test") keyspace := &clusterInfo.ClusterInstance.Keyspaces[0] shard0 := &keyspace.Shards[0] diff --git a/go/test/endtoend/vtorc/readtopologyinstance/main_test.go b/go/test/endtoend/vtorc/readtopologyinstance/main_test.go index fa8dc116782..823655ed785 100644 --- a/go/test/endtoend/vtorc/readtopologyinstance/main_test.go +++ b/go/test/endtoend/vtorc/readtopologyinstance/main_test.go @@ -55,8 +55,7 @@ func TestReadTopologyInstanceBufferable(t *testing.T) { "--topo_global_root", clusterInfo.ClusterInstance.VtctlProcess.TopoGlobalRoot, } servenv.ParseFlags("vtorc") - config.Config.RecoveryPeriodBlockSeconds = 1 - config.Config.InstancePollSeconds = 1 + config.SetInstancePollTime(1 * time.Second) config.MarkConfigurationLoaded() server.StartVTOrcDiscovery() diff --git a/go/test/endtoend/vtorc/utils/utils.go b/go/test/endtoend/vtorc/utils/utils.go index 680d1bfa39a..456d55518dd 100644 --- a/go/test/endtoend/vtorc/utils/utils.go +++ b/go/test/endtoend/vtorc/utils/utils.go @@ -376,7 +376,6 @@ func CheckPrimaryTablet(t *testing.T, clusterInfo *VTOrcClusterInfo, tablet *clu for { now := time.Now() if now.Sub(start) > time.Second*60 { - //log.Exitf("error") assert.FailNow(t, "failed to elect primary before timeout") } tabletInfo, err := clusterInfo.ClusterInstance.VtctldClientProcess.GetTablet(tablet.Alias) @@ -775,10 +774,10 @@ func MakeAPICallRetry(t *testing.T, vtorc *cluster.VTOrcProcess, url string, ret for { select { case <-timeout: - t.Fatal("timed out waiting for api to work") + t.Fatalf("timed out waiting for api to work. Last response - %s", response) return default: - status, response, _ := MakeAPICall(t, vtorc, url) + status, response, _ = MakeAPICall(t, vtorc, url) if retry(status, response) { time.Sleep(1 * time.Second) break diff --git a/go/test/vschemawrapper/vschema_wrapper.go b/go/test/vschemawrapper/vschema_wrapper.go index a1b87f5569c..b362a8b7408 100644 --- a/go/test/vschemawrapper/vschema_wrapper.go +++ b/go/test/vschemawrapper/vschema_wrapper.go @@ -299,12 +299,12 @@ func (vw *VSchemaWrapper) getActualKeyspace() string { return ks.Name } -func (vw *VSchemaWrapper) DefaultKeyspace() (*vindexes.Keyspace, error) { +func (vw *VSchemaWrapper) SelectedKeyspace() (*vindexes.Keyspace, error) { return vw.V.Keyspaces["main"].Keyspace, nil } func (vw *VSchemaWrapper) AnyKeyspace() (*vindexes.Keyspace, error) { - return vw.DefaultKeyspace() + return vw.SelectedKeyspace() } func (vw *VSchemaWrapper) FirstSortedKeyspace() (*vindexes.Keyspace, error) { diff --git a/go/viperutil/debug/debug.go b/go/viperutil/debug/debug.go index 66cbc7f2962..662634a5675 100644 --- a/go/viperutil/debug/debug.go +++ b/go/viperutil/debug/debug.go @@ -25,3 +25,13 @@ import ( func Debug() { registry.Combined().Debug() } + +// WriteConfigAs writes the config into the given filename. +func WriteConfigAs(filename string) error { + return registry.Combined().WriteConfigAs(filename) +} + +// AllSettings gets all the settings in the configuration. +func AllSettings() map[string]any { + return registry.Combined().AllSettings() +} diff --git a/go/vt/mysqlctl/builtinbackupengine.go b/go/vt/mysqlctl/builtinbackupengine.go index f3cbe5364a0..5aa759f6f7a 100644 --- a/go/vt/mysqlctl/builtinbackupengine.go +++ b/go/vt/mysqlctl/builtinbackupengine.go @@ -604,7 +604,15 @@ func (be *BuiltinBackupEngine) backupFiles( wg := sync.WaitGroup{} ctxCancel, cancel := context.WithCancel(ctx) - defer cancel() + defer func() { + // We may still have operations in flight that require a valid context, such as adding files to S3. + // Unless we encountered an error, we should not cancel the context, this is taken care of later + // in the process. If we encountered an error however, we can safely cancel the context as we should + // no longer work on anything and exit fast. + if finalErr != nil { + cancel() + } + }() for i := range fes { wg.Add(1) @@ -1037,7 +1045,15 @@ func (be *BuiltinBackupEngine) restoreFiles(ctx context.Context, params RestoreP wg := sync.WaitGroup{} ctxCancel, cancel := context.WithCancel(ctx) - defer cancel() + defer func() { + // We may still have operations in flight that require a valid context, such as adding files to S3. + // Unless we encountered an error, we should not cancel the context. This is taken care of later + // in the process. If we encountered an error however, we can safely cancel the context as we should + // no longer work on anything and exit fast. + if err != nil { + cancel() + } + }() for i := range fes { wg.Add(1) diff --git a/go/vt/proto/vtadmin/vtadmin.pb.go b/go/vt/proto/vtadmin/vtadmin.pb.go index e85385ec409..8b6a6997c8d 100644 --- a/go/vt/proto/vtadmin/vtadmin.pb.go +++ b/go/vt/proto/vtadmin/vtadmin.pb.go @@ -1203,8 +1203,12 @@ type ApplySchemaRequest struct { sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - ClusterId string `protobuf:"bytes,1,opt,name=cluster_id,json=clusterId,proto3" json:"cluster_id,omitempty"` - Request *vtctldata.ApplySchemaRequest `protobuf:"bytes,2,opt,name=request,proto3" json:"request,omitempty"` + ClusterId string `protobuf:"bytes,1,opt,name=cluster_id,json=clusterId,proto3" json:"cluster_id,omitempty"` + // Request.Sql will be overriden by this Sql field. + Sql string `protobuf:"bytes,2,opt,name=sql,proto3" json:"sql,omitempty"` + // Request.CallerId will be overriden by this CallerId field. + CallerId string `protobuf:"bytes,3,opt,name=caller_id,json=callerId,proto3" json:"caller_id,omitempty"` + Request *vtctldata.ApplySchemaRequest `protobuf:"bytes,4,opt,name=request,proto3" json:"request,omitempty"` } func (x *ApplySchemaRequest) Reset() { @@ -1244,6 +1248,20 @@ func (x *ApplySchemaRequest) GetClusterId() string { return "" } +func (x *ApplySchemaRequest) GetSql() string { + if x != nil { + return x.Sql + } + return "" +} + +func (x *ApplySchemaRequest) GetCallerId() string { + if x != nil { + return x.CallerId + } + return "" +} + func (x *ApplySchemaRequest) GetRequest() *vtctldata.ApplySchemaRequest { if x != nil { return x.Request @@ -7731,11 +7749,14 @@ var file_vtadmin_proto_rawDesc = []byte{ 0x2e, 0x76, 0x74, 0x63, 0x74, 0x6c, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x57, 0x6f, 0x72, 0x6b, 0x66, 0x6c, 0x6f, 0x77, 0x53, 0x77, 0x69, 0x74, 0x63, 0x68, 0x54, 0x72, 0x61, 0x66, 0x66, 0x69, 0x63, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x52, 0x07, 0x72, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, - 0x22, 0x6c, 0x0a, 0x12, 0x41, 0x70, 0x70, 0x6c, 0x79, 0x53, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x52, - 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x1d, 0x0a, 0x0a, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, - 0x72, 0x5f, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x63, 0x6c, 0x75, 0x73, - 0x74, 0x65, 0x72, 0x49, 0x64, 0x12, 0x37, 0x0a, 0x07, 0x72, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, - 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1d, 0x2e, 0x76, 0x74, 0x63, 0x74, 0x6c, 0x64, 0x61, + 0x22, 0x9b, 0x01, 0x0a, 0x12, 0x41, 0x70, 0x70, 0x6c, 0x79, 0x53, 0x63, 0x68, 0x65, 0x6d, 0x61, + 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x1d, 0x0a, 0x0a, 0x63, 0x6c, 0x75, 0x73, 0x74, + 0x65, 0x72, 0x5f, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x63, 0x6c, 0x75, + 0x73, 0x74, 0x65, 0x72, 0x49, 0x64, 0x12, 0x10, 0x0a, 0x03, 0x73, 0x71, 0x6c, 0x18, 0x02, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x71, 0x6c, 0x12, 0x1b, 0x0a, 0x09, 0x63, 0x61, 0x6c, 0x6c, + 0x65, 0x72, 0x5f, 0x69, 0x64, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x63, 0x61, 0x6c, + 0x6c, 0x65, 0x72, 0x49, 0x64, 0x12, 0x37, 0x0a, 0x07, 0x72, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, + 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1d, 0x2e, 0x76, 0x74, 0x63, 0x74, 0x6c, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x41, 0x70, 0x70, 0x6c, 0x79, 0x53, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x52, 0x07, 0x72, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x22, 0x80, 0x01, 0x0a, 0x1c, 0x43, 0x61, 0x6e, 0x63, 0x65, 0x6c, 0x53, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x4d, diff --git a/go/vt/proto/vtadmin/vtadmin_vtproto.pb.go b/go/vt/proto/vtadmin/vtadmin_vtproto.pb.go index bc0746b7b8a..82cca2cea06 100644 --- a/go/vt/proto/vtadmin/vtadmin_vtproto.pb.go +++ b/go/vt/proto/vtadmin/vtadmin_vtproto.pb.go @@ -454,6 +454,8 @@ func (m *ApplySchemaRequest) CloneVT() *ApplySchemaRequest { } r := new(ApplySchemaRequest) r.ClusterId = m.ClusterId + r.Sql = m.Sql + r.CallerId = m.CallerId r.Request = m.Request.CloneVT() if len(m.unknownFields) > 0 { r.unknownFields = make([]byte, len(m.unknownFields)) @@ -4038,6 +4040,20 @@ func (m *ApplySchemaRequest) MarshalToSizedBufferVT(dAtA []byte) (int, error) { i -= size i = protohelpers.EncodeVarint(dAtA, i, uint64(size)) i-- + dAtA[i] = 0x22 + } + if len(m.CallerId) > 0 { + i -= len(m.CallerId) + copy(dAtA[i:], m.CallerId) + i = protohelpers.EncodeVarint(dAtA, i, uint64(len(m.CallerId))) + i-- + dAtA[i] = 0x1a + } + if len(m.Sql) > 0 { + i -= len(m.Sql) + copy(dAtA[i:], m.Sql) + i = protohelpers.EncodeVarint(dAtA, i, uint64(len(m.Sql))) + i-- dAtA[i] = 0x12 } if len(m.ClusterId) > 0 { @@ -10321,6 +10337,14 @@ func (m *ApplySchemaRequest) SizeVT() (n int) { if l > 0 { n += 1 + l + protohelpers.SizeOfVarint(uint64(l)) } + l = len(m.Sql) + if l > 0 { + n += 1 + l + protohelpers.SizeOfVarint(uint64(l)) + } + l = len(m.CallerId) + if l > 0 { + n += 1 + l + protohelpers.SizeOfVarint(uint64(l)) + } if m.Request != nil { l = m.Request.SizeVT() n += 1 + l + protohelpers.SizeOfVarint(uint64(l)) @@ -15873,6 +15897,70 @@ func (m *ApplySchemaRequest) UnmarshalVT(dAtA []byte) error { m.ClusterId = string(dAtA[iNdEx:postIndex]) iNdEx = postIndex case 2: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Sql", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return protohelpers.ErrIntOverflow + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return protohelpers.ErrInvalidLength + } + postIndex := iNdEx + intStringLen + if postIndex < 0 { + return protohelpers.ErrInvalidLength + } + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.Sql = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + case 3: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field CallerId", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return protohelpers.ErrIntOverflow + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + stringLen |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return protohelpers.ErrInvalidLength + } + postIndex := iNdEx + intStringLen + if postIndex < 0 { + return protohelpers.ErrInvalidLength + } + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.CallerId = string(dAtA[iNdEx:postIndex]) + iNdEx = postIndex + case 4: if wireType != 2 { return fmt.Errorf("proto: wrong wireType = %d for field Request", wireType) } diff --git a/go/vt/servenv/servenv.go b/go/vt/servenv/servenv.go index 4aa9818eb7d..22bf3523dfc 100644 --- a/go/vt/servenv/servenv.go +++ b/go/vt/servenv/servenv.go @@ -370,6 +370,14 @@ func moveFlags(name string, fs *pflag.FlagSet) { // functions. func CobraPreRunE(cmd *cobra.Command, args []string) error { _flag.TrickGlog() + // Register logging on config file change. + ch := make(chan struct{}) + viperutil.NotifyConfigReload(ch) + go func() { + for range ch { + log.Infof("Change in configuration - %v", viperdebug.AllSettings()) + } + }() watchCancel, err := viperutil.LoadConfig() if err != nil { @@ -377,6 +385,10 @@ func CobraPreRunE(cmd *cobra.Command, args []string) error { } OnTerm(watchCancel) + // Register a function to be called on termination that closes the channel. + // This is done after the watchCancel has registered to ensure that we don't end up + // sending on a closed channel. + OnTerm(func() { close(ch) }) HTTPHandleFunc("/debug/config", viperdebug.HandlerFunc) logutil.PurgeLogs() diff --git a/go/vt/vtadmin/api.go b/go/vt/vtadmin/api.go index cef8816504a..4f91459d9ed 100644 --- a/go/vt/vtadmin/api.go +++ b/go/vt/vtadmin/api.go @@ -59,6 +59,7 @@ import ( "vitess.io/vitess/go/vt/vtadmin/rbac" "vitess.io/vitess/go/vt/vtadmin/sort" "vitess.io/vitess/go/vt/vtadmin/vtadminproto" + "vitess.io/vitess/go/vt/vtctl/grpcvtctldserver" "vitess.io/vitess/go/vt/vtctl/workflow" "vitess.io/vitess/go/vt/vtenv" "vitess.io/vitess/go/vt/vterrors" @@ -488,6 +489,31 @@ func (api *API) ApplySchema(ctx context.Context, req *vtadminpb.ApplySchemaReque return nil, err } + // Parser with default options. New() itself initializes with default MySQL version. + parser, err := sqlparser.New(sqlparser.Options{ + TruncateUILen: 512, + TruncateErrLen: 0, + }) + if err != nil { + return nil, err + } + + // Split the sql statement received from request. + sqlParts, err := parser.SplitStatementToPieces(req.Sql) + if err != nil { + return nil, err + } + + req.Request.Sql = sqlParts + + // Set the callerID if not empty. + if req.CallerId != "" { + req.Request.CallerId = &vtrpcpb.CallerID{Principal: req.CallerId} + } + + // Set the default wait replicas timeout. + req.Request.WaitReplicasTimeout = protoutil.DurationToProto(grpcvtctldserver.DefaultWaitReplicasTimeout) + return c.ApplySchema(ctx, req.Request) } diff --git a/go/vt/vtadmin/http/schema_migrations.go b/go/vt/vtadmin/http/schema_migrations.go index e0207989648..3da6026fe9f 100644 --- a/go/vt/vtadmin/http/schema_migrations.go +++ b/go/vt/vtadmin/http/schema_migrations.go @@ -34,19 +34,26 @@ func ApplySchema(ctx context.Context, r Request, api *API) *JSONResponse { decoder := json.NewDecoder(r.Body) defer r.Body.Close() - var req vtctldatapb.ApplySchemaRequest - if err := decoder.Decode(&req); err != nil { + var body struct { + Sql string `json:"sql"` + CallerId string `json:"caller_id"` + Request vtctldatapb.ApplySchemaRequest `json:"request"` + } + + if err := decoder.Decode(&body); err != nil { return NewJSONResponse(nil, &errors.BadRequest{ Err: err, }) } vars := mux.Vars(r.Request) - req.Keyspace = vars["keyspace"] + body.Request.Keyspace = vars["keyspace"] resp, err := api.server.ApplySchema(ctx, &vtadminpb.ApplySchemaRequest{ ClusterId: vars["cluster_id"], - Request: &req, + Sql: body.Sql, + CallerId: body.CallerId, + Request: &body.Request, }) return NewJSONResponse(resp, err) diff --git a/go/vt/vtgate/debugenv.go b/go/vt/vtgate/debugenv.go index 4fa989c69a3..7213353432d 100644 --- a/go/vt/vtgate/debugenv.go +++ b/go/vt/vtgate/debugenv.go @@ -22,9 +22,10 @@ import ( "html" "net/http" "strconv" - "text/template" "time" + "github.com/google/safehtml/template" + "vitess.io/vitess/go/acl" "vitess.io/vitess/go/vt/discovery" "vitess.io/vitess/go/vt/log" diff --git a/go/vt/vtgate/evalengine/eval_result.go b/go/vt/vtgate/evalengine/eval_result.go index d9916af03be..5c1973d8eb1 100644 --- a/go/vt/vtgate/evalengine/eval_result.go +++ b/go/vt/vtgate/evalengine/eval_result.go @@ -62,6 +62,7 @@ func (er EvalResult) String() string { // TupleValues allows for retrieval of the value we expose for public consumption func (er EvalResult) TupleValues() []sqltypes.Value { + // TODO: Make this collation-aware switch v := er.v.(type) { case *evalTuple: result := make([]sqltypes.Value, 0, len(v.t)) diff --git a/go/vt/vtgate/planbuilder/builder.go b/go/vt/vtgate/planbuilder/builder.go index 27b994b1730..495564d6571 100644 --- a/go/vt/vtgate/planbuilder/builder.go +++ b/go/vt/vtgate/planbuilder/builder.go @@ -275,7 +275,7 @@ func buildDBDDLPlan(stmt sqlparser.Statement, _ *sqlparser.ReservedVars, vschema dbDDLstmt := stmt.(sqlparser.DBDDLStatement) ksName := dbDDLstmt.GetDatabaseName() if ksName == "" { - ks, err := vschema.DefaultKeyspace() + ks, err := vschema.SelectedKeyspace() if err != nil { return nil, err } @@ -310,7 +310,7 @@ func buildDBDDLPlan(stmt sqlparser.Statement, _ *sqlparser.ReservedVars, vschema } func buildLoadPlan(query string, vschema plancontext.VSchema) (*planResult, error) { - keyspace, err := vschema.DefaultKeyspace() + keyspace, err := vschema.SelectedKeyspace() if err != nil { return nil, err } @@ -355,7 +355,7 @@ func buildFlushOptions(stmt *sqlparser.Flush, vschema plancontext.VSchema) (*pla return nil, vterrors.VT09012("FLUSH", vschema.TabletType().String()) } - keyspace, err := vschema.DefaultKeyspace() + keyspace, err := vschema.SelectedKeyspace() if err != nil { return nil, err } diff --git a/go/vt/vtgate/planbuilder/bypass.go b/go/vt/vtgate/planbuilder/bypass.go index 62cae9655b1..d3384d509c1 100644 --- a/go/vt/vtgate/planbuilder/bypass.go +++ b/go/vt/vtgate/planbuilder/bypass.go @@ -26,7 +26,7 @@ import ( ) func buildPlanForBypass(stmt sqlparser.Statement, _ *sqlparser.ReservedVars, vschema plancontext.VSchema) (*planResult, error) { - keyspace, err := vschema.DefaultKeyspace() + keyspace, err := vschema.SelectedKeyspace() if err != nil { return nil, err } diff --git a/go/vt/vtgate/planbuilder/ddl.go b/go/vt/vtgate/planbuilder/ddl.go index f4b8ab6976f..5e16dab1c59 100644 --- a/go/vt/vtgate/planbuilder/ddl.go +++ b/go/vt/vtgate/planbuilder/ddl.go @@ -81,7 +81,7 @@ func buildGeneralDDLPlan(ctx context.Context, sql string, ddlStatement sqlparser } func buildByPassPlan(sql string, vschema plancontext.VSchema, isDDL bool) (*planResult, error) { - keyspace, err := vschema.DefaultKeyspace() + keyspace, err := vschema.SelectedKeyspace() if err != nil { return nil, err } diff --git a/go/vt/vtgate/planbuilder/operator_transformers.go b/go/vt/vtgate/planbuilder/operator_transformers.go index a22719b4489..df14745e6b2 100644 --- a/go/vt/vtgate/planbuilder/operator_transformers.go +++ b/go/vt/vtgate/planbuilder/operator_transformers.go @@ -545,7 +545,7 @@ func routeToEngineRoute(ctx *plancontext.PlanningContext, op *operators.Route, h } func newRoutingParams(ctx *plancontext.PlanningContext, opCode engine.Opcode) *engine.RoutingParameters { - ks, _ := ctx.VSchema.DefaultKeyspace() + ks, _ := ctx.VSchema.SelectedKeyspace() if ks == nil { // if we don't have a selected keyspace, any keyspace will do // this is used by operators that do not set the keyspace diff --git a/go/vt/vtgate/planbuilder/plancontext/planning_context.go b/go/vt/vtgate/planbuilder/plancontext/planning_context.go index 607ca83aa31..016f5c877cf 100644 --- a/go/vt/vtgate/planbuilder/plancontext/planning_context.go +++ b/go/vt/vtgate/planbuilder/plancontext/planning_context.go @@ -91,7 +91,7 @@ func CreatePlanningContext(stmt sqlparser.Statement, version querypb.ExecuteOptions_PlannerVersion, ) (*PlanningContext, error) { ksName := "" - if ks, _ := vschema.DefaultKeyspace(); ks != nil { + if ks, _ := vschema.SelectedKeyspace(); ks != nil { ksName = ks.Name } diff --git a/go/vt/vtgate/planbuilder/plancontext/planning_context_test.go b/go/vt/vtgate/planbuilder/plancontext/planning_context_test.go index d7315f376b6..e5e96b0a4be 100644 --- a/go/vt/vtgate/planbuilder/plancontext/planning_context_test.go +++ b/go/vt/vtgate/planbuilder/plancontext/planning_context_test.go @@ -201,7 +201,7 @@ func (v *vschema) FindTableOrVindex(tablename sqlparser.TableName) (*vindexes.Ta panic("implement me") } -func (v *vschema) DefaultKeyspace() (*vindexes.Keyspace, error) { +func (v *vschema) SelectedKeyspace() (*vindexes.Keyspace, error) { // TODO implement me panic("implement me") } diff --git a/go/vt/vtgate/planbuilder/plancontext/vschema.go b/go/vt/vtgate/planbuilder/plancontext/vschema.go index 6e92ad0d83b..b4560424718 100644 --- a/go/vt/vtgate/planbuilder/plancontext/vschema.go +++ b/go/vt/vtgate/planbuilder/plancontext/vschema.go @@ -27,7 +27,9 @@ type VSchema interface { FindTable(tablename sqlparser.TableName) (*vindexes.Table, string, topodatapb.TabletType, key.Destination, error) FindView(name sqlparser.TableName) sqlparser.SelectStatement FindTableOrVindex(tablename sqlparser.TableName) (*vindexes.Table, vindexes.Vindex, string, topodatapb.TabletType, key.Destination, error) - DefaultKeyspace() (*vindexes.Keyspace, error) + + // SelectedKeyspace returns the current keyspace if set, otherwise returns an error + SelectedKeyspace() (*vindexes.Keyspace, error) TargetString() string Destination() key.Destination TabletType() topodatapb.TabletType diff --git a/go/vt/vtgate/planbuilder/select.go b/go/vt/vtgate/planbuilder/select.go index 9cc1c8efe06..409343f2760 100644 --- a/go/vt/vtgate/planbuilder/select.go +++ b/go/vt/vtgate/planbuilder/select.go @@ -46,7 +46,7 @@ func gen4SelectStmtPlanner( } if p != nil { used := "dual" - keyspace, ksErr := vschema.DefaultKeyspace() + keyspace, ksErr := vschema.SelectedKeyspace() if ksErr == nil { // we are just getting the ks to log the correct table use. // no need to fail this if we can't find the default keyspace @@ -101,7 +101,7 @@ func gen4SelectStmtPlanner( func gen4planSQLCalcFoundRows(vschema plancontext.VSchema, sel *sqlparser.Select, query string, reservedVars *sqlparser.ReservedVars) (*planResult, error) { ksName := "" - if ks, _ := vschema.DefaultKeyspace(); ks != nil { + if ks, _ := vschema.SelectedKeyspace(); ks != nil { ksName = ks.Name } semTable, err := semantics.Analyze(sel, ksName, vschema) diff --git a/go/vt/vtgate/planbuilder/show.go b/go/vt/vtgate/planbuilder/show.go index 82035adaa87..40cf7b2411f 100644 --- a/go/vt/vtgate/planbuilder/show.go +++ b/go/vt/vtgate/planbuilder/show.go @@ -676,7 +676,7 @@ func buildVschemaKeyspacesPlan(vschema plancontext.VSchema) (engine.Primitive, e func buildVschemaTablesPlan(vschema plancontext.VSchema) (engine.Primitive, error) { vs := vschema.GetVSchema() - ks, err := vschema.DefaultKeyspace() + ks, err := vschema.SelectedKeyspace() if err != nil { return nil, err } diff --git a/go/vt/vtgate/querylogz.go b/go/vt/vtgate/querylogz.go index 7c72e950d4a..05d301f28be 100644 --- a/go/vt/vtgate/querylogz.go +++ b/go/vt/vtgate/querylogz.go @@ -20,15 +20,15 @@ import ( "net/http" "strconv" "strings" - "text/template" "time" - "vitess.io/vitess/go/vt/vtgate/logstats" + "github.com/google/safehtml/template" "vitess.io/vitess/go/acl" "vitess.io/vitess/go/vt/log" "vitess.io/vitess/go/vt/logz" "vitess.io/vitess/go/vt/sqlparser" + "vitess.io/vitess/go/vt/vtgate/logstats" ) var ( diff --git a/go/vt/vtgate/querylogz_test.go b/go/vt/vtgate/querylogz_test.go index 3cecb983b3f..9236b2ac840 100644 --- a/go/vt/vtgate/querylogz_test.go +++ b/go/vt/vtgate/querylogz_test.go @@ -35,7 +35,7 @@ import ( func TestQuerylogzHandlerFormatting(t *testing.T) { req, _ := http.NewRequest("GET", "/querylogz?timeout=10&limit=1", nil) - logStats := logstats.NewLogStats(context.Background(), "Execute", "select name from test_table limit 1000", "suuid", nil) + logStats := logstats.NewLogStats(context.Background(), "Execute", "select name, 'inject ' from test_table limit 1000", "suuid", nil) logStats.StmtType = "select" logStats.RowsAffected = 1000 logStats.ShardQueries = 1 @@ -64,7 +64,7 @@ func TestQuerylogzHandlerFormatting(t *testing.T) { `0.002`, `0.003`, `select`, - `select name from test_table limit 1000`, + regexp.QuoteMeta(`select name,​ 'inject <script>alert()​;</script>' from test_table limit 1000`), `1`, `1000`, ``, @@ -94,7 +94,7 @@ func TestQuerylogzHandlerFormatting(t *testing.T) { `0.002`, `0.003`, `select`, - `select name from test_table limit 1000`, + regexp.QuoteMeta(`select name,​ 'inject <script>alert()​;</script>' from test_table limit 1000`), `1`, `1000`, ``, @@ -124,7 +124,7 @@ func TestQuerylogzHandlerFormatting(t *testing.T) { `0.002`, `0.003`, `select`, - `select name from test_table limit 1000`, + regexp.QuoteMeta(`select name,​ 'inject <script>alert()​;</script>' from test_table limit 1000`), `1`, `1000`, ``, diff --git a/go/vt/vtgate/vcursor_impl.go b/go/vt/vtgate/vcursor_impl.go index e9b1d3d7712..691b9988d9e 100644 --- a/go/vt/vtgate/vcursor_impl.go +++ b/go/vt/vtgate/vcursor_impl.go @@ -392,10 +392,10 @@ func (vc *vcursorImpl) getActualKeyspace() string { return ks.Name } -// DefaultKeyspace returns the default keyspace of the current request +// SelectedKeyspace returns the selected keyspace of the current request // if there is one. If the keyspace specified in the target cannot be // identified, it returns an error. -func (vc *vcursorImpl) DefaultKeyspace() (*vindexes.Keyspace, error) { +func (vc *vcursorImpl) SelectedKeyspace() (*vindexes.Keyspace, error) { if ignoreKeyspace(vc.keyspace) { return nil, errNoKeyspace } @@ -409,7 +409,7 @@ func (vc *vcursorImpl) DefaultKeyspace() (*vindexes.Keyspace, error) { var errNoDbAvailable = vterrors.NewErrorf(vtrpcpb.Code_FAILED_PRECONDITION, vterrors.NoDB, "no database available") func (vc *vcursorImpl) AnyKeyspace() (*vindexes.Keyspace, error) { - keyspace, err := vc.DefaultKeyspace() + keyspace, err := vc.SelectedKeyspace() if err == nil { return keyspace, nil } diff --git a/go/vt/vtorc/config/config.go b/go/vt/vtorc/config/config.go index 2d21e377cb6..cafff5acce8 100644 --- a/go/vt/vtorc/config/config.go +++ b/go/vt/vtorc/config/config.go @@ -17,14 +17,12 @@ package config import ( - "encoding/json" - "fmt" - "os" "time" "github.com/spf13/pflag" - "vitess.io/vitess/go/vt/log" + "vitess.io/vitess/go/viperutil" + "vitess.io/vitess/go/vt/servenv" ) var configurationLoaded = make(chan bool) @@ -42,200 +40,296 @@ const ( ) var ( - sqliteDataFile = "file::memory:?mode=memory&cache=shared" - instancePollTime = 5 * time.Second - snapshotTopologyInterval = 0 * time.Hour - reasonableReplicationLag = 10 * time.Second - auditFileLocation = "" - auditToBackend = false - auditToSyslog = false - auditPurgeDuration = 7 * 24 * time.Hour // Equivalent of 7 days - recoveryPeriodBlockDuration = 30 * time.Second - preventCrossCellFailover = false - waitReplicasTimeout = 30 * time.Second - tolerableReplicationLag = 0 * time.Second - topoInformationRefreshDuration = 15 * time.Second - recoveryPollDuration = 1 * time.Second - ersEnabled = true - convertTabletsWithErrantGTIDs = false + instancePollTime = viperutil.Configure( + "instance-poll-time", + viperutil.Options[time.Duration]{ + FlagName: "instance-poll-time", + Default: 5 * time.Second, + Dynamic: true, + }, + ) + + preventCrossCellFailover = viperutil.Configure( + "prevent-cross-cell-failover", + viperutil.Options[bool]{ + FlagName: "prevent-cross-cell-failover", + Default: false, + Dynamic: true, + }, + ) + + sqliteDataFile = viperutil.Configure( + "sqlite-data-file", + viperutil.Options[string]{ + FlagName: "sqlite-data-file", + Default: "file::memory:?mode=memory&cache=shared", + Dynamic: false, + }, + ) + + snapshotTopologyInterval = viperutil.Configure( + "snapshot-topology-interval", + viperutil.Options[time.Duration]{ + FlagName: "snapshot-topology-interval", + Default: 0 * time.Hour, + Dynamic: true, + }, + ) + + reasonableReplicationLag = viperutil.Configure( + "reasonable-replication-lag", + viperutil.Options[time.Duration]{ + FlagName: "reasonable-replication-lag", + Default: 10 * time.Second, + Dynamic: true, + }, + ) + + auditFileLocation = viperutil.Configure( + "audit-file-location", + viperutil.Options[string]{ + FlagName: "audit-file-location", + Default: "", + Dynamic: false, + }, + ) + + auditToBackend = viperutil.Configure( + "audit-to-backend", + viperutil.Options[bool]{ + FlagName: "audit-to-backend", + Default: false, + Dynamic: true, + }, + ) + + auditToSyslog = viperutil.Configure( + "audit-to-syslog", + viperutil.Options[bool]{ + FlagName: "audit-to-syslog", + Default: false, + Dynamic: true, + }, + ) + + auditPurgeDuration = viperutil.Configure( + "audit-purge-duration", + viperutil.Options[time.Duration]{ + FlagName: "audit-purge-duration", + Default: 7 * 24 * time.Hour, + Dynamic: true, + }, + ) + + waitReplicasTimeout = viperutil.Configure( + "wait-replicas-timeout", + viperutil.Options[time.Duration]{ + FlagName: "wait-replicas-timeout", + Default: 30 * time.Second, + Dynamic: true, + }, + ) + + tolerableReplicationLag = viperutil.Configure( + "tolerable-replication-lag", + viperutil.Options[time.Duration]{ + FlagName: "tolerable-replication-lag", + Default: 0 * time.Second, + Dynamic: true, + }, + ) + + topoInformationRefreshDuration = viperutil.Configure( + "topo-information-refresh-duration", + viperutil.Options[time.Duration]{ + FlagName: "topo-information-refresh-duration", + Default: 15 * time.Second, + Dynamic: true, + }, + ) + + recoveryPollDuration = viperutil.Configure( + "recovery-poll-duration", + viperutil.Options[time.Duration]{ + FlagName: "recovery-poll-duration", + Default: 1 * time.Second, + Dynamic: true, + }, + ) + + ersEnabled = viperutil.Configure( + "allow-emergency-reparent", + viperutil.Options[bool]{ + FlagName: "allow-emergency-reparent", + Default: true, + Dynamic: true, + }, + ) + + convertTabletsWithErrantGTIDs = viperutil.Configure( + "change-tablets-with-errant-gtid-to-drained", + viperutil.Options[bool]{ + FlagName: "change-tablets-with-errant-gtid-to-drained", + Default: false, + Dynamic: true, + }, + ) ) -// RegisterFlags registers the flags required by VTOrc -func RegisterFlags(fs *pflag.FlagSet) { - fs.StringVar(&sqliteDataFile, "sqlite-data-file", sqliteDataFile, "SQLite Datafile to use as VTOrc's database") - fs.DurationVar(&instancePollTime, "instance-poll-time", instancePollTime, "Timer duration on which VTOrc refreshes MySQL information") - fs.DurationVar(&snapshotTopologyInterval, "snapshot-topology-interval", snapshotTopologyInterval, "Timer duration on which VTOrc takes a snapshot of the current MySQL information it has in the database. Should be in multiple of hours") - fs.DurationVar(&reasonableReplicationLag, "reasonable-replication-lag", reasonableReplicationLag, "Maximum replication lag on replicas which is deemed to be acceptable") - fs.StringVar(&auditFileLocation, "audit-file-location", auditFileLocation, "File location where the audit logs are to be stored") - fs.BoolVar(&auditToBackend, "audit-to-backend", auditToBackend, "Whether to store the audit log in the VTOrc database") - fs.BoolVar(&auditToSyslog, "audit-to-syslog", auditToSyslog, "Whether to store the audit log in the syslog") - fs.DurationVar(&auditPurgeDuration, "audit-purge-duration", auditPurgeDuration, "Duration for which audit logs are held before being purged. Should be in multiples of days") - fs.DurationVar(&recoveryPeriodBlockDuration, "recovery-period-block-duration", recoveryPeriodBlockDuration, "Duration for which a new recovery is blocked on an instance after running a recovery") - fs.MarkDeprecated("recovery-period-block-duration", "As of v20 this is ignored and will be removed in a future release.") - fs.BoolVar(&preventCrossCellFailover, "prevent-cross-cell-failover", preventCrossCellFailover, "Prevent VTOrc from promoting a primary in a different cell than the current primary in case of a failover") - fs.DurationVar(&waitReplicasTimeout, "wait-replicas-timeout", waitReplicasTimeout, "Duration for which to wait for replica's to respond when issuing RPCs") - fs.DurationVar(&tolerableReplicationLag, "tolerable-replication-lag", tolerableReplicationLag, "Amount of replication lag that is considered acceptable for a tablet to be eligible for promotion when Vitess makes the choice of a new primary in PRS") - fs.DurationVar(&topoInformationRefreshDuration, "topo-information-refresh-duration", topoInformationRefreshDuration, "Timer duration on which VTOrc refreshes the keyspace and vttablet records from the topology server") - fs.DurationVar(&recoveryPollDuration, "recovery-poll-duration", recoveryPollDuration, "Timer duration on which VTOrc polls its database to run a recovery") - fs.BoolVar(&ersEnabled, "allow-emergency-reparent", ersEnabled, "Whether VTOrc should be allowed to run emergency reparent operation when it detects a dead primary") - fs.BoolVar(&convertTabletsWithErrantGTIDs, "change-tablets-with-errant-gtid-to-drained", convertTabletsWithErrantGTIDs, "Whether VTOrc should be changing the type of tablets with errant GTIDs to DRAINED") +func init() { + servenv.OnParseFor("vtorc", registerFlags) } -// Configuration makes for vtorc configuration input, which can be provided by user via JSON formatted file. -// Some of the parameters have reasonable default values, and some (like database credentials) are -// strictly expected from user. -// TODO(sougou): change this to yaml parsing, and possible merge with tabletenv. -type Configuration struct { - SQLite3DataFile string // full path to sqlite3 datafile - InstancePollSeconds uint // Number of seconds between instance reads - SnapshotTopologiesIntervalHours uint // Interval in hour between snapshot-topologies invocation. Default: 0 (disabled) - ReasonableReplicationLagSeconds int // Above this value is considered a problem - AuditLogFile string // Name of log file for audit operations. Disabled when empty. - AuditToSyslog bool // If true, audit messages are written to syslog - AuditToBackendDB bool // If true, audit messages are written to the backend DB's `audit` table (default: true) - AuditPurgeDays uint // Days after which audit entries are purged from the database - RecoveryPeriodBlockSeconds int // (overrides `RecoveryPeriodBlockMinutes`) The time for which an instance's recovery is kept "active", so as to avoid concurrent recoveries on smae instance as well as flapping - PreventCrossDataCenterPrimaryFailover bool // When true (default: false), cross-DC primary failover are not allowed, vtorc will do all it can to only fail over within same DC, or else not fail over at all. - WaitReplicasTimeoutSeconds int // Timeout on amount of time to wait for the replicas in case of ERS. Should be a small value because we should fail-fast. Should not be larger than LockTimeout since that is the total time we use for an ERS. - TolerableReplicationLagSeconds int // Amount of replication lag that is considered acceptable for a tablet to be eligible for promotion when Vitess makes the choice of a new primary in PRS. - TopoInformationRefreshSeconds int // Timer duration on which VTOrc refreshes the keyspace and vttablet records from the topo-server. - RecoveryPollSeconds int // Timer duration on which VTOrc recovery analysis runs +// registerFlags registers the flags required by VTOrc +func registerFlags(fs *pflag.FlagSet) { + fs.String("sqlite-data-file", sqliteDataFile.Default(), "SQLite Datafile to use as VTOrc's database") + fs.Duration("instance-poll-time", instancePollTime.Default(), "Timer duration on which VTOrc refreshes MySQL information") + fs.Duration("snapshot-topology-interval", snapshotTopologyInterval.Default(), "Timer duration on which VTOrc takes a snapshot of the current MySQL information it has in the database. Should be in multiple of hours") + fs.Duration("reasonable-replication-lag", reasonableReplicationLag.Default(), "Maximum replication lag on replicas which is deemed to be acceptable") + fs.String("audit-file-location", auditFileLocation.Default(), "File location where the audit logs are to be stored") + fs.Bool("audit-to-backend", auditToBackend.Default(), "Whether to store the audit log in the VTOrc database") + fs.Bool("audit-to-syslog", auditToSyslog.Default(), "Whether to store the audit log in the syslog") + fs.Duration("audit-purge-duration", auditPurgeDuration.Default(), "Duration for which audit logs are held before being purged. Should be in multiples of days") + fs.Bool("prevent-cross-cell-failover", preventCrossCellFailover.Default(), "Prevent VTOrc from promoting a primary in a different cell than the current primary in case of a failover") + fs.Duration("wait-replicas-timeout", waitReplicasTimeout.Default(), "Duration for which to wait for replica's to respond when issuing RPCs") + fs.Duration("tolerable-replication-lag", tolerableReplicationLag.Default(), "Amount of replication lag that is considered acceptable for a tablet to be eligible for promotion when Vitess makes the choice of a new primary in PRS") + fs.Duration("topo-information-refresh-duration", topoInformationRefreshDuration.Default(), "Timer duration on which VTOrc refreshes the keyspace and vttablet records from the topology server") + fs.Duration("recovery-poll-duration", recoveryPollDuration.Default(), "Timer duration on which VTOrc polls its database to run a recovery") + fs.Bool("allow-emergency-reparent", ersEnabled.Default(), "Whether VTOrc should be allowed to run emergency reparent operation when it detects a dead primary") + fs.Bool("change-tablets-with-errant-gtid-to-drained", convertTabletsWithErrantGTIDs.Default(), "Whether VTOrc should be changing the type of tablets with errant GTIDs to DRAINED") + + viperutil.BindFlags(fs, + instancePollTime, + preventCrossCellFailover, + sqliteDataFile, + snapshotTopologyInterval, + reasonableReplicationLag, + auditFileLocation, + auditToBackend, + auditToSyslog, + auditPurgeDuration, + waitReplicasTimeout, + tolerableReplicationLag, + topoInformationRefreshDuration, + recoveryPollDuration, + ersEnabled, + convertTabletsWithErrantGTIDs, + ) } -// ToJSONString will marshal this configuration as JSON -func (config *Configuration) ToJSONString() string { - b, _ := json.Marshal(config) - return string(b) +// GetInstancePollTime is a getter function. +func GetInstancePollTime() time.Duration { + return instancePollTime.Get() } -// Config is *the* configuration instance, used globally to get configuration data -var Config = newConfiguration() -var readFileNames []string - -// UpdateConfigValuesFromFlags is used to update the config values from the flags defined. -// This is done before we read any configuration files from the user. So the config files take precedence. -func UpdateConfigValuesFromFlags() { - Config.SQLite3DataFile = sqliteDataFile - Config.InstancePollSeconds = uint(instancePollTime / time.Second) - Config.InstancePollSeconds = uint(instancePollTime / time.Second) - Config.SnapshotTopologiesIntervalHours = uint(snapshotTopologyInterval / time.Hour) - Config.ReasonableReplicationLagSeconds = int(reasonableReplicationLag / time.Second) - Config.AuditLogFile = auditFileLocation - Config.AuditToBackendDB = auditToBackend - Config.AuditToSyslog = auditToSyslog - Config.AuditPurgeDays = uint(auditPurgeDuration / (time.Hour * 24)) - Config.RecoveryPeriodBlockSeconds = int(recoveryPeriodBlockDuration / time.Second) - Config.PreventCrossDataCenterPrimaryFailover = preventCrossCellFailover - Config.WaitReplicasTimeoutSeconds = int(waitReplicasTimeout / time.Second) - Config.TolerableReplicationLagSeconds = int(tolerableReplicationLag / time.Second) - Config.TopoInformationRefreshSeconds = int(topoInformationRefreshDuration / time.Second) - Config.RecoveryPollSeconds = int(recoveryPollDuration / time.Second) +// SetInstancePollTime is a setter function. +func SetInstancePollTime(v time.Duration) { + instancePollTime.Set(v) } -// ERSEnabled reports whether VTOrc is allowed to run ERS or not. -func ERSEnabled() bool { - return ersEnabled +// GetInstancePollSeconds gets the instance poll time but in seconds. +func GetInstancePollSeconds() uint { + return uint(instancePollTime.Get() / time.Second) } -// SetERSEnabled sets the value for the ersEnabled variable. This should only be used from tests. -func SetERSEnabled(val bool) { - ersEnabled = val +// GetPreventCrossCellFailover is a getter function. +func GetPreventCrossCellFailover() bool { + return preventCrossCellFailover.Get() } -// ConvertTabletWithErrantGTIDs reports whether VTOrc is allowed to change the tablet type of tablets with errant GTIDs to DRAINED. -func ConvertTabletWithErrantGTIDs() bool { - return convertTabletsWithErrantGTIDs +// GetSQLiteDataFile is a getter function. +func GetSQLiteDataFile() string { + return sqliteDataFile.Get() } -// SetConvertTabletWithErrantGTIDs sets the value for the convertTabletWithErrantGTIDs variable. This should only be used from tests. -func SetConvertTabletWithErrantGTIDs(val bool) { - convertTabletsWithErrantGTIDs = val +// GetReasonableReplicationLagSeconds gets the reasonable replication lag but in seconds. +func GetReasonableReplicationLagSeconds() int64 { + return int64(reasonableReplicationLag.Get() / time.Second) +} + +// GetSnapshotTopologyInterval is a getter function. +func GetSnapshotTopologyInterval() time.Duration { + return snapshotTopologyInterval.Get() } -// LogConfigValues is used to log the config values. -func LogConfigValues() { - b, _ := json.MarshalIndent(Config, "", "\t") - log.Infof("Running with Configuration - %v", string(b)) +// GetAuditFileLocation is a getter function. +func GetAuditFileLocation() string { + return auditFileLocation.Get() } -func newConfiguration() *Configuration { - return &Configuration{ - SQLite3DataFile: "file::memory:?mode=memory&cache=shared", - InstancePollSeconds: 5, - SnapshotTopologiesIntervalHours: 0, - ReasonableReplicationLagSeconds: 10, - AuditLogFile: "", - AuditToSyslog: false, - AuditToBackendDB: false, - AuditPurgeDays: 7, - RecoveryPeriodBlockSeconds: 30, - PreventCrossDataCenterPrimaryFailover: false, - WaitReplicasTimeoutSeconds: 30, - TopoInformationRefreshSeconds: 15, - RecoveryPollSeconds: 1, - } +// SetAuditFileLocation is a setter function. +func SetAuditFileLocation(v string) { + auditFileLocation.Set(v) } -func (config *Configuration) postReadAdjustments() error { - if config.SQLite3DataFile == "" { - return fmt.Errorf("SQLite3DataFile must be set") - } +// GetAuditToSyslog is a getter function. +func GetAuditToSyslog() bool { + return auditToSyslog.Get() +} + +// SetAuditToSyslog is a setter function. +func SetAuditToSyslog(v bool) { + auditToSyslog.Set(v) +} + +// GetAuditToBackend is a getter function. +func GetAuditToBackend() bool { + return auditToBackend.Get() +} + +// SetAuditToBackend is a setter function. +func SetAuditToBackend(v bool) { + auditToBackend.Set(v) +} - return nil +// GetAuditPurgeDays gets the audit purge duration but in days. +func GetAuditPurgeDays() int64 { + return int64(auditPurgeDuration.Get() / (24 * time.Hour)) } -// read reads configuration from given file, or silently skips if the file does not exist. -// If the file does exist, then it is expected to be in valid JSON format or the function bails out. -func read(fileName string) (*Configuration, error) { - if fileName == "" { - return Config, fmt.Errorf("Empty file name") - } - file, err := os.Open(fileName) - if err != nil { - return Config, err - } - decoder := json.NewDecoder(file) - err = decoder.Decode(Config) - if err == nil { - log.Infof("Read config: %s", fileName) - } else { - log.Fatal("Cannot read config file:", fileName, err) - } - if err := Config.postReadAdjustments(); err != nil { - log.Fatal(err) - } - return Config, err +// SetAuditPurgeDays sets the audit purge duration. +func SetAuditPurgeDays(days int64) { + auditPurgeDuration.Set(time.Duration(days) * 24 * time.Hour) } -// Read reads configuration from zero, either, some or all given files, in order of input. -// A file can override configuration provided in previous file. -func Read(fileNames ...string) *Configuration { - for _, fileName := range fileNames { - _, _ = read(fileName) - } - readFileNames = fileNames - return Config +// GetWaitReplicasTimeout is a getter function. +func GetWaitReplicasTimeout() time.Duration { + return waitReplicasTimeout.Get() } -// ForceRead reads configuration from given file name or bails out if it fails -func ForceRead(fileName string) *Configuration { - _, err := read(fileName) - if err != nil { - log.Fatal("Cannot read config file:", fileName, err) - } - readFileNames = []string{fileName} - return Config +// GetTolerableReplicationLag is a getter function. +func GetTolerableReplicationLag() time.Duration { + return tolerableReplicationLag.Get() } -// Reload re-reads configuration from last used files -func Reload(extraFileNames ...string) *Configuration { - for _, fileName := range readFileNames { - _, _ = read(fileName) - } - for _, fileName := range extraFileNames { - _, _ = read(fileName) - } - return Config +// GetTopoInformationRefreshDuration is a getter function. +func GetTopoInformationRefreshDuration() time.Duration { + return topoInformationRefreshDuration.Get() +} + +// GetRecoveryPollDuration is a getter function. +func GetRecoveryPollDuration() time.Duration { + return recoveryPollDuration.Get() +} + +// ERSEnabled reports whether VTOrc is allowed to run ERS or not. +func ERSEnabled() bool { + return ersEnabled.Get() +} + +// SetERSEnabled sets the value for the ersEnabled variable. This should only be used from tests. +func SetERSEnabled(val bool) { + ersEnabled.Set(val) +} + +// ConvertTabletWithErrantGTIDs reports whether VTOrc is allowed to change the tablet type of tablets with errant GTIDs to DRAINED. +func ConvertTabletWithErrantGTIDs() bool { + return convertTabletsWithErrantGTIDs.Get() +} + +// SetConvertTabletWithErrantGTIDs sets the value for the convertTabletWithErrantGTIDs variable. This should only be used from tests. +func SetConvertTabletWithErrantGTIDs(val bool) { + convertTabletsWithErrantGTIDs.Set(val) } // MarkConfigurationLoaded is called once configuration has first been loaded. diff --git a/go/vt/vtorc/config/config_test.go b/go/vt/vtorc/config/config_test.go deleted file mode 100644 index 2009b476f1d..00000000000 --- a/go/vt/vtorc/config/config_test.go +++ /dev/null @@ -1,234 +0,0 @@ -/* -Copyright 2022 The Vitess Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package config - -import ( - "testing" - "time" - - "github.com/stretchr/testify/require" -) - -func TestUpdateConfigValuesFromFlags(t *testing.T) { - t.Run("defaults", func(t *testing.T) { - // Restore the changes we make to the Config parameter - defer func() { - Config = newConfiguration() - }() - defaultConfig := newConfiguration() - UpdateConfigValuesFromFlags() - require.Equal(t, defaultConfig, Config) - }) - - t.Run("override auditPurgeDuration", func(t *testing.T) { - oldAuditPurgeDuration := auditPurgeDuration - auditPurgeDuration = 8 * time.Hour * 24 - auditPurgeDuration += time.Second + 4*time.Minute - // Restore the changes we make - defer func() { - Config = newConfiguration() - auditPurgeDuration = oldAuditPurgeDuration - }() - - testConfig := newConfiguration() - // auditPurgeDuration is supposed to be in multiples of days. - // If it is not, then we round down to the nearest number of days. - testConfig.AuditPurgeDays = 8 - UpdateConfigValuesFromFlags() - require.Equal(t, testConfig, Config) - }) - - t.Run("override sqliteDataFile", func(t *testing.T) { - oldSqliteDataFile := sqliteDataFile - sqliteDataFile = "newVal" - // Restore the changes we make - defer func() { - Config = newConfiguration() - sqliteDataFile = oldSqliteDataFile - }() - - testConfig := newConfiguration() - testConfig.SQLite3DataFile = "newVal" - UpdateConfigValuesFromFlags() - require.Equal(t, testConfig, Config) - }) - - t.Run("override instancePollTime", func(t *testing.T) { - oldInstancePollTime := instancePollTime - instancePollTime = 7 * time.Second - // Restore the changes we make - defer func() { - Config = newConfiguration() - instancePollTime = oldInstancePollTime - }() - - testConfig := newConfiguration() - testConfig.InstancePollSeconds = 7 - UpdateConfigValuesFromFlags() - require.Equal(t, testConfig, Config) - }) - - t.Run("override snapshotTopologyInterval", func(t *testing.T) { - oldSnapshotTopologyInterval := snapshotTopologyInterval - snapshotTopologyInterval = 1 * time.Hour - // Restore the changes we make - defer func() { - Config = newConfiguration() - snapshotTopologyInterval = oldSnapshotTopologyInterval - }() - - testConfig := newConfiguration() - testConfig.SnapshotTopologiesIntervalHours = 1 - UpdateConfigValuesFromFlags() - require.Equal(t, testConfig, Config) - }) - - t.Run("override reasonableReplicationLag", func(t *testing.T) { - oldReasonableReplicationLag := reasonableReplicationLag - reasonableReplicationLag = 15 * time.Second - // Restore the changes we make - defer func() { - Config = newConfiguration() - reasonableReplicationLag = oldReasonableReplicationLag - }() - - testConfig := newConfiguration() - testConfig.ReasonableReplicationLagSeconds = 15 - UpdateConfigValuesFromFlags() - require.Equal(t, testConfig, Config) - }) - - t.Run("override auditFileLocation", func(t *testing.T) { - oldAuditFileLocation := auditFileLocation - auditFileLocation = "newFile" - // Restore the changes we make - defer func() { - Config = newConfiguration() - auditFileLocation = oldAuditFileLocation - }() - - testConfig := newConfiguration() - testConfig.AuditLogFile = "newFile" - UpdateConfigValuesFromFlags() - require.Equal(t, testConfig, Config) - }) - - t.Run("override auditToBackend", func(t *testing.T) { - oldAuditToBackend := auditToBackend - auditToBackend = true - // Restore the changes we make - defer func() { - Config = newConfiguration() - auditToBackend = oldAuditToBackend - }() - - testConfig := newConfiguration() - testConfig.AuditToBackendDB = true - UpdateConfigValuesFromFlags() - require.Equal(t, testConfig, Config) - }) - - t.Run("override auditToSyslog", func(t *testing.T) { - oldAuditToSyslog := auditToSyslog - auditToSyslog = true - // Restore the changes we make - defer func() { - Config = newConfiguration() - auditToSyslog = oldAuditToSyslog - }() - - testConfig := newConfiguration() - testConfig.AuditToSyslog = true - UpdateConfigValuesFromFlags() - require.Equal(t, testConfig, Config) - }) - - t.Run("override recoveryPeriodBlockDuration", func(t *testing.T) { - oldRecoveryPeriodBlockDuration := recoveryPeriodBlockDuration - recoveryPeriodBlockDuration = 5 * time.Minute - // Restore the changes we make - defer func() { - Config = newConfiguration() - recoveryPeriodBlockDuration = oldRecoveryPeriodBlockDuration - }() - - testConfig := newConfiguration() - testConfig.RecoveryPeriodBlockSeconds = 300 - UpdateConfigValuesFromFlags() - require.Equal(t, testConfig, Config) - }) - - t.Run("override preventCrossCellFailover", func(t *testing.T) { - oldPreventCrossCellFailover := preventCrossCellFailover - preventCrossCellFailover = true - // Restore the changes we make - defer func() { - Config = newConfiguration() - preventCrossCellFailover = oldPreventCrossCellFailover - }() - - testConfig := newConfiguration() - testConfig.PreventCrossDataCenterPrimaryFailover = true - UpdateConfigValuesFromFlags() - require.Equal(t, testConfig, Config) - }) - - t.Run("override waitReplicasTimeout", func(t *testing.T) { - oldWaitReplicasTimeout := waitReplicasTimeout - waitReplicasTimeout = 3*time.Minute + 4*time.Second - // Restore the changes we make - defer func() { - Config = newConfiguration() - waitReplicasTimeout = oldWaitReplicasTimeout - }() - - testConfig := newConfiguration() - testConfig.WaitReplicasTimeoutSeconds = 184 - UpdateConfigValuesFromFlags() - require.Equal(t, testConfig, Config) - }) - - t.Run("override topoInformationRefreshDuration", func(t *testing.T) { - oldTopoInformationRefreshDuration := topoInformationRefreshDuration - topoInformationRefreshDuration = 1 * time.Second - // Restore the changes we make - defer func() { - Config = newConfiguration() - topoInformationRefreshDuration = oldTopoInformationRefreshDuration - }() - - testConfig := newConfiguration() - testConfig.TopoInformationRefreshSeconds = 1 - UpdateConfigValuesFromFlags() - require.Equal(t, testConfig, Config) - }) - - t.Run("override recoveryPollDuration", func(t *testing.T) { - oldRecoveryPollDuration := recoveryPollDuration - recoveryPollDuration = 15 * time.Second - // Restore the changes we make - defer func() { - Config = newConfiguration() - recoveryPollDuration = oldRecoveryPollDuration - }() - - testConfig := newConfiguration() - testConfig.RecoveryPollSeconds = 15 - UpdateConfigValuesFromFlags() - require.Equal(t, testConfig, Config) - }) -} diff --git a/go/vt/vtorc/db/db.go b/go/vt/vtorc/db/db.go index 470e5364680..870a3d15949 100644 --- a/go/vt/vtorc/db/db.go +++ b/go/vt/vtorc/db/db.go @@ -44,9 +44,9 @@ func (m *vtorcDB) QueryVTOrc(query string, argsArray []any, onRow func(sqlutils. // OpenTopology returns the DB instance for the vtorc backed database func OpenVTOrc() (db *sql.DB, err error) { var fromCache bool - db, fromCache, err = sqlutils.GetSQLiteDB(config.Config.SQLite3DataFile) + db, fromCache, err = sqlutils.GetSQLiteDB(config.GetSQLiteDataFile()) if err == nil && !fromCache { - log.Infof("Connected to vtorc backend: sqlite on %v", config.Config.SQLite3DataFile) + log.Infof("Connected to vtorc backend: sqlite on %v", config.GetSQLiteDataFile()) if err := initVTOrcDB(db); err != nil { log.Fatalf("Cannot initiate vtorc: %+v", err) } @@ -91,7 +91,7 @@ func deployStatements(db *sql.DB, queries []string) error { // ClearVTOrcDatabase is used to clear the VTOrc database. This function is meant to be used by tests to clear the // database to get a clean slate without starting a new one. func ClearVTOrcDatabase() { - db, _, _ := sqlutils.GetSQLiteDB(config.Config.SQLite3DataFile) + db, _, _ := sqlutils.GetSQLiteDB(config.GetSQLiteDataFile()) if db != nil { if err := initVTOrcDB(db); err != nil { log.Fatalf("Cannot re-initiate vtorc: %+v", err) diff --git a/go/vt/vtorc/discovery/queue.go b/go/vt/vtorc/discovery/queue.go index 95751c6ae25..4b18303959b 100644 --- a/go/vt/vtorc/discovery/queue.go +++ b/go/vt/vtorc/discovery/queue.go @@ -153,7 +153,7 @@ func (q *Queue) Consume() string { // alarm if have been waiting for too long timeOnQueue := time.Since(q.queuedKeys[key]) - if timeOnQueue > time.Duration(config.Config.InstancePollSeconds)*time.Second { + if timeOnQueue > config.GetInstancePollTime() { log.Warningf("key %v spent %.4fs waiting on a discoveryQueue", key, timeOnQueue.Seconds()) } diff --git a/go/vt/vtorc/inst/analysis.go b/go/vt/vtorc/inst/analysis.go index 66d6c6dd9ce..3e9e81c5c9f 100644 --- a/go/vt/vtorc/inst/analysis.go +++ b/go/vt/vtorc/inst/analysis.go @@ -144,5 +144,5 @@ func (replicationAnalysis *ReplicationAnalysis) MarshalJSON() ([]byte, error) { // ValidSecondsFromSeenToLastAttemptedCheck returns the maximum allowed elapsed time // between last_attempted_check to last_checked before we consider the instance as invalid. func ValidSecondsFromSeenToLastAttemptedCheck() uint { - return config.Config.InstancePollSeconds + 1 + return config.GetInstancePollSeconds() } diff --git a/go/vt/vtorc/inst/analysis_dao.go b/go/vt/vtorc/inst/analysis_dao.go index e44538e694c..07830bf7dda 100644 --- a/go/vt/vtorc/inst/analysis_dao.go +++ b/go/vt/vtorc/inst/analysis_dao.go @@ -47,7 +47,7 @@ func init() { func initializeAnalysisDaoPostConfiguration() { config.WaitForConfigurationToBeLoaded() - recentInstantAnalysis = cache.New(time.Duration(config.Config.RecoveryPollSeconds*2)*time.Second, time.Second) + recentInstantAnalysis = cache.New(config.GetRecoveryPollDuration()*2, time.Second) } type clusterAnalysis struct { @@ -68,7 +68,7 @@ func GetReplicationAnalysis(keyspace string, shard string, hints *ReplicationAna } // TODO(sougou); deprecate ReduceReplicationAnalysisCount - args := sqlutils.Args(config.Config.ReasonableReplicationLagSeconds, ValidSecondsFromSeenToLastAttemptedCheck(), config.Config.ReasonableReplicationLagSeconds, keyspace, shard) + args := sqlutils.Args(config.GetReasonableReplicationLagSeconds(), ValidSecondsFromSeenToLastAttemptedCheck(), config.GetReasonableReplicationLagSeconds(), keyspace, shard) query := `SELECT vitess_tablet.info AS tablet_info, vitess_tablet.tablet_type, diff --git a/go/vt/vtorc/inst/audit_dao.go b/go/vt/vtorc/inst/audit_dao.go index cbfd771e81c..7ae60fba927 100644 --- a/go/vt/vtorc/inst/audit_dao.go +++ b/go/vt/vtorc/inst/audit_dao.go @@ -38,10 +38,10 @@ func AuditOperation(auditType string, tabletAlias string, message string) error } auditWrittenToFile := false - if config.Config.AuditLogFile != "" { + if config.GetAuditFileLocation() != "" { auditWrittenToFile = true go func() { - f, err := os.OpenFile(config.Config.AuditLogFile, os.O_RDWR|os.O_CREATE|os.O_APPEND, 0640) + f, err := os.OpenFile(config.GetAuditFileLocation(), os.O_RDWR|os.O_CREATE|os.O_APPEND, 0640) if err != nil { log.Error(err) return @@ -54,7 +54,7 @@ func AuditOperation(auditType string, tabletAlias string, message string) error } }() } - if config.Config.AuditToBackendDB { + if config.GetAuditToBackend() { _, err := db.ExecVTOrc(`INSERT INTO audit ( audit_timestamp, diff --git a/go/vt/vtorc/inst/audit_dao_test.go b/go/vt/vtorc/inst/audit_dao_test.go index 1d50de4c146..d22e9177dc3 100644 --- a/go/vt/vtorc/inst/audit_dao_test.go +++ b/go/vt/vtorc/inst/audit_dao_test.go @@ -35,13 +35,13 @@ import ( // This test also verifies that we are able to read the recent audits that are written to the databaes. func TestAuditOperation(t *testing.T) { // Restore original configurations - originalAuditSysLog := config.Config.AuditToSyslog - originalAuditLogFile := config.Config.AuditLogFile - originalAuditBackend := config.Config.AuditToBackendDB + originalAuditSysLog := config.GetAuditToSyslog() + originalAuditLogFile := config.GetAuditFileLocation() + originalAuditBackend := config.GetAuditToBackend() defer func() { - config.Config.AuditToSyslog = originalAuditSysLog - config.Config.AuditLogFile = originalAuditLogFile - config.Config.AuditToBackendDB = originalAuditBackend + config.SetAuditToSyslog(originalAuditSysLog) + config.SetAuditFileLocation(originalAuditLogFile) + config.SetAuditToBackend(originalAuditBackend) }() orcDb, err := db.OpenVTOrc() @@ -78,9 +78,9 @@ func TestAuditOperation(t *testing.T) { message := "test-message" t.Run("audit to backend", func(t *testing.T) { - config.Config.AuditLogFile = "" - config.Config.AuditToSyslog = false - config.Config.AuditToBackendDB = true + config.SetAuditFileLocation("") + config.SetAuditToSyslog(false) + config.SetAuditToBackend(true) // Auditing should succeed as expected err = AuditOperation(auditType, tab100Alias, message) @@ -106,13 +106,13 @@ func TestAuditOperation(t *testing.T) { }) t.Run("audit to File", func(t *testing.T) { - config.Config.AuditToBackendDB = false - config.Config.AuditToSyslog = false + config.SetAuditToBackend(false) + config.SetAuditToSyslog(false) file, err := os.CreateTemp("", "test-auditing-*") require.NoError(t, err) defer os.Remove(file.Name()) - config.Config.AuditLogFile = file.Name() + config.SetAuditFileLocation(file.Name()) err = AuditOperation(auditType, tab100Alias, message) require.NoError(t, err) diff --git a/go/vt/vtorc/inst/instance_dao.go b/go/vt/vtorc/inst/instance_dao.go index c8ff218710f..d1421dbc91d 100644 --- a/go/vt/vtorc/inst/instance_dao.go +++ b/go/vt/vtorc/inst/instance_dao.go @@ -80,7 +80,7 @@ func init() { func initializeInstanceDao() { config.WaitForConfigurationToBeLoaded() - forgetAliases = cache.New(time.Duration(config.Config.InstancePollSeconds*3)*time.Second, time.Second) + forgetAliases = cache.New(config.GetInstancePollTime()*3, time.Second) cacheInitializationCompleted.Store(true) } @@ -122,7 +122,7 @@ func ExpireTableData(tableName string, timestampColumn string) error { tableName, timestampColumn, ) - _, err := db.ExecVTOrc(query, config.Config.AuditPurgeDays) + _, err := db.ExecVTOrc(query, config.GetAuditPurgeDays()) return err } return ExecDBWriteFunc(writeFunc) @@ -362,7 +362,7 @@ Cleanup: // Add replication group ancestry UUID as well. Otherwise, VTOrc thinks there are errant GTIDs in group // members and its replicas, even though they are not. instance.AncestryUUID = strings.Trim(instance.AncestryUUID, ",") - err = detectErrantGTIDs(tabletAlias, instance, tablet) + err = detectErrantGTIDs(instance, tablet) } latency.Stop("instance") @@ -390,13 +390,18 @@ Cleanup: } // detectErrantGTIDs detects the errant GTIDs on an instance. -func detectErrantGTIDs(tabletAlias string, instance *Instance, tablet *topodatapb.Tablet) (err error) { +func detectErrantGTIDs(instance *Instance, tablet *topodatapb.Tablet) (err error) { // If the tablet is not replicating from anyone, then it could be the previous primary. // We should check for errant GTIDs by finding the difference with the shard's current primary. if instance.primaryExecutedGtidSet == "" && instance.SourceHost == "" { var primaryInstance *Instance primaryAlias, _, _ := ReadShardPrimaryInformation(tablet.Keyspace, tablet.Shard) if primaryAlias != "" { + // Check if the current tablet is the primary. + // If it is, then we don't need to run errant gtid detection on it. + if primaryAlias == instance.InstanceAlias { + return nil + } primaryInstance, _, _ = ReadInstance(primaryAlias) } // Only run errant GTID detection, if we are sure that the data read of the current primary @@ -434,7 +439,7 @@ func detectErrantGTIDs(tabletAlias string, instance *Instance, tablet *topodatap if err == nil { var gtidCount int64 gtidCount, err = replication.GTIDCount(instance.GtidErrant) - currentErrantGTIDCount.Set(tabletAlias, gtidCount) + currentErrantGTIDCount.Set(instance.InstanceAlias, gtidCount) } } } @@ -573,8 +578,8 @@ func readInstanceRow(m sqlutils.RowMap) *Instance { instance.ReplicationDepth = m.GetUint("replication_depth") instance.IsCoPrimary = m.GetBool("is_co_primary") instance.HasReplicationCredentials = m.GetBool("has_replication_credentials") - instance.IsUpToDate = (m.GetUint("seconds_since_last_checked") <= config.Config.InstancePollSeconds) - instance.IsRecentlyChecked = (m.GetUint("seconds_since_last_checked") <= config.Config.InstancePollSeconds*5) + instance.IsUpToDate = m.GetUint("seconds_since_last_checked") <= config.GetInstancePollSeconds() + instance.IsRecentlyChecked = m.GetUint("seconds_since_last_checked") <= config.GetInstancePollSeconds()*5 instance.LastSeenTimestamp = m.GetString("last_seen") instance.IsLastCheckValid = m.GetBool("is_last_check_valid") instance.SecondsSinceLastSeen = m.GetNullInt64("seconds_since_last_seen") @@ -591,7 +596,7 @@ func readInstanceRow(m sqlutils.RowMap) *Instance { instance.Problems = append(instance.Problems, "not_recently_checked") } else if instance.ReplicationThreadsExist() && !instance.ReplicaRunning() { instance.Problems = append(instance.Problems, "not_replicating") - } else if instance.ReplicationLagSeconds.Valid && util.AbsInt64(instance.ReplicationLagSeconds.Int64-int64(instance.SQLDelay)) > int64(config.Config.ReasonableReplicationLagSeconds) { + } else if instance.ReplicationLagSeconds.Valid && util.AbsInt64(instance.ReplicationLagSeconds.Int64-int64(instance.SQLDelay)) > int64(config.GetReasonableReplicationLagSeconds()) { instance.Problems = append(instance.Problems, "replication_lag") } if instance.GtidErrant != "" { @@ -674,7 +679,7 @@ func ReadProblemInstances(keyspace string, shard string) ([](*Instance), error) OR (gtid_errant != '') )` - args := sqlutils.Args(keyspace, keyspace, shard, shard, config.Config.InstancePollSeconds*5, config.Config.ReasonableReplicationLagSeconds, config.Config.ReasonableReplicationLagSeconds) + args := sqlutils.Args(keyspace, keyspace, shard, shard, config.GetInstancePollSeconds()*5, config.GetReasonableReplicationLagSeconds(), config.GetReasonableReplicationLagSeconds()) return readInstancesByCondition(condition, args, "") } @@ -741,7 +746,7 @@ func ReadOutdatedInstanceKeys() ([]string, error) { WHERE database_instance.alias IS NULL ` - args := sqlutils.Args(config.Config.InstancePollSeconds, 2*config.Config.InstancePollSeconds) + args := sqlutils.Args(config.GetInstancePollSeconds(), 2*config.GetInstancePollSeconds()) err := db.QueryVTOrc(query, args, func(m sqlutils.RowMap) error { tabletAlias := m.GetString("alias") @@ -1163,7 +1168,7 @@ func SnapshotTopologies() error { } func ExpireStaleInstanceBinlogCoordinates() error { - expireSeconds := config.Config.ReasonableReplicationLagSeconds * 2 + expireSeconds := config.GetReasonableReplicationLagSeconds() * 2 if expireSeconds < config.StaleInstanceCoordinatesExpireSeconds { expireSeconds = config.StaleInstanceCoordinatesExpireSeconds } diff --git a/go/vt/vtorc/inst/instance_dao_test.go b/go/vt/vtorc/inst/instance_dao_test.go index f248ded5e2b..cc3217442ed 100644 --- a/go/vt/vtorc/inst/instance_dao_test.go +++ b/go/vt/vtorc/inst/instance_dao_test.go @@ -242,11 +242,11 @@ func TestReadProblemInstances(t *testing.T) { // We need to set InstancePollSeconds to a large value otherwise all the instances are reported as having problems since their last_checked is very old. // Setting this value to a hundred years, we ensure that this test doesn't fail with this issue for the next hundred years. - oldVal := config.Config.InstancePollSeconds + oldVal := config.GetInstancePollTime() defer func() { - config.Config.InstancePollSeconds = oldVal + config.SetInstancePollTime(oldVal) }() - config.Config.InstancePollSeconds = 60 * 60 * 24 * 365 * 100 + config.SetInstancePollTime(60 * 60 * 24 * 365 * 100 * time.Second) for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { @@ -326,11 +326,11 @@ func TestReadInstancesWithErrantGTIds(t *testing.T) { // We need to set InstancePollSeconds to a large value otherwise all the instances are reported as having problems since their last_checked is very old. // Setting this value to a hundred years, we ensure that this test doesn't fail with this issue for the next hundred years. - oldVal := config.Config.InstancePollSeconds + oldVal := config.GetInstancePollTime() defer func() { - config.Config.InstancePollSeconds = oldVal + config.SetInstancePollTime(oldVal) }() - config.Config.InstancePollSeconds = 60 * 60 * 24 * 365 * 100 + config.SetInstancePollTime(60 * 60 * 24 * 365 * 100 * time.Second) for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { @@ -460,13 +460,13 @@ func TestReadOutdatedInstanceKeys(t *testing.T) { waitForCacheInitialization() // We are setting InstancePollSeconds to 59 minutes, just for the test. - oldVal := config.Config.InstancePollSeconds + oldVal := config.GetInstancePollTime() oldCache := forgetAliases defer func() { forgetAliases = oldCache - config.Config.InstancePollSeconds = oldVal + config.SetInstancePollTime(oldVal) }() - config.Config.InstancePollSeconds = 60 * 25 + config.SetInstancePollTime(60 * 25 * time.Second) forgetAliases = cache.New(time.Minute, time.Minute) for _, tt := range tests { @@ -719,10 +719,10 @@ func TestGetDatabaseState(t *testing.T) { } func TestExpireTableData(t *testing.T) { - oldVal := config.Config.AuditPurgeDays - config.Config.AuditPurgeDays = 10 + oldVal := config.GetAuditPurgeDays() + config.SetAuditPurgeDays(10) defer func() { - config.Config.AuditPurgeDays = oldVal + config.SetAuditPurgeDays(oldVal) }() tests := []struct { @@ -854,7 +854,7 @@ func TestDetectErrantGTIDs(t *testing.T) { primaryTablet := &topodatapb.Tablet{ Alias: &topodatapb.TabletAlias{ Cell: "zone-1", - Uid: 100, + Uid: 101, }, Keyspace: keyspaceName, Shard: shardName, @@ -881,7 +881,8 @@ func TestDetectErrantGTIDs(t *testing.T) { require.NoError(t, err) } - err = detectErrantGTIDs(topoproto.TabletAliasString(tablet.Alias), tt.instance, tablet) + tt.instance.InstanceAlias = topoproto.TabletAliasString(tablet.Alias) + err = detectErrantGTIDs(tt.instance, tablet) if tt.wantErr { require.Error(t, err) return @@ -891,3 +892,47 @@ func TestDetectErrantGTIDs(t *testing.T) { }) } } + +// TestPrimaryErrantGTIDs tests that we don't run Errant GTID detection on the primary tablet itself! +func TestPrimaryErrantGTIDs(t *testing.T) { + // Clear the database after the test. The easiest way to do that is to run all the initialization commands again. + defer func() { + db.ClearVTOrcDatabase() + }() + db.ClearVTOrcDatabase() + keyspaceName := "ks" + shardName := "0" + tablet := &topodatapb.Tablet{ + Alias: &topodatapb.TabletAlias{ + Cell: "zone-1", + Uid: 100, + }, + Keyspace: keyspaceName, + Shard: shardName, + } + instance := &Instance{ + SourceHost: "", + ExecutedGtidSet: "230ea8ea-81e3-11e4-972a-e25ec4bd140a:1-10589,8bc65c84-3fe4-11ed-a912-257f0fcdd6c9:1-34,316d193c-70e5-11e5-adb2-ecf4bb2262ff:1-341", + InstanceAlias: topoproto.TabletAliasString(tablet.Alias), + } + + // Save shard record for the primary tablet. + err := SaveShard(topo.NewShardInfo(keyspaceName, shardName, &topodatapb.Shard{ + PrimaryAlias: tablet.Alias, + }, nil)) + require.NoError(t, err) + + // Store the tablet record and the instance. + err = SaveTablet(tablet) + require.NoError(t, err) + err = WriteInstance(instance, true, nil) + require.NoError(t, err) + + // After this if we read a new information for the record that updates its + // gtid set further, we shouldn't be detecting errant GTIDs on it since it is the primary! + // We shouldn't be comparing it with a previous version of itself! + instance.ExecutedGtidSet = "230ea8ea-81e3-11e4-972a-e25ec4bd140a:1-10589,8bc65c84-3fe4-11ed-a912-257f0fcdd6c9:1-34,316d193c-70e5-11e5-adb2-ecf4bb2262ff:1-351" + err = detectErrantGTIDs(instance, tablet) + require.NoError(t, err) + require.EqualValues(t, "", instance.GtidErrant) +} diff --git a/go/vt/vtorc/logic/tablet_discovery.go b/go/vt/vtorc/logic/tablet_discovery.go index d9225bd61fc..990192a23f7 100644 --- a/go/vt/vtorc/logic/tablet_discovery.go +++ b/go/vt/vtorc/logic/tablet_discovery.go @@ -68,13 +68,13 @@ func OpenTabletDiscovery() <-chan time.Time { log.Error(err) } // We refresh all information from the topo once before we start the ticks to do - // it on a timer. We can wait forever (context.Background()) for this call. + // it on a timer. ctx, cancel := context.WithTimeout(context.Background(), topo.RemoteOperationTimeout) defer cancel() if err := refreshAllInformation(ctx); err != nil { log.Errorf("failed to initialize topo information: %+v", err) } - return time.Tick(time.Second * time.Duration(config.Config.TopoInformationRefreshSeconds)) //nolint SA1015: using time.Tick leaks the underlying ticker + return time.Tick(config.GetTopoInformationRefreshDuration()) //nolint SA1015: using time.Tick leaks the underlying ticker } // refreshAllTablets reloads the tablets from topo and discovers the ones which haven't been refreshed in a while diff --git a/go/vt/vtorc/logic/topology_recovery.go b/go/vt/vtorc/logic/topology_recovery.go index aec137a45b4..f14eca624c9 100644 --- a/go/vt/vtorc/logic/topology_recovery.go +++ b/go/vt/vtorc/logic/topology_recovery.go @@ -21,7 +21,6 @@ import ( "encoding/json" "fmt" "math/rand/v2" - "time" "vitess.io/vitess/go/stats" "vitess.io/vitess/go/vt/log" @@ -235,8 +234,8 @@ func runEmergencyReparentOp(ctx context.Context, analysisEntry *inst.Replication tablet.Shard, reparentutil.EmergencyReparentOptions{ IgnoreReplicas: nil, - WaitReplicasTimeout: time.Duration(config.Config.WaitReplicasTimeoutSeconds) * time.Second, - PreventCrossCellPromotion: config.Config.PreventCrossDataCenterPrimaryFailover, + WaitReplicasTimeout: config.GetWaitReplicasTimeout(), + PreventCrossCellPromotion: config.GetPreventCrossCellFailover(), WaitAllTablets: waitForAllTablets, }, ) @@ -703,8 +702,8 @@ func electNewPrimary(ctx context.Context, analysisEntry *inst.ReplicationAnalysi analyzedTablet.Keyspace, analyzedTablet.Shard, reparentutil.PlannedReparentOptions{ - WaitReplicasTimeout: time.Duration(config.Config.WaitReplicasTimeoutSeconds) * time.Second, - TolerableReplLag: time.Duration(config.Config.TolerableReplicationLagSeconds) * time.Second, + WaitReplicasTimeout: config.GetWaitReplicasTimeout(), + TolerableReplLag: config.GetTolerableReplicationLag(), }, ) diff --git a/go/vt/vtorc/logic/topology_recovery_dao_test.go b/go/vt/vtorc/logic/topology_recovery_dao_test.go index 20dfb7e91e2..6a1d7c4c48f 100644 --- a/go/vt/vtorc/logic/topology_recovery_dao_test.go +++ b/go/vt/vtorc/logic/topology_recovery_dao_test.go @@ -70,10 +70,10 @@ func TestTopologyRecovery(t *testing.T) { } func TestExpireTableData(t *testing.T) { - oldVal := config.Config.AuditPurgeDays - config.Config.AuditPurgeDays = 10 + oldVal := config.GetAuditPurgeDays() + config.SetAuditPurgeDays(10) defer func() { - config.Config.AuditPurgeDays = oldVal + config.SetAuditPurgeDays(oldVal) }() tests := []struct { diff --git a/go/vt/vtorc/logic/vtorc.go b/go/vt/vtorc/logic/vtorc.go index 6c6430ea6b3..0619c87f1f0 100644 --- a/go/vt/vtorc/logic/vtorc.go +++ b/go/vt/vtorc/logic/vtorc.go @@ -22,7 +22,6 @@ import ( "os/signal" "sync" "sync/atomic" - "syscall" "time" "github.com/patrickmn/go-cache" @@ -76,26 +75,6 @@ func init() { }) } -// used in several places -func instancePollSecondsDuration() time.Duration { - return time.Duration(config.Config.InstancePollSeconds) * time.Second -} - -// acceptSighupSignal registers for SIGHUP signal from the OS to reload the configuration files. -func acceptSighupSignal() { - c := make(chan os.Signal, 1) - - signal.Notify(c, syscall.SIGHUP) - go func() { - for range c { - log.Infof("Received SIGHUP. Reloading configuration") - _ = inst.AuditOperation("reload-configuration", "", "Triggered via SIGHUP") - config.Reload() - discoveryMetrics.SetExpirePeriod(time.Duration(config.DiscoveryCollectionRetentionSeconds) * time.Second) - } - }() -} - // closeVTOrc runs all the operations required to cleanly shutdown VTOrc func closeVTOrc() { log.Infof("Starting VTOrc shutdown") @@ -164,7 +143,7 @@ func DiscoverInstance(tabletAlias string, forceDiscovery bool) { defer func() { latency.Stop("total") discoveryTime := latency.Elapsed("total") - if discoveryTime > instancePollSecondsDuration() { + if discoveryTime > config.GetInstancePollTime() { instancePollSecondsExceededCounter.Add(1) log.Warningf("discoverInstance exceeded InstancePollSeconds for %+v, took %.4fs", tabletAlias, discoveryTime.Seconds()) if metric != nil { @@ -180,7 +159,7 @@ func DiscoverInstance(tabletAlias string, forceDiscovery bool) { // Calculate the expiry period each time as InstancePollSeconds // _may_ change during the run of the process (via SIGHUP) and // it is not possible to change the cache's default expiry.. - if existsInCacheError := recentDiscoveryOperationKeys.Add(tabletAlias, true, instancePollSecondsDuration()); existsInCacheError != nil && !forceDiscovery { + if existsInCacheError := recentDiscoveryOperationKeys.Add(tabletAlias, true, config.GetInstancePollTime()); existsInCacheError != nil && !forceDiscovery { // Just recently attempted return } @@ -274,24 +253,23 @@ func onHealthTick() { // nolint SA1015: using time.Tick leaks the underlying ticker func ContinuousDiscovery() { log.Infof("continuous discovery: setting up") - recentDiscoveryOperationKeys = cache.New(instancePollSecondsDuration(), time.Second) + recentDiscoveryOperationKeys = cache.New(config.GetInstancePollTime(), time.Second) go handleDiscoveryRequests() healthTick := time.Tick(config.HealthPollSeconds * time.Second) caretakingTick := time.Tick(time.Minute) - recoveryTick := time.Tick(time.Duration(config.Config.RecoveryPollSeconds) * time.Second) + recoveryTick := time.Tick(config.GetRecoveryPollDuration()) tabletTopoTick := OpenTabletDiscovery() var recoveryEntrance int64 var snapshotTopologiesTick <-chan time.Time - if config.Config.SnapshotTopologiesIntervalHours > 0 { - snapshotTopologiesTick = time.Tick(time.Duration(config.Config.SnapshotTopologiesIntervalHours) * time.Hour) + if config.GetSnapshotTopologyInterval() > 0 { + snapshotTopologiesTick = time.Tick(config.GetSnapshotTopologyInterval()) } go func() { _ = ometrics.InitMetrics() }() - go acceptSighupSignal() // On termination of the server, we should close VTOrc cleanly servenv.OnTermSync(closeVTOrc) diff --git a/go/vt/vtorc/server/api.go b/go/vt/vtorc/server/api.go index 5e9a84c0a29..177f2c80333 100644 --- a/go/vt/vtorc/server/api.go +++ b/go/vt/vtorc/server/api.go @@ -25,6 +25,7 @@ import ( "time" "vitess.io/vitess/go/acl" + "vitess.io/vitess/go/viperutil/debug" "vitess.io/vitess/go/vt/servenv" "vitess.io/vitess/go/vt/vtorc/collection" "vitess.io/vitess/go/vt/vtorc/discovery" @@ -46,6 +47,7 @@ const ( enableGlobalRecoveriesAPI = "/api/enable-global-recoveries" replicationAnalysisAPI = "/api/replication-analysis" databaseStateAPI = "/api/database-state" + configAPI = "/api/config" healthAPI = "/debug/health" AggregatedDiscoveryMetricsAPI = "/api/aggregated-discovery-metrics" @@ -62,6 +64,7 @@ var ( enableGlobalRecoveriesAPI, replicationAnalysisAPI, databaseStateAPI, + configAPI, healthAPI, AggregatedDiscoveryMetricsAPI, } @@ -90,6 +93,8 @@ func (v *vtorcAPI) ServeHTTP(response http.ResponseWriter, request *http.Request replicationAnalysisAPIHandler(response, request) case databaseStateAPI: databaseStateAPIHandler(response) + case configAPI: + configAPIHandler(response) case AggregatedDiscoveryMetricsAPI: AggregatedDiscoveryMetricsAPIHandler(response, request) default: @@ -106,7 +111,7 @@ func getACLPermissionLevelForAPI(apiEndpoint string) string { return acl.MONITORING case disableGlobalRecoveriesAPI, enableGlobalRecoveriesAPI: return acl.ADMIN - case replicationAnalysisAPI: + case replicationAnalysisAPI, configAPI: return acl.MONITORING case healthAPI, databaseStateAPI: return acl.MONITORING @@ -180,6 +185,17 @@ func databaseStateAPIHandler(response http.ResponseWriter) { writePlainTextResponse(response, ds, http.StatusOK) } +// configAPIHandler is the handler for the configAPI endpoint +func configAPIHandler(response http.ResponseWriter) { + settingsMap := debug.AllSettings() + jsonOut, err := json.MarshalIndent(settingsMap, "", "\t") + if err != nil { + http.Error(response, err.Error(), http.StatusInternalServerError) + return + } + writePlainTextResponse(response, string(jsonOut), http.StatusOK) +} + // AggregatedDiscoveryMetricsAPIHandler is the handler for the discovery metrics endpoint func AggregatedDiscoveryMetricsAPIHandler(response http.ResponseWriter, request *http.Request) { // return metrics for last x seconds diff --git a/go/vt/vtorc/server/api_test.go b/go/vt/vtorc/server/api_test.go index c352d1e600f..ab6b9eed9af 100644 --- a/go/vt/vtorc/server/api_test.go +++ b/go/vt/vtorc/server/api_test.go @@ -31,6 +31,9 @@ func TestGetACLPermissionLevelForAPI(t *testing.T) { }, { apiEndpoint: healthAPI, want: acl.MONITORING, + }, { + apiEndpoint: configAPI, + want: acl.MONITORING, }, { apiEndpoint: "gibberish", want: acl.ADMIN, diff --git a/go/vt/vttablet/common/flags.go b/go/vt/vttablet/common/flags.go index 3c6141d62eb..75e8e58982f 100644 --- a/go/vt/vttablet/common/flags.go +++ b/go/vt/vttablet/common/flags.go @@ -33,8 +33,7 @@ const ( ) var ( - // Default flags: currently VReplicationExperimentalFlagVPlayerBatching is not enabled by default. - vreplicationExperimentalFlags = VReplicationExperimentalFlagOptimizeInserts | VReplicationExperimentalFlagAllowNoBlobBinlogRowImage + vreplicationExperimentalFlags = VReplicationExperimentalFlagOptimizeInserts | VReplicationExperimentalFlagAllowNoBlobBinlogRowImage | VReplicationExperimentalFlagVPlayerBatching vreplicationNetReadTimeout = 300 vreplicationNetWriteTimeout = 600 vreplicationCopyPhaseDuration = 1 * time.Hour diff --git a/go/vt/vttablet/endtoend/config_test.go b/go/vt/vttablet/endtoend/config_test.go index 4abf5b36c21..c3ad5f8a9db 100644 --- a/go/vt/vttablet/endtoend/config_test.go +++ b/go/vt/vttablet/endtoend/config_test.go @@ -36,7 +36,7 @@ import ( ) func TestPoolSize(t *testing.T) { - revert := changeVar(t, "PoolSize", "1") + revert := changeVar(t, "ReadPoolSize", "1") defer revert() vstart := framework.DebugVars() @@ -92,7 +92,7 @@ func TestTxPoolSize(t *testing.T) { defer client2.Rollback() verifyIntValue(t, framework.DebugVars(), "FoundRowsPoolAvailable", framework.FetchInt(vstart, "FoundRowsPoolAvailable")-1) - revert := changeVar(t, "TxPoolSize", "1") + revert := changeVar(t, "TransactionPoolSize", "1") defer revert() vend := framework.DebugVars() verifyIntValue(t, vend, "TransactionPoolAvailable", 0) diff --git a/go/vt/vttablet/tabletmanager/vreplication/replicator_plan.go b/go/vt/vttablet/tabletmanager/vreplication/replicator_plan.go index 6a416cb4414..62d6166b5ca 100644 --- a/go/vt/vttablet/tabletmanager/vreplication/replicator_plan.go +++ b/go/vt/vttablet/tabletmanager/vreplication/replicator_plan.go @@ -618,49 +618,40 @@ func valsEqual(v1, v2 sqltypes.Value) bool { func (tp *TablePlan) appendFromRow(buf *bytes2.Buffer, row *querypb.Row) error { bindLocations := tp.BulkInsertValues.BindLocations() if len(tp.Fields) < len(bindLocations) { - return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "wrong number of fields: got %d fields for %d bind locations ", + return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "wrong number of fields: got %d fields for %d bind locations", len(tp.Fields), len(bindLocations)) } - type colInfo struct { - typ querypb.Type - length int64 - offset int64 - field *querypb.Field - } - rowInfo := make([]*colInfo, 0) - - offset := int64(0) - for i, field := range tp.Fields { // collect info required for fields to be bound - length := row.Lengths[i] - if !tp.FieldsToSkip[strings.ToLower(field.Name)] { - rowInfo = append(rowInfo, &colInfo{ - typ: field.Type, - length: length, - offset: offset, - field: field, - }) - } - if length > 0 { - offset += row.Lengths[i] + // Bind field values to locations. + var ( + offset int64 + offsetQuery int + fieldsIndex int + field *querypb.Field + ) + for i, loc := range bindLocations { + field = tp.Fields[fieldsIndex] + length := row.Lengths[fieldsIndex] + for tp.FieldsToSkip[strings.ToLower(field.Name)] { + if length > 0 { + offset += length + } + fieldsIndex++ + field = tp.Fields[fieldsIndex] + length = row.Lengths[fieldsIndex] } - } - // bind field values to locations - var offsetQuery int - for i, loc := range bindLocations { - col := rowInfo[i] buf.WriteString(tp.BulkInsertValues.Query[offsetQuery:loc.Offset]) - typ := col.typ + typ := field.Type switch typ { case querypb.Type_TUPLE: return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "unexpected Type_TUPLE for value %d", i) case querypb.Type_JSON: - if col.length < 0 { // An SQL NULL and not an actual JSON value + if length < 0 { // An SQL NULL and not an actual JSON value buf.WriteString(sqltypes.NullStr) } else { // A JSON value (which may be a JSON null literal value) - buf2 := row.Values[col.offset : col.offset+col.length] + buf2 := row.Values[offset : offset+length] vv, err := vjson.MarshalSQLValue(buf2) if err != nil { return err @@ -668,16 +659,16 @@ func (tp *TablePlan) appendFromRow(buf *bytes2.Buffer, row *querypb.Row) error { buf.WriteString(vv.RawStr()) } default: - if col.length < 0 { + if length < 0 { // -1 means a null variable; serialize it directly buf.WriteString(sqltypes.NullStr) } else { - raw := row.Values[col.offset : col.offset+col.length] + raw := row.Values[offset : offset+length] var vv sqltypes.Value - if conversion, ok := tp.ConvertCharset[col.field.Name]; ok && col.length > 0 { + if conversion, ok := tp.ConvertCharset[field.Name]; ok && length > 0 { // Non-null string value, for which we have a charset conversion instruction - out, err := tp.convertStringCharset(raw, conversion, col.field.Name) + out, err := tp.convertStringCharset(raw, conversion, field.Name) if err != nil { return err } @@ -690,6 +681,10 @@ func (tp *TablePlan) appendFromRow(buf *bytes2.Buffer, row *querypb.Row) error { } } offsetQuery = loc.Offset + loc.Length + if length > 0 { + offset += length + } + fieldsIndex++ } buf.WriteString(tp.BulkInsertValues.Query[offsetQuery:]) return nil diff --git a/go/vt/vttablet/tabletmanager/vreplication/replicator_plan_test.go b/go/vt/vttablet/tabletmanager/vreplication/replicator_plan_test.go index 644b4585914..09ace916f11 100644 --- a/go/vt/vttablet/tabletmanager/vreplication/replicator_plan_test.go +++ b/go/vt/vttablet/tabletmanager/vreplication/replicator_plan_test.go @@ -21,17 +21,18 @@ import ( "strings" "testing" - vttablet "vitess.io/vitess/go/vt/vttablet/common" - "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "vitess.io/vitess/go/bytes2" "vitess.io/vitess/go/mysql/collations" "vitess.io/vitess/go/sqltypes" "vitess.io/vitess/go/vt/binlog/binlogplayer" "vitess.io/vitess/go/vt/sqlparser" binlogdatapb "vitess.io/vitess/go/vt/proto/binlogdata" + querypb "vitess.io/vitess/go/vt/proto/query" + vttablet "vitess.io/vitess/go/vt/vttablet/common" ) type TestReplicatorPlan struct { @@ -829,3 +830,137 @@ func TestBuildPlayerPlanExclude(t *testing.T) { wantPlan, _ := json.Marshal(want) assert.Equal(t, string(gotPlan), string(wantPlan)) } + +func TestAppendFromRow(t *testing.T) { + testCases := []struct { + name string + tp *TablePlan + row *querypb.Row + want string + wantErr string + }{ + { + name: "simple", + tp: &TablePlan{ + BulkInsertValues: sqlparser.BuildParsedQuery("values (%a, %a, %a)", + ":c1", ":c2", ":c3", + ), + Fields: []*querypb.Field{ + {Name: "c1", Type: querypb.Type_INT32}, + {Name: "c2", Type: querypb.Type_INT32}, + {Name: "c3", Type: querypb.Type_INT32}, + }, + }, + row: sqltypes.RowToProto3( + []sqltypes.Value{ + sqltypes.NewInt64(1), + sqltypes.NewInt64(2), + sqltypes.NewInt64(3), + }, + ), + want: "values (1, 2, 3)", + }, + { + name: "too few fields", + tp: &TablePlan{ + BulkInsertValues: sqlparser.BuildParsedQuery("values (%a, %a, %a)", + ":c1", ":c2", ":c3", + ), + Fields: []*querypb.Field{ + {Name: "c1", Type: querypb.Type_INT32}, + {Name: "c2", Type: querypb.Type_INT32}, + }, + }, + wantErr: "wrong number of fields: got 2 fields for 3 bind locations", + }, + { + name: "skip half", + tp: &TablePlan{ + BulkInsertValues: sqlparser.BuildParsedQuery("values (%a, %a, %a, %a)", + ":c1", ":c2", ":c4", ":c8", + ), + Fields: []*querypb.Field{ + {Name: "c1", Type: querypb.Type_INT32}, + {Name: "c2", Type: querypb.Type_INT32}, + {Name: "c3", Type: querypb.Type_INT32}, + {Name: "c4", Type: querypb.Type_INT32}, + {Name: "c5", Type: querypb.Type_INT32}, + {Name: "c6", Type: querypb.Type_INT32}, + {Name: "c7", Type: querypb.Type_INT32}, + {Name: "c8", Type: querypb.Type_INT32}, + }, + FieldsToSkip: map[string]bool{ + "c3": true, + "c5": true, + "c6": true, + "c7": true, + }, + }, + row: sqltypes.RowToProto3( + []sqltypes.Value{ + sqltypes.NewInt64(1), + sqltypes.NewInt64(2), + sqltypes.NewInt64(3), + sqltypes.NewInt64(4), + sqltypes.NewInt64(5), + sqltypes.NewInt64(6), + sqltypes.NewInt64(7), + sqltypes.NewInt64(8), + }, + ), + want: "values (1, 2, 4, 8)", + }, + { + name: "skip all but one", + tp: &TablePlan{ + BulkInsertValues: sqlparser.BuildParsedQuery("values (%a)", + ":c4", + ), + Fields: []*querypb.Field{ + {Name: "c1", Type: querypb.Type_INT32}, + {Name: "c2", Type: querypb.Type_INT32}, + {Name: "c3", Type: querypb.Type_INT32}, + {Name: "c4", Type: querypb.Type_INT32}, + {Name: "c5", Type: querypb.Type_INT32}, + {Name: "c6", Type: querypb.Type_INT32}, + {Name: "c7", Type: querypb.Type_INT32}, + {Name: "c8", Type: querypb.Type_INT32}, + }, + FieldsToSkip: map[string]bool{ + "c1": true, + "c2": true, + "c3": true, + "c5": true, + "c6": true, + "c7": true, + "c8": true, + }, + }, + row: sqltypes.RowToProto3( + []sqltypes.Value{ + sqltypes.NewInt64(1), + sqltypes.NewInt64(2), + sqltypes.NewInt64(3), + sqltypes.NewInt64(4), + sqltypes.NewInt64(5), + sqltypes.NewInt64(6), + sqltypes.NewInt64(7), + sqltypes.NewInt64(8), + }, + ), + want: "values (4)", + }, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + bb := &bytes2.Buffer{} + err := tc.tp.appendFromRow(bb, tc.row) + if tc.wantErr != "" { + require.EqualError(t, err, tc.wantErr) + } else { + require.NoError(t, err) + require.Equal(t, tc.want, bb.String()) + } + }) + } +} diff --git a/go/vt/vttablet/tabletmanager/vreplication/vdbclient.go b/go/vt/vttablet/tabletmanager/vreplication/vdbclient.go index b8339cdf874..8a4409db06c 100644 --- a/go/vt/vttablet/tabletmanager/vreplication/vdbclient.go +++ b/go/vt/vttablet/tabletmanager/vreplication/vdbclient.go @@ -171,7 +171,7 @@ func (vc *vdbClient) Execute(query string) (*sqltypes.Result, error) { func (vc *vdbClient) ExecuteWithRetry(ctx context.Context, query string) (*sqltypes.Result, error) { qr, err := vc.Execute(query) for err != nil { - if sqlErr, ok := err.(*sqlerror.SQLError); ok && sqlErr.Number() == sqlerror.ERLockDeadlock || sqlErr.Number() == sqlerror.ERLockWaitTimeout { + if sqlErr, ok := err.(*sqlerror.SQLError); ok && (sqlErr.Number() == sqlerror.ERLockDeadlock || sqlErr.Number() == sqlerror.ERLockWaitTimeout) { log.Infof("retryable error: %v, waiting for %v and retrying", sqlErr, dbLockRetryDelay) if err := vc.Rollback(); err != nil { return nil, err diff --git a/go/vt/vttablet/tabletserver/debugenv.go b/go/vt/vttablet/tabletserver/debugenv.go index 54cf09db7d6..6f1ea854ea9 100644 --- a/go/vt/vttablet/tabletserver/debugenv.go +++ b/go/vt/vttablet/tabletserver/debugenv.go @@ -23,9 +23,10 @@ import ( "html" "net/http" "strconv" - "text/template" "time" + "github.com/google/safehtml/template" + "vitess.io/vitess/go/acl" "vitess.io/vitess/go/vt/log" ) @@ -70,90 +71,131 @@ func debugEnvHandler(tsv *TabletServer, w http.ResponseWriter, r *http.Request) return } + switch r.Method { + case http.MethodPost: + handlePost(tsv, w, r) + case http.MethodGet: + handleGet(tsv, w, r) + default: + http.Error(w, "Method not allowed", http.StatusMethodNotAllowed) + } +} + +func handlePost(tsv *TabletServer, w http.ResponseWriter, r *http.Request) { + varname := r.FormValue("varname") + value := r.FormValue("value") + var msg string - if r.Method == "POST" { - varname := r.FormValue("varname") - value := r.FormValue("value") - setIntVal := func(f func(int)) { - ival, err := strconv.Atoi(value) - if err != nil { - msg = fmt.Sprintf("Failed setting value for %v: %v", varname, err) - return - } - f(ival) - msg = fmt.Sprintf("Setting %v to: %v", varname, value) + if varname == "" || value == "" { + http.Error(w, "Missing varname or value", http.StatusBadRequest) + return + } + + setIntVal := func(f func(int)) error { + ival, err := strconv.Atoi(value) + if err != nil { + return fmt.Errorf("invalid int value for %v: %v", varname, err) } - setIntValCtx := func(f func(context.Context, int) error) { - ival, err := strconv.Atoi(value) - if err == nil { - err = f(r.Context(), ival) - if err == nil { - msg = fmt.Sprintf("Setting %v to: %v", varname, value) - return - } - } - msg = fmt.Sprintf("Failed setting value for %v: %v", varname, err) + f(ival) + msg = fmt.Sprintf("Setting %v to: %v", varname, value) + return nil + } + + setIntValCtx := func(f func(context.Context, int) error) error { + ival, err := strconv.Atoi(value) + if err == nil { + err = f(r.Context(), ival) } - setInt64Val := func(f func(int64)) { - ival, err := strconv.ParseInt(value, 10, 64) - if err != nil { - msg = fmt.Sprintf("Failed setting value for %v: %v", varname, err) - return - } - f(ival) - msg = fmt.Sprintf("Setting %v to: %v", varname, value) + if err != nil { + return fmt.Errorf("failed setting value for %v: %v", varname, err) } - setDurationVal := func(f func(time.Duration)) { - durationVal, err := time.ParseDuration(value) - if err != nil { - msg = fmt.Sprintf("Failed setting value for %v: %v", varname, err) - return - } - f(durationVal) - msg = fmt.Sprintf("Setting %v to: %v", varname, value) + msg = fmt.Sprintf("Setting %v to: %v", varname, value) + return nil + } + + setInt64Val := func(f func(int64)) error { + ival, err := strconv.ParseInt(value, 10, 64) + if err != nil { + return fmt.Errorf("invalid int64 value for %v: %v", varname, err) } - setFloat64Val := func(f func(float64)) { - fval, err := strconv.ParseFloat(value, 64) - if err != nil { - msg = fmt.Sprintf("Failed setting value for %v: %v", varname, err) - return - } - f(fval) - msg = fmt.Sprintf("Setting %v to: %v", varname, value) + f(ival) + msg = fmt.Sprintf("Setting %v to: %v", varname, value) + return nil + } + + setDurationVal := func(f func(time.Duration)) error { + durationVal, err := time.ParseDuration(value) + if err != nil { + return fmt.Errorf("invalid duration value for %v: %v", varname, err) } - switch varname { - case "PoolSize": - setIntValCtx(tsv.SetPoolSize) - case "StreamPoolSize": - setIntValCtx(tsv.SetStreamPoolSize) - case "TxPoolSize": - setIntValCtx(tsv.SetTxPoolSize) - case "MaxResultSize": - setIntVal(tsv.SetMaxResultSize) - case "WarnResultSize": - setIntVal(tsv.SetWarnResultSize) - case "RowStreamerMaxInnoDBTrxHistLen": - setInt64Val(func(val int64) { tsv.Config().RowStreamer.MaxInnoDBTrxHistLen = val }) - case "RowStreamerMaxMySQLReplLagSecs": - setInt64Val(func(val int64) { tsv.Config().RowStreamer.MaxMySQLReplLagSecs = val }) - case "UnhealthyThreshold": - setDurationVal(func(d time.Duration) { tsv.Config().Healthcheck.UnhealthyThreshold = d }) - setDurationVal(tsv.hs.SetUnhealthyThreshold) - setDurationVal(tsv.sm.SetUnhealthyThreshold) - case "ThrottleMetricThreshold": - setFloat64Val(tsv.SetThrottleMetricThreshold) - case "Consolidator": - tsv.SetConsolidatorMode(value) - msg = fmt.Sprintf("Setting %v to: %v", varname, value) + f(durationVal) + msg = fmt.Sprintf("Setting %v to: %v", varname, value) + return nil + } + + setFloat64Val := func(f func(float64)) error { + fval, err := strconv.ParseFloat(value, 64) + if err != nil { + return fmt.Errorf("invalid float64 value for %v: %v", varname, err) } + f(fval) + msg = fmt.Sprintf("Setting %v to: %v", varname, value) + return nil + } + + var err error + switch varname { + case "ReadPoolSize": + err = setIntValCtx(tsv.SetPoolSize) + case "StreamPoolSize": + err = setIntValCtx(tsv.SetStreamPoolSize) + case "TransactionPoolSize": + err = setIntValCtx(tsv.SetTxPoolSize) + case "MaxResultSize": + err = setIntVal(tsv.SetMaxResultSize) + case "WarnResultSize": + err = setIntVal(tsv.SetWarnResultSize) + case "RowStreamerMaxInnoDBTrxHistLen": + err = setInt64Val(func(val int64) { tsv.Config().RowStreamer.MaxInnoDBTrxHistLen = val }) + case "RowStreamerMaxMySQLReplLagSecs": + err = setInt64Val(func(val int64) { tsv.Config().RowStreamer.MaxMySQLReplLagSecs = val }) + case "UnhealthyThreshold": + err = setDurationVal(func(d time.Duration) { tsv.Config().Healthcheck.UnhealthyThreshold = d }) + case "ThrottleMetricThreshold": + err = setFloat64Val(tsv.SetThrottleMetricThreshold) + case "Consolidator": + tsv.SetConsolidatorMode(value) + msg = fmt.Sprintf("Setting %v to: %v", varname, value) + } + + if err != nil { + http.Error(w, err.Error(), http.StatusBadRequest) + return } + vars := getVars(tsv) + sendResponse(r, w, vars, msg) +} + +func handleGet(tsv *TabletServer, w http.ResponseWriter, r *http.Request) { + vars := getVars(tsv) + sendResponse(r, w, vars, "") +} + +func sendResponse(r *http.Request, w http.ResponseWriter, vars []envValue, msg string) { + format := r.FormValue("format") + if format == "json" { + respondWithJSON(w, vars, msg) + return + } + respondWithHTML(w, vars, msg) +} + +func getVars(tsv *TabletServer) []envValue { var vars []envValue - vars = addVar(vars, "PoolSize", tsv.PoolSize) + vars = addVar(vars, "ReadPoolSize", tsv.PoolSize) vars = addVar(vars, "StreamPoolSize", tsv.StreamPoolSize) - vars = addVar(vars, "TxPoolSize", tsv.TxPoolSize) - vars = addVar(vars, "QueryCacheCapacity", tsv.QueryPlanCacheCap) // QueryCacheCapacity is deprecated in v21, it is replaced by QueryEnginePlanCacheCapacity - vars = addVar(vars, "QueryEnginePlanCacheCapacity", tsv.QueryPlanCacheCap) + vars = addVar(vars, "TransactionPoolSize", tsv.TxPoolSize) vars = addVar(vars, "MaxResultSize", tsv.MaxResultSize) vars = addVar(vars, "WarnResultSize", tsv.WarnResultSize) vars = addVar(vars, "RowStreamerMaxInnoDBTrxHistLen", func() int64 { return tsv.Config().RowStreamer.MaxInnoDBTrxHistLen }) @@ -165,18 +207,22 @@ func debugEnvHandler(tsv *TabletServer, w http.ResponseWriter, r *http.Request) Value: tsv.ConsolidatorMode(), }) - format := r.FormValue("format") - if format == "json" { - mvars := make(map[string]string) - for _, v := range vars { - mvars[v.Name] = v.Value - } - w.Header().Set("Content-Type", "application/json") - _ = json.NewEncoder(w).Encode(mvars) - return + return vars +} + +func respondWithJSON(w http.ResponseWriter, vars []envValue, msg string) { + mvars := make(map[string]string) + for _, v := range vars { + mvars[v.Name] = v.Value } + if msg != "" { + mvars["ResponseMessage"] = msg + } + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(mvars) +} - // gridTable is reused from twopcz.go. +func respondWithHTML(w http.ResponseWriter, vars []envValue, msg string) { w.Write(gridTable) w.Write([]byte("

Internal Variables

\n")) if msg != "" { diff --git a/go/vt/vttablet/tabletserver/querylogz.go b/go/vt/vttablet/tabletserver/querylogz.go index 33341d1641b..09f375aa329 100644 --- a/go/vt/vttablet/tabletserver/querylogz.go +++ b/go/vt/vttablet/tabletserver/querylogz.go @@ -20,9 +20,10 @@ import ( "net/http" "strconv" "strings" - "text/template" "time" + "github.com/google/safehtml/template" + "vitess.io/vitess/go/acl" "vitess.io/vitess/go/vt/log" "vitess.io/vitess/go/vt/logz" diff --git a/go/vt/vttablet/tabletserver/querylogz_test.go b/go/vt/vttablet/tabletserver/querylogz_test.go index 25f03c762c7..ee26437f330 100644 --- a/go/vt/vttablet/tabletserver/querylogz_test.go +++ b/go/vt/vttablet/tabletserver/querylogz_test.go @@ -37,7 +37,7 @@ func TestQuerylogzHandler(t *testing.T) { req, _ := http.NewRequest("GET", "/querylogz?timeout=10&limit=1", nil) logStats := tabletenv.NewLogStats(context.Background(), "Execute") logStats.PlanType = planbuilder.PlanSelect.String() - logStats.OriginalSQL = "select name from test_table limit 1000" + logStats.OriginalSQL = "select name, 'inject ' from test_table limit 1000" logStats.RowsAffected = 1000 logStats.NumberOfQueries = 1 logStats.StartTime, _ = time.Parse("Jan 2 15:04:05", "Nov 29 13:33:09") @@ -64,7 +64,7 @@ func TestQuerylogzHandler(t *testing.T) { `0.001`, `1e-08`, `Select`, - `select name from test_table limit 1000`, + regexp.QuoteMeta(`select name,​ 'inject <script>alert()​;</script>' from test_table limit 1000`), `1`, `none`, `1000`, @@ -95,7 +95,7 @@ func TestQuerylogzHandler(t *testing.T) { `0.001`, `1e-08`, `Select`, - `select name from test_table limit 1000`, + regexp.QuoteMeta(`select name,​ 'inject <script>alert()​;</script>' from test_table limit 1000`), `1`, `none`, `1000`, @@ -126,7 +126,7 @@ func TestQuerylogzHandler(t *testing.T) { `0.001`, `1e-08`, `Select`, - `select name from test_table limit 1000`, + regexp.QuoteMeta(`select name,​ 'inject <script>alert()​;</script>' from test_table limit 1000`), `1`, `none`, `1000`, diff --git a/go/vt/vttablet/tabletserver/vstreamer/planbuilder.go b/go/vt/vttablet/tabletserver/vstreamer/planbuilder.go index 9bbc98ca2bd..e5115afe6d3 100644 --- a/go/vt/vttablet/tabletserver/vstreamer/planbuilder.go +++ b/go/vt/vttablet/tabletserver/vstreamer/planbuilder.go @@ -89,6 +89,8 @@ const ( NotEqual // IsNotNull is used to filter a column if it is NULL IsNotNull + // In is used to filter a comparable column if equals any of the values from a specific tuple + In ) // Filter contains opcodes for filtering. @@ -97,6 +99,9 @@ type Filter struct { ColNum int Value sqltypes.Value + // Values will be used to store tuple/list values. + Values []sqltypes.Value + // Parameters for VindexMatch. // Vindex, VindexColumns and KeyRange, if set, will be used // to filter the row. @@ -166,6 +171,8 @@ func getOpcode(comparison *sqlparser.ComparisonExpr) (Opcode, error) { opcode = GreaterThanEqual case sqlparser.NotEqualOp: opcode = NotEqual + case sqlparser.InOp: + opcode = In default: return -1, fmt.Errorf("comparison operator %s not supported", comparison.Operator.ToString()) } @@ -238,6 +245,24 @@ func (plan *Plan) filter(values, result []sqltypes.Value, charsets []collations. if values[filter.ColNum].IsNull() { return false, nil } + case In: + if filter.Values == nil { + return false, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "unexpected empty filter values when performing IN operator") + } + found := false + for _, filterValue := range filter.Values { + match, err := compare(Equal, values[filter.ColNum], filterValue, plan.env.CollationEnv(), charsets[filter.ColNum]) + if err != nil { + return false, err + } + if match { + found = true + break + } + } + if !found { + return false, nil + } default: match, err := compare(filter.Opcode, values[filter.ColNum], filter.Value, plan.env.CollationEnv(), charsets[filter.ColNum]) if err != nil { @@ -514,6 +539,27 @@ func (plan *Plan) getColumnFuncExpr(columnName string) *sqlparser.FuncExpr { return nil } +func (plan *Plan) appendTupleFilter(values sqlparser.ValTuple, opcode Opcode, colnum int) error { + pv, err := evalengine.Translate(values, &evalengine.Config{ + Collation: plan.env.CollationEnv().DefaultConnectionCharset(), + Environment: plan.env, + }) + if err != nil { + return err + } + env := evalengine.EmptyExpressionEnv(plan.env) + resolved, err := env.Evaluate(pv) + if err != nil { + return err + } + plan.Filters = append(plan.Filters, Filter{ + Opcode: opcode, + ColNum: colnum, + Values: resolved.TupleValues(), + }) + return nil +} + func (plan *Plan) analyzeWhere(vschema *localVSchema, where *sqlparser.Where) error { if where == nil { return nil @@ -537,6 +583,20 @@ func (plan *Plan) analyzeWhere(vschema *localVSchema, where *sqlparser.Where) er if err != nil { return err } + // The Right Expr is typically expected to be a Literal value, + // except for the IN operator, where a Tuple value is expected. + // Handle the IN operator case first. + if opcode == In { + values, ok := expr.Right.(sqlparser.ValTuple) + if !ok { + return fmt.Errorf("unexpected: %v", sqlparser.String(expr)) + } + err := plan.appendTupleFilter(values, opcode, colnum) + if err != nil { + return err + } + continue + } val, ok := expr.Right.(*sqlparser.Literal) if !ok { return fmt.Errorf("unexpected: %v", sqlparser.String(expr)) diff --git a/go/vt/vttablet/tabletserver/vstreamer/planbuilder_test.go b/go/vt/vttablet/tabletserver/vstreamer/planbuilder_test.go index ba345b2a00b..aba74368802 100644 --- a/go/vt/vttablet/tabletserver/vstreamer/planbuilder_test.go +++ b/go/vt/vttablet/tabletserver/vstreamer/planbuilder_test.go @@ -710,9 +710,15 @@ func TestPlanBuilderFilterComparison(t *testing.T) { outFilters: []Filter{{Opcode: LessThan, ColNum: 0, Value: sqltypes.NewInt64(2)}, {Opcode: LessThanEqual, ColNum: 1, Value: sqltypes.NewVarChar("xyz")}, }, + }, { + name: "in-operator", + inFilter: "select * from t1 where id in (1, 2)", + outFilters: []Filter{ + {Opcode: In, ColNum: 0, Values: []sqltypes.Value{sqltypes.NewInt64(1), sqltypes.NewInt64(2)}}, + }, }, { name: "vindex-and-operators", - inFilter: "select * from t1 where in_keyrange(id, 'hash', '-80') and id = 2 and val <> 'xyz'", + inFilter: "select * from t1 where in_keyrange(id, 'hash', '-80') and id = 2 and val <> 'xyz' and id in (100, 30)", outFilters: []Filter{ { Opcode: VindexMatch, @@ -727,6 +733,7 @@ func TestPlanBuilderFilterComparison(t *testing.T) { }, {Opcode: Equal, ColNum: 0, Value: sqltypes.NewInt64(2)}, {Opcode: NotEqual, ColNum: 1, Value: sqltypes.NewVarChar("xyz")}, + {Opcode: In, ColNum: 0, Values: []sqltypes.Value{sqltypes.NewInt64(100), sqltypes.NewInt64(30)}}, }, }} diff --git a/go/vt/vttablet/tabletserver/vstreamer/vstreamer_test.go b/go/vt/vttablet/tabletserver/vstreamer/vstreamer_test.go index 846d62202e7..5282b5f372d 100644 --- a/go/vt/vttablet/tabletserver/vstreamer/vstreamer_test.go +++ b/go/vt/vttablet/tabletserver/vstreamer/vstreamer_test.go @@ -1966,7 +1966,7 @@ func TestFilteredMultipleWhere(t *testing.T) { filter: &binlogdatapb.Filter{ Rules: []*binlogdatapb.Rule{{ Match: "t1", - Filter: "select id1, val from t1 where in_keyrange('-80') and id2 = 200 and id3 = 1000 and val = 'newton'", + Filter: "select id1, val from t1 where in_keyrange('-80') and id2 = 200 and id3 = 1000 and val = 'newton' and id1 in (1, 2, 129)", }}, }, customFieldEvents: true, @@ -1988,9 +1988,7 @@ func TestFilteredMultipleWhere(t *testing.T) { {spec: &TestRowEventSpec{table: "t1", changes: []TestRowChange{{after: []string{"2", "newton"}}}}}, }}, {"insert into t1 values (3, 100, 2000, 'kepler')", noEvents}, - {"insert into t1 values (128, 200, 1000, 'newton')", []TestRowEvent{ - {spec: &TestRowEventSpec{table: "t1", changes: []TestRowChange{{after: []string{"128", "newton"}}}}}, - }}, + {"insert into t1 values (128, 200, 1000, 'newton')", noEvents}, {"insert into t1 values (5, 200, 2000, 'kepler')", noEvents}, {"insert into t1 values (129, 200, 1000, 'kepler')", noEvents}, {"commit", nil}, @@ -2080,3 +2078,33 @@ func TestGeneratedInvisiblePrimaryKey(t *testing.T) { }} ts.Run() } + +func TestFilteredInOperator(t *testing.T) { + ts := &TestSpec{ + t: t, + ddls: []string{ + "create table t1(id1 int, id2 int, val varbinary(128), primary key(id1))", + }, + options: &TestSpecOptions{ + filter: &binlogdatapb.Filter{ + Rules: []*binlogdatapb.Rule{{ + Match: "t1", + Filter: "select id1, val from t1 where val in ('eee', 'bbb', 'ddd') and id1 in (4, 5)", + }}, + }, + }, + } + defer ts.Close() + ts.Init() + ts.fieldEvents["t1"].cols[1].skip = true + ts.tests = [][]*TestQuery{{ + {"begin", nil}, + {"insert into t1 values (1, 100, 'aaa')", noEvents}, + {"insert into t1 values (2, 200, 'bbb')", noEvents}, + {"insert into t1 values (3, 100, 'ccc')", noEvents}, + {"insert into t1 values (4, 200, 'ddd')", nil}, + {"insert into t1 values (5, 200, 'eee')", nil}, + {"commit", nil}, + }} + ts.Run() +} diff --git a/proto/binlogdata.proto b/proto/binlogdata.proto index 595760dcd52..e1df792776b 100644 --- a/proto/binlogdata.proto +++ b/proto/binlogdata.proto @@ -353,6 +353,10 @@ message FieldEvent { repeated query.Field fields = 2; string keyspace = 3; string shard = 4; + + // Field numbers in the gap between shard (4) and enum_set_string_values + // (25) are NOT reserved and can be used. + // Are ENUM and SET field values already mapped to strings in the ROW // events? This allows us to transition VTGate VStream consumers from // the pre v20 behavior of having to do this mapping themselves to the @@ -362,6 +366,9 @@ message FieldEvent { // vstreams managed by the vstreamManager. bool enum_set_string_values = 25; bool is_internal_table = 26; // set for sidecardb tables + + // Add new members in the field number gap between shard (4) and + // enum_set_string_values (25). } // ShardGtid contains the GTID position for one shard. diff --git a/proto/vtadmin.proto b/proto/vtadmin.proto index 78f086ec345..963d1fa5779 100644 --- a/proto/vtadmin.proto +++ b/proto/vtadmin.proto @@ -388,7 +388,11 @@ message WorkflowSwitchTrafficRequest { message ApplySchemaRequest { string cluster_id = 1; - vtctldata.ApplySchemaRequest request = 2; + // Request.Sql will be overriden by this Sql field. + string sql = 2; + // Request.CallerId will be overriden by this CallerId field. + string caller_id = 3; + vtctldata.ApplySchemaRequest request = 4; } message CancelSchemaMigrationRequest { diff --git a/test/config.json b/test/config.json index c911232ce74..1e278546c7a 100644 --- a/test/config.json +++ b/test/config.json @@ -1238,6 +1238,17 @@ "RetryMax": 1, "Tags": [] }, + "vtop_example": { + "File": "", + "Args": [], + "Command": [ + "test/vtop_example.sh" + ], + "Manual": false, + "Shard": "", + "RetryMax": 1, + "Tags": [] + }, "vtorc_primary_failure": { "File": "unused.go", "Args": ["vitess.io/vitess/go/test/endtoend/vtorc/primaryfailure"], diff --git a/test/vtop_example.sh b/test/vtop_example.sh index 5ff90a2be7e..c537c0f844c 100755 --- a/test/vtop_example.sh +++ b/test/vtop_example.sh @@ -482,11 +482,12 @@ EOF waitForKeyspaceToBeServing customer 80- 1 } +kind delete cluster --name kind || true # Build the docker image for vitess/lite using the local code docker build -f docker/lite/Dockerfile -t vitess/lite:pr . # Build the docker image for vitess/vtadmin using the local code -docker build -f docker/binaries/vtadmin/Dockerfile --build-arg VT_BASE_VER=pr -t vitess/vtadmin:pr . +docker build -f docker/binaries/vtadmin/Dockerfile --build-arg VT_BASE_VER=pr -t vitess/vtadmin:pr ./docker/binaries/vtadmin # Print the docker images available docker image ls diff --git a/tools/get_kubectl_kind.sh b/tools/get_kubectl_kind.sh index 57df414fdd8..169b120aaa0 100755 --- a/tools/get_kubectl_kind.sh +++ b/tools/get_kubectl_kind.sh @@ -12,7 +12,7 @@ source build.env mkdir -p "$VTROOT/bin" cd "$VTROOT/bin" -KUBE_VERSION="${KUBE_VERSION:-v1.21.1}" +KUBE_VERSION="${KUBE_VERSION:-v1.31.0}" KUBERNETES_RELEASE_URL="${KUBERNETES_RELEASE_URL:-https://dl.k8s.io}" # Download kubectl if needed. @@ -28,7 +28,7 @@ ln -sf "kubectl-${KUBE_VERSION}" kubectl if ! command -v kind &> /dev/null then echo "Downloading kind..." - curl -L https://kind.sigs.k8s.io/dl/v0.12.0/kind-linux-amd64 > "kind" + curl -L https://kind.sigs.k8s.io/dl/v0.22.0/kind-linux-amd64 > "kind" chmod +x "kind" echo "Installed kind" else diff --git a/tools/map-shard-for-value/Makefile b/tools/map-shard-for-value/Makefile new file mode 100644 index 00000000000..61bc88ac0ed --- /dev/null +++ b/tools/map-shard-for-value/Makefile @@ -0,0 +1,22 @@ +# Copyright 2024 The Vitess Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +build: + go build map-shard-for-value.go + +test: + echo "1\n-1\n99" | go run map-shard-for-value.go --total_shards=4 --vindex=xxhash + +clean: + rm -f map-shard-for-value diff --git a/tools/map-shard-for-value/map-shard-for-value.go b/tools/map-shard-for-value/map-shard-for-value.go new file mode 100755 index 00000000000..18a092d1371 --- /dev/null +++ b/tools/map-shard-for-value/map-shard-for-value.go @@ -0,0 +1,207 @@ +/* +Copyright 2024 The Vitess Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package main + +import ( + "bufio" + "context" + "encoding/hex" + "fmt" + "log" + "os" + "strconv" + "strings" + + flag "github.com/spf13/pflag" + + "vitess.io/vitess/go/vt/topo" + + "vitess.io/vitess/go/sqltypes" + "vitess.io/vitess/go/vt/key" + "vitess.io/vitess/go/vt/proto/topodata" + "vitess.io/vitess/go/vt/vtgate/vindexes" +) + +/* + * This tool reads a list of values from stdin and prints the + * corresponding keyspace ID and shard for each value. It uses the given vindex + * and shard ranges to determine the shard. The vindex is expected to be a + * single-column vindex. The shard ranges are specified as a comma-separated + * list of key ranges, example "-80,80-". + * If you have uniformly distributed shards, you can specify the total number + * of shards using the -total_shards flag, and the tool will generate the shard ranges + * using the same logic as the Vitess operator does (using the key.GenerateShardRanges() function). + * + * Example usage: + * echo "1\n2\n3" | go run shard-from-id.go -vindex=hash -shards=-80,80- + * + * Currently tested only for integer values and hash/xxhash vindexes. + */ + +func mapShard(allShards []*topodata.ShardReference, ksid key.DestinationKeyspaceID) (string, error) { + foundShard := "" + addShard := func(shard string) error { + foundShard = shard + return nil + } + if err := ksid.Resolve(allShards, addShard); err != nil { + return "", fmt.Errorf("failed to resolve keyspace ID: %v:: %s", ksid.String(), err) + } + + if foundShard == "" { + return "", fmt.Errorf("no shard found for keyspace ID: %v", ksid) + } + return foundShard, nil +} + +func selectShard(vindex vindexes.Vindex, value sqltypes.Value, allShards []*topodata.ShardReference) (string, key.DestinationKeyspaceID, error) { + ctx := context.Background() + + destinations, err := vindexes.Map(ctx, vindex, nil, [][]sqltypes.Value{{value}}) + if err != nil { + return "", nil, fmt.Errorf("failed to map value to keyspace ID: %w", err) + } + + if len(destinations) != 1 { + return "", nil, fmt.Errorf("unexpected number of destinations: %d", len(destinations)) + } + + ksid, ok := destinations[0].(key.DestinationKeyspaceID) + if !ok { + return "", nil, fmt.Errorf("unexpected destination type: %T", destinations[0]) + } + + foundShard, err := mapShard(allShards, ksid) + if err != nil { + return "", nil, fmt.Errorf("failed to map shard, original value %v, keyspace id %s: %w", value, ksid, err) + } + return foundShard, ksid, nil +} + +func getValue(valueStr, valueType string) (sqltypes.Value, error) { + var value sqltypes.Value + + switch valueType { + case "int": + valueInt, err := strconv.ParseInt(valueStr, 10, 64) + if err != nil { + return value, fmt.Errorf("failed to parse int value: %w", err) + } + value = sqltypes.NewInt64(valueInt) + case "uint": + valueUint, err := strconv.ParseUint(valueStr, 10, 64) + if err != nil { + return value, fmt.Errorf("failed to parse uint value: %w", err) + } + value = sqltypes.NewUint64(valueUint) + case "string": + value = sqltypes.NewVarChar(valueStr) + default: + return value, fmt.Errorf("unsupported value type: %s", valueType) + } + + return value, nil +} + +func getShardMap(shardsCSV *string) []*topodata.ShardReference { + var allShards []*topodata.ShardReference + + for _, shard := range strings.Split(*shardsCSV, ",") { + _, keyRange, err := topo.ValidateShardName(shard) + if err != nil { + log.Fatalf("invalid shard range: %s", shard) + } + allShards = append(allShards, &topodata.ShardReference{ + Name: shard, + KeyRange: keyRange, + }) + } + return allShards +} + +type output struct { + Value string + KeyspaceID string + Shard string +} + +func processValues(scanner *bufio.Scanner, shardsCSV *string, vindexName string, valueType string) ([]output, error) { + allShards := getShardMap(shardsCSV) + + vindex, err := vindexes.CreateVindex(vindexName, vindexName, nil) + if err != nil { + return nil, fmt.Errorf("failed to create vindex: %v", err) + } + var outputs []output + for scanner.Scan() { + valueStr := scanner.Text() + if valueStr == "" { + continue + } + value, err := getValue(valueStr, valueType) + if err != nil { + return nil, fmt.Errorf("failed to get value for: %v, value_type %s:: %v", valueStr, valueType, err) + } + shard, ksid, err := selectShard(vindex, value, allShards) + if err != nil { + // ignore errors so that we can go ahead with the computation for other values + continue + } + outputs = append(outputs, output{Value: valueStr, KeyspaceID: hex.EncodeToString(ksid), Shard: shard}) + } + return outputs, nil +} + +func printOutput(outputs []output) { + fmt.Println("value,keyspaceID,shard") + for _, output := range outputs { + fmt.Printf("%s,%s,%s\n", output.Value, output.KeyspaceID, output.Shard) + } +} + +func main() { + // Explicitly configuring the logger since it was flaky in displaying logs locally without this. + log.SetOutput(os.Stderr) + log.SetFlags(log.LstdFlags) + log.SetPrefix("LOG: ") + + vindexName := flag.String("vindex", "xxhash", "name of the vindex") + shardsCSV := flag.String("shards", "", "comma-separated list of shard ranges") + totalShards := flag.Int("total_shards", 0, "total number of uniformly distributed shards") + valueType := flag.String("value_type", "int", "type of the value (int, uint, or string)") + flag.Parse() + + if *totalShards > 0 { + if *shardsCSV != "" { + log.Fatalf("cannot specify both total_shards and shards") + } + shardArr, err := key.GenerateShardRanges(*totalShards) + if err != nil { + log.Fatalf("failed to generate shard ranges: %v", err) + } + *shardsCSV = strings.Join(shardArr, ",") + } + if *shardsCSV == "" { + log.Fatal("shards or total_shards must be specified") + } + scanner := bufio.NewScanner(os.Stdin) + outputs, err := processValues(scanner, shardsCSV, *vindexName, *valueType) + if err != nil { + log.Fatalf("failed to process values: %v", err) + } + printOutput(outputs) +} diff --git a/tools/map-shard-for-value/map-shard-for-value.md b/tools/map-shard-for-value/map-shard-for-value.md new file mode 100644 index 00000000000..17daf7f5fe5 --- /dev/null +++ b/tools/map-shard-for-value/map-shard-for-value.md @@ -0,0 +1,47 @@ +## Map Shard for Value Tool + +### Overview + +The `map-shard-for-value` tool maps a given value to a specific shard. This tool helps in determining +which shard a particular value belongs to, based on the vindex algorithm and shard ranges. + +### Features +- + +- Allows specifying the vindex type (e.g., `hash`, `xxhash`). +- Allows specifying the shard list of (for uniformly distributed shard ranges) the total number of shards to generate. +- Designed as a _filter_: Reads input values from `stdin` and outputs the corresponding shard information, so it can be + used to map values from a file or another program. + +### Usage + +```sh +make build +``` + +```sh +echo "1\n-1\n99" | ./map-shard-for-value --total_shards=4 --vindex=xxhash +value,keyspaceID,shard +1,d46405367612b4b7,c0- +-1,d8e2a6a7c8c7623d,c0- +99,200533312244abca,-40 + +echo "1\n-1\n99" | ./map-shard-for-value --vindex=hash --shards="-80,80-" +value,keyspaceID,shard +1,166b40b44aba4bd6,-80 +-1,355550b2150e2451,-80 +99,2c40ad56f4593c47,-80 +``` + +#### Flags + +- `--vindex`: Specifies the name of the vindex to use (e.g., `hash`, `xxhash`) (default `xxhash`) + +One (and only one) of these is required: + +- `--shards`: Comma-separated list of shard ranges +- `--total_shards`: Total number of shards, only if shards are uniformly distributed + +Optional: +- `--value_type`: Type of the value to map, one of int, uint, string (default `int`) + diff --git a/tools/map-shard-for-value/map-shard-for-value_test.go b/tools/map-shard-for-value/map-shard-for-value_test.go new file mode 100644 index 00000000000..ca014818bb9 --- /dev/null +++ b/tools/map-shard-for-value/map-shard-for-value_test.go @@ -0,0 +1,90 @@ +/* +Copyright 2024 The Vitess Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package main + +import ( + "bufio" + "fmt" + "strings" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestProcess(t *testing.T) { + type testCase struct { + name string + shardsCSV string + vindexType string + values []int + valueType string + expected []output + } + testCases := []testCase{ + { + name: "hash,2 shards", + shardsCSV: "-80,80-", + vindexType: "hash", + values: []int{1, 99}, + valueType: "int", + expected: []output{ + { + Value: "1", + KeyspaceID: "166b40b44aba4bd6", + Shard: "-80", + }, + { + Value: "99", + KeyspaceID: "2c40ad56f4593c47", + Shard: "-80", + }, + }, + }, + { + name: "xxhash,4 shards", + shardsCSV: "-40,40-80,80-c0,c0-", + vindexType: "xxhash", + values: []int{1, 99}, + valueType: "int", + expected: []output{ + { + Value: "1", + KeyspaceID: "d46405367612b4b7", + Shard: "c0-", + }, + { + Value: "99", + KeyspaceID: "200533312244abca", + Shard: "-40", + }, + }, + }, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + var input strings.Builder + for _, num := range tc.values { + fmt.Fprintf(&input, "%d\n", num) + } + reader := strings.NewReader(input.String()) + scanner := bufio.NewScanner(reader) + got, err := processValues(scanner, &tc.shardsCSV, tc.vindexType, tc.valueType) + require.NoError(t, err) + require.EqualValues(t, tc.expected, got) + }) + } +} diff --git a/web/vtadmin/src/api/http.ts b/web/vtadmin/src/api/http.ts index 3f75330d240..674df961ef0 100644 --- a/web/vtadmin/src/api/http.ts +++ b/web/vtadmin/src/api/http.ts @@ -1068,3 +1068,41 @@ export const showVDiff = async ({ clusterID, request }: ShowVDiffParams) => { return vtadmin.VDiffShowResponse.create(result); }; + +export const fetchSchemaMigrations = async (request: vtadmin.IGetSchemaMigrationsRequest) => { + const { result } = await vtfetch(`/api/migrations/`, { + body: JSON.stringify(request), + method: 'post', + }); + + const err = vtadmin.GetSchemaMigrationsResponse.verify(result); + if (err) throw Error(err); + + return vtadmin.GetSchemaMigrationsResponse.create(result); +}; + +export interface ApplySchemaParams { + clusterID: string; + keyspace: string; + callerID: string; + sql: string; + request: vtctldata.IApplySchemaRequest; +} + +export const applySchema = async ({ clusterID, keyspace, callerID, sql, request }: ApplySchemaParams) => { + const body = { + sql, + caller_id: callerID, + request, + }; + + const { result } = await vtfetch(`/api/migration/${clusterID}/${keyspace}`, { + body: JSON.stringify(body), + method: 'post', + }); + + const err = vtctldata.ApplySchemaResponse.verify(result); + if (err) throw Error(err); + + return vtctldata.ApplySchemaResponse.create(result); +}; diff --git a/web/vtadmin/src/components/App.tsx b/web/vtadmin/src/components/App.tsx index ef27a35dc95..3bb41ea35f0 100644 --- a/web/vtadmin/src/components/App.tsx +++ b/web/vtadmin/src/components/App.tsx @@ -45,6 +45,8 @@ import { Transactions } from './routes/Transactions'; import { Transaction } from './routes/transaction/Transaction'; import { CreateReshard } from './routes/createWorkflow/CreateReshard'; import { CreateMaterialize } from './routes/createWorkflow/CreateMaterialize'; +import { SchemaMigrations } from './routes/SchemaMigrations'; +import { CreateSchemaMigration } from './routes/createSchemaMigration/CreateSchemaMigration'; export const App = () => { return ( @@ -140,6 +142,16 @@ export const App = () => { + + + + + {!isReadOnlyMode() && ( + + + + )} + diff --git a/web/vtadmin/src/components/NavRail.tsx b/web/vtadmin/src/components/NavRail.tsx index 9f9e1bf1681..b30cd165684 100644 --- a/web/vtadmin/src/components/NavRail.tsx +++ b/web/vtadmin/src/components/NavRail.tsx @@ -65,6 +65,9 @@ export const NavRail = () => {