Skip to content

Commit

Permalink
Merge branch 'main' into duckdb-arrow-integration
Browse files Browse the repository at this point in the history
  • Loading branch information
fanyang01 authored Jan 6, 2025
2 parents 4f76af4 + 18113cf commit e4c52d9
Show file tree
Hide file tree
Showing 43 changed files with 5,819 additions and 183 deletions.
51 changes: 44 additions & 7 deletions .github/workflows/backup-restore.yml
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ jobs:
# Verify replication of new data
psql -h 127.0.0.1 -p 5432 -U postgres -c "SELECT 1 FROM test_table WHERE id = 3 AND name = 'new data 3';" | grep -q 1
- name: Backup MyDuck and Insert more data into source PG
run: |
psql "postgres://postgres:@127.0.0.1:5432" <<-EOSQL
Expand Down Expand Up @@ -157,14 +157,16 @@ jobs:
--restore-secret-access-key=minioadmin &
sleep 10
- name: Test Replication
run: |
# Verify replication catches up
psql -h 127.0.0.1 -p 5432 -U postgres -c "SELECT 1 FROM test_table WHERE id = 4 AND name = 'offline data 4';" | grep -q 1
# Kill MyDuck
pkill myduckserver
- name: Cleanup
if: always()
run: |
pkill myduckserver || true
rm -f ./myduck.db
- name: Restore MyDuck at Runtime
Expand All @@ -186,8 +188,43 @@ jobs:
# Verify replication catches up
psql -h 127.0.0.1 -p 5432 -U postgres -d testdb2 -c "SELECT 1 FROM test_table WHERE id = 4 AND name = 'offline data 4';" | grep -q 1
# Kill MyDuck
pkill myduckserver
- name: Test Multiple Databases
run: |
psql "postgres://postgres:@127.0.0.1:5432" <<-EOSQL
CREATE DATABASE testdb3;
CREATE SCHEMA testdb3.sch3;
USE testdb3.sch3;
CREATE TABLE test_table3 (id int primary key, name text);
INSERT INTO test_table3 VALUES (3, 'initial data 3'), (33, 'initial data 33');
CREATE DATABASE testdb4;
CREATE SCHEMA testdb4.sch4;
USE testdb4.sch4;
CREATE TABLE test_table4 (id int primary key, name text);
INSERT INTO test_table4 VALUES (4, 'initial data 4'), (44, 'initial data 44');
BACKUP DATABASE testdb3 TO 's3c://myduck-backup/myduck/myduck3.bak'
ENDPOINT = '127.0.0.1:9001'
ACCESS_KEY_ID = 'minioadmin'
SECRET_ACCESS_KEY = 'minioadmin';
BACKUP DATABASE testdb4 TO 's3c://myduck-backup/myduck/myduck4.bak'
ENDPOINT = '127.0.0.1:9001'
ACCESS_KEY_ID = 'minioadmin'
SECRET_ACCESS_KEY = 'minioadmin';
RESTORE DATABASE testdb5 FROM 's3c://myduck-backup/myduck/myduck3.bak'
ENDPOINT = '127.0.0.1:9001'
ACCESS_KEY_ID = 'minioadmin'
SECRET_ACCESS_KEY = 'minioadmin';
RESTORE DATABASE testdb6 FROM 's3c://myduck-backup/myduck/myduck4.bak'
ENDPOINT = '127.0.0.1:9001'
ACCESS_KEY_ID = 'minioadmin'
SECRET_ACCESS_KEY = 'minioadmin';
EOSQL
psql -h 127.0.0.1 -p 5432 -U postgres -d testdb5 -c "SELECT 1 FROM sch3.test_table3 WHERE id = 3 AND name = 'initial data 3';" | grep -q 1
psql -h 127.0.0.1 -p 5432 -U postgres -d testdb6 -c "SELECT 1 FROM sch4.test_table4 WHERE id = 44 AND name = 'initial data 44';" | grep -q 1
- name: Cleanup
if: always()
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/bats-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ jobs:
pip3 install "sqlglot[rs]" pyarrow pandas
curl -LJO https://github.com/duckdb/duckdb/releases/download/v1.1.3/duckdb_cli-linux-amd64.zip
curl -LJO https://github.com/duckdb/duckdb/releases/latest/download/duckdb_cli-linux-amd64.zip
unzip duckdb_cli-linux-amd64.zip
chmod +x duckdb
sudo mv duckdb /usr/local/bin
Expand Down
99 changes: 99 additions & 0 deletions .github/workflows/mysql-copy-tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
name: MySQL Copy Instance Test

on:
push:
branches: [ "main" ]
pull_request:
branches: [ "main" ]

jobs:
copy-instance-test:
runs-on: ubuntu-latest
services:
source:
image: mysql:lts
env:
MYSQL_ROOT_PASSWORD: root
ports:
- 13306:3306
options: >-
--health-cmd="mysqladmin ping"
--health-interval=10s
--health-timeout=5s
--health-retries=3
steps:
- uses: actions/checkout@v4

- name: Set up Go
uses: actions/setup-go@v5
with:
go-version: '1.23'

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.13'

- name: Install dependencies
run: |
go get .
pip3 install "sqlglot[rs]"
curl -LJO https://dev.mysql.com/get/Downloads/MySQL-Shell/mysql-shell_9.1.0-1debian12_amd64.deb
sudo apt-get install -y ./mysql-shell_9.1.0-1debian12_amd64.deb
- name: Setup test data in source MySQL
run: |
mysqlsh -hlocalhost -P13306 -uroot -proot --sql -e "
CREATE DATABASE testdb;
USE testdb;
CREATE TABLE users (
id INT AUTO_INCREMENT PRIMARY KEY,
name VARCHAR(100)
);
INSERT INTO users (name) VALUES ('test1'), ('test2'), ('test3');
-- Make a gap in the id sequence
INSERT INTO users VALUES (100, 'test100');
INSERT INTO users (name) VALUES ('test101');
-- A table with non-default starting auto_increment value
CREATE TABLE items (
id INT AUTO_INCREMENT PRIMARY KEY,
v BIGINT check (v > 0),
name VARCHAR(100)
) AUTO_INCREMENT=1000;
INSERT INTO items (v, name) VALUES (1, 'item1'), (2, 'item2'), (3, 'item3');
"
- name: Build and start MyDuck Server
run: |
go build -v
./myduckserver &
sleep 5
- name: Run copy-instance test
run: |
# Set local_infile to true to allow loading data from files
mysqlsh -uroot --no-password --sql -e "SET GLOBAL local_infile = 1;"
# Copy the data from source MySQL to MyDuck
mysqlsh -hlocalhost -P13306 -uroot -proot \
-- util copy-instance "mysql://root:@127.0.0.1:3306" \
--users false --ignore-version true
# Verify the data was copied
for table in users items; do
mysqlsh -hlocalhost -P13306 -uroot -proot --sql -e "
SELECT * FROM testdb.$table ORDER BY id;
" | tee source_data_$table.tsv
mysqlsh -uroot --no-password --sql -e "
SELECT * FROM testdb.$table ORDER BY id;
" | tee copied_data_$table.tsv
diff source_data_$table.tsv copied_data_$table.tsv
done
39 changes: 35 additions & 4 deletions .github/workflows/replication-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -90,23 +90,29 @@ jobs:
docker exec source-db dolt sql -q "
CREATE DATABASE test;
CREATE TABLE test.items (id INT PRIMARY KEY, name VARCHAR(50));
INSERT INTO test.items VALUES (1, 'test1'), (2, 'test2');"
INSERT INTO test.items VALUES (1, 'test1'), (2, 'test2');
CREATE TABLE test.skip (id INT PRIMARY KEY, name VARCHAR(50));
INSERT INTO test.skip VALUES (1, 'abc'), (2, 'def');"
elif [ "${{ matrix.source }}" = "mariadb" ]; then
docker exec source-db mariadb -uroot -proot test -e "
CREATE TABLE items (id INT PRIMARY KEY, name VARCHAR(50));
INSERT INTO items VALUES (1, 'test1'), (2, 'test2');"
INSERT INTO items VALUES (1, 'test1'), (2, 'test2');
CREATE TABLE skip (id INT PRIMARY KEY, name VARCHAR(50));
INSERT INTO skip VALUES (1, 'abc'), (2, 'def');"
else
docker exec source-db mysql -uroot -proot test -e "
CREATE TABLE items (id INT PRIMARY KEY, name VARCHAR(50));
INSERT INTO items VALUES (1, 'test1'), (2, 'test2');"
INSERT INTO items VALUES (1, 'test1'), (2, 'test2');
CREATE TABLE skip (id INT PRIMARY KEY, name VARCHAR(50));
INSERT INTO skip VALUES (1, 'abc'), (2, 'def');"
fi
- name: Start MyDuck Server in replica mode
run: |
if [ "${{ matrix.source }}" = "postgres" ]; then
SOURCE_DSN="postgres://postgres:[email protected]:5432/test"
else
SOURCE_DSN="mysql://root:[email protected]:3306"
SOURCE_DSN="mysql://root:[email protected]:3306/test?skip-tables=test.skip"
fi
docker run -d --name myduck \
Expand Down Expand Up @@ -203,6 +209,31 @@ jobs:
exit 1
fi
# Print the logs
docker logs myduck
- name: Verify skip tables
run: |
# Verify skipped table is empty (for MySQL-compatible databases only)
if [ "${{ matrix.source }}" != "postgres" ]; then
# Check if skip table exists and has any rows
TABLE_EXISTS=$(docker exec myduck psql -t -U postgres -h 127.0.0.1 -c \
"SELECT COUNT(*) FROM information_schema.tables WHERE table_schema = '${SCHEMA}' AND table_name = 'skip';" | tr -d ' ')
if [ "$TABLE_EXISTS" -ne "0" ]; then
COUNT=$(docker exec myduck psql -t -U postgres -h 127.0.0.1 -c \
"SELECT COUNT(*) FROM ${SCHEMA}.skip;" | tr -d ' ')
if [ "$COUNT" -eq "0" ]; then
echo "Successfully verified that skipped table exists but is empty"
else
echo "Error: Skipped table 'skip' contains $COUNT rows when it should be empty"
exit 1
fi
else
echo "Successfully verified that skipped table does not exist in destination"
fi
fi
- name: Cleanup
if: always()
run: |
Expand Down
10 changes: 7 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -201,9 +201,13 @@ Looking to load Parquet files into MyDuck Server and start querying? Follow our

Already have a DuckDB file? You can seamlessly bootstrap MyDuck Server with it. See our [DuckDB file bootstrapping guide](docs/tutorial/bootstrap.md) for more details.

### Managing Multiple Databases

Easily manage multiple databases in MyDuck Server, same as Postgres. For step-by-step instructions and detailed guidance, check out our [Database Management Guide](docs/tutorial/manage-multiple-databases.md).

### Backup and Restore with Object Storage

To back up and restore your MyDuck Server database using object storage, refer to our [backup and restore guide](docs/tutorial/backup-restore.md) for detailed instructions.
To back up and restore your databases inside MyDuck Server using object storage, refer to our [backup and restore guide](docs/tutorial/backup-restore.md) for detailed instructions.

### LLM Integration

Expand All @@ -217,8 +221,8 @@ MyDuck Server can be seamlessly accessed from the Python data science ecosystem.

We have big plans for MyDuck Server! Here are some of the features we’re working on:

- [ ] Arrow Flight SQL.
- [ ] Multiple DB.
- [x] Arrow Flight SQL.
- [x] Multiple DB.
- [ ] Authentication.
- [ ] ...and more! We’re always looking for ways to make MyDuck Server better. If you have a feature request, please let us know by [opening an issue](https://github.com/apecloud/myduckserver/issues/new).

Expand Down
10 changes: 10 additions & 0 deletions adapter/adapter.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ type ConnectionHolder interface {
GetCatalogConn(ctx context.Context) (*stdsql.Conn, error)
GetCatalogTxn(ctx context.Context, options *stdsql.TxOptions) (*stdsql.Tx, error)
TryGetTxn() *stdsql.Tx
GetCurrentCatalog() string
GetCurrentSchema() string
CloseTxn()
CloseConn()
}
Expand Down Expand Up @@ -42,6 +44,14 @@ func TryGetTxn(ctx *sql.Context) *stdsql.Tx {
return ctx.Session.(ConnectionHolder).TryGetTxn()
}

func GetCurrentCatalog(ctx *sql.Context) string {
return ctx.Session.(ConnectionHolder).GetCurrentCatalog()
}

func GetCurrentSchema(ctx *sql.Context) string {
return ctx.Session.(ConnectionHolder).GetCurrentSchema()
}

func CloseTxn(ctx *sql.Context) {
ctx.Session.(ConnectionHolder).CloseTxn()
}
Expand Down
37 changes: 27 additions & 10 deletions backend/executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
stdsql "database/sql"
"fmt"

"github.com/apecloud/myduckserver/adapter"
"github.com/apecloud/myduckserver/catalog"
"github.com/apecloud/myduckserver/transpiler"
"github.com/dolthub/go-mysql-server/sql"
Expand Down Expand Up @@ -63,7 +64,7 @@ func (b *DuckBuilder) Build(ctx *sql.Context, root sql.Node, r sql.Row) (sql.Row
ctx.GetLogger().WithFields(logrus.Fields{
"Query": ctx.Query(),
"NodeType": fmt.Sprintf("%T", n),
}).Trace("Building node:", n)
}).Traceln("Building node:", n)

// TODO; find a better way to fallback to the base builder
switch n.(type) {
Expand All @@ -80,13 +81,12 @@ func (b *DuckBuilder) Build(ctx *sql.Context, root sql.Node, r sql.Row) (sql.Row
case *plan.InsertInto:
insert := n.(*plan.InsertInto)

// For AUTO_INCREMENT column, we fallback to the framework if the column is specified.
if dst, err := plan.GetInsertable(insert.Destination); err == nil && dst.Schema().HasAutoIncrement() {
if len(insert.ColumnNames) == 0 || len(insert.ColumnNames) == len(dst.Schema()) {
return b.base.Build(ctx, root, r)
}
}

// The handling of auto_increment reset and check constraints is not supported by DuckDB.
// We need to fallback to the framework for these cases.
// But we want to rewrite LOAD DATA to be handled by DuckDB,
// as it is a common way to import data into the database.
// Therefore, we ignoring auto_increment and check constraints for LOAD DATA.
// So rewriting LOAD DATA is done eagerly here.
src := insert.Source
if proj, ok := src.(*plan.Project); ok {
src = proj.Child
Expand All @@ -97,6 +97,20 @@ func (b *DuckBuilder) Build(ctx *sql.Context, root sql.Node, r sql.Row) (sql.Row
}
return b.base.Build(ctx, root, r)
}

if dst, err := plan.GetInsertable(insert.Destination); err == nil {
// For AUTO_INCREMENT column, we fallback to the framework if the column is specified.
// if dst.Schema().HasAutoIncrement() && (0 == len(insert.ColumnNames) || len(insert.ColumnNames) == len(dst.Schema())) {
if dst.Schema().HasAutoIncrement() {
return b.base.Build(ctx, root, r)
}
// For table with check constraints, we fallback to the framework.
if ct, ok := dst.(sql.CheckTable); ok {
if checks, err := ct.GetChecks(ctx); err == nil && len(checks) > 0 {
return b.base.Build(ctx, root, r)
}
}
}
}

// Fallback to the base builder if the plan contains system/user variables or is not a pure data query.
Expand All @@ -111,7 +125,7 @@ func (b *DuckBuilder) Build(ctx *sql.Context, root sql.Node, r sql.Row) (sql.Row

switch node := n.(type) {
case *plan.Use:
useStmt := "USE " + catalog.FullSchemaName(b.provider.CatalogName(), node.Database().Name())
useStmt := "USE " + catalog.FullSchemaName(adapter.GetCurrentCatalog(ctx), node.Database().Name())
if _, err := conn.ExecContext(ctx.Context, useStmt); err != nil {
if catalog.IsDuckDBSetSchemaNotFoundError(err) {
return nil, sql.ErrDatabaseNotFound.New(node.Database().Name())
Expand Down Expand Up @@ -156,9 +170,12 @@ func (b *DuckBuilder) executeQuery(ctx *sql.Context, n sql.Node, conn *stdsql.Co
)

// Translate the MySQL query to a DuckDB query
switch n.(type) {
switch n := n.(type) {
case *plan.ShowTables:
duckSQL = ctx.Query()
case *plan.ResolvedTable:
// SQLGlot cannot translate MySQL's `TABLE t` into DuckDB's `FROM t` - it produces `"table" AS t` instead.
duckSQL = `FROM ` + catalog.ConnectIdentifiersANSI(n.Database().Name(), n.Name())
default:
duckSQL, err = transpiler.TranslateWithSQLGlot(ctx.Query())
}
Expand Down
Loading

0 comments on commit e4c52d9

Please sign in to comment.