Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
fix: data lost when BecomeLeader failed (#621)
### Motivation The status already became `LEADER` if `addFollower` failed. that will make public server accept requests. ``` {"level":"debug","time":"2025-02-25T03:15:00.886431474Z","component":"db","key":"2f52b44b5408436894ff635087ff0f37-000000000000000013 97-00000000000000002794-00000000000000004191","namespace":"default","shard":11,"version":{"version_id":2932110,"created_timestamp":1 740453300879,"modified_timestamp":1740453300879},"time":"2025-02-25T03:15:00.886435947Z","message":"Applied put operation"} {"level":"debug","time":"2025-02-25T03:15:00.886448265Z","component":"db","key":"__oxia/commit-offset","namespace":"default","shard" :11,"version":{"version_id":-1,"modifications_count":2577974,"created_timestamp":1740396891493,"modified_timestamp":1740453300879}," time":"2025-02-25T03:15:00.886453932Z","message":"Applied put operation"} {"level":"debug","time":"2025-02-25T03:15:00.886460251Z","component":"db","key":"__oxia/last-version-id","namespace":"default","shar d":11,"version":{"version_id":-1,"modifications_count":2577974,"created_timestamp":1740396891493,"modified_timestamp":1740453300879} ,"time":"2025-02-25T03:15:00.886464265Z","message":"Applied put operation"} {"level":"debug","time":"2025-02-25T03:15:00.97985226Z","req":{"shard":11,"puts":[{"key":"2f52b44b5408436894ff635087ff0f37","value": "AAAAAAAAKTg=","partition_key":"2f52b44b5408436894ff635087ff0f37","sequence_key_delta":[1,2,3]}]},"time":"2025-02-25T03:15:00.979863 138Z","message":"Got request in stream"} {"level":"debug","time":"2025-02-25T03:15:00.9798711Z","component":"leader-controller","namespace":"default","req":{"shard":11,"puts ":[{"key":"2f52b44b5408436894ff635087ff0f37","value":"AAAAAAAAKTg=","partition_key":"2f52b44b5408436894ff635087ff0f37","sequence_key _delta":[1,2,3]}]},"shard":11,"term":344,"time":"2025-02-25T03:15:00.979878047Z","message":"Append operation"} {"level":"debug","time":"2025-02-25T03:15:00.98082737Z","component":"follower-cursor","follower":"data-lost-oxia-0.data-lost-oxia-he adless:6649","namespace":"default","offset":2585821,"shard":11,"term":344,"time":"2025-02-25T03:15:00.980834674Z","message":"Sending entries to follower"} {"level":"debug","time":"2025-02-25T03:15:06.3798797Z","component":"public-rpc-server","namespace":"default","peer":"10.60.20.44:598 72","shard":11,"time":"2025-02-25T03:15:06.37988677Z","message":"Write Stream request"} {"level":"debug","time":"2025-02-25T03:15:06.379890628Z","component":"shards-director","shard":11,"time":"2025-02-25T03:15:06.379892 379Z","message":"This node is not hosting shard"} {"level":"info","time":"2025-02-25T03:15:49.803805685Z","component":"internal-rpc-server","peer":"10.60.31.210:38304","req":{"namesp ace":"default","shard":11,"term":345,"options":{}},"time":"2025-02-25T03:15:49.803814676Z","message":"Received NewTerm request"} {"level":"debug","time":"2025-02-25T03:15:49.80382062Z","component":"shards-director","shard":11,"time":"2025-02-25T03:15:49.8038236 71Z","message":"This node is not hosting shard"} {"level":"debug","time":"2025-02-25T03:15:49.803828329Z","component":"internal-rpc-server","error":{"error":"rpc error: code = Code( 107) desc = node is not follower for shard 11","kind":"*status.Error","stack":null},"peer":"10.60.31.210:38304","req":{"namespace":" default","shard":11,"term":345,"options":{}},"time":"2025-02-25T03:15:49.803840725Z","message":"Node is not follower, getting leader "} {"level":"info","time":"2025-02-25T03:15:49.812867614Z","component":"pebble","shard":11,"time":"2025-02-25T03:15:49.812873089Z","mes sage":"[JOB 1] WAL file /data/db/default/shard-11/001221.log with log number 001221 stopped reading at offset: 0; replayed 0 keys in 0 batches"} {"level":"info","time":"2025-02-25T03:15:49.825914038Z","component":"leader-controller","namespace":"default","shard":11,"term":344, "time":"2025-02-25T03:15:49.825919153Z","message":"Created leader controller"} {"level":"debug","time":"2025-02-25T03:15:49.82678302Z","component":"db","key":"__oxia/term","namespace":"default","shard":11,"versi on":{"version_id":-1,"modifications_count":319,"created_timestamp":1740396117249,"modified_timestamp":1740453349826},"time":"2025-02 -25T03:15:49.826790546Z","message":"Applied put operation"} {"level":"debug","time":"2025-02-25T03:15:49.826799889Z","component":"db","key":"__oxia/term-options","namespace":"default","shard": 11,"version":{"version_id":-1,"modifications_count":319,"created_timestamp":1740396117249,"modified_timestamp":1740453349826},"time" :"2025-02-25T03:15:49.826805124Z","message":"Applied put operation"} {"level":"info","time":"2025-02-25T03:15:49.835051901Z","component":"leader-controller","last-entry":{"term":344,"offset":2585821}," namespace":"default","shard":11,"term":345,"time":"2025-02-25T03:15:49.835067743Z","message":"Leader successfully initialized in new term"} {"level":"info","time":"2025-02-25T03:15:49.836224479Z","component":"internal-rpc-server","leaderTerm":345,"peer":"10.60.31.210:3830 4","req":{"namespace":"default","shard":11,"term":345,"options":{}},"response":{"head_entry_id":{"term":344,"offset":2585821}},"time ":"2025-02-25T03:15:49.836238087Z","message":"New term processing completed"} {"level":"info","time":"2025-02-25T03:15:49.837994448Z","component":"internal-rpc-server","peer":"10.60.31.210:38304","request":{"na mespace":"default","shard":11,"term":345,"replication_factor":3,"follower_maps":{"data-lost-oxia-0.data-lost-oxia-headless:6649":{"t erm":344,"offset":2585821},"data-lost-oxia-2.data-lost-oxia-headless:6649":{"term":156,"offset":2564735}}},"time":"2025-02-25T03:15: 49.838007987Z","message":"Received BecomeLeader request"} {"level":"debug","time":"2025-02-25T03:15:49.838202183Z","component":"leader-controller","follower":"data-lost-oxia-0.data-lost-oxia -headless:6649","follower-head-entry":{"term":344,"offset":2585821},"leader-head-entry":{"term":344,"offset":2585821},"namespace":"d efault","shard":11,"term":345,"time":"2025-02-25T03:15:49.838214088Z","message":"Needs truncation?"} {"level":"info","time":"2025-02-25T03:15:49.838267883Z","component":"leader-controller","follower":"data-lost-oxia-0.data-lost-oxia- headless:6649","follower-head-entry":{"term":344,"offset":2585821},"head-offset":2585821,"leader-election-head-entry":{"term":344,"o ffset":2585821},"namespace":"default","shard":11,"term":345,"time":"2025-02-25T03:15:49.838276908Z","message":"Added follower"} {"level":"debug","time":"2025-02-25T03:15:49.838284161Z","component":"leader-controller","follower":"data-lost-oxia-2.data-lost-oxia -headless:6649","follower-head-entry":{"term":156,"offset":2564735},"leader-head-entry":{"term":344,"offset":2585821},"namespace":"d efault","shard":11,"term":345,"time":"2025-02-25T03:15:49.838290241Z","message":"Needs truncation?"} {"level":"info","time":"2025-02-25T03:15:49.838404107Z","ack-offset":2585821,"component":"follower-cursor","follower":"data-lost-oxi a-0.data-lost-oxia-headless:6649","namespace":"default","shard":11,"term":345,"time":"2025-02-25T03:15:49.838416072Z","message":"Suc cessfully attached cursor follower"} {"level":"debug","time":"2025-02-25T03:15:49.879930854Z","component":"public-rpc-server","namespace":"default","peer":"10.60.20.44:5 9872","shard":11,"time":"2025-02-25T03:15:49.879936692Z","message":"Write Stream request"} {"level":"debug","time":"2025-02-25T03:15:49.879959526Z","req":{"shard":11,"puts":[{"key":"2f52b44b5408436894ff635087ff0f37","value" :"AAAAAAAAKyE=","partition_key":"2f52b44b5408436894ff635087ff0f37","sequence_key_delta":[1,2,3]}]},"time":"2025-02-25T03:15:49.87997 3537Z","message":"Got request in stream"} {"level":"error","time":"2025-02-25T03:15:49.903652509Z","component":"leader-controller","error":{"error":"rpc error: code = Unknown desc = failed to truncate wal. truncate-offset: 2564844 - wal-last-offset: 2564735: oxia: offset out of bounds","kind":"*status.Err or","stack":null},"follower":"data-lost-oxia-2.data-lost-oxia-headless:6649","follower-head-entry":null,"namespace":"default","shard ":11,"term":345,"time":"2025-02-25T03:15:49.90367001Z","message":"Failed to truncate follower"} {"level":"warn","time":"2025-02-25T03:15:49.903678997Z","component":"internal-rpc-server","error":{"error":"rpc error: code = Unknow n desc = failed to truncate wal. truncate-offset: 2564844 - wal-last-offset: 2564735: oxia: offset out of bounds","kind":"*status.Er ror","stack":null},"peer":"10.60.31.210:38304","request":{"namespace":"default","shard":11,"term":345,"replication_factor":3,"follow er_maps":{"data-lost-oxia-0.data-lost-oxia-headless:6649":{"term":344,"offset":2585821},"data-lost-oxia-2.data-lost-oxia-headless:66 49":{"term":156,"offset":2564735}}},"time":"2025-02-25T03:15:49.903691104Z","message":"BecomeLeader failed"} {"level":"debug","time":"2025-02-25T03:15:49.903714208Z","component":"leader-controller","namespace":"default","req":{"shard":11,"pu ts":[{"key":"2f52b44b5408436894ff635087ff0f37","value":"AAAAAAAAKyE=","partition_key":"2f52b44b5408436894ff635087ff0f37","sequence_k ey_delta":[1,2,3]}]},"shard":11,"term":345,"time":"2025-02-25T03:15:49.903721952Z","message":"Append operation"} {"level":"debug","time":"2025-02-25T03:15:49.904634716Z","component":"follower-cursor","follower":"data-lost-oxia-0.data-lost-oxia-h eadless:6649","namespace":"default","offset":2585822,"shard":11,"term":345,"time":"2025-02-25T03:15:49.904641202Z","message":"Sendin g entries to follower"} {"level":"debug","time":"2025-02-25T03:15:49.906468805Z","component":"follower-cursor","follower":"data-lost-oxia-0.data-lost-oxia-h eadless:6649","namespace":"default","offset":2585822,"shard":11,"term":345,"time":"2025-02-25T03:15:49.906484415Z","message":"Receiv ed ack"} {"level":"debug","time":"2025-02-25T03:15:49.906818814Z","component":"db","key":"2f52b44b5408436894ff635087ff0f37-000000000000000013 05-00000000000000002610-00000000000000003915","namespace":"default","shard":11,"version":{"version_id":2932018,"created_timestamp":1 740453349903,"modified_timestamp":1740453349903},"time":"2025-02-25T03:15:49.906830244Z","message":"Applied put operation"} {"level":"debug","time":"2025-02-25T03:15:49.906881056Z","component":"db","key":"__oxia/commit-offset","namespace":"default","shard" :11,"version":{"version_id":-1,"modifications_count":2577882,"created_timestamp":1740396891493,"modified_timestamp":1740453349903}," time":"2025-02-25T03:15:49.90688618Z","message":"Applied put operation"} {"level":"debug","time":"2025-02-25T03:15:49.906894687Z","component":"db","key":"__oxia/last-version-id","namespace":"default","shar d":11,"version":{"version_id":-1,"modifications_count":2577882,"created_timestamp":1740396891493,"modified_timestamp":1740453349903} ,"time":"2025-02-25T03:15:49.906899077Z","message":"Applied put operation"} {"level":"debug","time":"2025-02-25T03:15:49.979839865Z","req":{"shard":11,"puts":[{"key":"2f52b44b5408436894ff635087ff0f37","value" :"AAAAAAAAKyI=","partition_key":"2f52b44b5408436894ff635087ff0f37","sequence_key_delta":[1,2,3]}]},"time":"2025-02-25T03:15:49.97984 5378Z","message":"Got request in stream"} ``` ### Modification - Change status to `ServingStatus_LEADER` when all the steps done.
- Loading branch information