@@ -2691,14 +2691,25 @@ func (s *Store) reResolve(c *RegionCache) (bool, error) {
2691
2691
if s .addr != addr || ! s .IsSameLabels (store .GetLabels ()) {
2692
2692
newStore := & Store {storeID : s .storeID , addr : addr , peerAddr : store .GetPeerAddress (), saddr : store .GetStatusAddress (), storeType : storeType , labels : store .GetLabels (), state : uint64 (resolved )}
2693
2693
newStore .livenessState = atomic .LoadUint32 (& s .livenessState )
2694
- newStore .unreachableSince = s .unreachableSince
2694
+ if newStore .getLivenessState () != reachable {
2695
+ newStore .unreachableSince = s .unreachableSince
2696
+ go newStore .checkUntilHealth (c , newStore .getLivenessState (), storeReResolveInterval )
2697
+ }
2695
2698
c .storeMu .Lock ()
2696
2699
if s .addr == addr {
2697
2700
newStore .slowScore = s .slowScore
2698
2701
}
2699
2702
c .storeMu .stores [newStore .storeID ] = newStore
2700
2703
c .storeMu .Unlock ()
2701
2704
s .setResolveState (deleted )
2705
+ logutil .BgLogger ().Info ("store address or labels changed, add new store and mark old store deleted" ,
2706
+ zap .Uint64 ("store" , s .storeID ),
2707
+ zap .String ("old-addr" , s .addr ),
2708
+ zap .Any ("old-labels" , s .labels ),
2709
+ zap .String ("old-liveness" , s .getLivenessState ().String ()),
2710
+ zap .String ("new-addr" , newStore .addr ),
2711
+ zap .Any ("new-labels" , newStore .labels ),
2712
+ zap .String ("new-liveness" , newStore .getLivenessState ().String ()))
2702
2713
return false , nil
2703
2714
}
2704
2715
s .changeResolveStateTo (needCheck , resolved )
@@ -2850,6 +2861,8 @@ func (s livenessState) String() string {
2850
2861
}
2851
2862
}
2852
2863
2864
+ var storeReResolveInterval = 30 * time .Second
2865
+
2853
2866
func (s * Store ) startHealthCheckLoopIfNeeded (c * RegionCache , liveness livenessState ) {
2854
2867
// This mechanism doesn't support non-TiKV stores currently.
2855
2868
if s .storeType != tikvrpc .TiKV {
@@ -2861,7 +2874,7 @@ func (s *Store) startHealthCheckLoopIfNeeded(c *RegionCache, liveness livenessSt
2861
2874
// It may be already started by another thread.
2862
2875
if atomic .CompareAndSwapUint32 (& s .livenessState , uint32 (reachable ), uint32 (liveness )) {
2863
2876
s .unreachableSince = time .Now ()
2864
- reResolveInterval := 30 * time . Second
2877
+ reResolveInterval := storeReResolveInterval
2865
2878
if val , err := util .EvalFailpoint ("injectReResolveInterval" ); err == nil {
2866
2879
if dur , err := time .ParseDuration (val .(string )); err == nil {
2867
2880
reResolveInterval = dur
@@ -2889,26 +2902,18 @@ func (s *Store) checkUntilHealth(c *RegionCache, liveness livenessState, reResol
2889
2902
case <- c .ctx .Done ():
2890
2903
return
2891
2904
case <- ticker .C :
2905
+ if s .getResolveState () == deleted {
2906
+ logutil .BgLogger ().Info ("[health check] store meta deleted, stop checking" , zap .Uint64 ("storeID" , s .storeID ), zap .String ("addr" , s .addr ), zap .String ("state" , s .getResolveState ().String ()))
2907
+ return
2908
+ }
2892
2909
if time .Since (lastCheckPDTime ) > reResolveInterval {
2893
2910
lastCheckPDTime = time .Now ()
2894
2911
2895
2912
valid , err := s .reResolve (c )
2896
2913
if err != nil {
2897
2914
logutil .BgLogger ().Warn ("[health check] failed to re-resolve unhealthy store" , zap .Error (err ))
2898
2915
} else if ! valid {
2899
- if s .getResolveState () == deleted {
2900
- // if the store is deleted, a new store with same id must be inserted (guaranteed by reResolve).
2901
- c .storeMu .RLock ()
2902
- newStore := c .storeMu .stores [s .storeID ]
2903
- c .storeMu .RUnlock ()
2904
- logutil .BgLogger ().Info ("[health check] store meta changed" ,
2905
- zap .Uint64 ("storeID" , s .storeID ),
2906
- zap .String ("oldAddr" , s .addr ),
2907
- zap .String ("oldLabels" , fmt .Sprintf ("%v" , s .labels )),
2908
- zap .String ("newAddr" , newStore .addr ),
2909
- zap .String ("newLabels" , fmt .Sprintf ("%v" , newStore .labels )))
2910
- go newStore .checkUntilHealth (c , liveness , reResolveInterval )
2911
- }
2916
+ logutil .BgLogger ().Info ("[health check] store meta deleted, stop checking" , zap .Uint64 ("storeID" , s .storeID ), zap .String ("addr" , s .addr ), zap .String ("state" , s .getResolveState ().String ()))
2912
2917
return
2913
2918
}
2914
2919
}
0 commit comments