Skip to content

Commit

Permalink
[ICD]Retrigger subscription immediately when OnActiveModeNotification…
Browse files Browse the repository at this point in the history
… is invoked when state is FailedICDSubscription
  • Loading branch information
yunhanw-google committed Feb 19, 2025
1 parent 78092a6 commit 265f19d
Show file tree
Hide file tree
Showing 5 changed files with 88 additions and 49 deletions.
8 changes: 4 additions & 4 deletions src/app/InteractionModelEngine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1051,17 +1051,17 @@ void InteractionModelEngine::OnActiveModeNotification(ScopedNodeId aPeer)
}
}

void InteractionModelEngine::OnPeerTypeChange(ScopedNodeId aPeer, ReadClient::PeerType aType)
void InteractionModelEngine::OnPeerOperatingModeChange(ScopedNodeId aPeer, ReadClient::PeerOperatingMode aMode)
{
// TODO: Follow up to use a iterator function to avoid copy/paste here.
for (ReadClient * pListItem = mpActiveReadClientList; pListItem != nullptr;)
{
// It is possible that pListItem is destroyed by the app in OnPeerTypeChange.
// Get the next item before invoking `OnPeerTypeChange`.
// It is possible that pListItem is destroyed by the app in OnPeerOperatingModeChange.
// Get the next item before invoking `OnPeerOperatingModeChange`.
auto pNextItem = pListItem->GetNextClient();
if (ScopedNodeId(pListItem->GetPeerNodeId(), pListItem->GetFabricIndex()) == aPeer)
{
pListItem->OnPeerTypeChange(aType);
pListItem->OnPeerOperatingModeChange(aMode);
}
pListItem = pNextItem;
}
Expand Down
4 changes: 2 additions & 2 deletions src/app/InteractionModelEngine.h
Original file line number Diff line number Diff line change
Expand Up @@ -242,12 +242,12 @@ class InteractionModelEngine : public Messaging::UnsolicitedMessageHandler,
void OnActiveModeNotification(ScopedNodeId aPeer);

/**
* Used to notify when a peer becomes LIT ICD or vice versa.
* Used to notify when a peer operating mode becomes LIT ICD or vice versa.
*
* ReadClient will call this function when it finds any updates of the OperatingMode attribute from ICD management
* cluster. The application doesn't need to call this function, usually.
*/
void OnPeerTypeChange(ScopedNodeId aPeer, ReadClient::PeerType aType);
void OnPeerOperatingModeChange(ScopedNodeId aPeer, ReadClient::PeerOperatingMode aMode);

/**
* Add a read client to the internally tracked list of weak references. This list is used to
Expand Down
90 changes: 57 additions & 33 deletions src/app/ReadClient.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -213,10 +213,18 @@ void ReadClient::Close(CHIP_ERROR aError, bool allowResubscription)
if (aError == CHIP_ERROR_LIT_SUBSCRIBE_INACTIVE_TIMEOUT)
{
VerifyOrDie(originalReason == CHIP_ERROR_LIT_SUBSCRIBE_INACTIVE_TIMEOUT);
ChipLogProgress(DataManagement, "ICD device is inactive mark subscription as InactiveICDSubscription");
ChipLogProgress(DataManagement, "ICD device is inactive, mark subscription as InactiveICDSubscription");
MoveToState(ClientState::InactiveICDSubscription);
return;
}
if (aError == CHIP_ERROR_LIT_CASE_SUBSCRIBE_PRIMING_TIMEOUT)
{
ChipLogProgress(
DataManagement,
"ICD device is unreachable, mark subscription as FailedICDSubscription, retrying is scheduled");
MoveToState(ClientState::FailedICDSubscription);
return;
}
}

//
Expand Down Expand Up @@ -250,6 +258,8 @@ const char * ReadClient::GetStateStr() const
return "SubscriptionActive";
case ClientState::InactiveICDSubscription:
return "InactiveICDSubscription";
case ClientState::FailedICDSubscription:
return "FailedICDSubscription";
}
#endif // CHIP_DETAIL_LOGGING
return "N/A";
Expand Down Expand Up @@ -484,26 +494,27 @@ CHIP_ERROR ReadClient::GenerateDataVersionFilterList(DataVersionFilterIBs::Build

void ReadClient::OnActiveModeNotification()
{
// This function just tries to complete the deferred resubscription logic in `OnLivenessTimeoutCallback`.
VerifyOrDie(mpImEngine->InActiveReadClientList(this));
// If we are not in InactiveICDSubscription state, that means the liveness timeout has not been reached. Simply do nothing.
VerifyOrReturn(IsInactiveICDSubscription());
// If the current state is InactiveICDSubscription (LIT ICD unreachable) or FailedICDSubscription (session/subscription creation failed),
// the previous close call should have executed ClearActiveSubscriptionState.
// Upon receiving a check-in, cancel pending resubscription if any and initiate a new one immediately.
// Case sessions need to be invalidated.
VerifyOrReturn(IsInactiveICDSubscription() && IsFailedICDSubscription());

// When we reach here, the subscription definitely exceeded the liveness timeout. Just continue the unfinished resubscription
// logic in `OnLivenessTimeoutCallback`.
TriggerResubscriptionForLivenessTimeout(CHIP_ERROR_TIMEOUT);
CloseSession();
CancelResubscribeTimer();
MoveToState(ClientState::Idle);
OnResubscribeTimerCallback(nullptr, this);
}

void ReadClient::OnPeerTypeChange(PeerType aType)
void ReadClient::OnPeerOperatingModeChange(PeerOperatingMode aMode)
{
VerifyOrDie(mpImEngine->InActiveReadClientList(this));
ChipLogProgress(DataManagement, "Peer operation mode is now %s LIT ICD.",
mPeerOperatingMode == PeerOperatingMode::kLITICD ? "a" : "not a");

mIsPeerLIT = (aType == PeerType::kLITICD);

ChipLogProgress(DataManagement, "Peer is now %s LIT ICD.", mIsPeerLIT ? "a" : "not a");

// If the peer is no longer LIT, try to wake up the subscription and do resubscribe when necessary.
if (!mIsPeerLIT)
// If the peer operating mode is no longer LIT, try to wake up the subscription and do resubscribe when necessary.
if (mPeerOperatingMode == PeerOperatingMode::kNormal)
{
OnActiveModeNotification();
}
Expand Down Expand Up @@ -720,10 +731,18 @@ void ReadClient::OnResponseTimeout(Messaging::ExchangeContext * apExchangeContex
ChipLogError(DataManagement, "Time out! failed to receive report data from Exchange: " ChipLogFormatExchange,
ChipLogValueExchange(apExchangeContext));

Close(CHIP_ERROR_TIMEOUT);
if (IsPeerLIT() && (IsIdle() || IsAwaitingInitialReport() || IsAwaitingSubscribeResponse()))
{
ChipLogError(DataManagement, "LIT ICD device is unreachable during Subscription establishment procedure with state %s", GetStateStr());
Close(CHIP_ERROR_LIT_CASE_SUBSCRIBE_PRIMING_TIMEOUT);
}
else
{
Close(CHIP_ERROR_TIMEOUT);
}
}

CHIP_ERROR ReadClient::ReadICDOperatingModeFromAttributeDataIB(TLV::TLVReader && aReader, PeerType & aType)
CHIP_ERROR ReadClient::ReadICDOperatingModeFromAttributeDataIB(TLV::TLVReader && aReader, PeerOperatingMode & aMode)
{
Clusters::IcdManagement::Attributes::OperatingMode::TypeInfo::DecodableType operatingMode;

Expand All @@ -733,10 +752,12 @@ CHIP_ERROR ReadClient::ReadICDOperatingModeFromAttributeDataIB(TLV::TLVReader &&
switch (operatingMode)
{
case Clusters::IcdManagement::OperatingModeEnum::kSit:
aType = PeerType::kNormal;
aMode = PeerOperatingMode::kNormal;
break;
case Clusters::IcdManagement::OperatingModeEnum::kLit:
aType = PeerType::kLITICD;
aMode = PeerOperatingMode::kLITICD;
// If operating mode is LIT, this device should be LIT ICD.
mIsPeerLIT = true;
break;
default:
err = CHIP_ERROR_INVALID_ARGUMENT;
Expand Down Expand Up @@ -843,14 +864,14 @@ CHIP_ERROR ReadClient::ProcessAttributeReportIBs(TLV::TLVReader & aAttributeRepo
if (attributePath.MatchesConcreteAttributePath(ConcreteAttributePath(
kRootEndpointId, Clusters::IcdManagement::Id, Clusters::IcdManagement::Attributes::OperatingMode::Id)))
{
PeerType peerType;
PeerOperatingMode peerMode;
TLV::TLVReader operatingModeTlvReader;
operatingModeTlvReader.Init(dataReader);
if (CHIP_NO_ERROR == ReadICDOperatingModeFromAttributeDataIB(std::move(operatingModeTlvReader), peerType))
if (CHIP_NO_ERROR == ReadICDOperatingModeFromAttributeDataIB(std::move(operatingModeTlvReader), peerMode))
{
// It is safe to call `OnPeerTypeChange` since we are in the middle of parsing the attribute data, And
// It is safe to call `OnPeerOperatingModeChange` since we are in the middle of parsing the attribute data, And
// the subscription should be active so `OnActiveModeNotification` is a no-op in this case.
InteractionModelEngine::GetInstance()->OnPeerTypeChange(mPeer, peerType);
InteractionModelEngine::GetInstance()->OnPeerOperatingModeChange(mPeer, peerMode);
}
else
{
Expand Down Expand Up @@ -1029,15 +1050,16 @@ void ReadClient::OnLivenessTimeoutCallback(System::Layer * apSystemLayer, void *
"Subscription Liveness timeout with SubscriptionID = 0x%08" PRIx32 ", Peer = %02x:" ChipLogFormatX64,
_this->mSubscriptionId, _this->GetFabricIndex(), ChipLogValueX64(_this->GetPeerNodeId()));

if (_this->mIsPeerLIT)
if (_this->IsPeerLIT())
{
subscriptionTerminationCause = CHIP_ERROR_LIT_SUBSCRIBE_INACTIVE_TIMEOUT;
}

_this->TriggerResubscriptionForLivenessTimeout(subscriptionTerminationCause);
_this->CloseSession();
_this->Close(subscriptionTerminationCause);
}

void ReadClient::TriggerResubscriptionForLivenessTimeout(CHIP_ERROR aReason)
void ReadClient::CloseSession()
{
// We didn't get a message from the server on time; it's possible that it no
// longer has a useful CASE session to us. Mark defunct all sessions that
Expand All @@ -1060,8 +1082,6 @@ void ReadClient::TriggerResubscriptionForLivenessTimeout(CHIP_ERROR aReason)
session->MarkAsDefunct();
});
}

Close(aReason);
}

CHIP_ERROR ReadClient::ProcessSubscribeResponse(System::PacketBufferHandle && aPayload)
Expand Down Expand Up @@ -1270,10 +1290,10 @@ CHIP_ERROR ReadClient::DefaultResubscribePolicy(CHIP_ERROR aTerminationCause)
ChipLogProgress(DataManagement, "ICD device is inactive, skipping scheduling resubscribe within DefaultResubscribePolicy");
return CHIP_ERROR_LIT_SUBSCRIBE_INACTIVE_TIMEOUT;
}

VerifyOrReturnError(IsIdle(), CHIP_ERROR_INCORRECT_STATE);

auto timeTillNextResubscription = ComputeTimeTillNextSubscription();

ChipLogProgress(DataManagement,
"Will try to resubscribe to %02x:" ChipLogFormatX64 " at retry index %" PRIu32 " after %" PRIu32
"ms due to error %" CHIP_ERROR_FORMAT,
Expand Down Expand Up @@ -1305,9 +1325,8 @@ void ReadClient::HandleDeviceConnectionFailure(void * context, const Operational
{
ReadClient * const _this = static_cast<ReadClient *>(context);
VerifyOrDie(_this != nullptr);

ChipLogError(DataManagement, "Failed to establish CASE for re-subscription with error '%" CHIP_ERROR_FORMAT "'",
failureInfo.error.Format());
CHIP_ERROR err = failureInfo.error;
ChipLogError(DataManagement, "Failed to establish CASE for re-subscription with error '%" CHIP_ERROR_FORMAT "'", err.Format());

#if CHIP_CONFIG_ENABLE_BUSY_HANDLING_FOR_OPERATIONAL_SESSION_SETUP
#if CHIP_DETAIL_LOGGING
Expand All @@ -1322,7 +1341,13 @@ void ReadClient::HandleDeviceConnectionFailure(void * context, const Operational
_this->mMinimalResubscribeDelay = System::Clock::kZero;
#endif // CHIP_CONFIG_ENABLE_BUSY_HANDLING_FOR_OPERATIONAL_SESSION_SETUP

_this->Close(failureInfo.error);
if (_this->IsPeerLIT() && err == CHIP_ERROR_TIMEOUT && _this->IsIdle())
{
ChipLogError(DataManagement, "LIT ICD device is unreachable during CASE establishment procedure");
err = CHIP_ERROR_LIT_CASE_SUBSCRIBE_PRIMING_TIMEOUT;
}

_this->Close(err);
}

void ReadClient::OnResubscribeTimerCallback(System::Layer * /* If this starts being used, fix callers that pass nullptr */,
Expand All @@ -1334,7 +1359,6 @@ void ReadClient::OnResubscribeTimerCallback(System::Layer * /* If this starts be
_this->mIsResubscriptionScheduled = false;

CHIP_ERROR err;

ChipLogProgress(DataManagement, "OnResubscribeTimerCallback: ForceCASE = %d", _this->mForceCaseOnNextResub);
_this->mNumRetries++;

Expand Down
25 changes: 16 additions & 9 deletions src/app/ReadClient.h
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,7 @@ class ReadClient : public Messaging::ExchangeDelegate
Subscribe,
};

enum class PeerType : uint8_t
enum class PeerOperatingMode : uint8_t
{
kNormal,
kLITICD,
Expand Down Expand Up @@ -351,11 +351,11 @@ class ReadClient : public Messaging::ExchangeDelegate
* Re-activate an inactive subscription.
*
* When subscribing to LIT-ICD and liveness timeout reached and OnResubscriptionNeeded returns
* CHIP_ERROR_LIT_SUBSCRIBE_INACTIVE_TIMEOUT, the read client will move to the InactiveICDSubscription state and
* resubscription can be triggered via OnActiveModeNotification().
* CHIP_ERROR_LIT_SUBSCRIBE_INACTIVE_TIMEOUT, the read client will move to the InactiveICDSubscription state and the subscription is put on hold.
* resubscription can be triggered via OnActiveModeNotification().
* When CASE connection fails or client fails in subscription priming stage, client will move to FailedICDSubscription state
* and subscription is still retrying with back-off algorithm, the pending subscription can be triggered via OnActiveModeNotification().
*
* If the subscription is not in the `InactiveICDSubscription` state, this function will do nothing. So it is always safe to
* call this function when a check-in message is received.
*/
void OnActiveModeNotification();

Expand Down Expand Up @@ -503,6 +503,9 @@ class ReadClient : public Messaging::ExchangeDelegate
*/
Optional<System::Clock::Timeout> GetSubscriptionTimeout();

bool IsPeerLIT() { return mIsPeerLIT; }
PeerOperatingMode GetLITOperatingMode() { return mPeerOperatingMode; }

private:
friend class TestReadInteraction;
friend class InteractionModelEngine;
Expand All @@ -514,6 +517,7 @@ class ReadClient : public Messaging::ExchangeDelegate
AwaitingSubscribeResponse, ///< The client is waiting for subscribe response
SubscriptionActive, ///< The client is maintaining subscription
InactiveICDSubscription, ///< The client is waiting to resubscribe for LIT device
FailedICDSubscription, ///< The client is failing during case establishment or subscription priming stage for LIT device
};

enum class ReportType
Expand All @@ -534,20 +538,21 @@ class ReadClient : public Messaging::ExchangeDelegate
void OnResponseTimeout(Messaging::ExchangeContext * apExchangeContext) override;

/**
* Updates the type (LIT ICD or not) of the peer.
* Updates the operating mode (LIT ICD or not) of the peer.
*
* When the subscription is active, this function will just set the flag. When the subscription is an InactiveICDSubscription,
* setting the peer type to SIT or normal devices will also trigger a resubscription attempt.
*
*/
void OnPeerTypeChange(PeerType aType);
void OnPeerOperatingModeChange(PeerOperatingMode aMode);

/**
* Check if current read client is being used
*
*/
bool IsIdle() const { return mState == ClientState::Idle; }
bool IsInactiveICDSubscription() const { return mState == ClientState::InactiveICDSubscription; }
bool IsFailedICDSubscription() const { return mState == ClientState::FailedICDSubscription; }
bool IsSubscriptionActive() const { return mState == ClientState::SubscriptionActive; }
bool IsAwaitingInitialReport() const { return mState == ClientState::AwaitingInitialReport; }
bool IsAwaitingSubscribeResponse() const { return mState == ClientState::AwaitingSubscribeResponse; }
Expand All @@ -562,7 +567,7 @@ class ReadClient : public Messaging::ExchangeDelegate
CHIP_ERROR BuildDataVersionFilterList(DataVersionFilterIBs::Builder & aDataVersionFilterIBsBuilder,
const Span<AttributePathParams> & aAttributePaths,
const Span<DataVersionFilter> & aDataVersionFilters, bool & aEncodedDataVersionList);
CHIP_ERROR ReadICDOperatingModeFromAttributeDataIB(TLV::TLVReader && aReader, PeerType & aType);
CHIP_ERROR ReadICDOperatingModeFromAttributeDataIB(TLV::TLVReader && aReader, PeerOperatingMode & aMode);
CHIP_ERROR ProcessAttributeReportIBs(TLV::TLVReader & aAttributeDataIBsReader);
CHIP_ERROR ProcessEventReportIBs(TLV::TLVReader & aEventReportIBsReader);

Expand All @@ -572,7 +577,7 @@ class ReadClient : public Messaging::ExchangeDelegate
CHIP_ERROR ComputeLivenessCheckTimerTimeout(System::Clock::Timeout * aTimeout);
void CancelLivenessCheckTimer();
void CancelResubscribeTimer();
void TriggerResubscriptionForLivenessTimeout(CHIP_ERROR aReason);
void CloseSession();
void MoveToState(const ClientState aTargetState);
CHIP_ERROR ProcessAttributePath(AttributePathIB::Parser & aAttributePath, ConcreteDataAttributePath & aClusterInfo);
CHIP_ERROR ProcessReportData(System::PacketBufferHandle && aPayload, ReportType aReportType);
Expand Down Expand Up @@ -667,6 +672,8 @@ class ReadClient : public Messaging::ExchangeDelegate

bool mIsPeerLIT = false;

PeerOperatingMode mPeerOperatingMode = PeerOperatingMode::kNormal;

// End Of Container (0x18) uses one byte.
static constexpr uint16_t kReservedSizeForEndOfContainer = 1;
// Reserved size for the uint8_t InteractionModelRevision flag, which takes up 1 byte for the control tag and 1 byte for the
Expand Down
10 changes: 9 additions & 1 deletion src/lib/core/CHIPError.h
Original file line number Diff line number Diff line change
Expand Up @@ -837,7 +837,15 @@ using CHIP_ERROR = ::chip::ChipError;
*/
#define CHIP_ERROR_TLV_CONTAINER_OPEN CHIP_CORE_ERROR(0x27)

// AVAILABLE: 0x28
/**
* @def CHIP_ERROR_LIT_CASE_SUBSCRIBE_PRIMING_TIMEOUT
*
* @brief
* CASE fails to be established or subscription fails to be established
*
*/
#define CHIP_ERROR_LIT_CASE_SUBSCRIBE_PRIMING_TIMEOUT CHIP_CORE_ERROR(0x28)

// AVAILABLE: 0x29

/**
Expand Down

0 comments on commit 265f19d

Please sign in to comment.