/* * * Copyright (c) 2020-2021 Project CHIP Authors * All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /** * @file * This file contains implementation of Device class. The objects of this * class will be used by Controller applications to interact with CHIP * devices. The class provides mechanism to construct, send and receive * messages to and from the corresponding CHIP devices. */ #include #include #include #include #include #include #include #include #include #include #include #include #include using namespace chip::Callback; using chip::AddressResolve::NodeLookupRequest; using chip::AddressResolve::Resolver; using chip::AddressResolve::ResolveResult; using namespace chip::Tracing; namespace chip { void OperationalSessionSetup::MoveToState(State aTargetState) { if (mState != aTargetState) { ChipLogDetail(Discovery, "OperationalSessionSetup[%u:" ChipLogFormatX64 "]: State change %d --> %d", mPeerId.GetFabricIndex(), ChipLogValueX64(mPeerId.GetNodeId()), to_underlying(mState), to_underlying(aTargetState)); #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES if (mState == State::WaitingForRetry) { CancelSessionSetupReattempt(); } #endif mState = aTargetState; if (aTargetState != State::Connecting) { CleanupCASEClient(); } } } bool OperationalSessionSetup::AttachToExistingSecureSession() { VerifyOrReturnError(mState == State::NeedsAddress || mState == State::ResolvingAddress || mState == State::HasAddress || mState == State::WaitingForRetry, false); auto sessionHandle = mInitParams.sessionManager->FindSecureSessionForNode( mPeerId, MakeOptional(Transport::SecureSession::Type::kCASE), mTransportPayloadCapability); if (!sessionHandle.HasValue()) return false; ChipLogProgress(Discovery, "Found an existing secure session to [%u:" ChipLogFormatX64 "]!", mPeerId.GetFabricIndex(), ChipLogValueX64(mPeerId.GetNodeId())); mDeviceAddress = sessionHandle.Value()->AsSecureSession()->GetPeerAddress(); if (!mSecureSession.Grab(sessionHandle.Value())) return false; return true; } void OperationalSessionSetup::Connect(Callback::Callback * onConnection, Callback::Callback * onFailure, Callback::Callback * onSetupFailure, TransportPayloadCapability transportPayloadCapability) { CHIP_ERROR err = CHIP_NO_ERROR; bool isConnected = false; mTransportPayloadCapability = transportPayloadCapability; // // Always enqueue our user provided callbacks into our callback list. // If anything goes wrong below, we'll trigger failures (including any queued from // a previous iteration which in theory shouldn't happen, but this is written to be more defensive) // EnqueueConnectionCallbacks(onConnection, onFailure, onSetupFailure); switch (mState) { case State::Uninitialized: err = CHIP_ERROR_INCORRECT_STATE; break; case State::NeedsAddress: isConnected = AttachToExistingSecureSession(); if (!isConnected) { // LookupPeerAddress could perhaps call back with a result // synchronously, so do our state update first. MoveToState(State::ResolvingAddress); err = LookupPeerAddress(); if (err != CHIP_NO_ERROR) { // Roll back the state change, since we are presumably not in // the middle of a lookup. MoveToState(State::NeedsAddress); } } break; case State::ResolvingAddress: case State::WaitingForRetry: isConnected = AttachToExistingSecureSession(); break; case State::HasAddress: isConnected = AttachToExistingSecureSession(); if (!isConnected) { // We should not actually every be in be in State::HasAddress. This // is because in the same call that we moved to State::HasAddress // we either move to State::Connecting or call // DequeueConnectionCallbacks with an error thus releasing // ourselves before any call would reach this section of code. err = CHIP_ERROR_INCORRECT_STATE; } break; case State::Connecting: break; case State::SecureConnected: isConnected = true; break; default: err = CHIP_ERROR_INCORRECT_STATE; } if (isConnected) { MoveToState(State::SecureConnected); } // // Dequeue all our callbacks on either encountering an error // or if we successfully connected. Both should not be set // simultaneously. // if (err != CHIP_NO_ERROR || isConnected) { DequeueConnectionCallbacks(err); // Do not touch `this` instance anymore; it has been destroyed in DequeueConnectionCallbacks. // While it is odd to have an explicit return here at the end of the function, we do so // as a precaution in case someone later on adds something to the end of this function. return; } } void OperationalSessionSetup::Connect(Callback::Callback * onConnection, Callback::Callback * onFailure, TransportPayloadCapability transportPayloadCapability) { Connect(onConnection, onFailure, nullptr, transportPayloadCapability); } void OperationalSessionSetup::Connect(Callback::Callback * onConnection, Callback::Callback * onSetupFailure, TransportPayloadCapability transportPayloadCapability) { Connect(onConnection, nullptr, onSetupFailure, transportPayloadCapability); } void OperationalSessionSetup::UpdateDeviceData(const ResolveResult & result) { auto & config = result.mrpRemoteConfig; auto addr = result.address; #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES // Make sure to clear out our reason for trying the next result first thing, // so it does not stick around in various error cases. bool tryingNextResultDueToSessionEstablishmentError = mTryingNextResultDueToSessionEstablishmentError; mTryingNextResultDueToSessionEstablishmentError = false; #endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES if (mState == State::Uninitialized) { return; } #if CHIP_DETAIL_LOGGING char peerAddrBuff[Transport::PeerAddress::kMaxToStringSize]; addr.ToString(peerAddrBuff); ChipLogDetail(Discovery, "OperationalSessionSetup[%u:" ChipLogFormatX64 "]: Updating device address to %s while in state %d", mPeerId.GetFabricIndex(), ChipLogValueX64(mPeerId.GetNodeId()), peerAddrBuff, static_cast(mState)); #endif mDeviceAddress = addr; // Initialize CASE session state with any MRP parameters that DNS-SD has provided. // It can be overridden by CASE session protocol messages that include MRP parameters. if (mCASEClient) { mCASEClient->SetRemoteMRPIntervals(config); } if (mState != State::ResolvingAddress) { ChipLogError(Discovery, "Received UpdateDeviceData in incorrect state"); DequeueConnectionCallbacks(CHIP_ERROR_INCORRECT_STATE); // Do not touch `this` instance anymore; it has been destroyed in // DequeueConnectionCallbacks. return; } MoveToState(State::HasAddress); mInitParams.sessionManager->UpdateAllSessionsPeerAddress(mPeerId, addr); if (mPerformingAddressUpdate) { // Nothing else to do here. DequeueConnectionCallbacks(CHIP_NO_ERROR); // Do not touch `this` instance anymore; it has been destroyed in DequeueConnectionCallbacks. return; } CHIP_ERROR err = EstablishConnection(result); LogErrorOnFailure(err); if (err == CHIP_NO_ERROR) { // We expect to get a callback via OnSessionEstablished or OnSessionEstablishmentError to continue // the state machine forward. #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES if (tryingNextResultDueToSessionEstablishmentError) { // Our retry has already been kicked off, so claim 0 delay until it // starts. We only reach this from OnSessionEstablishmentError when // the error is CHIP_ERROR_TIMEOUT. NotifyRetryHandlers(CHIP_ERROR_TIMEOUT, config, System::Clock::kZero); } #endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES return; } // Move to the ResolvingAddress state, in case we have more results, // since we expect to receive results in that state. Pretend like we moved // on directly to this address from whatever triggered us to try this result // (so restore mTryingNextResultDueToSessionEstablishmentError to the value // it had at the start of this function). MoveToState(State::ResolvingAddress); #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES mTryingNextResultDueToSessionEstablishmentError = tryingNextResultDueToSessionEstablishmentError; #endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES if (CHIP_NO_ERROR == Resolver::Instance().TryNextResult(mAddressLookupHandle)) { // No need to NotifyRetryHandlers, since we never actually spent any // time trying the previous result. Whatever work we need to do has // been handled by our recursive OnNodeAddressResolved callback. Make // sure not to touch `this` under here, because it might have been // deleted by OnNodeAddressResolved. return; } // No need to reset mTryingNextResultDueToSessionEstablishmentError here, // because we're about to delete ourselves. DequeueConnectionCallbacks(err); // Do not touch `this` instance anymore; it has been destroyed in DequeueConnectionCallbacks. } CHIP_ERROR OperationalSessionSetup::EstablishConnection(const ResolveResult & result) { auto & config = result.mrpRemoteConfig; #if INET_CONFIG_ENABLE_TCP_ENDPOINT if (mTransportPayloadCapability == TransportPayloadCapability::kLargePayload) { if (result.supportsTcpServer) { // Set the transport type for carrying large payloads mDeviceAddress.SetTransportType(chip::Transport::Type::kTcp); } else { // we should not set the large payload while the TCP support is not enabled ChipLogError( Discovery, "LargePayload session requested but peer does not support TCP server, PeerNodeId=" ChipLogFormatScopedNodeId, ChipLogValueScopedNodeId(mPeerId)); return CHIP_ERROR_INTERNAL; } } #endif mCASEClient = mClientPool->Allocate(); VerifyOrReturnError(mCASEClient != nullptr, CHIP_ERROR_NO_MEMORY); MATTER_LOG_METRIC_BEGIN(kMetricDeviceCASESession); CHIP_ERROR err = mCASEClient->EstablishSession(mInitParams, mPeerId, mDeviceAddress, config, this); if (err != CHIP_NO_ERROR) { MATTER_LOG_METRIC_END(kMetricDeviceCASESession, err); CleanupCASEClient(); return err; } MoveToState(State::Connecting); return CHIP_NO_ERROR; } void OperationalSessionSetup::EnqueueConnectionCallbacks(Callback::Callback * onConnection, Callback::Callback * onFailure, Callback::Callback * onSetupFailure) { mCallbacks.Enqueue(onConnection, onFailure, onSetupFailure); } void OperationalSessionSetup::DequeueConnectionCallbacks(CHIP_ERROR error, SessionEstablishmentStage stage, ReleaseBehavior releaseBehavior) { // We expect that we only have callbacks if we are not performing just address update. VerifyOrDie(!mPerformingAddressUpdate || mCallbacks.IsEmpty()); #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES // Clear out mConnectionRetry, so that those cancelables are not holding // pointers to us, since we're about to go away. while (auto * cb = mConnectionRetry.First()) { cb->Cancel(); } #endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES // Gather up state we will need for our notifications. SuccessFailureCallbackList readyCallbacks; readyCallbacks.EnqueueTakeAll(mCallbacks); auto * exchangeMgr = mInitParams.exchangeMgr; Optional optionalSessionHandle = mSecureSession.Get(); ScopedNodeId peerId = mPeerId; System::Clock::Milliseconds16 requestedBusyDelay = #if CHIP_CONFIG_ENABLE_BUSY_HANDLING_FOR_OPERATIONAL_SESSION_SETUP mRequestedBusyDelay; #else System::Clock::kZero; #endif // CHIP_CONFIG_ENABLE_BUSY_HANDLING_FOR_OPERATIONAL_SESSION_SETUP if (releaseBehavior == ReleaseBehavior::Release) { VerifyOrDie(mReleaseDelegate != nullptr); mReleaseDelegate->ReleaseSession(this); } // DO NOT touch any members of this object after this point. It's dead. NotifyConnectionCallbacks(readyCallbacks, error, stage, peerId, exchangeMgr, optionalSessionHandle, requestedBusyDelay); } void OperationalSessionSetup::NotifyConnectionCallbacks(SuccessFailureCallbackList & ready, CHIP_ERROR error, SessionEstablishmentStage stage, const ScopedNodeId & peerId, Messaging::ExchangeManager * exchangeMgr, const Optional & optionalSessionHandle, System::Clock::Milliseconds16 requestedBusyDelay) { Callback::Callback * onConnected; Callback::Callback * onConnectionFailure; Callback::Callback * onSetupFailure; while (ready.Take(onConnected, onConnectionFailure, onSetupFailure)) { if (error == CHIP_NO_ERROR) { VerifyOrDie(exchangeMgr); VerifyOrDie(optionalSessionHandle.Value()->AsSecureSession()->IsActiveSession()); if (onConnected != nullptr) { onConnected->mCall(onConnected->mContext, *exchangeMgr, optionalSessionHandle.Value()); // That sucessful call might have made the session inactive. If it did, then we should // not call any more success callbacks, since we do not in fact have an active session // for them, and if they try to put the session in a holder that will fail, and then // trying to use the holder as if it has a session will crash. if (!optionalSessionHandle.Value()->AsSecureSession()->IsActiveSession()) { ChipLogError(Discovery, "Success callback for connection to " ChipLogFormatScopedNodeId " tore down session", ChipLogValueScopedNodeId(peerId)); error = CHIP_ERROR_CONNECTION_ABORTED; } } } else // error { if (onConnectionFailure != nullptr) { onConnectionFailure->mCall(onConnectionFailure->mContext, peerId, error); } if (onSetupFailure != nullptr) { ConnectionFailureInfo failureInfo(peerId, error, stage); #if CHIP_CONFIG_ENABLE_BUSY_HANDLING_FOR_OPERATIONAL_SESSION_SETUP if (error == CHIP_ERROR_BUSY) { failureInfo.requestedBusyDelay.Emplace(requestedBusyDelay); } #endif // CHIP_CONFIG_ENABLE_BUSY_HANDLING_FOR_OPERATIONAL_SESSION_SETUP onSetupFailure->mCall(onSetupFailure->mContext, failureInfo); } } } } void OperationalSessionSetup::OnSessionEstablishmentError(CHIP_ERROR error, SessionEstablishmentStage stage) { VerifyOrReturn(mState == State::Connecting, ChipLogError(Discovery, "OnSessionEstablishmentError was called while we were not connecting")); // If this condition ever changes, we may need to store the error in a // member instead of having a boolean // mTryingNextResultDueToSessionEstablishmentError, so we can recover the // error in UpdateDeviceData. if (CHIP_ERROR_TIMEOUT == error || CHIP_ERROR_BUSY == error) { #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES // Make a copy of the ReliableMessageProtocolConfig, since our // mCaseClient is about to go away once we change state. ReliableMessageProtocolConfig remoteMprConfig = mCASEClient->GetRemoteMRPIntervals(); #endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES // Move to the ResolvingAddress state, in case we have more results, // since we expect to receive results in that state. MoveToState(State::ResolvingAddress); #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES mTryingNextResultDueToSessionEstablishmentError = true; #endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES if (CHIP_NO_ERROR == Resolver::Instance().TryNextResult(mAddressLookupHandle)) { // Whatever work we needed to do has been handled by our // OnNodeAddressResolved callback. Make sure not to touch `this` // under here, because it might have been deleted by // OnNodeAddressResolved. return; } #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES mTryingNextResultDueToSessionEstablishmentError = false; #endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES // Moving back to the Connecting state would be a bit of a lie, since we // don't have an mCASEClient. Just go back to NeedsAddress, since // that's really where we are now. MoveToState(State::NeedsAddress); #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES if (mRemainingAttempts > 0) { System::Clock::Seconds16 reattemptDelay; CHIP_ERROR err = ScheduleSessionSetupReattempt(reattemptDelay); if (err == CHIP_NO_ERROR) { MoveToState(State::WaitingForRetry); NotifyRetryHandlers(error, remoteMprConfig, reattemptDelay); return; } } #endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES } // Session failed to be established. This is when discovery is also stopped MATTER_LOG_METRIC_END(kMetricDeviceOperationalDiscovery, error); MATTER_LOG_METRIC_END(kMetricDeviceCASESession, error); DequeueConnectionCallbacks(error, stage); // Do not touch `this` instance anymore; it has been destroyed in DequeueConnectionCallbacks. } void OperationalSessionSetup::OnResponderBusy(System::Clock::Milliseconds16 requestedDelay) { #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES || CHIP_CONFIG_ENABLE_BUSY_HANDLING_FOR_OPERATIONAL_SESSION_SETUP // Store the requested delay, so that we can use it for scheduling our // retry or communicate it to our API consumer. mRequestedBusyDelay = requestedDelay; #endif } void OperationalSessionSetup::OnSessionEstablished(const SessionHandle & session) { VerifyOrReturn(mState == State::Connecting, ChipLogError(Discovery, "OnSessionEstablished was called while we were not connecting")); // Session has been established. This is when discovery is also stopped MATTER_LOG_METRIC_END(kMetricDeviceOperationalDiscovery, CHIP_NO_ERROR); MATTER_LOG_METRIC_END(kMetricDeviceCASESession, CHIP_NO_ERROR); if (!mSecureSession.Grab(session)) { // Got an invalid session, just dispatch an error. We have to do this // so we don't leak. DequeueConnectionCallbacks(CHIP_ERROR_INCORRECT_STATE); // Do not touch `this` instance anymore; it has been destroyed in DequeueConnectionCallbacks. return; } MoveToState(State::SecureConnected); DequeueConnectionCallbacks(CHIP_NO_ERROR); } void OperationalSessionSetup::CleanupCASEClient() { if (mCASEClient) { mClientPool->Release(mCASEClient); mCASEClient = nullptr; } } OperationalSessionSetup::~OperationalSessionSetup() { if (mAddressLookupHandle.IsActive()) { ChipLogDetail(Discovery, "OperationalSessionSetup[%u:" ChipLogFormatX64 "]: Cancelling incomplete address resolution as device is being deleted.", mPeerId.GetFabricIndex(), ChipLogValueX64(mPeerId.GetNodeId())); // Skip cancel callback since the destructor is being called, so we assume that this object is // obviously not used anymore CHIP_ERROR err = Resolver::Instance().CancelLookup(mAddressLookupHandle, Resolver::FailureCallback::Skip); if (err != CHIP_NO_ERROR) { ChipLogError(Discovery, "Lookup cancel failed: %" CHIP_ERROR_FORMAT, err.Format()); } } if (mCASEClient) { // Make sure we don't leak it. mClientPool->Release(mCASEClient); } #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES CancelSessionSetupReattempt(); #endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES DequeueConnectionCallbacks(CHIP_ERROR_CANCELLED, ReleaseBehavior::DoNotRelease); } CHIP_ERROR OperationalSessionSetup::LookupPeerAddress() { #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES if (mRemainingAttempts > 0) { --mRemainingAttempts; } if (mAttemptsDone < UINT8_MAX) { ++mAttemptsDone; } if (mResolveAttemptsAllowed > 0) { --mResolveAttemptsAllowed; } MATTER_LOG_METRIC(kMetricDeviceOperationalDiscoveryAttemptCount, mAttemptsDone); #endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES // NOTE: This is public API that can be used to update our stored peer // address even when we are in State::Connected, so we do not make any // MoveToState calls in this method. if (mAddressLookupHandle.IsActive()) { ChipLogProgress(Discovery, "OperationalSessionSetup[%u:" ChipLogFormatX64 "]: Operational node lookup already in progress. Will NOT start a new one.", mPeerId.GetFabricIndex(), ChipLogValueX64(mPeerId.GetNodeId())); return CHIP_NO_ERROR; } // This code can be reached multiple times, if we discover multiple addresses or do retries. // The metric backend can handle this and always picks the earliest occurrence as the start of the event. MATTER_LOG_METRIC_BEGIN(kMetricDeviceOperationalDiscovery); auto const * fabricInfo = mInitParams.fabricTable->FindFabricWithIndex(mPeerId.GetFabricIndex()); VerifyOrReturnError(fabricInfo != nullptr, CHIP_ERROR_INVALID_FABRIC_INDEX); PeerId peerId(fabricInfo->GetCompressedFabricId(), mPeerId.GetNodeId()); NodeLookupRequest request(peerId); return Resolver::Instance().LookupNode(request, mAddressLookupHandle); } void OperationalSessionSetup::PerformAddressUpdate() { if (mPerformingAddressUpdate) { // We are already in the middle of a lookup from a previous call to // PerformAddressUpdate. In that case we will just exit right away as // we are already looking to update the results from the previous lookup. return; } // We must be newly-allocated to handle this address lookup, so must be in the NeedsAddress state. VerifyOrDie(mState == State::NeedsAddress); // We are doing an address lookup whether we have an active session for this peer or not. mPerformingAddressUpdate = true; MoveToState(State::ResolvingAddress); CHIP_ERROR err = LookupPeerAddress(); if (err != CHIP_NO_ERROR) { ChipLogError(Discovery, "Failed to look up peer address: %" CHIP_ERROR_FORMAT, err.Format()); DequeueConnectionCallbacks(err); // Do not touch `this` instance anymore; it has been destroyed in DequeueConnectionCallbacks. return; } } void OperationalSessionSetup::OnNodeAddressResolved(const PeerId & peerId, const ResolveResult & result) { UpdateDeviceData(result); } void OperationalSessionSetup::OnNodeAddressResolutionFailed(const PeerId & peerId, CHIP_ERROR reason) { ChipLogError(Discovery, "OperationalSessionSetup[%u:" ChipLogFormatX64 "]: operational discovery failed: %" CHIP_ERROR_FORMAT, mPeerId.GetFabricIndex(), ChipLogValueX64(mPeerId.GetNodeId()), reason.Format()); #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES // If we're in a mode where we would generally retry CASE, retry operational // discovery if we're allowed to. That allows us to more-gracefully handle broken networks // where multicast DNS does not actually work and hence only the initial // unicast DNS-SD queries get a response. // // We check for State::ResolvingAddress just in case in the meantime // something weird happened and we are no longer trying to resolve an // address. if (mState == State::ResolvingAddress && mResolveAttemptsAllowed > 0) { ChipLogProgress(Discovery, "Retrying operational DNS-SD discovery. Attempts remaining: %u", mResolveAttemptsAllowed); // Pretend like our previous attempt (i.e. call to LookupPeerAddress) // has not happened for purposes of the generic attempt counters, so we // don't mess up the counters for our actual CASE retry logic. if (mRemainingAttempts < UINT8_MAX) { ++mRemainingAttempts; } if (mAttemptsDone > 0) { --mAttemptsDone; } MATTER_LOG_METRIC(kMetricDeviceOperationalDiscoveryAttemptCount, mAttemptsDone); CHIP_ERROR err = LookupPeerAddress(); if (err == CHIP_NO_ERROR) { // We need to notify our consumer that the resolve will take more // time, but we don't actually know how much time it will take, // because the resolver does not expose that information. Just use // one minute to be safe. using namespace chip::System::Clock::Literals; NotifyRetryHandlers(reason, 60_s16); return; } } #endif MATTER_LOG_METRIC_END(kMetricDeviceOperationalDiscovery, reason); // No need to modify any variables in `this` since call below releases `this`. DequeueConnectionCallbacks(reason); // Do not touch `this` instance anymore; it has been destroyed in DequeueConnectionCallbacks. } #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES void OperationalSessionSetup::UpdateAttemptCount(uint8_t attemptCount) { if (attemptCount == 0) { // Nothing to do. return; } if (mState != State::NeedsAddress) { // We're in the middle of an attempt already, so decrement attemptCount // by 1 to account for that. --attemptCount; } if (attemptCount > mRemainingAttempts) { mRemainingAttempts = attemptCount; } if (attemptCount > mResolveAttemptsAllowed) { mResolveAttemptsAllowed = attemptCount; } } CHIP_ERROR OperationalSessionSetup::ScheduleSessionSetupReattempt(System::Clock::Seconds16 & timerDelay) { VerifyOrDie(mRemainingAttempts > 0); // Try again, but not if things are in shutdown such that we can't get // to a system layer, and not if we've run out of attempts. if (!mInitParams.exchangeMgr->GetSessionManager() || !mInitParams.exchangeMgr->GetSessionManager()->SystemLayer()) { return CHIP_ERROR_INCORRECT_STATE; } MoveToState(State::NeedsAddress); // Stop exponential backoff before our delays get too large. // // Note that mAttemptsDone is always > 0 here, because we have // just finished one attempt. VerifyOrDie(mAttemptsDone > 0); static_assert(UINT16_MAX / CHIP_DEVICE_CONFIG_AUTOMATIC_CASE_RETRY_INITIAL_DELAY_SECONDS >= (1 << CHIP_DEVICE_CONFIG_AUTOMATIC_CASE_RETRY_MAX_BACKOFF), "Our backoff calculation will overflow."); System::Clock::Timeout actualTimerDelay = System::Clock::Seconds16( static_cast(CHIP_DEVICE_CONFIG_AUTOMATIC_CASE_RETRY_INITIAL_DELAY_SECONDS << std::min((mAttemptsDone - 1), CHIP_DEVICE_CONFIG_AUTOMATIC_CASE_RETRY_MAX_BACKOFF))); const bool responseWasBusy = mRequestedBusyDelay != System::Clock::kZero; if (responseWasBusy) { if (mRequestedBusyDelay > actualTimerDelay) { actualTimerDelay = mRequestedBusyDelay; } // Reset mRequestedBusyDelay now that we have consumed it, so it does // not affect future reattempts not triggered by a busy response. mRequestedBusyDelay = System::Clock::kZero; } if (mAttemptsDone % 2 == 0) { // It's possible that the other side received one of our Sigma1 messages // and then failed to get its Sigma2 back to us. If that's the case, it // will be waiting for that Sigma2 to time out before it starts // listening for Sigma1 messages again. // // To handle that, on every other retry, add the amount of time it would // take the other side to time out. It would be nice if we could rely // on the delay reported in a BUSY response to just tell us that value, // but in practice for old devices BUSY often sends some hardcoded value // that tells us nothing about when the other side will decide it has // timed out. // // Unfortunately, we do not have the MRP config for the other side here, // but in practice if the other side is using its local config to // compute Sigma2 response timeouts, then it's also returning useful // values with BUSY, so we will wait long enough. auto additionalTimeout = CASESession::ComputeSigma2ResponseTimeout(GetLocalMRPConfig().ValueOr(GetDefaultMRPConfig())); actualTimerDelay += additionalTimeout; } timerDelay = std::chrono::duration_cast(actualTimerDelay); CHIP_ERROR err = mInitParams.exchangeMgr->GetSessionManager()->SystemLayer()->StartTimer(actualTimerDelay, TrySetupAgain, this); // TODO: If responseWasBusy, should we increment, mRemainingAttempts and // mResolveAttemptsAllowed, since we were explicitly told to retry? Hard to // tell what consumers expect out of a capped retry count here. // The cast on count() is needed because the type count() returns might not // actually be uint16_t; on some platforms it's int. ChipLogProgress(Discovery, "OperationalSessionSetup:attempts done: %u, attempts left: %u, retry delay %us, status %" CHIP_ERROR_FORMAT, mAttemptsDone, mRemainingAttempts, static_cast(timerDelay.count()), err.Format()); return err; } void OperationalSessionSetup::CancelSessionSetupReattempt() { // If we can't get a system layer, there is no way for us to cancel things // at this point, but hopefully that's because everything is torn down // anyway and hence the timer will not fire. auto * sessionManager = mInitParams.exchangeMgr->GetSessionManager(); VerifyOrReturn(sessionManager != nullptr); auto * systemLayer = sessionManager->SystemLayer(); VerifyOrReturn(systemLayer != nullptr); systemLayer->CancelTimer(TrySetupAgain, this); } void OperationalSessionSetup::TrySetupAgain(System::Layer * systemLayer, void * state) { auto * self = static_cast(state); self->MoveToState(State::ResolvingAddress); CHIP_ERROR err = self->LookupPeerAddress(); if (err == CHIP_NO_ERROR) { return; } // Give up; we could not start a lookup. self->DequeueConnectionCallbacks(err); // Do not touch `self` instance anymore; it has been destroyed in DequeueConnectionCallbacks. } void OperationalSessionSetup::AddRetryHandler(Callback::Callback * onRetry) { mConnectionRetry.Enqueue(onRetry->Cancel()); } void OperationalSessionSetup::NotifyRetryHandlers(CHIP_ERROR error, const ReliableMessageProtocolConfig & remoteMrpConfig, System::Clock::Seconds16 retryDelay) { // Compute the time we are likely to need to detect that the retry has // failed. System::Clock::Timeout messageTimeout = CASESession::ComputeSigma1ResponseTimeout(remoteMrpConfig); auto timeoutSecs = std::chrono::duration_cast(messageTimeout); // Add 1 second in case we had fractional milliseconds in messageTimeout. using namespace chip::System::Clock::Literals; NotifyRetryHandlers(error, timeoutSecs + 1_s16 + retryDelay); } void OperationalSessionSetup::NotifyRetryHandlers(CHIP_ERROR error, System::Clock::Seconds16 timeoutEstimate) { // We have to be very careful here: Calling into these handlers might in // theory destroy the Callback objects involved, but unlike the // succcess/failure cases we don't want to just clear the handlers from our // list when we are calling them, because we might need to call a given // handler more than once. // // To handle this we: // // 1) Snapshot the list of handlers up front, so if any of the handlers // triggers an AddRetryHandler with some other handler that does not // affect the list we plan to notify here. // // 2) When planning to notify a handler move it to a new list that contains // just that handler. This way if it gets canceled as part of the // notification we can tell it has been canceled. // // 3) If notifying the handler does not cancel it, add it back to our list // of handlers so we will notify it on future retries. Cancelable retryHandlerListSnapshot; mConnectionRetry.DequeueAll(retryHandlerListSnapshot); while (retryHandlerListSnapshot.mNext != &retryHandlerListSnapshot) { auto * cb = Callback::Callback::FromCancelable(retryHandlerListSnapshot.mNext); Callback::CallbackDeque currentCallbackHolder; currentCallbackHolder.Enqueue(cb->Cancel()); cb->mCall(cb->mContext, mPeerId, error, timeoutEstimate); if (currentCallbackHolder.mNext != ¤tCallbackHolder) { // Callback has not been canceled as part of the call, so is still // supposed to be registered with us. AddRetryHandler(cb); } } } #endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES } // namespace chip