/* * * Copyright 2016-2018 The nlfaultinjection Authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * */ /** * @file * Implementation of the fault-injection utilities. */ #ifndef __STDC_LIMIT_MACROS #define __STDC_LIMIT_MACROS #endif #include #include #include #include #include namespace nl { namespace FaultInjection { static void Die() __attribute__((noreturn)); static GlobalContext * sGlobalContext = nullptr; /** * The callback function that implements the deterministic * injection feature (see FailAtFault). */ static bool DeterministicCbFn(Identifier aId, Record * aRecord, void * aContext) { bool retval = false; (void) aId; (void) aContext; if (aRecord->mNumCallsToSkip) { aRecord->mNumCallsToSkip--; } else if (aRecord->mNumCallsToFail) { aRecord->mNumCallsToFail--; retval = true; } return retval; } /** * Callback list node for DeterministicCbFn. * This node terminates all callback lists. */ static Callback sDeterministicCb = { DeterministicCbFn, nullptr, nullptr }; /** * The callback function that implements the random * injection feature (see FailRandomlyAtFault). */ static bool RandomCbFn(Identifier aId, Record * aRecord, void * aContext) { bool retval = false; (void) aId; (void) aContext; if (aRecord->mPercentage > 0) { int randValue = (rand() % 100) + 1; if (randValue <= aRecord->mPercentage) { retval = true; } } return retval; } /** * Callback list node for RandomCbFn. * Note that this is initialized to point to sDeterministicCb. * All Record instances are initialized to point to * this callback node. */ static Callback sRandomCb = { RandomCbFn, nullptr, &sDeterministicCb }; /** * Alias for the address of the first default callback. */ static const Callback * sEndOfCustomCallbacks = &sRandomCb; /** * Initialize the Manager instance. * * @param[in] inNumFaults The size of inFaultArray, equal to the number of fault IDs. * @param[in] inFaultArray A pointer to an array of Record, in which this object * will store the configuration of each fault. * @param[in] inManagerName A pointer to a C string containing the name of the Manager. * @param[in] inFaultNames A pointer to an array of inNumFaults C strings that describe * each fault ID. * * @return -EINVAL if the inputs are not valid. * 0 otherwise. */ int32_t Manager::Init(size_t inNumFaults, Record * inFaultArray, Name inManagerName, const Name * inFaultNames) { int32_t err = 0; Identifier i; nlEXPECT_ACTION((inNumFaults > 0 && inFaultArray && inManagerName && inFaultNames), exit, err = -EINVAL); mName = inManagerName; mNumFaults = inNumFaults; mFaultRecords = inFaultArray; mFaultNames = inFaultNames; mLock = nullptr; mUnlock = nullptr; mLockContext = nullptr; // Link all callback lists to the two default callbacks. for (i = 0; i < mNumFaults; i++) { mFaultRecords[i].mCallbackList = &sRandomCb; } exit: return err; } /** * Configure a fault to be triggered randomly, with a given probability defined as a percentage * This is meant to be used on live systems to generate a build that will encounter random failures. * * @param[in] inId The fault ID * @param[in] inPercentage An integer between 0 and 100. 100 means "always". 0 means "never". * * @return -EINVAL if the inputs are not valid. * 0 otherwise. */ int32_t Manager::FailRandomlyAtFault(Identifier inId, uint8_t inPercentage) { int32_t err = 0; nlEXPECT_ACTION((inId < mNumFaults && inPercentage <= 100), exit, err = -EINVAL); Lock(); mFaultRecords[inId].mNumCallsToSkip = 0; mFaultRecords[inId].mNumCallsToFail = 0; mFaultRecords[inId].mPercentage = inPercentage; Unlock(); exit: return err; } /** * Configure a fault to be triggered deterministically. * * @param[in] inId The fault ID * @param[in] inNumCallsToSkip The number of times this fault is to be skipped before it * starts to fail. * @param[in] inNumCallsToFail The number of times the fault should be triggered. * @param[in] inTakeMutex By default this method takes the Manager's mutex. * If inTakeMutex is set to kMutexDoNotTake, the mutex is not taken. * * @return -EINVAL if the inputs are not valid. * 0 otherwise. */ int32_t Manager::FailAtFault(Identifier inId, uint32_t inNumCallsToSkip, uint32_t inNumCallsToFail, bool inTakeMutex) { int32_t err = 0; nlEXPECT_ACTION(inId < mNumFaults && inNumCallsToSkip <= UINT16_MAX && inNumCallsToFail <= UINT16_MAX, exit, err = -EINVAL); if (inTakeMutex) { Lock(); } mFaultRecords[inId].mNumCallsToSkip = static_cast(inNumCallsToSkip); mFaultRecords[inId].mNumCallsToFail = static_cast(inNumCallsToFail); mFaultRecords[inId].mPercentage = 0; if (inTakeMutex) { Unlock(); } exit: return err; } /** * @overload int32_t FailAtFault(Identifier inId, uint32_t inNumCallsToSkip, uint32_t inNumCallsToFail, bool inTakeMutex) */ int32_t Manager::FailAtFault(Identifier inId, uint32_t inNumCallsToSkip, uint32_t inNumCallsToFail) { return FailAtFault(inId, inNumCallsToSkip, inNumCallsToFail, kMutexTake); } /** * Configure a fault to reboot the system when triggered. * If the application has installed a RebootCallbackFn, it will * be invoked when fault inId is triggered. * If the application has not installed the callback, the system * will crash. * * @param[in] inId The fault ID * * @return -EINVAL if the inputs are not valid. * 0 otherwise. */ int32_t Manager::RebootAtFault(Identifier inId) { int32_t err = 0; nlEXPECT_ACTION(inId < mNumFaults, exit, err = -EINVAL); Lock(); mFaultRecords[inId].mReboot = true; Unlock(); exit: return err; } /** * Store a set of arguments for a given fault ID. * The array of arguments is made available to the code injected with * the nlFAULT_INJECT macro. * For this to work for a given fault ID, the Manager must allocate memory to * store the arguments and configure the Record's mLengthOfArguments and * mArguments members accordingly. * * @param[in] inId The fault ID * @param[in] inNumArgs The number of arguments in the array pointed to by inArgs. * @param[in] inArgs The pointer to the array of integers to be stored in the fault * * @return -EINVAL if the inputs are not valid. * 0 otherwise. */ int32_t Manager::StoreArgsAtFault(Identifier inId, uint16_t inNumArgs, int32_t * inArgs) { int32_t err = 0; size_t i; nlEXPECT_ACTION(inId < mNumFaults && mFaultRecords[inId].mArguments != nullptr && mFaultRecords[inId].mLengthOfArguments >= inNumArgs && inNumArgs <= UINT8_MAX, exit, err = -EINVAL); Lock(); for (i = 0; i < inNumArgs; i++) { mFaultRecords[inId].mArguments[i] = inArgs[i]; } mFaultRecords[inId].mNumArguments = static_cast(inNumArgs); Unlock(); exit: return err; } /** * Attach a callback to a fault ID. * Calling this twice does not attach the callback twice. * * @param[in] inId The fault ID * @param[in] inCallback The callback node to be attached to the fault * * * @return -EINVAL if the inputs are not valid. * 0 otherwise. */ int32_t Manager::InsertCallbackAtFault(Identifier inId, Callback * inCallBack) { int32_t err = 0; // Make sure it's not already there err = RemoveCallbackAtFault(inId, inCallBack); nlEXPECT_SUCCESS(err, exit); Lock(); // Insert the callback at the beginning of the list. // Remember that all lists end into the two default (deterministic // and random) callbacks! inCallBack->mNext = mFaultRecords[inId].mCallbackList; mFaultRecords[inId].mCallbackList = inCallBack; Unlock(); exit: return err; } /** * Detaches a callback from a fault. * * @param[in] inId The fault * @param[in] inCallback The callback node to be removed. * @param[in] inTakeMutex By default this method takes the Manager's mutex. * If inTakeMutex is set to kMutexDoNotTake, the mutex is not taken. * * @return -EINVAL if the inputs are not valid. * 0 otherwise. */ int32_t Manager::RemoveCallbackAtFault(Identifier inId, Callback * inCallBack, bool inTakeMutex) { int32_t err = 0; Callback ** cb = nullptr; nlEXPECT_ACTION((inId < mNumFaults) && (inCallBack != nullptr), exit, err = -EINVAL); if (inTakeMutex) { Lock(); } cb = &mFaultRecords[inId].mCallbackList; while (*cb != nullptr) { if (*cb == inCallBack) { *cb = (*cb)->mNext; break; } cb = &((*cb)->mNext); } if (inTakeMutex) { Unlock(); } exit: return err; } /** * @overload int32_t Manager::RemoveCallbackAtFault(Identifier inId, Callback *inCallBack, bool inTakeMutex) */ int32_t Manager::RemoveCallbackAtFault(Identifier inId, Callback * inCallBack) { return RemoveCallbackAtFault(inId, inCallBack, kMutexTake); } /** * When the program traverses the location at which a fault should be injected, this method is invoked * on the manager to query the configuration of the fault ID. * * A fault can be triggered randomly, deterministically or on a call-by-call basis by a callback. * All three types of trigger can be installed at the same time, and they all get a chance of * injecting the fault. * * @param[in] inId The fault ID * @param[in] inTakeMutex By default this method takes the Manager's mutex. * If inTakeMutex is set to kMutexDoNotTake, the mutex is not taken. * * @return true if the fault should be injected; false otherwise. */ bool Manager::CheckFault(Identifier inId, bool inTakeMutex) { bool retval = false; Callback * cb = nullptr; Callback * next = nullptr; bool reboot = false; nlEXPECT(inId < mNumFaults, exit); if (inTakeMutex) { Lock(); } cb = mFaultRecords[inId].mCallbackList; while (cb != nullptr) { // Save mNext now, in case the callback removes itself // calling RemoveCallbackAtFault next = cb->mNext; if (cb->mCallBackFn(inId, &mFaultRecords[inId], cb->mContext)) { retval = true; } cb = next; } reboot = mFaultRecords[inId].mReboot; if (retval && sGlobalContext && sGlobalContext->mCbTable.mPostInjectionCb) { sGlobalContext->mCbTable.mPostInjectionCb(this, inId, &mFaultRecords[inId]); } if (retval && reboot) { // If the application has not setup a context and/or reboot callback, the system will crash if (sGlobalContext && sGlobalContext->mCbTable.mRebootCb) { sGlobalContext->mCbTable.mRebootCb(); } else { Die(); } } mFaultRecords[inId].mNumTimesChecked++; if (inTakeMutex) { Unlock(); } exit: return retval; } /** * @overload bool CheckFault(Identifier inId, bool inTakeMutex) */ bool Manager::CheckFault(Identifier inId) { return CheckFault(inId, kMutexTake); } /** * When the program traverses the location at which a fault should be injected, this method is invoked * on the manager to query the configuration of the fault ID. * * This version of the method retrieves the arguments stored in the Record. * * A fault can be triggered randomly, deterministically or on a call-by-call basis by a callback. * All three types of trigger can be installed at the same time, and they all get a chance of * injecting the fault. * * @param[in] inId The fault ID * @param[in] outNumArgs The length of the array pointed to by outArgs * @param[in] outArgs The array of arguments configured for the faultId * @param[in] inTakeMutex By default this method takes the Manager's mutex. * If inTakeMutex is set to kMutexDoNotTake, the mutex is not taken. * * @return true if the fault should be injected; false otherwise. */ bool Manager::CheckFault(Identifier inId, uint16_t & outNumArgs, int32_t *& outArgs, bool inTakeMutex) { bool retval = false; if (inTakeMutex) { Lock(); } retval = CheckFault(inId, kMutexDoNotTake); if (retval) { outNumArgs = mFaultRecords[inId].mNumArguments; outArgs = mFaultRecords[inId].mArguments; } if (inTakeMutex) { Unlock(); } return retval; } /** * @overload bool CheckFault(Identifier inId, uint16_t &outNumArgs, int32_t *&outArgs, bool inTakeMutex) */ bool Manager::CheckFault(Identifier inId, uint16_t & outNumArgs, int32_t *& outArgs) { return CheckFault(inId, outNumArgs, outArgs, kMutexTake); } /** * Reset the counters in the fault Records * Note that calling this method does not impact the current configuration * in any way (including the number of times a fault is to be skipped * before it should fail). */ void Manager::ResetFaultCounters() { Identifier id = 0; Lock(); for (id = 0; id < mNumFaults; id++) { mFaultRecords[id].mNumTimesChecked = 0; } Unlock(); } /** * Reset the configuration of a fault Record * * @param[in] inId The fault ID * * @return -EINVAL if the inputs are not valid. * 0 otherwise. */ int32_t Manager::ResetFaultConfigurations(Identifier inId) { Callback * cb; int32_t err = 0; nlEXPECT_ACTION((inId < mNumFaults), exit, err = -EINVAL); Lock(); mFaultRecords[inId].mNumCallsToSkip = 0; mFaultRecords[inId].mNumCallsToFail = 0; mFaultRecords[inId].mPercentage = 0; mFaultRecords[inId].mReboot = 0; mFaultRecords[inId].mNumArguments = 0; cb = mFaultRecords[inId].mCallbackList; // All callback handling code in this module is based on the assumption // that custom callbacks are inserted at the beginning of the list while (cb != sEndOfCustomCallbacks && cb != nullptr) { (void) RemoveCallbackAtFault(inId, cb, kMutexDoNotTake); cb = mFaultRecords[inId].mCallbackList; } Unlock(); exit: return err; } /** * Reset the configuration of all fault Records * * @return -EINVAL if the inputs are not valid. * 0 otherwise. */ int32_t Manager::ResetFaultConfigurations() { int32_t err = 0; Identifier id = 0; for (id = 0; id < mNumFaults; id++) { err = ResetFaultConfigurations(id); nlEXPECT(err == 0, exit); } exit: return err; } /** * Take the Manager's mutex. */ void Manager::Lock() { if (mLock) { mLock(mLockContext); } } /** * Release the Manager's mutex. */ void Manager::Unlock() { if (mUnlock) { mUnlock(mLockContext); } } /** * Configure the instance of GlobalContext to use. * On systems in which faults are configured and injected from different threads, * this function should be called before threads are started. * * @param[in] inGlobalContext Pointer to the GlobalContext provided by the application */ void SetGlobalContext(GlobalContext * inGlobalContext) { sGlobalContext = inGlobalContext; } /** * Parse an integer * * This implementation does not check for ERANGE, as it assumes a very simple * underlying implementation of strtol. * * @param[in] str Pointer to a string representing an integer * * @param[out] num Pointer to the integer result * * @return true in case of success; false if the string does not * contain an integer. */ static bool ParseInt(const char * str, int32_t * num) { char * endptr = nullptr; long tmp; bool retval = true; tmp = strtol(str, &endptr, 10); if (!endptr || *endptr != '\0') { retval = false; } else { *num = static_cast(tmp); } return retval; } /** * Parse an unsigned integer * * @param[in] str Pointer to a string representing an insigned int * * @param[out] num Pointer to the unsigned integer result * * @return true in case of success; false if the string does not * contain an unsigned integer. */ static bool ParseUInt(const char * str, uint32_t * num) { bool retval = true; int32_t tmpint = 0; retval = ParseInt(str, &tmpint); if (retval) { if (tmpint < 0) { retval = false; } else { *num = static_cast(tmpint); } } return retval; } /** * Parse a fault-injection configuration string and apply the configuration. * * @param[in] aFaultInjectionStr The configuration string. An example of a valid string that * enables two faults is "system_buffer_f5_s1:inet_send_p33" * An example of a configuration string that * also passes three integer arguments to the fault point is * "system_buffer_f5_s1_a10_a7_a-4" * The format is * "__{f[_s],p}[_a]..." * * @param[in] inArray An array of GetManagerFn callbacks * to be used to parse the string. * * @param[in] inArraySize Num of elements in inArray * * @return true if the string can be parsed completely; false otherwise */ bool ParseFaultInjectionStr(char * aFaultInjectionStr, const GetManagerFn * inArray, size_t inArraySize) { ManagerTable table = { inArray, inArraySize }; size_t numTables = 1; return ParseFaultInjectionStr(aFaultInjectionStr, &table, numTables); } /** * Parse a fault-injection configuration string and apply the configuration. * * @param[in] aFaultInjectionStr The configuration string. An example of a valid string that * enables two faults is "system_buffer_f5_s1:inet_send_p33" * An example of a configuration string that * also passes three integer arguments to the fault point is * "system_buffer_f5_s1_a10_a7_a-4" * The format is * "__{f[_s],p}[_a]..." * * @param[in] inTables An array of ManagerTable structures * to be used to parse the string. * * @param[in] inNumTables Size of inTables * * @return true if the string can be parsed completely; false otherwise */ bool ParseFaultInjectionStr(char * aFaultInjectionStr, const ManagerTable * inTables, size_t inNumTables) { char * tok1 = nullptr; char * savePtr1 = nullptr; char * tok2 = nullptr; char * savePtr2 = nullptr; char * outerString = aFaultInjectionStr; size_t i = 0; nl::FaultInjection::Identifier j = 0; int err = 0; bool retval = false; int32_t args[kMaxFaultArgs]; uint16_t numArgs = 0; nl::FaultInjection::Manager * mgr = nullptr; nl::FaultInjection::Identifier faultId = 0; memset(args, 0, sizeof(args)); while ((tok1 = strtok_r(outerString, ":", &savePtr1))) { uint32_t numTimesToFail = 0; uint32_t numTimesToSkip = 0; uint32_t percentage = 0; bool gotPercentage = false; bool gotReboot = false; bool gotArguments = false; const Name * faultNames = nullptr; outerString = nullptr; tok2 = strtok_r(tok1, "_", &savePtr2); nlEXPECT(tok2 != nullptr, exit); // this is the module for (i = 0; i < inNumTables; i++) { for (j = 0; j < inTables[i].mNumItems; j++) { nl::FaultInjection::Manager & tmpMgr = inTables[i].mArray[j](); if (!strcmp(tok2, tmpMgr.GetName())) { mgr = &tmpMgr; break; } } } nlEXPECT(mgr != nullptr, exit); tok2 = strtok_r(nullptr, "_", &savePtr2); nlEXPECT(tok2 != nullptr, exit); // this is the fault name faultNames = mgr->GetFaultNames(); for (j = 0; j < mgr->GetNumFaults(); j++) { if (!strcmp(tok2, faultNames[j])) { faultId = j; break; } } nlEXPECT(j != mgr->GetNumFaults(), exit); while ((tok2 = strtok_r(nullptr, "_", &savePtr2))) { switch (tok2[0]) { case 'a': { int32_t tmp = 0; nlEXPECT(numArgs < kMaxFaultArgs, exit); gotArguments = true; nlEXPECT(ParseInt(&(tok2[1]), &tmp), exit); args[numArgs++] = tmp; } break; case 'f': nlEXPECT(ParseUInt(&(tok2[1]), &numTimesToFail), exit); break; case 's': nlEXPECT(ParseUInt(&(tok2[1]), &numTimesToSkip), exit); break; case 'p': gotPercentage = true; nlEXPECT(ParseUInt(&(tok2[1]), &percentage), exit); nlEXPECT(percentage <= 100, exit); break; case 'r': gotReboot = true; break; default: goto exit; break; } } if (gotArguments) { err = mgr->StoreArgsAtFault(faultId, numArgs, args); nlEXPECT_SUCCESS(err, exit); } if (gotPercentage) { err = mgr->FailRandomlyAtFault(faultId, static_cast(percentage)); nlEXPECT_SUCCESS(err, exit); } else { err = mgr->FailAtFault(faultId, numTimesToSkip, numTimesToFail); nlEXPECT_SUCCESS(err, exit); } if (gotReboot) { err = mgr->RebootAtFault(faultId); nlEXPECT_SUCCESS(err, exit); } } retval = true; exit: return retval; } /** * Internal function to kill the process if a * fault is supposed to reboot the process but the application * has not installed a callback */ static void Die() { #if defined(__GNUC__) && (__GNUC__ >= 12) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Warray-bounds" #endif while (true) *((volatile long *) 1) = 0; #if defined(__GNUC__) && (__GNUC__ >= 12) #pragma GCC diagnostic pop #endif } } // namespace FaultInjection } // namespace nl