/*********************************************************************** * * Copyright (c) 2008 Broadcom Corporation * All Rights Reserved * <:label-BRCM:2012:DUAL/GPL:standard Unless you and Broadcom execute a separate written software license agreement governing use of this software, this software is licensed to you under the terms of the GNU General Public License version 2 (the "GPL"), available at http://www.broadcom.com/licenses/GPLv2.php, with the following added to such license: As a special exception, the copyright holders of this software give you permission to link this software with independent modules, and to copy and distribute the resulting executable under terms of your choice, provided that you also meet, for each linked independent module, the terms and conditions of the license of that module. An independent module is a module which is not derived from this software. The special exception does not apply to any modifications of the software. Not withstanding the above, under no circumstances may you combine this software in any way with any other Broadcom software provided under a license other than the GPL, without Broadcom's express prior written consent. :> * ************************************************************************/ #include #include #include #include #include "cms.h" #include "cms_mem.h" #include "cms_log.h" // chec if string should contains XML Unicode characters // &#xhhhh; or &#dddd, where h is any hex character [0-9,a-f] // and d is any decimal digit [0-9] // such as J or C // This function will look for any number of digits, but only 8 bit unicode // is supported right now. UBOOL8 cmsUnicode_isUnescapeNeeded (const char *string) { char *pStart = NULL, *pChar = NULL; UBOOL8 found = FALSE; if (string == NULL) return found; pStart = (char *)string; while (found == FALSE) { if ((pChar = strstr(pStart, "&#")) != NULL) { for (pChar += 2; found == FALSE && *pChar != '&' && *pChar != '\0'; pChar++) { if (*pChar == ';') found = TRUE; } pStart = pChar; } else { break; } } return found; } static CmsRet consumeUnicode (const char *string, unsigned char *pConvertedChar, UINT32 *offset) { char *tmpStr = NULL; char *pSemi = NULL, *pAmp = NULL; UINT16 num = 0; UINT32 i = 0; CmsRet ret = CMSRET_SUCCESS; if (string == NULL) { return CMSRET_INVALID_PARAM_VALUE; } // See valid format strings above tmpStr = cmsMem_strdup(string); *pConvertedChar = tmpStr[i]; *offset = 1; if (tmpStr[i] == '&' && tmpStr[i+1] == '#') { // tmpStr = '&#x;' ==> invalid number to convert if (tmpStr[i+2] == 'x' && tmpStr[i+3] == ';') { ret = CMSRET_INVALID_PARAM_VALUE; cmsLog_error("invalid format, skipping data"); *offset = 4; } // tmpStr = '&#;' ==> invalid number to convert else if (tmpStr[i+2] == ';') { ret = CMSRET_INVALID_PARAM_VALUE; cmsLog_error("invalid format, skipping data"); *offset = 3; } else { pSemi = strstr(&tmpStr[i+2], ";"); if (pSemi != NULL) { pAmp = strstr(&tmpStr[i+2], "&"); if (pAmp == NULL || pAmp > pSemi) { *pSemi = '\0'; if (tmpStr[i+2] == 'x') num = strtoul(&tmpStr[i+3], (char **)NULL, 16); else num = strtoul(&tmpStr[i+2], (char **)NULL, 10); // TO-DO: need to take care unicode 16 (2 bytes) // right now only take care unicode 8 (1 byte) if (num > 127) { cmsLog_error("multi-byte unicode not supported. Return CMSRET_INVALID_PARAM_VALUE " "expected num <= 127, got num %d", num); ret = CMSRET_INVALID_PARAM_VALUE; } *pConvertedChar = (unsigned char) btowc(num); *offset = (pSemi - tmpStr) + 1; } } } } CMSMEM_FREE_BUF_AND_NULL_PTR(tmpStr); return ret; } CmsRet cmsUnicode_unescapeString (const char *string, char **unicodedString) { char *tmpStr = NULL; unsigned char convertedChar; UINT32 len = 0, i = 0, j = 0, offset = 0; CmsRet ret = CMSRET_SUCCESS; if (string == NULL) { return ret; } len = strlen(string); if ((tmpStr = cmsMem_alloc(len, ALLOC_ZEROIZE)) == NULL) { cmsLog_error("failed to allocate %d bytes", len); return CMSRET_RESOURCE_EXCEEDED; } while (i < len) { ret = consumeUnicode(&string[i], &convertedChar, &offset); // note, only 8 bit unicode is suported right now, so we can only // get at most 1 byte convertedChar if (ret == CMSRET_SUCCESS) { tmpStr[j++] = convertedChar; } else { /* when there is error, just stop, no need to continue */ break; } i += offset; } *unicodedString = tmpStr; return ret; }