/* ***** BEGIN LICENSE BLOCK ***** * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * * ***** END LICENSE BLOCK ***** */ /* * This code is based on nssb64e.c from Mozilla.org, which allows * the code to be licensed under MPL/GPL/or LGPL. We will license it * under LGPL --mwang 8/22/07 */ #include #include #include #include #include "genutil_base64.h" /* * The following implementation of base64 decoding was based on code * found in libmime (specifically, in mimeenc.c). It has been adapted to * use PR types and naming as well as to provide other necessary semantics * (like buffer-in/buffer-out in addition to "streaming" without undue * performance hit of extra copying if you made the buffer versions * use the output_fn). It also incorporates some aspects of the current * NSPR base64 decoding code. As such, you may find similarities to * both of those implementations. I tried to use names that reflected * the original code when possible. For this reason you may find some * inconsistencies -- libmime used lots of "in" and "out" whereas the * NSPR version uses "src" and "dest"; sometimes I changed one to the other * and sometimes I left them when I thought the subroutines were at least * self-consistent. */ /* * Opaque object used by the decoder to store state. */ typedef struct PLBase64DecoderStr { /* Current token (or portion, if token_size < 4) being decoded. */ unsigned char token[4]; SINT32 token_size; /* * Where the decoded output goes -- this will * be the entire buffered result for users of the buffer version. */ unsigned char *output_buffer; UINT32 output_buflen; /* the total length of allocated buffer */ UINT32 output_length; /* the length that is currently populated */ } PLBase64Decoder; /* * Table to convert an ascii "code" to its corresponding binary value. * For ease of use, the binary values in the table are the actual values * PLUS ONE. This is so that the special value of zero can denote an * invalid mapping; that was much easier than trying to fill in the other * values with some value other than zero, and to check for it. * Just remember to SUBTRACT ONE when using the value retrieved. */ static unsigned char base64_codetovaluep1[256] = { /* 0: */ 0, 0, 0, 0, 0, 0, 0, 0, /* 8: */ 0, 0, 0, 0, 0, 0, 0, 0, /* 16: */ 0, 0, 0, 0, 0, 0, 0, 0, /* 24: */ 0, 0, 0, 0, 0, 0, 0, 0, /* 32: */ 0, 0, 0, 0, 0, 0, 0, 0, /* 40: */ 0, 0, 0, 63, 0, 0, 0, 64, /* 48: */ 53, 54, 55, 56, 57, 58, 59, 60, /* 56: */ 61, 62, 0, 0, 0, 0, 0, 0, /* 64: */ 0, 1, 2, 3, 4, 5, 6, 7, /* 72: */ 8, 9, 10, 11, 12, 13, 14, 15, /* 80: */ 16, 17, 18, 19, 20, 21, 22, 23, /* 88: */ 24, 25, 26, 0, 0, 0, 0, 0, /* 96: */ 0, 27, 28, 29, 30, 31, 32, 33, /* 104: */ 34, 35, 36, 37, 38, 39, 40, 41, /* 112: */ 42, 43, 44, 45, 46, 47, 48, 49, /* 120: */ 50, 51, 52, 0, 0, 0, 0, 0, /* 128: */ 0, 0, 0, 0, 0, 0, 0, 0 /* and rest are all zero as well */ }; #define B64_PAD '=' /* * Reads 4; writes 3 (known, or expected, to have no trailing padding). * Returns bytes written; -1 on error (unexpected character). */ static int pl_base64_decode_4to3 (const unsigned char *in, unsigned char *out) { int j; SINT32 num = 0; unsigned char bits; for (j = 0; j < 4; j++) { bits = base64_codetovaluep1[in[j]]; if (bits == 0) return -1; num = (num << 6) | (bits - 1); } out[0] = (unsigned char) (num >> 16); out[1] = (unsigned char) ((num >> 8) & 0xFF); out[2] = (unsigned char) (num & 0xFF); return 3; } /* * Reads 3; writes 2 (caller already confirmed EOF or trailing padding). * Returns bytes written; -1 on error (unexpected character). */ static int pl_base64_decode_3to2 (const unsigned char *in, unsigned char *out) { SINT32 num = 0; unsigned char bits1, bits2, bits3; bits1 = base64_codetovaluep1[in[0]]; bits2 = base64_codetovaluep1[in[1]]; bits3 = base64_codetovaluep1[in[2]]; if ((bits1 == 0) || (bits2 == 0) || (bits3 == 0)) return -1; num = ((UINT32)(bits1 - 1)) << 10; num |= ((UINT32)(bits2 - 1)) << 4; num |= ((UINT32)(bits3 - 1)) >> 2; out[0] = (unsigned char) (num >> 8); out[1] = (unsigned char) (num & 0xFF); return 2; } /* * Reads 2; writes 1 (caller already confirmed EOF or trailing padding). * Returns bytes written; -1 on error (unexpected character). */ static int pl_base64_decode_2to1 (const unsigned char *in, unsigned char *out) { SINT32 num = 0; unsigned char bits1, bits2; bits1 = base64_codetovaluep1[in[0]]; bits2 = base64_codetovaluep1[in[1]]; if ((bits1 == 0) || (bits2 == 0)) return -1; num = ((UINT32)(bits1 - 1)) << 2; num |= ((UINT32)(bits2 - 1)) >> 4; out[0] = (unsigned char) num; return 1; } /* * Reads 4; writes 0-3. Returns bytes written or -1 on error. * (Writes less than 3 only at (presumed) EOF.) */ static int pl_base64_decode_token (const unsigned char *in, unsigned char *out) { if (in[3] != B64_PAD) return pl_base64_decode_4to3 (in, out); if (in[2] == B64_PAD) return pl_base64_decode_2to1 (in, out); return pl_base64_decode_3to2 (in, out); } static SINT32 pl_base64_decode_buffer (PLBase64Decoder *data, const unsigned char *in, UINT32 length) { unsigned char *out = data->output_buffer; unsigned char *token = data->token; int i, n = 0; i = data->token_size; data->token_size = 0; while (length > 0) { while (i < 4 && length > 0) { /* * XXX Note that the following simply ignores any unexpected * characters. This is exactly what the original code in * libmime did, and I am leaving it. We certainly want to skip * over whitespace (we must); this does much more than that. * I am not confident changing it, and I don't want to slow * the processing down doing more complicated checking, but * someone else might have different ideas in the future. */ if (base64_codetovaluep1[*in] > 0 || *in == B64_PAD) { token[i++] = *in; } in++; length--; } if (i < 4) { /* Didn't get enough for a complete token. */ data->token_size = i; break; } i = 0; assert((out - data->output_buffer + 3) <= (SINT32) data->output_buflen); /* * Assume we are not at the end; the following function only works * for an internal token (no trailing padding characters) but is * faster that way. If it hits an invalid character (padding) it * will return an error; we break out of the loop and try again * calling the routine that will handle a final token. * Note that we intentionally do it this way rather than explicitly * add a check for padding here (because that would just slow down * the normal case) nor do we rely on checking whether we have more * input to process (because that would also slow it down but also * because we want to allow trailing garbage, especially white space * and cannot tell that without read-ahead, also a slow proposition). * Whew. Understand? */ n = pl_base64_decode_4to3 (token, out); if (n < 0) { break; } /* Advance "out" by the number of bytes just written to it. */ out += n; n = 0; } /* * See big comment above, before call to pl_base64_decode_4to3. * Here we check if we error'd out of loop, and allow for the case * that we are processing the last interesting token. If the routine * which should handle padding characters also fails, then we just * have bad input and give up. */ if (n < 0) { n = pl_base64_decode_token (token, out); if (n < 0) { return B64RET_INTERNAL_ERROR; } out += n; } /* * As explained above, we can get here with more input remaining, but * it should be all characters we do not care about (i.e. would be * ignored when transferring from "in" to "token" in loop above, * except here we choose to ignore extraneous pad characters, too). * Swallow it, performing that check. If we find more characters that * we would expect to decode, something is wrong. */ while (length > 0) { if (base64_codetovaluep1[*in] > 0) { return B64RET_INTERNAL_ERROR; } in++; length--; } /* Record the length of decoded data we have left in output_buffer. */ data->output_length = (UINT32) (out - data->output_buffer); return B64RET_SUCCESS; } /* * Flush any remaining buffered characters. Given well-formed input, * this will have nothing to do. If the input was missing the padding * characters at the end, though, there could be 1-3 characters left * behind -- we will tolerate that by adding the padding for them. */ static SINT32 pl_base64_decode_flush (PLBase64Decoder *data) { int count; /* * If no remaining characters, or all are padding (also not well-formed * input, but again, be tolerant), then nothing more to do. (And, that * is considered successful.) */ if (data->token_size == 0 || data->token[0] == B64_PAD) { return B64RET_SUCCESS; } /* * Assume we have all the interesting input except for some expected * padding characters. Add them and decode the resulting token. */ while (data->token_size < 4) { data->token[data->token_size++] = B64_PAD; } data->token_size = 0; /* so a subsequent flush call is a no-op */ count = pl_base64_decode_token (data->token, data->output_buffer + data->output_length); if (count < 0) { return B64RET_INTERNAL_ERROR; } data->output_length += count; return B64RET_SUCCESS; } /* * The maximum space needed to hold the output of the decoder given * input data of length "size". */ UINT32 genUtl_b64DecodedBufferLength(UINT32 size) { return ((size * 3) / 4); } /* * A distinct internal creation function for the buffer version to use. * (It does not want to specify an output_fn, and we want the normal * Create function to require that.) If more common initialization * of the decoding context needs to be done, it should be done *here*. */ static PLBase64Decoder * pl_base64_create_decoder (void) { return ((PLBase64Decoder *) calloc(1, sizeof(PLBase64Decoder))); } /* * When you're done decoding, call this to free the data. */ static void PL_DestroyBase64Decoder (PLBase64Decoder *data) { /* don't free output_buffer. That is the user's buffer. * just free the context buffer. */ free(data); return; } SINT32 genUtl_b64DecodeMalloc(const char *b64Str, UINT8 **binaryBuf, UINT32 *binaryBufLen) { UINT32 b64StrLen; SINT32 ret; if (b64Str == NULL || b64Str[0] == '\0' || binaryBuf == NULL || binaryBufLen == NULL) { return B64RET_INVALID_ARGUMENTS; } b64StrLen = strlen(b64Str); *binaryBufLen = genUtl_b64DecodedBufferLength(b64StrLen); *binaryBuf = calloc(1, *binaryBufLen); if (*binaryBuf == NULL) { return B64RET_RESOURCE_EXCEEDED; } ret = genUtl_b64Decode(b64Str, *binaryBuf, binaryBufLen); if (ret != B64RET_SUCCESS) { free(*binaryBuf); *binaryBuf = NULL; *binaryBufLen = 0; } return ret; } SINT32 genUtl_b64Decode(const char *b64Str, UINT8 *binaryBuf, UINT32 *binaryBufLen) { PLBase64Decoder *data = NULL; UINT32 b64StrLen; SINT32 ret; if (b64Str == NULL || b64Str[0] == '\0') { return B64RET_INVALID_ARGUMENTS; } b64StrLen = strlen(b64Str); /* * Allocate the decoding structure. */ if ((data = pl_base64_create_decoder()) == NULL) { return B64RET_INTERNAL_ERROR; } data->output_buflen = *binaryBufLen; data->output_buffer = binaryBuf; ret = pl_base64_decode_buffer(data, (const unsigned char *) b64Str, b64StrLen); /* * We do not wait for Destroy to flush, because Destroy will also * get rid of our decoder context, which we need to look at first! */ if (ret == B64RET_SUCCESS) { ret = pl_base64_decode_flush(data); } if (ret == B64RET_SUCCESS) { assert(data->output_length <= *binaryBufLen); *binaryBufLen = data->output_length; } PL_DestroyBase64Decoder(data); return ret; }