/*
 * Copyright (c) 2010
 * by AVM GmbH Berlin, Germany
 *
 * Licence: Free, use with no restriction.
 */

#include <sys/types.h>
#include <stdlib.h>
#include <string.h>

#include "charencoding.h"


size_t ConvertSingleUnicodeCodepointTo_UTF8_WithFallback(unsigned long codepoint, unsigned char *buf, size_t max_buf, unsigned long fallback_codepoint)
{
again:
	if (codepoint <= 0x7f) {
		if (max_buf < 1) return 0;
		*buf = (unsigned char)codepoint;
		return 1;
	}
	if (codepoint <= 0x7ff) {
		if (max_buf < 2) return 0;
		*buf++ = 0xc0 | (( codepoint >> 6) & 0x1f);
		*buf =   0x80 | (codepoint & 0x3f);
		return 2;
	}
	if (codepoint <= 0xffff /* 0xd7ff*/) {
		if (max_buf < 3) return 0;
		*buf++ = 0xe0 | ((codepoint >> 12) & 0x0f);
		*buf++ = 0x80 | ((codepoint >> 6) & 0x3f);
		*buf =   0x80 | (codepoint & 0x3f);
		return 3;
	}
	if (codepoint <= 0x10ffff) {
		if (max_buf < 4) return 0;
		*buf++ = 0xf0 | ((codepoint >> 18) & 0x07);
		*buf++ = 0x80 | ((codepoint >> 12) & 0x3f);
		*buf++ = 0x80 | ((codepoint >> 6) & 0x3f);
		*buf =   0x80 | (codepoint & 0x3f);
		return 4;
	}
	codepoint = fallback_codepoint;
	goto again;
	return 0;
}

unsigned long ConvertSingle_UTF8_ToUnicodeCodepoint(const unsigned char *buf, size_t max_buf, /*out */size_t *pBytesConverted)
{
	unsigned long codepoint;
	unsigned char c;

	if (0 == max_buf) {
		goto err;
	}

	c = *buf;

	if (!(c & 0x80)) {
		*pBytesConverted = 1;
		return (unsigned long)c;
	}
	if (0xc0 == (c & 0xe0)) {
		if (max_buf < 2) goto err;
		*pBytesConverted = 2;
		codepoint = (c & 0x1f) << 6;
		c = *(++buf);
		if (0x80 != (c & 0xc0)) goto err;
		codepoint |= (c & 0x3f);
		return codepoint;
	}
	if (0xe0 == (c & 0xf0)) {
		if (max_buf < 3) goto err;
		*pBytesConverted = 3;
		codepoint = (c & 0x0f) << 12;
		c = *(++buf);
		if (0x80 != (c & 0xc0)) goto err;
		codepoint |= (c & 0x3f) << 6;
		c = *(++buf);
		if (0x80 != (c & 0xc0)) goto err;
		codepoint |= (c & 0x3f);
		return codepoint;
	}
	if (0xf0 == (c & 0xf8)) {
		if (max_buf < 4) goto err;
		*pBytesConverted = 4;
		codepoint = (c & 0x07) << 18;
		c = *(++buf);
		if (0x80 != (c & 0xc0)) goto err;
		codepoint |= (c & 0x3f) << 12;
		c = *(++buf);
		if (0x80 != (c & 0xc0)) goto err;
		codepoint |= (c & 0x3f) << 6;
		c = *(++buf);
		if (0x80 != (c & 0xc0)) goto err;
		codepoint |= (c & 0x3f);
		return codepoint;
	}

err:
	*pBytesConverted = 0;
	return 0;
}

// -----------------------------------------------------------------------------------------

char *ConvertStringFromISO8859_1ToUTF8_WithAlloc(const char *iso)
{
	const char *pin = iso;
	size_t total_out_space = strlen(iso) + 8; // start with 8 bytes room for UTF-8 multibyte encoding
	unsigned char *out_buf = (unsigned char *)calloc(1, total_out_space + 1);
	if (!out_buf) return strdup(""); // failure
	unsigned char *pout = out_buf;
	size_t out_space = total_out_space;
	while(*pin) {
		size_t BytesConverted;
		unsigned long codepoint = (unsigned long)*((unsigned char *)pin++);

		do {
			BytesConverted = ConvertSingleUnicodeCodepointTo_UTF8_WithFallback(codepoint, pout, out_space, (unsigned long)'.');
			if (!BytesConverted) {
				// make more room
				total_out_space += 8;
				unsigned char *p = (unsigned char *)calloc(1, total_out_space + 1); // 8 bytes more room
				if (!p) break; // error!
				memcpy(p, out_buf, pout - out_buf);
				free(out_buf); // note: out_buf is still used for calculating
				pout = p + (pout - out_buf);
				out_space += 8;
				out_buf = p;
			}
		} while(!BytesConverted);

		if (!BytesConverted) break; // failed

		pout += BytesConverted;
		out_space -= BytesConverted;
	}
	*pout = '\0';
	return (char *)out_buf;
}


int ConvertStringFromUTF8ToISO8859_1_With_Fallback(char *utf8, char fallback_iso_char)
{
	size_t len = strlen(utf8);
	unsigned char *pin = (unsigned char *)utf8;
	unsigned char *pout = (unsigned char *)utf8; // we use the same buffer, cause iso encoding will never be larger than UTF8 encoding

	while(len > 0) {
		size_t BytesConverted;
		unsigned long codepoint = ConvertSingle_UTF8_ToUnicodeCodepoint(pin, len, &BytesConverted);
		if (!BytesConverted) {
			return -1; // failed
		}
		if (codepoint > 0xff) {
			if (0 != fallback_iso_char)
			{
				codepoint = (unsigned long)fallback_iso_char;
			}
			else
			{
				return -2; // failed - cant represent in ISO8859-1
			}
		}
		*(pout++) = (unsigned char)(codepoint & 0xff);

		len -= BytesConverted;
		pin += BytesConverted;
	}
	*pout = '\0';

	return 0; // ok
}