diff -ur ../ntfs-3g_ntfsprogs-2015.3.14.orig/libntfs-3g/unistr.c ./libntfs-3g/unistr.c --- ../ntfs-3g_ntfsprogs-2015.3.14.orig/libntfs-3g/unistr.c 2015-03-14 15:10:12.000000000 +0100 +++ ./libntfs-3g/unistr.c 2015-07-28 14:37:18.282144523 +0200 @@ -59,6 +59,54 @@ #include "logging.h" #include "misc.h" +#ifdef AVM_CHARENCODING_AVMMIX + +#undef HAVE_MBSINIT /* AVM */ + +#if 0 // DEBUG +#define Log(x) _fLog x + +static void _fLog(char *fmt, ...) +{ + FILE *fp = fopen("/var/tmp/ntfs.log", "a"); + if (fp) { + va_list ap; + va_start(ap, fmt); + vfprintf(fp, fmt, ap); + va_end(ap); + fprintf(fp, "\n"); + fclose(fp); + } + +} +#else +#define Log(x) while(0) +#endif + +// ------------------------------------------------------------------------------------------ + + +// Because the Fritz!Box and Fritz!Media linux systems (and mediasrv for example) +// are not UTF-8 enabled, we can't +// use UTF-8 on our Interface. Instead we use our own char encoding ("avm mix") that is +// very much compatible to ISO-8859-1 (for german umlauts) and also can represent all +// unicode chars. + +// avm_mix Char Encoding +// - be as much compatible to ISO8859-1 as possible +// - can represent all unicode chars +// - no 0 byte in encoding (except for termination 0 byte) +// Unicode codepoint Mix Encoding +// 00-7f 00-7f +// 80-99 80-99 +// 9a 9a + UTF-8 of 9a +// 9b-ff 9b-ff +// 100-... 9a + UFF-8 of unicode + +#define AVM_MIX_ESCAPE 0x9a + + +#endif // AVM_CHARENCODING_AVMMIX #define NOREVBOM 0 /* JPA rejecting U+FFFE and U+FFFF, open to debate */ /* @@ -497,6 +545,26 @@ goto out; } +#ifdef AVM_CHARENCODING_AVMMIX +static int ucs2le_to_avm_mix_size(const ntfschar *ins, const int ins_len, int outs_len) +{ + int i, ret; + int count = 0; + + for (i = 0; i < ins_len && ins[i]; i++) { + unsigned short c = le16_to_cpu(ins[i]); + if (c <= 0xff && c != AVM_MIX_ESCAPE) count++; // output will be ISO-8859-1 + else if (c <= 0x7ff) count += 3; + else count += 4; + } + if (count > outs_len) { + errno = ENAMETOOLONG; + return -1; + } + return count; +} +#endif + /* * ntfs_utf16_to_utf8 - convert a little endian UTF16LE string to an UTF-8 string * @ins: input utf16 string buffer @@ -609,6 +677,56 @@ goto out; } +#ifdef AVM_CHARENCODING_AVMMIX +static int ntfs_ucs2le_to_avm_mix(const ntfschar *ins, const int ins_len, + char **outs, int outs_len) +{ + char *t; + int i, size, ret = -1; + + if (!*outs) + outs_len = PATH_MAX; + + size = ucs2le_to_avm_mix_size(ins, ins_len, outs_len); + + if (size < 0) + goto out; + + if (!*outs) { + outs_len = size + 1; + *outs = ntfs_malloc(outs_len); + if (!*outs) + goto out; + } + + t = *outs; + + for (i = 0; i < ins_len && ins[i]; i++) { + unsigned short c = le16_to_cpu(ins[i]); + if (c <= 0xff && c != AVM_MIX_ESCAPE) { + // output ISO-8859-1 encoding + *t++ = c & 0xff; + } else { + // output an escape char + *t++ = AVM_MIX_ESCAPE; + // now output UTF-8 encoding + if (c <= 0x7ff) { + *t++ = 0xc0 | ((c >> 6) & 0x1f); + *t++ = 0x80 | (c & 0x3f); + } else { + *t++ = 0xe0 | ((c >> 12) & 0x0f); + *t++ = 0x80 | ((c >> 6) & 0x3f); + *t++ = 0x80 | (c & 0x3f); + } + } + } + *t = '\0'; + ret = t - *outs; +out: + return ret; +} +#endif + /* * Return the amount of 16-bit elements in UTF-16LE needed * (without the terminating null) to store given UTF-8 string. @@ -618,6 +736,7 @@ * Note: This does not check whether the input sequence is a valid utf8 string, * and should be used only in context where such check is made! */ +#ifndef AVM_CHARENCODING_AVMMIX static int utf8_to_utf16_size(const char *s) { int ret = -1; @@ -656,6 +775,54 @@ errno = ENAMETOOLONG; goto out; } +#endif + +#ifdef AVM_CHARENCODING_AVMMIX +static int avm_mix_to_ucs2le_size(const char *s) +{ + int ret = -1; + unsigned int byte; + size_t count = 0; + +Log(("avm_mix_to_ucs2le_size s=%s", s)); + + while ((byte = *((const unsigned char *)s++))) { + if (++count >= PATH_MAX) + goto fail; + if (byte != AVM_MIX_ESCAPE) continue; + + byte = *((const unsigned char *)s++); + if (byte >= 0xF5) { + errno = EILSEQ; + goto out; + } + if (!*s) + break; + if (byte >= 0xC0) + s++; + if (!*s) + break; + if (byte >= 0xE0) + s++; + if (!*s) + break; + if (byte >= 0xF0) { + s++; + if (++count >= PATH_MAX) + goto fail; + } + } + ret = count; +out: +Log(("avm_mix_to_ucs2le_size ret=%d", ret)); + return ret; +fail: +Log(("avm_mix_to_ucs2le_size fail")); + errno = ENAMETOOLONG; + goto out; +} +#endif + /* * This converts one UTF-8 sequence to cpu-endian Unicode value * within range U+0 .. U+10ffff and excluding U+D800 .. U+DFFF @@ -729,6 +896,7 @@ * * Return -1 with errno set. */ +#ifndef AVM_CHARENCODING_AVMMIX static int ntfs_utf8_to_utf16(const char *ins, ntfschar **outs) { #if defined(__APPLE__) || defined(__DARWIN__) @@ -798,6 +966,57 @@ #endif /* defined(__APPLE__) || defined(__DARWIN__) */ return ret; } +#endif + +#ifdef AVM_CHARENCODING_AVMMIX +static int ntfs_avm_mix_to_ucs2le(const char *ins, ntfschar **outs) +{ + const char *t = ins; + u32 wc; + ntfschar *outpos; + int shorts, ret = -1; + + shorts = avm_mix_to_ucs2le_size(ins); + if (shorts < 0) + goto fail; + + if (!*outs) { + *outs = ntfs_malloc((shorts + 1) * sizeof(ntfschar)); + if (!*outs) + goto fail; + } + + outpos = *outs; + + while(1) { + unsigned char c = *t++; + if (c != AVM_MIX_ESCAPE) { + *outpos++ = cpu_to_le16(c); + if (c == 0) break; + } else { + int m = utf8_to_unicode(&wc, t); + if (m < 0) + goto fail; + if (wc < 0x10000) + *outpos++ = cpu_to_le16(wc); + else { + // should not happen + wc -= 0x10000; + *outpos++ = cpu_to_le16((wc >> 10) + 0xd800); + *outpos++ = cpu_to_le16((wc & 0x3ff) + 0xdc00); + } + if (m == 0) + break; + t += m; + } + } + + ret = --outpos - *outs; +fail: +Log(("ntfs_avm_mix_to_ucs2le ret=%d", ret)); + return ret; +} +#endif /** * ntfs_ucstombs - convert a little endian Unicode string to a multibyte string @@ -848,6 +1067,9 @@ errno = ENAMETOOLONG; return -1; } +#ifdef AVM_CHARENCODING_AVMMIX + return ntfs_ucs2le_to_avm_mix(ins, ins_len, outs, outs_len); +#else if (use_utf8) return ntfs_utf16_to_utf8(ins, ins_len, outs, outs_len); #ifdef MB_CUR_MAX @@ -920,6 +1142,7 @@ errno = EILSEQ; #endif /* MB_CUR_MAX */ return -1; +#endif /* AVM_CHARENCODING_AVMMIX */ } /** @@ -961,7 +1184,10 @@ errno = EINVAL; return -1; } - + +#ifdef AVM_CHARENCODING_AVMMIX + return ntfs_avm_mix_to_ucs2le(ins, outs); +#else if (use_utf8) return ntfs_utf8_to_utf16(ins, outs); @@ -1059,6 +1285,7 @@ errno = EILSEQ; #endif /* MB_CUR_MAX */ return -1; +#endif /* AVM_CHARENCODING_AVMMIX */ } /* @@ -1513,6 +1740,7 @@ * Use UTF-8 unless specified otherwise. */ +#ifndef AVM_CHARENCODING_AVMMIX int ntfs_set_char_encoding(const char *locale) { use_utf8 = 0; @@ -1528,6 +1756,7 @@ } return 0; /* always successful */ } +#endif #if defined(__APPLE__) || defined(__DARWIN__) diff -ur ../ntfs-3g_ntfsprogs-2015.3.14.orig/src/ntfs-3g_common.c ./src/ntfs-3g_common.c --- ../ntfs-3g_ntfsprogs-2015.3.14.orig/src/ntfs-3g_common.c 2015-03-14 15:10:12.000000000 +0100 +++ ./src/ntfs-3g_common.c 2015-07-28 14:38:22.034143381 +0200 @@ -380,7 +380,9 @@ break; #endif case OPT_LOCALE : +#ifndef AVM_CHARENCODING_AVMMIX ntfs_set_char_encoding(val); +#endif break; #if defined(__APPLE__) || defined(__DARWIN__) #ifdef ENABLE_NFCONV