/* Java format strings. Copyright (C) 2001-2004, 2006-2007, 2009, 2015-2016 Free Software Foundation, Inc. Written by Bruno Haible , 2001. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #ifdef HAVE_CONFIG_H # include #endif #include #include #include #include #include "format.h" #include "c-ctype.h" #include "xalloc.h" #include "xmalloca.h" #include "xvasprintf.h" #include "format-invalid.h" #include "gettext.h" #define _(str) gettext (str) /* Java format strings are described in java/text/MessageFormat.html. See also the ICU documentation class_MessageFormat.html. messageFormatPattern := string ( "{" messageFormatElement "}" string )* messageFormatElement := argument { "," elementFormat } elementFormat := "time" { "," datetimeStyle } | "date" { "," datetimeStyle } | "number" { "," numberStyle } | "choice" { "," choiceStyle } datetimeStyle := "short" | "medium" | "long" | "full" | dateFormatPattern numberStyle := "currency" | "percent" | "integer" | numberFormatPattern choiceStyle := choiceFormatPattern dateFormatPattern see SimpleDateFormat.applyPattern numberFormatPattern see DecimalFormat.applyPattern choiceFormatPattern see ChoiceFormat constructor In strings, literal curly braces can be used if quoted between single quotes. A real single quote is represented by ''. If a pattern is used, then unquoted braces in the pattern, if any, must match: that is, "ab {0} de" and "ab '}' de" are ok, but "ab {0'}' de" and "ab } de" are not. The argument is a number from 0 to 9, which corresponds to the arguments presented in an array to be formatted. It is ok to have unused arguments in the array. Adding a dateFormatPattern / numberFormatPattern / choiceFormatPattern to an elementFormat is equivalent to creating a SimpleDateFormat / DecimalFormat / ChoiceFormat and use of setFormat. For example, MessageFormat form = new MessageFormat("The disk \"{1}\" contains {0,choice,0#no files|1#one file|2#{0,number} files}."); is equivalent to MessageFormat form = new MessageFormat("The disk \"{1}\" contains {0}."); form.setFormat(1, // Number of {} occurrence in the string! new ChoiceFormat(new double[] { 0, 1, 2 }, new String[] { "no files", "one file", "{0,number} files" })); Note: The behaviour of quotes inside a choiceFormatPattern is not clear. Example 1: "abc{1,choice,0#{1,number,00';'000}}def" JDK 1.1.x: exception JDK 1.3.x: behaves like "abc{1,choice,0#{1,number,00;000}}def" Example 2: "abc{1,choice,0#{1,number,00';'}}def" JDK 1.1.x: interprets the semicolon as number suffix JDK 1.3.x: behaves like "abc{1,choice,0#{1,number,00;}}def" */ enum format_arg_type { FAT_NONE, FAT_OBJECT, /* java.lang.Object */ FAT_NUMBER, /* java.lang.Number */ FAT_DATE /* java.util.Date */ }; struct numbered_arg { unsigned int number; enum format_arg_type type; }; struct spec { unsigned int directives; unsigned int numbered_arg_count; unsigned int allocated; struct numbered_arg *numbered; }; /* Forward declaration of local functions. */ static bool date_format_parse (const char *format); static bool number_format_parse (const char *format); static bool choice_format_parse (const char *format, struct spec *spec, char **invalid_reason); /* Quote handling: - When we see a single-quote, ignore it, but toggle the quoting flag. - When we see a double single-quote, ignore the first of the two. Assumes local variables format, quoting. */ #define HANDLE_QUOTE \ if (*format == '\'' && *++format != '\'') \ quoting = !quoting; /* Note that message_format_parse and choice_format_parse are mutually recursive. This is because MessageFormat can use some ChoiceFormats, and a ChoiceFormat is made up from several MessageFormats. */ /* Return true if a format is a valid messageFormatPattern. Extracts argument type information into spec. */ static bool message_format_parse (const char *format, char *fdi, struct spec *spec, char **invalid_reason) { const char *const format_start = format; bool quoting = false; for (;;) { HANDLE_QUOTE; if (!quoting && *format == '{') { unsigned int depth; const char *element_start; const char *element_end; size_t n; char *element_alloced; char *element; unsigned int number; enum format_arg_type type; FDI_SET (format, FMTDIR_START); spec->directives++; element_start = ++format; depth = 0; for (; *format != '\0'; format++) { if (*format == '{') depth++; else if (*format == '}') { if (depth == 0) break; else depth--; } } if (*format == '\0') { *invalid_reason = xstrdup (_("The string ends in the middle of a directive: found '{' without matching '}'.")); FDI_SET (format - 1, FMTDIR_ERROR); return false; } element_end = format++; n = element_end - element_start; element = element_alloced = (char *) xmalloca (n + 1); memcpy (element, element_start, n); element[n] = '\0'; if (!c_isdigit (*element)) { *invalid_reason = xasprintf (_("In the directive number %u, '{' is not followed by an argument number."), spec->directives); FDI_SET (format - 1, FMTDIR_ERROR); freea (element_alloced); return false; } number = 0; do { number = 10 * number + (*element - '0'); element++; } while (c_isdigit (*element)); type = FAT_OBJECT; if (*element == '\0') ; else if (strncmp (element, ",time", 5) == 0 || strncmp (element, ",date", 5) == 0) { type = FAT_DATE; element += 5; if (*element == '\0') ; else if (*element == ',') { element++; if (strcmp (element, "short") == 0 || strcmp (element, "medium") == 0 || strcmp (element, "long") == 0 || strcmp (element, "full") == 0 || date_format_parse (element)) ; else { *invalid_reason = xasprintf (_("In the directive number %u, the substring \"%s\" is not a valid date/time style."), spec->directives, element); FDI_SET (format - 1, FMTDIR_ERROR); freea (element_alloced); return false; } } else { *element = '\0'; element -= 4; *invalid_reason = xasprintf (_("In the directive number %u, \"%s\" is not followed by a comma."), spec->directives, element); FDI_SET (format - 1, FMTDIR_ERROR); freea (element_alloced); return false; } } else if (strncmp (element, ",number", 7) == 0) { type = FAT_NUMBER; element += 7; if (*element == '\0') ; else if (*element == ',') { element++; if (strcmp (element, "currency") == 0 || strcmp (element, "percent") == 0 || strcmp (element, "integer") == 0 || number_format_parse (element)) ; else { *invalid_reason = xasprintf (_("In the directive number %u, the substring \"%s\" is not a valid number style."), spec->directives, element); FDI_SET (format - 1, FMTDIR_ERROR); freea (element_alloced); return false; } } else { *element = '\0'; element -= 6; *invalid_reason = xasprintf (_("In the directive number %u, \"%s\" is not followed by a comma."), spec->directives, element); FDI_SET (format - 1, FMTDIR_ERROR); freea (element_alloced); return false; } } else if (strncmp (element, ",choice", 7) == 0) { type = FAT_NUMBER; /* because ChoiceFormat extends NumberFormat */ element += 7; if (*element == '\0') ; else if (*element == ',') { element++; if (choice_format_parse (element, spec, invalid_reason)) ; else { FDI_SET (format - 1, FMTDIR_ERROR); freea (element_alloced); return false; } } else { *element = '\0'; element -= 6; *invalid_reason = xasprintf (_("In the directive number %u, \"%s\" is not followed by a comma."), spec->directives, element); FDI_SET (format - 1, FMTDIR_ERROR); freea (element_alloced); return false; } } else { *invalid_reason = xasprintf (_("In the directive number %u, the argument number is not followed by a comma and one of \"%s\", \"%s\", \"%s\", \"%s\"."), spec->directives, "time", "date", "number", "choice"); FDI_SET (format - 1, FMTDIR_ERROR); freea (element_alloced); return false; } freea (element_alloced); if (spec->allocated == spec->numbered_arg_count) { spec->allocated = 2 * spec->allocated + 1; spec->numbered = (struct numbered_arg *) xrealloc (spec->numbered, spec->allocated * sizeof (struct numbered_arg)); } spec->numbered[spec->numbered_arg_count].number = number; spec->numbered[spec->numbered_arg_count].type = type; spec->numbered_arg_count++; FDI_SET (format - 1, FMTDIR_END); } /* The doc says "ab}de" is invalid. Even though JDK accepts it. */ else if (!quoting && *format == '}') { FDI_SET (format, FMTDIR_START); *invalid_reason = xstrdup (_("The string starts in the middle of a directive: found '}' without matching '{'.")); FDI_SET (format, FMTDIR_ERROR); return false; } else if (*format != '\0') format++; else break; } return true; } /* Return true if a format is a valid dateFormatPattern. */ static bool date_format_parse (const char *format) { /* Any string is valid. Single-quote starts a quoted section, to be terminated at the next single-quote or string end. Double single-quote gives a single single-quote. Non-quoted ASCII letters are first grouped into blocks of equal letters. Then each block (e.g. 'yyyy') is interpreted according to some rules. */ return true; } /* Return true if a format is a valid numberFormatPattern. */ static bool number_format_parse (const char *format) { /* Pattern Syntax: pattern := pos_pattern{';' neg_pattern} pos_pattern := {prefix}number{suffix} neg_pattern := {prefix}number{suffix} number := integer{'.' fraction}{exponent} prefix := '\u0000'..'\uFFFD' - special_characters suffix := '\u0000'..'\uFFFD' - special_characters integer := min_int | '#' | '#' integer | '#' ',' integer min_int := '0' | '0' min_int | '0' ',' min_int fraction := '0'* '#'* exponent := 'E' '0' '0'* Notation: X* 0 or more instances of X { X } 0 or 1 instances of X X | Y either X or Y X..Y any character from X up to Y, inclusive S - T characters in S, except those in T Single-quote starts a quoted section, to be terminated at the next single-quote or string end. Double single-quote gives a single single-quote. */ bool quoting = false; bool seen_semicolon = false; HANDLE_QUOTE; for (;;) { /* Parse prefix. */ while (*format != '\0' && !(!quoting && (*format == '0' || *format == '#'))) { if (format[0] == '\\') { if (format[1] == 'u' && c_isxdigit (format[2]) && c_isxdigit (format[3]) && c_isxdigit (format[4]) && c_isxdigit (format[5])) format += 6; else format += 2; } else format += 1; HANDLE_QUOTE; } /* Parse integer. */ if (!(!quoting && (*format == '0' || *format == '#'))) return false; while (!quoting && *format == '#') { format++; HANDLE_QUOTE; if (!quoting && *format == ',') { format++; HANDLE_QUOTE; } } while (!quoting && *format == '0') { format++; HANDLE_QUOTE; if (!quoting && *format == ',') { format++; HANDLE_QUOTE; } } /* Parse fraction. */ if (!quoting && *format == '.') { format++; HANDLE_QUOTE; while (!quoting && *format == '0') { format++; HANDLE_QUOTE; } while (!quoting && *format == '#') { format++; HANDLE_QUOTE; } } /* Parse exponent. */ if (!quoting && *format == 'E') { const char *format_save = format; format++; HANDLE_QUOTE; if (!quoting && *format == '0') { do { format++; HANDLE_QUOTE; } while (!quoting && *format == '0'); } else { /* Back up. */ format = format_save; quoting = false; } } /* Parse suffix. */ while (*format != '\0' && (seen_semicolon || !(!quoting && *format == ';'))) { if (format[0] == '\\') { if (format[1] == 'u' && c_isxdigit (format[2]) && c_isxdigit (format[3]) && c_isxdigit (format[4]) && c_isxdigit (format[5])) format += 6; else format += 2; } else format += 1; HANDLE_QUOTE; } if (seen_semicolon || !(!quoting && *format == ';')) break; } return (*format == '\0'); } /* Return true if a format is a valid choiceFormatPattern. Extracts argument type information into spec. */ static bool choice_format_parse (const char *format, struct spec *spec, char **invalid_reason) { /* Pattern syntax: pattern := | choice | choice '|' pattern choice := number separator messageformat separator := '<' | '#' | '\u2264' Single-quote starts a quoted section, to be terminated at the next single-quote or string end. Double single-quote gives a single single-quote. */ bool quoting = false; HANDLE_QUOTE; if (*format == '\0') return true; for (;;) { /* Don't bother looking too precisely into the syntax of the number. It can contain various Unicode characters. */ bool number_nonempty; char *msgformat; char *mp; bool msgformat_valid; /* Parse number. */ number_nonempty = false; while (*format != '\0' && !(!quoting && (*format == '<' || *format == '#' || strncmp (format, "\\u2264", 6) == 0 || *format == '|'))) { if (format[0] == '\\') { if (format[1] == 'u' && c_isxdigit (format[2]) && c_isxdigit (format[3]) && c_isxdigit (format[4]) && c_isxdigit (format[5])) format += 6; else format += 2; } else format += 1; number_nonempty = true; HANDLE_QUOTE; } /* Short clause at end of pattern is valid and is ignored! */ if (*format == '\0') break; if (!number_nonempty) { *invalid_reason = xasprintf (_("In the directive number %u, a choice contains no number."), spec->directives); return false; } if (*format == '<' || *format == '#') format += 1; else if (strncmp (format, "\\u2264", 6) == 0) format += 6; else { *invalid_reason = xasprintf (_("In the directive number %u, a choice contains a number that is not followed by '<', '#' or '%s'."), spec->directives, "\\u2264"); return false; } HANDLE_QUOTE; msgformat = (char *) xmalloca (strlen (format) + 1); mp = msgformat; while (*format != '\0' && !(!quoting && *format == '|')) { *mp++ = *format++; HANDLE_QUOTE; } *mp = '\0'; msgformat_valid = message_format_parse (msgformat, NULL, spec, invalid_reason); freea (msgformat); if (!msgformat_valid) return false; if (*format == '\0') break; format++; HANDLE_QUOTE; } return true; } static int numbered_arg_compare (const void *p1, const void *p2) { unsigned int n1 = ((const struct numbered_arg *) p1)->number; unsigned int n2 = ((const struct numbered_arg *) p2)->number; return (n1 > n2 ? 1 : n1 < n2 ? -1 : 0); } static void * format_parse (const char *format, bool translated, char *fdi, char **invalid_reason) { struct spec spec; struct spec *result; spec.directives = 0; spec.numbered_arg_count = 0; spec.allocated = 0; spec.numbered = NULL; if (!message_format_parse (format, fdi, &spec, invalid_reason)) goto bad_format; /* Sort the numbered argument array, and eliminate duplicates. */ if (spec.numbered_arg_count > 1) { unsigned int i, j; bool err; qsort (spec.numbered, spec.numbered_arg_count, sizeof (struct numbered_arg), numbered_arg_compare); /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i. */ err = false; for (i = j = 0; i < spec.numbered_arg_count; i++) if (j > 0 && spec.numbered[i].number == spec.numbered[j-1].number) { enum format_arg_type type1 = spec.numbered[i].type; enum format_arg_type type2 = spec.numbered[j-1].type; enum format_arg_type type_both; if (type1 == type2 || type2 == FAT_OBJECT) type_both = type1; else if (type1 == FAT_OBJECT) type_both = type2; else { /* Incompatible types. */ type_both = FAT_NONE; if (!err) *invalid_reason = INVALID_INCOMPATIBLE_ARG_TYPES (spec.numbered[i].number); err = true; } spec.numbered[j-1].type = type_both; } else { if (j < i) { spec.numbered[j].number = spec.numbered[i].number; spec.numbered[j].type = spec.numbered[i].type; } j++; } spec.numbered_arg_count = j; if (err) /* *invalid_reason has already been set above. */ goto bad_format; } result = XMALLOC (struct spec); *result = spec; return result; bad_format: if (spec.numbered != NULL) free (spec.numbered); return NULL; } static void format_free (void *descr) { struct spec *spec = (struct spec *) descr; if (spec->numbered != NULL) free (spec->numbered); free (spec); } static int format_get_number_of_directives (void *descr) { struct spec *spec = (struct spec *) descr; return spec->directives; } static bool format_check (void *msgid_descr, void *msgstr_descr, bool equality, formatstring_error_logger_t error_logger, const char *pretty_msgid, const char *pretty_msgstr) { struct spec *spec1 = (struct spec *) msgid_descr; struct spec *spec2 = (struct spec *) msgstr_descr; bool err = false; if (spec1->numbered_arg_count + spec2->numbered_arg_count > 0) { unsigned int i, j; unsigned int n1 = spec1->numbered_arg_count; unsigned int n2 = spec2->numbered_arg_count; /* Check the argument names are the same. Both arrays are sorted. We search for the first difference. */ for (i = 0, j = 0; i < n1 || j < n2; ) { int cmp = (i >= n1 ? 1 : j >= n2 ? -1 : spec1->numbered[i].number > spec2->numbered[j].number ? 1 : spec1->numbered[i].number < spec2->numbered[j].number ? -1 : 0); if (cmp > 0) { if (error_logger) error_logger (_("a format specification for argument {%u}, as in '%s', doesn't exist in '%s'"), spec2->numbered[j].number, pretty_msgstr, pretty_msgid); err = true; break; } else if (cmp < 0) { if (equality) { if (error_logger) error_logger (_("a format specification for argument {%u} doesn't exist in '%s'"), spec1->numbered[i].number, pretty_msgstr); err = true; break; } else i++; } else j++, i++; } /* Check the argument types are the same. */ if (!err) for (i = 0, j = 0; j < n2; ) { if (spec1->numbered[i].number == spec2->numbered[j].number) { if (spec1->numbered[i].type != spec2->numbered[j].type) { if (error_logger) error_logger (_("format specifications in '%s' and '%s' for argument {%u} are not the same"), pretty_msgid, pretty_msgstr, spec2->numbered[j].number); err = true; break; } j++, i++; } else i++; } } return err; } struct formatstring_parser formatstring_java = { format_parse, format_free, format_get_number_of_directives, NULL, format_check }; #ifdef TEST /* Test program: Print the argument list specification returned by format_parse for strings read from standard input. */ #include static void format_print (void *descr) { struct spec *spec = (struct spec *) descr; unsigned int last; unsigned int i; if (spec == NULL) { printf ("INVALID"); return; } printf ("("); last = 0; for (i = 0; i < spec->numbered_arg_count; i++) { unsigned int number = spec->numbered[i].number; if (i > 0) printf (" "); if (number < last) abort (); for (; last < number; last++) printf ("_ "); switch (spec->numbered[i].type) { case FAT_OBJECT: printf ("*"); break; case FAT_NUMBER: printf ("Number"); break; case FAT_DATE: printf ("Date"); break; default: abort (); } last = number + 1; } printf (")"); } int main () { for (;;) { char *line = NULL; size_t line_size = 0; int line_len; char *invalid_reason; void *descr; line_len = getline (&line, &line_size, stdin); if (line_len < 0) break; if (line_len > 0 && line[line_len - 1] == '\n') line[--line_len] = '\0'; invalid_reason = NULL; descr = format_parse (line, false, NULL, &invalid_reason); format_print (descr); printf ("\n"); if (descr == NULL) printf ("%s\n", invalid_reason); free (invalid_reason); free (line); } return 0; } /* * For Emacs M-x compile * Local Variables: * compile-command: "/bin/sh ../libtool --tag=CC --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../gnulib-lib -I../intl -DHAVE_CONFIG_H -DTEST format-java.c ../gnulib-lib/libgettextlib.la" * End: */ #endif /* TEST */