diff --git a/gson/src/main/java/com/google/gson/stream/JsonWriter.java b/gson/src/main/java/com/google/gson/stream/JsonWriter.java index 42f9e899..c0e14540 100644 --- a/gson/src/main/java/com/google/gson/stream/JsonWriter.java +++ b/gson/src/main/java/com/google/gson/stream/JsonWriter.java @@ -122,6 +122,38 @@ import java.util.List; */ public class JsonWriter implements Closeable { + /* + * From RFC 4627, "All Unicode characters may be placed within the + * quotation marks except for the characters that must be escaped: + * quotation mark, reverse solidus, and the control characters + * (U+0000 through U+001F)." + * + * We also escape '\u2028' and '\u2029', which JavaScript interprets as + * newline characters. This prevents eval() from failing with a syntax + * error. http://code.google.com/p/google-gson/issues/detail?id=341 + */ + private static final String[] REPLACEMENT_CHARS; + private static final String[] HTML_SAFE_REPLACEMENT_CHARS; + static { + REPLACEMENT_CHARS = new String[128]; + for (int i = 0; i <= 0x1f; i++) { + REPLACEMENT_CHARS[i] = String.format("\\u%04x", (int) i); + } + REPLACEMENT_CHARS['"'] = "\\\""; + REPLACEMENT_CHARS['\\'] = "\\\\"; + REPLACEMENT_CHARS['\t'] = "\\t"; + REPLACEMENT_CHARS['\b'] = "\\b"; + REPLACEMENT_CHARS['\n'] = "\\n"; + REPLACEMENT_CHARS['\r'] = "\\r"; + REPLACEMENT_CHARS['\f'] = "\\f"; + HTML_SAFE_REPLACEMENT_CHARS = REPLACEMENT_CHARS.clone(); + HTML_SAFE_REPLACEMENT_CHARS['<'] = "\\u003c"; + HTML_SAFE_REPLACEMENT_CHARS['>'] = "\\u003e"; + HTML_SAFE_REPLACEMENT_CHARS['&'] = "\\u0026"; + HTML_SAFE_REPLACEMENT_CHARS['='] = "\\u003d"; + HTML_SAFE_REPLACEMENT_CHARS['\''] = "\\u0027"; + } + /** The output data, containing at most one top-level array or object. */ private final Writer out; @@ -482,72 +514,33 @@ public class JsonWriter implements Closeable { } private void string(String value) throws IOException { + String[] replacements = htmlSafe ? HTML_SAFE_REPLACEMENT_CHARS : REPLACEMENT_CHARS; out.write("\""); - for (int i = 0, length = value.length(); i < length; i++) { + int last = 0; + int length = value.length(); + for (int i = 0; i < length; i++) { char c = value.charAt(i); - - /* - * From RFC 4627, "All Unicode characters may be placed within the - * quotation marks except for the characters that must be escaped: - * quotation mark, reverse solidus, and the control characters - * (U+0000 through U+001F)." - * - * We also escape '\u2028' and '\u2029', which JavaScript interprets as - * newline characters. This prevents eval() from failing with a syntax - * error. http://code.google.com/p/google-gson/issues/detail?id=341 - */ - switch (c) { - case '"': - case '\\': - out.write('\\'); - out.write(c); - break; - - case '\t': - out.write("\\t"); - break; - - case '\b': - out.write("\\b"); - break; - - case '\n': - out.write("\\n"); - break; - - case '\r': - out.write("\\r"); - break; - - case '\f': - out.write("\\f"); - break; - - case '<': - case '>': - case '&': - case '=': - case '\'': - if (htmlSafe) { - out.write(String.format("\\u%04x", (int) c)); - } else { - out.write(c); + String replacement; + if (c < 128) { + replacement = replacements[c]; + if (replacement == null) { + continue; } - break; - - case '\u2028': - case '\u2029': - out.write(String.format("\\u%04x", (int) c)); - break; - - default: - if (c <= 0x1F) { - out.write(String.format("\\u%04x", (int) c)); - } else { - out.write(c); - } - break; + } else if (c == '\u2028') { + replacement = "\\u2028"; + } else if (c == '\u2029') { + replacement = "\\u2029"; + } else { + continue; } + if (last < i) { + out.write(value, last, i - last); + } + out.write(replacement); + last = i + 1; + } + if (last < length) { + out.write(value, last, length - last); } out.write("\""); }