From 30f18ecef9b1a60cca6b182386e4dabcce3141f0 Mon Sep 17 00:00:00 2001 From: JFronny Date: Sat, 20 Apr 2024 14:35:41 +0200 Subject: [PATCH] feat(serialize-xml): initial work on NativeXmlWriter --- .../serialize/xml/NativeXmlReader.java | 36 +- .../serialize/xml/NativeXmlWriter.java | 355 ++++++++++++++++++ .../commons/serialize/xml/impl/NameCheck.java | 31 ++ 3 files changed, 391 insertions(+), 31 deletions(-) create mode 100644 commons-serialize-xml/src/main/java/io/gitlab/jfronny/commons/serialize/xml/NativeXmlWriter.java create mode 100644 commons-serialize-xml/src/main/java/io/gitlab/jfronny/commons/serialize/xml/impl/NameCheck.java diff --git a/commons-serialize-xml/src/main/java/io/gitlab/jfronny/commons/serialize/xml/NativeXmlReader.java b/commons-serialize-xml/src/main/java/io/gitlab/jfronny/commons/serialize/xml/NativeXmlReader.java index ef7bb79..b654550 100644 --- a/commons-serialize-xml/src/main/java/io/gitlab/jfronny/commons/serialize/xml/NativeXmlReader.java +++ b/commons-serialize-xml/src/main/java/io/gitlab/jfronny/commons/serialize/xml/NativeXmlReader.java @@ -2,6 +2,7 @@ package io.gitlab.jfronny.commons.serialize.xml; import io.gitlab.jfronny.commons.serialize.MalformedDataException; import io.gitlab.jfronny.commons.serialize.StringEscapeUtil; +import io.gitlab.jfronny.commons.serialize.xml.impl.NameCheck; import io.gitlab.jfronny.commons.serialize.xml.impl.XmlScope; import java.io.Closeable; @@ -230,7 +231,7 @@ public class NativeXmlReader implements Closeable { // fall through } else if (pos < limit || fillBuffer(1)) { char chNext = buffer[pos + 1]; - var check = isNameStart((char) c, chNext); + var check = NameCheck.isNameStart((char) c, chNext); pos--; if (check != NameCheck.NONE) { return peeked = PEEKED_ATTRIBUTE_NAME; @@ -289,7 +290,7 @@ public class NativeXmlReader implements Closeable { } } } else if (pos + 2 <= limit || fillBuffer(2)) { - var check = isNameStart(chNext, buffer[pos + 1]); + var check = NameCheck.isNameStart(chNext, buffer[pos + 1]); if (check != NameCheck.NONE) { return peeked = PEEKED_BEGIN_TAG; } @@ -302,33 +303,6 @@ public class NativeXmlReader implements Closeable { } } - private enum NameCheck { FIRST, BOTH, NONE } - private NameCheck isNameStart(char ch, char chNext) { - if ('A' <= ch && ch <= 'Z') return NameCheck.FIRST; - if ('a' <= ch && ch <= 'z') return NameCheck.FIRST; - return switch (ch) { - case ':', '_' -> NameCheck.FIRST; - case '\u2070' -> chNext == '\u218F' ? NameCheck.BOTH : NameCheck.NONE; - case '\u2C00' -> chNext == '\u2FEF' ? NameCheck.BOTH : NameCheck.NONE; - case '\u3001' -> chNext == '\uD7FF' ? NameCheck.BOTH : NameCheck.NONE; - case '\uF900' -> chNext == '\uFDCF' ? NameCheck.BOTH : NameCheck.NONE; - case '\uFDF0' -> chNext == '\uFFFD' ? NameCheck.BOTH : NameCheck.NONE; - default -> NameCheck.NONE; - }; - } - - private NameCheck isName(char ch, char chNext) { - var nameStart = isNameStart(ch, chNext); - if (nameStart != NameCheck.NONE) return nameStart; - if ('0' <= ch && ch <= '9') return NameCheck.FIRST; - return switch (ch) { - case '-', '.', '\u00B7' -> NameCheck.FIRST; - case '\u0300' -> chNext == '\u036F' ? NameCheck.BOTH : NameCheck.NONE; - case '\u203F' -> chNext == '\u2040' ? NameCheck.BOTH : NameCheck.NONE; - default -> NameCheck.NONE; - }; - } - public String nextAttributeName() throws IOException { int p = peeked; if (p == PEEKED_NONE) { @@ -473,7 +447,7 @@ public class NativeXmlReader implements Closeable { } } case PEEKED_ATTRIBUTE_NAME -> { - skipUntil((c, i) -> isName(c, pos + i + 1 < limit ? buffer[pos + i + 1] : '\0') == NameCheck.NONE); + skipUntil((c, i) -> NameCheck.isName(c, pos + i + 1 < limit ? buffer[pos + i + 1] : '\0') == NameCheck.NONE); if (count == 0) pathNames[stackSize - 1] = ""; peeked = PEEKED_NONE; } @@ -494,7 +468,7 @@ public class NativeXmlReader implements Closeable { } private String nextName() throws IOException { - return readUntil((c, i) -> isName(c, pos + i + 1 < limit ? buffer[pos + i + 1] : '\0') == NameCheck.NONE, false); + return readUntil((c, i) -> NameCheck.isName(c, pos + i + 1 < limit ? buffer[pos + i + 1] : '\0') == NameCheck.NONE, false); } @FunctionalInterface diff --git a/commons-serialize-xml/src/main/java/io/gitlab/jfronny/commons/serialize/xml/NativeXmlWriter.java b/commons-serialize-xml/src/main/java/io/gitlab/jfronny/commons/serialize/xml/NativeXmlWriter.java new file mode 100644 index 0000000..1e124ed --- /dev/null +++ b/commons-serialize-xml/src/main/java/io/gitlab/jfronny/commons/serialize/xml/NativeXmlWriter.java @@ -0,0 +1,355 @@ +package io.gitlab.jfronny.commons.serialize.xml; + +import io.gitlab.jfronny.commons.serialize.xml.impl.NameCheck; + +import java.io.Closeable; +import java.io.Flushable; +import java.io.IOException; +import java.io.Writer; +import java.util.*; + +import static io.gitlab.jfronny.commons.serialize.xml.impl.XmlScope.*; + +public class NativeXmlWriter implements Closeable, Flushable { + private final Writer out; + private int[] stack = new int[32]; + private int stackSize = 0; + + { + push(EMPTY_DOCUMENT); + } + + private String[] pathNames = new String[32]; + + private String newline; + private String indent; + private boolean usesEmptyNewlineAndIndent; + + private final List deferredComments = new LinkedList<>(); + private boolean wasText = false; + + private boolean lenient; + private boolean escapeNonAscii; + public NativeXmlWriter(Writer out) { + this.out = Objects.requireNonNull(out, "out == null"); + newline = indent = ""; + setIndent(""); + setNewline(""); + } + + public NativeXmlWriter setLenient(boolean lenient) { + this.lenient = lenient; + return this; + } + + public boolean isLenient() { + return lenient; + } + + public NativeXmlWriter setEscapeNonAscii(boolean escapeNonAscii) { + this.escapeNonAscii = escapeNonAscii; + return this; + } + + public boolean isEscapeNonAscii() { + return escapeNonAscii; + } + + public NativeXmlWriter setIndent(String indent) { + if (indent == null || indent.isEmpty()) { + this.indent = ""; + this.usesEmptyNewlineAndIndent = newline.isEmpty(); + } else { + this.newline = "\n"; // if someone sets an indent, this is probably intended + this.indent = indent; + this.usesEmptyNewlineAndIndent = false; + } + return this; + } + + public String getIndent() { + return indent; + } + + public NativeXmlWriter setNewline(String newline) { + if (newline == null || newline.isEmpty()) { + this.newline = ""; + this.usesEmptyNewlineAndIndent = indent.isEmpty(); + } else { + this.newline = newline; + this.usesEmptyNewlineAndIndent = false; + } + return this; + } + + public String getNewline() { + return newline; + } + + public NativeXmlWriter beginTag(String name) throws IOException { + Objects.requireNonNull(name, "name == null"); + wasText = false; + beforeValue(); + pathNames[stackSize - 1] = name; + push(TAG_HEAD); + out.write('<'); + name(name); + return this; + } + + public NativeXmlWriter endTag() throws IOException { + int context = peek(); + if (context != TAG_HEAD && context != TAG_BODY) { + throw new IllegalStateException("Nesting problem."); + } + wasText = false; + + if (!deferredComments.isEmpty()) { + beforeValue(); + newline(); + writeDeferredComment(); + context = TAG_BODY; + } + + stackSize--; + if (context == TAG_BODY) { + newline(); + } + out.write("'); + return this; + } + + private void push(int newTop) { + if (stackSize == stack.length) { + int newLength = stackSize * 2; + stack = Arrays.copyOf(stack, newLength); + pathNames = Arrays.copyOf(pathNames, newLength); + } + stack[stackSize++] = newTop; + } + + /** Returns the value on the top of the stack. */ + private int peek() { + if (stackSize == 0) { + throw new IllegalStateException("JsonWriter is closed."); + } + return stack[stackSize - 1]; + } + + /** Replace the value on the top of the stack with the given value. */ + private void replaceTop(int topOfStack) { + stack[stackSize - 1] = topOfStack; + } + + public NativeXmlWriter comment(String comment) throws IOException { + if (comment == null || comment.isBlank()) return this; + wasText = false; + comment = comment.replace("-->", "-->"); + String[] parts = comment.split("\n"); + Collections.addAll(deferredComments, parts); + if (peek() == NONEMPTY_DOCUMENT) { + newline(); + writeDeferredComment(); + } + return this; + } + + private void writeDeferredComment() throws IOException { + if (!deferredComments.isEmpty()) { + if (newline.isEmpty()) { + out.append(""); + } else { + boolean first = true; + for (String s : deferredComments) { + if (!first) newline(); + first = false; + if (s == null || s.isBlank()) out.append(""); + else out.append(""); + } + } + deferredComments.clear(); + } + } + + public NativeXmlWriter attribute(String name, String value) throws IOException { + return attributeName(name).attributeValue(value); + } + + public NativeXmlWriter attributeName(String name) throws IOException { + Objects.requireNonNull(name, "name == null"); + wasText = false; + if (peek() != TAG_HEAD) { + throw new IllegalStateException("Nesting problem."); + } + replaceTop(DANGLING_NAME); + out.write(' '); + name(name); + return this; + } + + public NativeXmlWriter attributeValue(String value) throws IOException { + value = value == null ? "null" : value; + wasText = false; + if (peek() != DANGLING_NAME) { + throw new IllegalStateException("Nesting problem."); + } + replaceTop(TAG_HEAD); + out.write('='); + out.write('"'); + escapeText(value, lenient); + out.write('"'); + return this; + } + + public NativeXmlWriter text(String text) throws IOException { + text = text == null ? "" : text; + if (wasText) comment(""); + beforeValue(); + escapeText(text, true); + wasText = true; + return this; + } + + public NativeXmlWriter reference(String reference) throws IOException { + reference = reference == null ? "" : reference; + if (!reference.matches("[a-zA-Z_:][a-zA-Z0-9._:-]*")) { + throw new IllegalArgumentException("Invalid reference: " + reference); + } + wasText = false; + beforeValue(); + out.write("&"); + out.write(reference); + out.write(";"); + return this; + } + + public NativeXmlWriter cdata(String cdata) throws IOException { + cdata = cdata == null ? "" : cdata; + if (cdata.contains("]]>")) throw new IllegalArgumentException("CDATA cannot contain ']]>'"); + wasText = false; + beforeValue(); + out.write(""); + return this; + } + + @Override + public void flush() throws IOException { + if (stackSize == 0) { + throw new IllegalStateException("JsonWriter is closed."); + } + out.flush(); + } + + @Override + public void close() throws IOException { + out.close(); + + int size = stackSize; + if (size > 1 || (size == 1 && stack[size - 1] != NONEMPTY_DOCUMENT)) { + throw new IOException("Incomplete document"); + } + stackSize = 0; + } + + private void name(String name) throws IOException { + if (name == null) throw new NullPointerException("name == null"); + if (stackSize == 0) throw new IllegalStateException("JsonWriter is closed."); + if (peek() != TAG_HEAD) throw new IllegalStateException("Nesting problem."); + // Check name for illegal characters + int last = 0; + int length = name.length(); + for (int i = 0; i < length; i++) { + char c = name.charAt(i); + char n = i + 1 < length ? name.charAt(i + 1) : '\0'; + NameCheck check = i == 0 ? NameCheck.isNameStart(c, n) : NameCheck.isName(c, n); + switch (check) { + case NONE -> { + if (!lenient) { + throw new IllegalArgumentException("Illegal character in name: " + name); + } + if (last < i) out.write(name, last, i - last); + out.write('_'); + last = i + 1; + } + case BOTH -> i++; + case FIRST -> {} + } + } + out.write(' '); + if (last < length) out.write(name, last, length - last); + } + + private void escapeText(String text, boolean permitControl) throws IOException { + int last = 0; + int length = text.length(); + for (int i = 0; i < length; i++) { + char c = text.charAt(i); + if (!permitControl && c < 0x20 && c != 0x09) throw new IllegalArgumentException("Illegal control character in text: " + text); + String replacement = switch(c) { + case '&' -> "&"; + case '<' -> "<"; + case '>' -> ">"; + case '"' -> """; + case '\'' -> "'"; + default -> escapeNonAscii && c > 127 ? "&#" + (int) c + ";" : null; + }; + if (replacement == null) continue; + if (last < i) out.write(text, last, i - last); + out.write(replacement); + last = i + 1; + } + if (last < length) out.write(text, last, length - last); + } + + private void newline() throws IOException { + if (usesEmptyNewlineAndIndent) { + return; + } + + out.write(newline); + for (int i = 1, size = stackSize; i < size; i++) { + out.write(indent); + } + } + + private void beforeValue() throws IOException { + switch (peek()) { + case NONEMPTY_DOCUMENT: + if (!lenient) { + throw new IllegalStateException("XML must have only one top-level value."); + } + case EMPTY_DOCUMENT: + replaceTop(NONEMPTY_DOCUMENT); + if (!deferredComments.isEmpty()) { + newline(); + writeDeferredComment(); + } + break; + case DANGLING_NAME: + if (!lenient) { + throw new IllegalStateException("Attribute name must be followed by a value"); + } + attributeValue("null"); + case TAG_HEAD: + out.write('>'); + replaceTop(TAG_BODY); + case TAG_BODY: + newline(); + if (!deferredComments.isEmpty()) { + writeDeferredComment(); + newline(); + } + break; + default: + throw new IllegalStateException("Nesting problem."); + } + } +} diff --git a/commons-serialize-xml/src/main/java/io/gitlab/jfronny/commons/serialize/xml/impl/NameCheck.java b/commons-serialize-xml/src/main/java/io/gitlab/jfronny/commons/serialize/xml/impl/NameCheck.java new file mode 100644 index 0000000..111d6b7 --- /dev/null +++ b/commons-serialize-xml/src/main/java/io/gitlab/jfronny/commons/serialize/xml/impl/NameCheck.java @@ -0,0 +1,31 @@ +package io.gitlab.jfronny.commons.serialize.xml.impl; + +public enum NameCheck { + FIRST, BOTH, NONE; + + public static NameCheck isNameStart(char ch, char chNext) { + if ('A' <= ch && ch <= 'Z') return NameCheck.FIRST; + if ('a' <= ch && ch <= 'z') return NameCheck.FIRST; + return switch (ch) { + case ':', '_' -> NameCheck.FIRST; + case '\u2070' -> chNext == '\u218F' ? NameCheck.BOTH : NameCheck.NONE; + case '\u2C00' -> chNext == '\u2FEF' ? NameCheck.BOTH : NameCheck.NONE; + case '\u3001' -> chNext == '\uD7FF' ? NameCheck.BOTH : NameCheck.NONE; + case '\uF900' -> chNext == '\uFDCF' ? NameCheck.BOTH : NameCheck.NONE; + case '\uFDF0' -> chNext == '\uFFFD' ? NameCheck.BOTH : NameCheck.NONE; + default -> NameCheck.NONE; + }; + } + + public static NameCheck isName(char ch, char chNext) { + var nameStart = isNameStart(ch, chNext); + if (nameStart != NameCheck.NONE) return nameStart; + if ('0' <= ch && ch <= '9') return NameCheck.FIRST; + return switch (ch) { + case '-', '.', '\u00B7' -> NameCheck.FIRST; + case '\u0300' -> chNext == '\u036F' ? NameCheck.BOTH : NameCheck.NONE; + case '\u203F' -> chNext == '\u2040' ? NameCheck.BOTH : NameCheck.NONE; + default -> NameCheck.NONE; + }; + } +}