feat(serialize-xml): initial work on NativeXmlWriter

This commit is contained in:
Johannes Frohnmeyer 2024-04-20 14:35:41 +02:00
parent 7bcde68b9f
commit 30f18ecef9
Signed by: Johannes
GPG Key ID: E76429612C2929F4
3 changed files with 391 additions and 31 deletions

View File

@ -2,6 +2,7 @@ package io.gitlab.jfronny.commons.serialize.xml;
import io.gitlab.jfronny.commons.serialize.MalformedDataException;
import io.gitlab.jfronny.commons.serialize.StringEscapeUtil;
import io.gitlab.jfronny.commons.serialize.xml.impl.NameCheck;
import io.gitlab.jfronny.commons.serialize.xml.impl.XmlScope;
import java.io.Closeable;
@ -230,7 +231,7 @@ public class NativeXmlReader implements Closeable {
// fall through
} else if (pos < limit || fillBuffer(1)) {
char chNext = buffer[pos + 1];
var check = isNameStart((char) c, chNext);
var check = NameCheck.isNameStart((char) c, chNext);
pos--;
if (check != NameCheck.NONE) {
return peeked = PEEKED_ATTRIBUTE_NAME;
@ -289,7 +290,7 @@ public class NativeXmlReader implements Closeable {
}
}
} else if (pos + 2 <= limit || fillBuffer(2)) {
var check = isNameStart(chNext, buffer[pos + 1]);
var check = NameCheck.isNameStart(chNext, buffer[pos + 1]);
if (check != NameCheck.NONE) {
return peeked = PEEKED_BEGIN_TAG;
}
@ -302,33 +303,6 @@ public class NativeXmlReader implements Closeable {
}
}
private enum NameCheck { FIRST, BOTH, NONE }
private NameCheck isNameStart(char ch, char chNext) {
if ('A' <= ch && ch <= 'Z') return NameCheck.FIRST;
if ('a' <= ch && ch <= 'z') return NameCheck.FIRST;
return switch (ch) {
case ':', '_' -> NameCheck.FIRST;
case '\u2070' -> chNext == '\u218F' ? NameCheck.BOTH : NameCheck.NONE;
case '\u2C00' -> chNext == '\u2FEF' ? NameCheck.BOTH : NameCheck.NONE;
case '\u3001' -> chNext == '\uD7FF' ? NameCheck.BOTH : NameCheck.NONE;
case '\uF900' -> chNext == '\uFDCF' ? NameCheck.BOTH : NameCheck.NONE;
case '\uFDF0' -> chNext == '\uFFFD' ? NameCheck.BOTH : NameCheck.NONE;
default -> NameCheck.NONE;
};
}
private NameCheck isName(char ch, char chNext) {
var nameStart = isNameStart(ch, chNext);
if (nameStart != NameCheck.NONE) return nameStart;
if ('0' <= ch && ch <= '9') return NameCheck.FIRST;
return switch (ch) {
case '-', '.', '\u00B7' -> NameCheck.FIRST;
case '\u0300' -> chNext == '\u036F' ? NameCheck.BOTH : NameCheck.NONE;
case '\u203F' -> chNext == '\u2040' ? NameCheck.BOTH : NameCheck.NONE;
default -> NameCheck.NONE;
};
}
public String nextAttributeName() throws IOException {
int p = peeked;
if (p == PEEKED_NONE) {
@ -473,7 +447,7 @@ public class NativeXmlReader implements Closeable {
}
}
case PEEKED_ATTRIBUTE_NAME -> {
skipUntil((c, i) -> isName(c, pos + i + 1 < limit ? buffer[pos + i + 1] : '\0') == NameCheck.NONE);
skipUntil((c, i) -> NameCheck.isName(c, pos + i + 1 < limit ? buffer[pos + i + 1] : '\0') == NameCheck.NONE);
if (count == 0) pathNames[stackSize - 1] = "<skipped>";
peeked = PEEKED_NONE;
}
@ -494,7 +468,7 @@ public class NativeXmlReader implements Closeable {
}
private String nextName() throws IOException {
return readUntil((c, i) -> isName(c, pos + i + 1 < limit ? buffer[pos + i + 1] : '\0') == NameCheck.NONE, false);
return readUntil((c, i) -> NameCheck.isName(c, pos + i + 1 < limit ? buffer[pos + i + 1] : '\0') == NameCheck.NONE, false);
}
@FunctionalInterface

View File

@ -0,0 +1,355 @@
package io.gitlab.jfronny.commons.serialize.xml;
import io.gitlab.jfronny.commons.serialize.xml.impl.NameCheck;
import java.io.Closeable;
import java.io.Flushable;
import java.io.IOException;
import java.io.Writer;
import java.util.*;
import static io.gitlab.jfronny.commons.serialize.xml.impl.XmlScope.*;
public class NativeXmlWriter implements Closeable, Flushable {
private final Writer out;
private int[] stack = new int[32];
private int stackSize = 0;
{
push(EMPTY_DOCUMENT);
}
private String[] pathNames = new String[32];
private String newline;
private String indent;
private boolean usesEmptyNewlineAndIndent;
private final List<String> deferredComments = new LinkedList<>();
private boolean wasText = false;
private boolean lenient;
private boolean escapeNonAscii;
public NativeXmlWriter(Writer out) {
this.out = Objects.requireNonNull(out, "out == null");
newline = indent = "";
setIndent("");
setNewline("");
}
public NativeXmlWriter setLenient(boolean lenient) {
this.lenient = lenient;
return this;
}
public boolean isLenient() {
return lenient;
}
public NativeXmlWriter setEscapeNonAscii(boolean escapeNonAscii) {
this.escapeNonAscii = escapeNonAscii;
return this;
}
public boolean isEscapeNonAscii() {
return escapeNonAscii;
}
public NativeXmlWriter setIndent(String indent) {
if (indent == null || indent.isEmpty()) {
this.indent = "";
this.usesEmptyNewlineAndIndent = newline.isEmpty();
} else {
this.newline = "\n"; // if someone sets an indent, this is probably intended
this.indent = indent;
this.usesEmptyNewlineAndIndent = false;
}
return this;
}
public String getIndent() {
return indent;
}
public NativeXmlWriter setNewline(String newline) {
if (newline == null || newline.isEmpty()) {
this.newline = "";
this.usesEmptyNewlineAndIndent = indent.isEmpty();
} else {
this.newline = newline;
this.usesEmptyNewlineAndIndent = false;
}
return this;
}
public String getNewline() {
return newline;
}
public NativeXmlWriter beginTag(String name) throws IOException {
Objects.requireNonNull(name, "name == null");
wasText = false;
beforeValue();
pathNames[stackSize - 1] = name;
push(TAG_HEAD);
out.write('<');
name(name);
return this;
}
public NativeXmlWriter endTag() throws IOException {
int context = peek();
if (context != TAG_HEAD && context != TAG_BODY) {
throw new IllegalStateException("Nesting problem.");
}
wasText = false;
if (!deferredComments.isEmpty()) {
beforeValue();
newline();
writeDeferredComment();
context = TAG_BODY;
}
stackSize--;
if (context == TAG_BODY) {
newline();
}
out.write("</");
name(pathNames[stackSize - 1]);
pathNames[stackSize - 1] = null;
out.write('>');
return this;
}
private void push(int newTop) {
if (stackSize == stack.length) {
int newLength = stackSize * 2;
stack = Arrays.copyOf(stack, newLength);
pathNames = Arrays.copyOf(pathNames, newLength);
}
stack[stackSize++] = newTop;
}
/** Returns the value on the top of the stack. */
private int peek() {
if (stackSize == 0) {
throw new IllegalStateException("JsonWriter is closed.");
}
return stack[stackSize - 1];
}
/** Replace the value on the top of the stack with the given value. */
private void replaceTop(int topOfStack) {
stack[stackSize - 1] = topOfStack;
}
public NativeXmlWriter comment(String comment) throws IOException {
if (comment == null || comment.isBlank()) return this;
wasText = false;
comment = comment.replace("-->", "--&gt;");
String[] parts = comment.split("\n");
Collections.addAll(deferredComments, parts);
if (peek() == NONEMPTY_DOCUMENT) {
newline();
writeDeferredComment();
}
return this;
}
private void writeDeferredComment() throws IOException {
if (!deferredComments.isEmpty()) {
if (newline.isEmpty()) {
out.append("<!-- ")
.append(String.join(" / ", deferredComments.stream()
.filter(s -> s != null && !s.isBlank()).toList())
).append(" -->");
} else {
boolean first = true;
for (String s : deferredComments) {
if (!first) newline();
first = false;
if (s == null || s.isBlank()) out.append("<!---->");
else out.append("<!-- ").append(s).append(" -->");
}
}
deferredComments.clear();
}
}
public NativeXmlWriter attribute(String name, String value) throws IOException {
return attributeName(name).attributeValue(value);
}
public NativeXmlWriter attributeName(String name) throws IOException {
Objects.requireNonNull(name, "name == null");
wasText = false;
if (peek() != TAG_HEAD) {
throw new IllegalStateException("Nesting problem.");
}
replaceTop(DANGLING_NAME);
out.write(' ');
name(name);
return this;
}
public NativeXmlWriter attributeValue(String value) throws IOException {
value = value == null ? "null" : value;
wasText = false;
if (peek() != DANGLING_NAME) {
throw new IllegalStateException("Nesting problem.");
}
replaceTop(TAG_HEAD);
out.write('=');
out.write('"');
escapeText(value, lenient);
out.write('"');
return this;
}
public NativeXmlWriter text(String text) throws IOException {
text = text == null ? "" : text;
if (wasText) comment("");
beforeValue();
escapeText(text, true);
wasText = true;
return this;
}
public NativeXmlWriter reference(String reference) throws IOException {
reference = reference == null ? "" : reference;
if (!reference.matches("[a-zA-Z_:][a-zA-Z0-9._:-]*")) {
throw new IllegalArgumentException("Invalid reference: " + reference);
}
wasText = false;
beforeValue();
out.write("&");
out.write(reference);
out.write(";");
return this;
}
public NativeXmlWriter cdata(String cdata) throws IOException {
cdata = cdata == null ? "" : cdata;
if (cdata.contains("]]>")) throw new IllegalArgumentException("CDATA cannot contain ']]>'");
wasText = false;
beforeValue();
out.write("<![CDATA[");
out.write(cdata);
out.write("]]>");
return this;
}
@Override
public void flush() throws IOException {
if (stackSize == 0) {
throw new IllegalStateException("JsonWriter is closed.");
}
out.flush();
}
@Override
public void close() throws IOException {
out.close();
int size = stackSize;
if (size > 1 || (size == 1 && stack[size - 1] != NONEMPTY_DOCUMENT)) {
throw new IOException("Incomplete document");
}
stackSize = 0;
}
private void name(String name) throws IOException {
if (name == null) throw new NullPointerException("name == null");
if (stackSize == 0) throw new IllegalStateException("JsonWriter is closed.");
if (peek() != TAG_HEAD) throw new IllegalStateException("Nesting problem.");
// Check name for illegal characters
int last = 0;
int length = name.length();
for (int i = 0; i < length; i++) {
char c = name.charAt(i);
char n = i + 1 < length ? name.charAt(i + 1) : '\0';
NameCheck check = i == 0 ? NameCheck.isNameStart(c, n) : NameCheck.isName(c, n);
switch (check) {
case NONE -> {
if (!lenient) {
throw new IllegalArgumentException("Illegal character in name: " + name);
}
if (last < i) out.write(name, last, i - last);
out.write('_');
last = i + 1;
}
case BOTH -> i++;
case FIRST -> {}
}
}
out.write(' ');
if (last < length) out.write(name, last, length - last);
}
private void escapeText(String text, boolean permitControl) throws IOException {
int last = 0;
int length = text.length();
for (int i = 0; i < length; i++) {
char c = text.charAt(i);
if (!permitControl && c < 0x20 && c != 0x09) throw new IllegalArgumentException("Illegal control character in text: " + text);
String replacement = switch(c) {
case '&' -> "&amp;";
case '<' -> "&lt;";
case '>' -> "&gt;";
case '"' -> "&quot;";
case '\'' -> "&apos;";
default -> escapeNonAscii && c > 127 ? "&#" + (int) c + ";" : null;
};
if (replacement == null) continue;
if (last < i) out.write(text, last, i - last);
out.write(replacement);
last = i + 1;
}
if (last < length) out.write(text, last, length - last);
}
private void newline() throws IOException {
if (usesEmptyNewlineAndIndent) {
return;
}
out.write(newline);
for (int i = 1, size = stackSize; i < size; i++) {
out.write(indent);
}
}
private void beforeValue() throws IOException {
switch (peek()) {
case NONEMPTY_DOCUMENT:
if (!lenient) {
throw new IllegalStateException("XML must have only one top-level value.");
}
case EMPTY_DOCUMENT:
replaceTop(NONEMPTY_DOCUMENT);
if (!deferredComments.isEmpty()) {
newline();
writeDeferredComment();
}
break;
case DANGLING_NAME:
if (!lenient) {
throw new IllegalStateException("Attribute name must be followed by a value");
}
attributeValue("null");
case TAG_HEAD:
out.write('>');
replaceTop(TAG_BODY);
case TAG_BODY:
newline();
if (!deferredComments.isEmpty()) {
writeDeferredComment();
newline();
}
break;
default:
throw new IllegalStateException("Nesting problem.");
}
}
}

View File

@ -0,0 +1,31 @@
package io.gitlab.jfronny.commons.serialize.xml.impl;
public enum NameCheck {
FIRST, BOTH, NONE;
public static NameCheck isNameStart(char ch, char chNext) {
if ('A' <= ch && ch <= 'Z') return NameCheck.FIRST;
if ('a' <= ch && ch <= 'z') return NameCheck.FIRST;
return switch (ch) {
case ':', '_' -> NameCheck.FIRST;
case '\u2070' -> chNext == '\u218F' ? NameCheck.BOTH : NameCheck.NONE;
case '\u2C00' -> chNext == '\u2FEF' ? NameCheck.BOTH : NameCheck.NONE;
case '\u3001' -> chNext == '\uD7FF' ? NameCheck.BOTH : NameCheck.NONE;
case '\uF900' -> chNext == '\uFDCF' ? NameCheck.BOTH : NameCheck.NONE;
case '\uFDF0' -> chNext == '\uFFFD' ? NameCheck.BOTH : NameCheck.NONE;
default -> NameCheck.NONE;
};
}
public static NameCheck isName(char ch, char chNext) {
var nameStart = isNameStart(ch, chNext);
if (nameStart != NameCheck.NONE) return nameStart;
if ('0' <= ch && ch <= '9') return NameCheck.FIRST;
return switch (ch) {
case '-', '.', '\u00B7' -> NameCheck.FIRST;
case '\u0300' -> chNext == '\u036F' ? NameCheck.BOTH : NameCheck.NONE;
case '\u203F' -> chNext == '\u2040' ? NameCheck.BOTH : NameCheck.NONE;
default -> NameCheck.NONE;
};
}
}