feat(serialize-xml): Rewrite wrapping XmlReader

This commit is contained in:
Johannes Frohnmeyer 2024-04-17 10:44:56 +02:00
parent 6bd1bca351
commit 63d7caca7e
Signed by: Johannes
GPG Key ID: E76429612C2929F4
2 changed files with 234 additions and 87 deletions

View File

@ -1,9 +1,9 @@
package io.gitlab.jfronny.commons.serialize.xml.impl;
public class WrapperScope {
public static final int TAG_HEAD = 1;
public static final int TAG_HEAD_DANGLING_NAME = 2;
public static final int TAG_BODY_ARRAY = 3;
public static final int TAG_BODY_OBJECT = 4;
public static final int DOCUMENT = 6;
public static final int OBJECT_VALUE_WRAPPER = 1;
public static final int OBJECT_VALUE_WRAPPER_USED = 2;
public static final int ARRAY = 3;
public static final int OBJECT = 4;
public static final int DOCUMENT = 5;
}

View File

@ -15,6 +15,23 @@ import java.util.Arrays;
import java.util.Objects;
public class XmlReader extends SerializeReader<IOException, XmlReader> implements Closeable {
private static final int PEEKED_NONE = 0;
private static final int PEEKED_NAME_BEGIN_OBJECT = 1;
private static final int PEEKED_BEGIN_OBJECT = 2;
private static final int PEEKED_NAME_BEGIN_ARRAY = 3;
private static final int PEEKED_BEGIN_ARRAY = 4;
private static final int PEEKED_END_OBJECT = 5;
private static final int PEEKED_END_ARRAY = 6;
private static final int PEEKED_NAME_ATT = 7;
private static final int PEEKED_ATT_VALUE = 8;
private static final int PEEKED_NAME_TAG = 9;
private static final int PEEKED_TEXT = 10;
private static final int PEEKED_CDATA = 11;
private static final int PEEKED_NAME_VIRTUAL_TEXT = 12;
private static final int PEEKED_NAME_VIRTUAL_CDATA = 13;
private static final int PEEKED_EOF = 14;
int peeked = PEEKED_NONE;
private final NativeXmlReader reader;
private int[] stack = new int[32];
private int stackSize = 0;
@ -40,7 +57,6 @@ public class XmlReader extends SerializeReader<IOException, XmlReader> implement
public XmlReader(NativeXmlReader reader) {
this.reader = Objects.requireNonNull(reader);
this.heuristics = Objects.requireNonNull(heuristics);
}
public XmlReader(Reader source) {
@ -69,60 +85,59 @@ public class XmlReader extends SerializeReader<IOException, XmlReader> implement
@Override
public XmlReader beginArray() throws IOException {
if (nextTagName != null || stack[stackSize - 1] == WrapperScope.TAG_HEAD) {
// Tag was just created, interpret it as an array
nextTagName = null;
stack[stackSize - 1] = WrapperScope.TAG_BODY_ARRAY;
return this;
} else if (stack[stackSize - 1] == WrapperScope.TAG_BODY_ARRAY) {
// We are inside an array, interpret the next tag as the root of our array
reader.beginTag();
push(WrapperScope.TAG_BODY_ARRAY);
return this;
} else {
throw unexpectedTokenError("an array");
int p = peeked;
if (p == PEEKED_NONE) {
p = doPeek();
}
if (p != PEEKED_BEGIN_OBJECT) throw unexpectedTokenError("an array");
nextTagName = nextTagNamePath = null;
push(WrapperScope.ARRAY);
pathIndices[stackSize - 1] = 0;
peeked = PEEKED_NONE;
return this;
}
@Override
public XmlReader endArray() throws IOException {
if (nextTagName != null) throw unexpectedTokenError("the end of an array");
if (stack[stackSize - 1] == WrapperScope.TAG_BODY_ARRAY) {
reader.endTag();
stackSize--;
return this;
} else {
throw unexpectedTokenError("the end of an array");
int p = peeked;
if (p == PEEKED_NONE) {
p = doPeek();
}
if (p != PEEKED_END_ARRAY) throw unexpectedTokenError("the end of an array");
reader.endTag();
stackSize--;
pathNames[stackSize] = null;
pathIndices[stackSize - 1]++;
peeked = PEEKED_NONE;
return this;
}
@Override
public XmlReader beginObject() throws IOException {
if (nextTagName != null || stack[stackSize - 1] == WrapperScope.TAG_HEAD) {
// Tag was just created, interpret it as an object
nextTagName = null;
stack[stackSize - 1] = WrapperScope.TAG_BODY_OBJECT;
return this;
} else if (stack[stackSize - 1] == WrapperScope.TAG_BODY_ARRAY) {
// We are inside an array, interpret the next tag as the root of our object
reader.beginTag();
push(WrapperScope.TAG_BODY_OBJECT);
return this;
} else {
throw unexpectedTokenError("an object");
int p = peeked;
if (p == PEEKED_NONE) {
p = doPeek();
}
if (p != PEEKED_BEGIN_OBJECT) throw unexpectedTokenError("an object");
nextTagName = nextTagNamePath = null;
push(WrapperScope.OBJECT);
peeked = PEEKED_NONE;
return this;
}
@Override
public XmlReader endObject() throws IOException {
if (nextTagName != null) throw unexpectedTokenError("the end of an object");
if (stack[stackSize - 1] == WrapperScope.TAG_BODY_OBJECT) {
reader.endTag();
stackSize--;
return this;
} else {
throw unexpectedTokenError("the end of an object");
int p = peeked;
if (p == PEEKED_NONE) {
p = doPeek();
}
if (p != PEEKED_END_OBJECT) throw unexpectedTokenError("the end of an object");
reader.endTag();
stackSize--;
pathNames[stackSize] = null;
pathIndices[stackSize - 1]++;
peeked = PEEKED_NONE;
return this;
}
@Override
@ -132,47 +147,143 @@ public class XmlReader extends SerializeReader<IOException, XmlReader> implement
@Override
public Token peek() throws IOException {
if (nextTagName != null) {
return switch (heuristics.guessKind(reader.getPath())) {
case OBJECT -> Token.BEGIN_OBJECT;
case ARRAY -> Token.BEGIN_ARRAY;
};
int p = peeked;
if (p == PEEKED_NONE) {
p = doPeek();
}
return switch (reader.peek()) {
case ATTRIBUTE_NAME -> Token.NAME;
case ATTRIBUTE_VALUE, TEXT, CDATA -> Token.STRING;
return switch (p) {
case PEEKED_NAME_BEGIN_ARRAY, PEEKED_NAME_BEGIN_OBJECT, PEEKED_NAME_ATT, PEEKED_NAME_TAG, PEEKED_NAME_VIRTUAL_TEXT, PEEKED_NAME_VIRTUAL_CDATA -> Token.NAME;
case PEEKED_BEGIN_OBJECT -> Token.BEGIN_OBJECT;
case PEEKED_BEGIN_ARRAY -> Token.BEGIN_ARRAY;
case PEEKED_END_OBJECT -> Token.END_OBJECT;
case PEEKED_END_ARRAY -> Token.END_ARRAY;
case PEEKED_CDATA -> heuristics.guessValueKind(getPath(), XmlToken.CDATA);
case PEEKED_ATT_VALUE -> heuristics.guessValueKind(getPath(), XmlToken.ATTRIBUTE_VALUE);
case PEEKED_TEXT -> heuristics.guessValueKind(getPath(), XmlToken.TEXT);
case PEEKED_EOF -> Token.END_DOCUMENT;
default -> throw new AssertionError();
};
}
private int doPeek() throws IOException {
if (peeked == PEEKED_NAME_VIRTUAL_TEXT) return PEEKED_TEXT;
if (peeked == PEEKED_NAME_VIRTUAL_CDATA) return PEEKED_CDATA;
if (peeked == PEEKED_NAME_BEGIN_ARRAY) return PEEKED_BEGIN_ARRAY;
if (peeked == PEEKED_NAME_BEGIN_OBJECT) return PEEKED_BEGIN_OBJECT;
int peekStack = stack[stackSize - 1];
return peeked = switch (reader.peek()) {
case ATTRIBUTE_NAME -> PEEKED_NAME_ATT;
case ATTRIBUTE_VALUE -> PEEKED_ATT_VALUE;
case TEXT -> switch (peekStack) {
case WrapperScope.DOCUMENT -> PEEKED_TEXT;
case WrapperScope.OBJECT_VALUE_WRAPPER -> {
stack[stackSize - 1] = WrapperScope.OBJECT_VALUE_WRAPPER_USED;
yield PEEKED_TEXT;
}
case WrapperScope.ARRAY, WrapperScope.OBJECT -> PEEKED_NAME_VIRTUAL_TEXT;
default -> throw syntaxError("Unexpected text");
};
case CDATA -> switch (peekStack) {
case WrapperScope.DOCUMENT -> PEEKED_CDATA;
case WrapperScope.OBJECT_VALUE_WRAPPER -> {
stack[stackSize - 1] = WrapperScope.OBJECT_VALUE_WRAPPER_USED;
yield PEEKED_CDATA;
}
case WrapperScope.ARRAY, WrapperScope.OBJECT -> PEEKED_NAME_VIRTUAL_CDATA;
default -> throw syntaxError("Unexpected CDATA");
};
case BEGIN_TAG -> {
switch (peekStack) {
case WrapperScope.OBJECT_VALUE_WRAPPER_USED -> throw syntaxError("Unexpected Tag");
case WrapperScope.OBJECT_VALUE_WRAPPER -> stack[stackSize - 1] = WrapperScope.OBJECT_VALUE_WRAPPER_USED;
default -> {}
}
nextTagNamePath = getPath();
nextTagName = reader.beginTag();
yield peek();
if (peekStack == WrapperScope.OBJECT) {
String path = reader.getPath();
XmlToken next = reader.peek();
if (heuristics.guessServesAsEntryName(path, next)) {
push(WrapperScope.OBJECT_VALUE_WRAPPER);
yield PEEKED_NAME_TAG;
} else {
yield switch (heuristics.guessKind(path)) {
case OBJECT -> {
push(WrapperScope.OBJECT);
yield PEEKED_NAME_BEGIN_OBJECT;
}
case ARRAY -> {
push(WrapperScope.ARRAY);
yield PEEKED_NAME_BEGIN_ARRAY;
}
};
}
}
yield switch (heuristics.guessKind(reader.getPath())) {
case OBJECT -> {
push(WrapperScope.OBJECT);
yield PEEKED_BEGIN_OBJECT;
}
case ARRAY -> {
push(WrapperScope.ARRAY);
yield PEEKED_BEGIN_ARRAY;
}
};
}
case END_TAG -> stack[stackSize - 1] == WrapperScope.TAG_BODY_ARRAY ? Token.END_ARRAY : Token.END_OBJECT;
case EOF -> Token.END_DOCUMENT;
case END_TAG -> switch (peekStack) {
case WrapperScope.ARRAY -> {
stackSize--;
yield PEEKED_END_ARRAY;
}
case WrapperScope.OBJECT -> {
stackSize--;
yield PEEKED_END_OBJECT;
}
case WrapperScope.OBJECT_VALUE_WRAPPER -> {
stackSize--;
reader.endTag();
yield doPeek();
}
default -> throw syntaxError("Unexpected end tag");
};
case EOF -> PEEKED_EOF;
};
}
@Override
public String nextName() throws IOException {
if (nextTagName != null) {
String res = nextTagName;
pathNames[stackSize - 1] = res;
push(WrapperScope.TAG_HEAD);
nextTagName = null;
return res;
}
if (reader.peek() == XmlToken.ATTRIBUTE_NAME) {
stack[stackSize - 1] = WrapperScope.TAG_HEAD_DANGLING_NAME;
return reader.nextAttributeName();
} else if (reader.peek() == XmlToken.BEGIN_TAG) {
// ordinarily, this would also require a check whether we are in an object,
// but doing it this way provides users with more flexibility
String res = reader.beginTag();
pathNames[stackSize - 1] = res;
push(WrapperScope.TAG_HEAD);
return res;
} else {
throw unexpectedTokenError("a name");
int p = peeked;
if (p == PEEKED_NONE) {
p = doPeek();
}
return switch (p) {
case PEEKED_NAME_ATT -> {
String res = reader.nextAttributeName();
peeked = PEEKED_NONE;
yield res;
}
case PEEKED_NAME_TAG -> {
String res = nextTagName;
if (res == null) throw unexpectedTokenError("a name");
peeked = PEEKED_NONE;
yield res;
}
case PEEKED_NAME_VIRTUAL_TEXT -> {
String result = heuristics.guessElementName(reader.getPath(), XmlToken.TEXT);
peeked = PEEKED_NONE;
yield result;
}
case PEEKED_NAME_VIRTUAL_CDATA -> {
String result = heuristics.guessElementName(reader.getPath(), XmlToken.CDATA);
peeked = PEEKED_NONE;
yield result;
}
case PEEKED_NAME_BEGIN_OBJECT, PEEKED_NAME_BEGIN_ARRAY -> {
peeked = PEEKED_NONE;
yield nextTagName;
}
default -> throw unexpectedTokenError("a name");
};
}
@Override
@ -206,20 +317,35 @@ public class XmlReader extends SerializeReader<IOException, XmlReader> implement
@Override
public void skipValue() throws IOException {
nextValue("a value");
int p = peeked;
if (p == PEEKED_NONE) {
p = doPeek();
}
switch (p) {
case PEEKED_BEGIN_OBJECT, PEEKED_BEGIN_ARRAY -> {
while (reader.hasNext()) reader.skipValue();
reader.endTag();
stackSize--;
}
case PEEKED_NAME_ATT, PEEKED_TEXT, PEEKED_CDATA, PEEKED_NAME_VIRTUAL_TEXT, PEEKED_NAME_VIRTUAL_CDATA -> reader.skipValue();
default -> throw unexpectedTokenError("a value");
}
peeked = PEEKED_NONE;
}
private String nextValue(String kind) throws IOException {
if (nextTagName != null) throw unexpectedTokenError(kind);
return switch (reader.peek()) {
case ATTRIBUTE_VALUE -> {
stack[stackSize - 1] = WrapperScope.TAG_HEAD;
yield reader.nextAttributeValue();
}
case TEXT -> reader.nextText();
case CDATA -> reader.nextCData();
case BEGIN_TAG, END_TAG, ATTRIBUTE_NAME, EOF -> throw unexpectedTokenError(kind);
int p = peeked;
if (p == PEEKED_NONE) {
p = doPeek();
}
String result = switch (p) {
case PEEKED_ATT_VALUE -> reader.nextAttributeValue();
case PEEKED_TEXT -> reader.nextText();
case PEEKED_CDATA -> reader.nextCData();
default -> throw unexpectedTokenError(kind);
};
peeked = PEEKED_NONE;
return result;
}
@Override
@ -241,10 +367,31 @@ public class XmlReader extends SerializeReader<IOException, XmlReader> implement
public interface Heuristics {
enum Kind {OBJECT, ARRAY}
Kind guessKind(String path);
boolean guessServesAsEntryName(String path, XmlToken next);
String guessElementName(String path, XmlToken kind);
Token guessValueKind(String path, XmlToken next);
Heuristics DEFAULT = path -> {
if (path.endsWith("s")) return Kind.ARRAY;
return Kind.OBJECT;
Heuristics DEFAULT = new Heuristics() {
@Override
public Kind guessKind(String path) {
if (path.endsWith("s")) return Kind.ARRAY;
return Kind.OBJECT;
}
@Override
public boolean guessServesAsEntryName(String path, XmlToken next) {
return true;
}
@Override
public String guessElementName(String path, XmlToken kind) {
return "item";
}
@Override
public Token guessValueKind(String path, XmlToken next) {
return Token.STRING;
}
};
}