feat(serialize-xml): Rewrite wrapping XmlReader
This commit is contained in:
parent
6bd1bca351
commit
63d7caca7e
@ -1,9 +1,9 @@
|
||||
package io.gitlab.jfronny.commons.serialize.xml.impl;
|
||||
|
||||
public class WrapperScope {
|
||||
public static final int TAG_HEAD = 1;
|
||||
public static final int TAG_HEAD_DANGLING_NAME = 2;
|
||||
public static final int TAG_BODY_ARRAY = 3;
|
||||
public static final int TAG_BODY_OBJECT = 4;
|
||||
public static final int DOCUMENT = 6;
|
||||
public static final int OBJECT_VALUE_WRAPPER = 1;
|
||||
public static final int OBJECT_VALUE_WRAPPER_USED = 2;
|
||||
public static final int ARRAY = 3;
|
||||
public static final int OBJECT = 4;
|
||||
public static final int DOCUMENT = 5;
|
||||
}
|
||||
|
@ -15,6 +15,23 @@ import java.util.Arrays;
|
||||
import java.util.Objects;
|
||||
|
||||
public class XmlReader extends SerializeReader<IOException, XmlReader> implements Closeable {
|
||||
private static final int PEEKED_NONE = 0;
|
||||
private static final int PEEKED_NAME_BEGIN_OBJECT = 1;
|
||||
private static final int PEEKED_BEGIN_OBJECT = 2;
|
||||
private static final int PEEKED_NAME_BEGIN_ARRAY = 3;
|
||||
private static final int PEEKED_BEGIN_ARRAY = 4;
|
||||
private static final int PEEKED_END_OBJECT = 5;
|
||||
private static final int PEEKED_END_ARRAY = 6;
|
||||
private static final int PEEKED_NAME_ATT = 7;
|
||||
private static final int PEEKED_ATT_VALUE = 8;
|
||||
private static final int PEEKED_NAME_TAG = 9;
|
||||
private static final int PEEKED_TEXT = 10;
|
||||
private static final int PEEKED_CDATA = 11;
|
||||
private static final int PEEKED_NAME_VIRTUAL_TEXT = 12;
|
||||
private static final int PEEKED_NAME_VIRTUAL_CDATA = 13;
|
||||
private static final int PEEKED_EOF = 14;
|
||||
|
||||
int peeked = PEEKED_NONE;
|
||||
private final NativeXmlReader reader;
|
||||
private int[] stack = new int[32];
|
||||
private int stackSize = 0;
|
||||
@ -40,7 +57,6 @@ public class XmlReader extends SerializeReader<IOException, XmlReader> implement
|
||||
|
||||
public XmlReader(NativeXmlReader reader) {
|
||||
this.reader = Objects.requireNonNull(reader);
|
||||
this.heuristics = Objects.requireNonNull(heuristics);
|
||||
}
|
||||
|
||||
public XmlReader(Reader source) {
|
||||
@ -69,60 +85,59 @@ public class XmlReader extends SerializeReader<IOException, XmlReader> implement
|
||||
|
||||
@Override
|
||||
public XmlReader beginArray() throws IOException {
|
||||
if (nextTagName != null || stack[stackSize - 1] == WrapperScope.TAG_HEAD) {
|
||||
// Tag was just created, interpret it as an array
|
||||
nextTagName = null;
|
||||
stack[stackSize - 1] = WrapperScope.TAG_BODY_ARRAY;
|
||||
return this;
|
||||
} else if (stack[stackSize - 1] == WrapperScope.TAG_BODY_ARRAY) {
|
||||
// We are inside an array, interpret the next tag as the root of our array
|
||||
reader.beginTag();
|
||||
push(WrapperScope.TAG_BODY_ARRAY);
|
||||
return this;
|
||||
} else {
|
||||
throw unexpectedTokenError("an array");
|
||||
int p = peeked;
|
||||
if (p == PEEKED_NONE) {
|
||||
p = doPeek();
|
||||
}
|
||||
if (p != PEEKED_BEGIN_OBJECT) throw unexpectedTokenError("an array");
|
||||
nextTagName = nextTagNamePath = null;
|
||||
push(WrapperScope.ARRAY);
|
||||
pathIndices[stackSize - 1] = 0;
|
||||
peeked = PEEKED_NONE;
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public XmlReader endArray() throws IOException {
|
||||
if (nextTagName != null) throw unexpectedTokenError("the end of an array");
|
||||
if (stack[stackSize - 1] == WrapperScope.TAG_BODY_ARRAY) {
|
||||
reader.endTag();
|
||||
stackSize--;
|
||||
return this;
|
||||
} else {
|
||||
throw unexpectedTokenError("the end of an array");
|
||||
int p = peeked;
|
||||
if (p == PEEKED_NONE) {
|
||||
p = doPeek();
|
||||
}
|
||||
if (p != PEEKED_END_ARRAY) throw unexpectedTokenError("the end of an array");
|
||||
reader.endTag();
|
||||
stackSize--;
|
||||
pathNames[stackSize] = null;
|
||||
pathIndices[stackSize - 1]++;
|
||||
peeked = PEEKED_NONE;
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public XmlReader beginObject() throws IOException {
|
||||
if (nextTagName != null || stack[stackSize - 1] == WrapperScope.TAG_HEAD) {
|
||||
// Tag was just created, interpret it as an object
|
||||
nextTagName = null;
|
||||
stack[stackSize - 1] = WrapperScope.TAG_BODY_OBJECT;
|
||||
return this;
|
||||
} else if (stack[stackSize - 1] == WrapperScope.TAG_BODY_ARRAY) {
|
||||
// We are inside an array, interpret the next tag as the root of our object
|
||||
reader.beginTag();
|
||||
push(WrapperScope.TAG_BODY_OBJECT);
|
||||
return this;
|
||||
} else {
|
||||
throw unexpectedTokenError("an object");
|
||||
int p = peeked;
|
||||
if (p == PEEKED_NONE) {
|
||||
p = doPeek();
|
||||
}
|
||||
if (p != PEEKED_BEGIN_OBJECT) throw unexpectedTokenError("an object");
|
||||
nextTagName = nextTagNamePath = null;
|
||||
push(WrapperScope.OBJECT);
|
||||
peeked = PEEKED_NONE;
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public XmlReader endObject() throws IOException {
|
||||
if (nextTagName != null) throw unexpectedTokenError("the end of an object");
|
||||
if (stack[stackSize - 1] == WrapperScope.TAG_BODY_OBJECT) {
|
||||
reader.endTag();
|
||||
stackSize--;
|
||||
return this;
|
||||
} else {
|
||||
throw unexpectedTokenError("the end of an object");
|
||||
int p = peeked;
|
||||
if (p == PEEKED_NONE) {
|
||||
p = doPeek();
|
||||
}
|
||||
if (p != PEEKED_END_OBJECT) throw unexpectedTokenError("the end of an object");
|
||||
reader.endTag();
|
||||
stackSize--;
|
||||
pathNames[stackSize] = null;
|
||||
pathIndices[stackSize - 1]++;
|
||||
peeked = PEEKED_NONE;
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -132,47 +147,143 @@ public class XmlReader extends SerializeReader<IOException, XmlReader> implement
|
||||
|
||||
@Override
|
||||
public Token peek() throws IOException {
|
||||
if (nextTagName != null) {
|
||||
return switch (heuristics.guessKind(reader.getPath())) {
|
||||
case OBJECT -> Token.BEGIN_OBJECT;
|
||||
case ARRAY -> Token.BEGIN_ARRAY;
|
||||
};
|
||||
int p = peeked;
|
||||
if (p == PEEKED_NONE) {
|
||||
p = doPeek();
|
||||
}
|
||||
return switch (reader.peek()) {
|
||||
case ATTRIBUTE_NAME -> Token.NAME;
|
||||
case ATTRIBUTE_VALUE, TEXT, CDATA -> Token.STRING;
|
||||
return switch (p) {
|
||||
case PEEKED_NAME_BEGIN_ARRAY, PEEKED_NAME_BEGIN_OBJECT, PEEKED_NAME_ATT, PEEKED_NAME_TAG, PEEKED_NAME_VIRTUAL_TEXT, PEEKED_NAME_VIRTUAL_CDATA -> Token.NAME;
|
||||
case PEEKED_BEGIN_OBJECT -> Token.BEGIN_OBJECT;
|
||||
case PEEKED_BEGIN_ARRAY -> Token.BEGIN_ARRAY;
|
||||
case PEEKED_END_OBJECT -> Token.END_OBJECT;
|
||||
case PEEKED_END_ARRAY -> Token.END_ARRAY;
|
||||
case PEEKED_CDATA -> heuristics.guessValueKind(getPath(), XmlToken.CDATA);
|
||||
case PEEKED_ATT_VALUE -> heuristics.guessValueKind(getPath(), XmlToken.ATTRIBUTE_VALUE);
|
||||
case PEEKED_TEXT -> heuristics.guessValueKind(getPath(), XmlToken.TEXT);
|
||||
case PEEKED_EOF -> Token.END_DOCUMENT;
|
||||
default -> throw new AssertionError();
|
||||
};
|
||||
}
|
||||
|
||||
private int doPeek() throws IOException {
|
||||
if (peeked == PEEKED_NAME_VIRTUAL_TEXT) return PEEKED_TEXT;
|
||||
if (peeked == PEEKED_NAME_VIRTUAL_CDATA) return PEEKED_CDATA;
|
||||
if (peeked == PEEKED_NAME_BEGIN_ARRAY) return PEEKED_BEGIN_ARRAY;
|
||||
if (peeked == PEEKED_NAME_BEGIN_OBJECT) return PEEKED_BEGIN_OBJECT;
|
||||
int peekStack = stack[stackSize - 1];
|
||||
return peeked = switch (reader.peek()) {
|
||||
case ATTRIBUTE_NAME -> PEEKED_NAME_ATT;
|
||||
case ATTRIBUTE_VALUE -> PEEKED_ATT_VALUE;
|
||||
case TEXT -> switch (peekStack) {
|
||||
case WrapperScope.DOCUMENT -> PEEKED_TEXT;
|
||||
case WrapperScope.OBJECT_VALUE_WRAPPER -> {
|
||||
stack[stackSize - 1] = WrapperScope.OBJECT_VALUE_WRAPPER_USED;
|
||||
yield PEEKED_TEXT;
|
||||
}
|
||||
case WrapperScope.ARRAY, WrapperScope.OBJECT -> PEEKED_NAME_VIRTUAL_TEXT;
|
||||
default -> throw syntaxError("Unexpected text");
|
||||
};
|
||||
case CDATA -> switch (peekStack) {
|
||||
case WrapperScope.DOCUMENT -> PEEKED_CDATA;
|
||||
case WrapperScope.OBJECT_VALUE_WRAPPER -> {
|
||||
stack[stackSize - 1] = WrapperScope.OBJECT_VALUE_WRAPPER_USED;
|
||||
yield PEEKED_CDATA;
|
||||
}
|
||||
case WrapperScope.ARRAY, WrapperScope.OBJECT -> PEEKED_NAME_VIRTUAL_CDATA;
|
||||
default -> throw syntaxError("Unexpected CDATA");
|
||||
};
|
||||
case BEGIN_TAG -> {
|
||||
switch (peekStack) {
|
||||
case WrapperScope.OBJECT_VALUE_WRAPPER_USED -> throw syntaxError("Unexpected Tag");
|
||||
case WrapperScope.OBJECT_VALUE_WRAPPER -> stack[stackSize - 1] = WrapperScope.OBJECT_VALUE_WRAPPER_USED;
|
||||
default -> {}
|
||||
}
|
||||
nextTagNamePath = getPath();
|
||||
nextTagName = reader.beginTag();
|
||||
yield peek();
|
||||
if (peekStack == WrapperScope.OBJECT) {
|
||||
String path = reader.getPath();
|
||||
XmlToken next = reader.peek();
|
||||
if (heuristics.guessServesAsEntryName(path, next)) {
|
||||
push(WrapperScope.OBJECT_VALUE_WRAPPER);
|
||||
yield PEEKED_NAME_TAG;
|
||||
} else {
|
||||
yield switch (heuristics.guessKind(path)) {
|
||||
case OBJECT -> {
|
||||
push(WrapperScope.OBJECT);
|
||||
yield PEEKED_NAME_BEGIN_OBJECT;
|
||||
}
|
||||
case ARRAY -> {
|
||||
push(WrapperScope.ARRAY);
|
||||
yield PEEKED_NAME_BEGIN_ARRAY;
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
yield switch (heuristics.guessKind(reader.getPath())) {
|
||||
case OBJECT -> {
|
||||
push(WrapperScope.OBJECT);
|
||||
yield PEEKED_BEGIN_OBJECT;
|
||||
}
|
||||
case ARRAY -> {
|
||||
push(WrapperScope.ARRAY);
|
||||
yield PEEKED_BEGIN_ARRAY;
|
||||
}
|
||||
};
|
||||
}
|
||||
case END_TAG -> stack[stackSize - 1] == WrapperScope.TAG_BODY_ARRAY ? Token.END_ARRAY : Token.END_OBJECT;
|
||||
case EOF -> Token.END_DOCUMENT;
|
||||
case END_TAG -> switch (peekStack) {
|
||||
case WrapperScope.ARRAY -> {
|
||||
stackSize--;
|
||||
yield PEEKED_END_ARRAY;
|
||||
}
|
||||
case WrapperScope.OBJECT -> {
|
||||
stackSize--;
|
||||
yield PEEKED_END_OBJECT;
|
||||
}
|
||||
case WrapperScope.OBJECT_VALUE_WRAPPER -> {
|
||||
stackSize--;
|
||||
reader.endTag();
|
||||
yield doPeek();
|
||||
}
|
||||
default -> throw syntaxError("Unexpected end tag");
|
||||
};
|
||||
case EOF -> PEEKED_EOF;
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public String nextName() throws IOException {
|
||||
if (nextTagName != null) {
|
||||
String res = nextTagName;
|
||||
pathNames[stackSize - 1] = res;
|
||||
push(WrapperScope.TAG_HEAD);
|
||||
nextTagName = null;
|
||||
return res;
|
||||
}
|
||||
if (reader.peek() == XmlToken.ATTRIBUTE_NAME) {
|
||||
stack[stackSize - 1] = WrapperScope.TAG_HEAD_DANGLING_NAME;
|
||||
return reader.nextAttributeName();
|
||||
} else if (reader.peek() == XmlToken.BEGIN_TAG) {
|
||||
// ordinarily, this would also require a check whether we are in an object,
|
||||
// but doing it this way provides users with more flexibility
|
||||
String res = reader.beginTag();
|
||||
pathNames[stackSize - 1] = res;
|
||||
push(WrapperScope.TAG_HEAD);
|
||||
return res;
|
||||
} else {
|
||||
throw unexpectedTokenError("a name");
|
||||
int p = peeked;
|
||||
if (p == PEEKED_NONE) {
|
||||
p = doPeek();
|
||||
}
|
||||
return switch (p) {
|
||||
case PEEKED_NAME_ATT -> {
|
||||
String res = reader.nextAttributeName();
|
||||
peeked = PEEKED_NONE;
|
||||
yield res;
|
||||
}
|
||||
case PEEKED_NAME_TAG -> {
|
||||
String res = nextTagName;
|
||||
if (res == null) throw unexpectedTokenError("a name");
|
||||
peeked = PEEKED_NONE;
|
||||
yield res;
|
||||
}
|
||||
case PEEKED_NAME_VIRTUAL_TEXT -> {
|
||||
String result = heuristics.guessElementName(reader.getPath(), XmlToken.TEXT);
|
||||
peeked = PEEKED_NONE;
|
||||
yield result;
|
||||
}
|
||||
case PEEKED_NAME_VIRTUAL_CDATA -> {
|
||||
String result = heuristics.guessElementName(reader.getPath(), XmlToken.CDATA);
|
||||
peeked = PEEKED_NONE;
|
||||
yield result;
|
||||
}
|
||||
case PEEKED_NAME_BEGIN_OBJECT, PEEKED_NAME_BEGIN_ARRAY -> {
|
||||
peeked = PEEKED_NONE;
|
||||
yield nextTagName;
|
||||
}
|
||||
default -> throw unexpectedTokenError("a name");
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -206,20 +317,35 @@ public class XmlReader extends SerializeReader<IOException, XmlReader> implement
|
||||
|
||||
@Override
|
||||
public void skipValue() throws IOException {
|
||||
nextValue("a value");
|
||||
int p = peeked;
|
||||
if (p == PEEKED_NONE) {
|
||||
p = doPeek();
|
||||
}
|
||||
switch (p) {
|
||||
case PEEKED_BEGIN_OBJECT, PEEKED_BEGIN_ARRAY -> {
|
||||
while (reader.hasNext()) reader.skipValue();
|
||||
reader.endTag();
|
||||
stackSize--;
|
||||
}
|
||||
case PEEKED_NAME_ATT, PEEKED_TEXT, PEEKED_CDATA, PEEKED_NAME_VIRTUAL_TEXT, PEEKED_NAME_VIRTUAL_CDATA -> reader.skipValue();
|
||||
default -> throw unexpectedTokenError("a value");
|
||||
}
|
||||
peeked = PEEKED_NONE;
|
||||
}
|
||||
|
||||
private String nextValue(String kind) throws IOException {
|
||||
if (nextTagName != null) throw unexpectedTokenError(kind);
|
||||
return switch (reader.peek()) {
|
||||
case ATTRIBUTE_VALUE -> {
|
||||
stack[stackSize - 1] = WrapperScope.TAG_HEAD;
|
||||
yield reader.nextAttributeValue();
|
||||
}
|
||||
case TEXT -> reader.nextText();
|
||||
case CDATA -> reader.nextCData();
|
||||
case BEGIN_TAG, END_TAG, ATTRIBUTE_NAME, EOF -> throw unexpectedTokenError(kind);
|
||||
int p = peeked;
|
||||
if (p == PEEKED_NONE) {
|
||||
p = doPeek();
|
||||
}
|
||||
String result = switch (p) {
|
||||
case PEEKED_ATT_VALUE -> reader.nextAttributeValue();
|
||||
case PEEKED_TEXT -> reader.nextText();
|
||||
case PEEKED_CDATA -> reader.nextCData();
|
||||
default -> throw unexpectedTokenError(kind);
|
||||
};
|
||||
peeked = PEEKED_NONE;
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -241,10 +367,31 @@ public class XmlReader extends SerializeReader<IOException, XmlReader> implement
|
||||
public interface Heuristics {
|
||||
enum Kind {OBJECT, ARRAY}
|
||||
Kind guessKind(String path);
|
||||
boolean guessServesAsEntryName(String path, XmlToken next);
|
||||
String guessElementName(String path, XmlToken kind);
|
||||
Token guessValueKind(String path, XmlToken next);
|
||||
|
||||
Heuristics DEFAULT = path -> {
|
||||
if (path.endsWith("s")) return Kind.ARRAY;
|
||||
return Kind.OBJECT;
|
||||
Heuristics DEFAULT = new Heuristics() {
|
||||
@Override
|
||||
public Kind guessKind(String path) {
|
||||
if (path.endsWith("s")) return Kind.ARRAY;
|
||||
return Kind.OBJECT;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean guessServesAsEntryName(String path, XmlToken next) {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String guessElementName(String path, XmlToken kind) {
|
||||
return "item";
|
||||
}
|
||||
|
||||
@Override
|
||||
public Token guessValueKind(String path, XmlToken next) {
|
||||
return Token.STRING;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user