java-commons/commons-serialize-xml/src/main/java/io/gitlab/jfronny/commons/serialize/xml/wrapper/XmlReader.java

477 lines
18 KiB
Java

package io.gitlab.jfronny.commons.serialize.xml.wrapper;
import io.gitlab.jfronny.commons.data.LazilyParsedNumber;
import io.gitlab.jfronny.commons.serialize.MalformedDataException;
import io.gitlab.jfronny.commons.serialize.SerializeReader;
import io.gitlab.jfronny.commons.serialize.Token;
import io.gitlab.jfronny.commons.serialize.xml.NativeXmlReader;
import io.gitlab.jfronny.commons.serialize.xml.XmlToken;
import io.gitlab.jfronny.commons.serialize.xml.impl.WrapperScope;
import java.io.Closeable;
import java.io.IOException;
import java.io.Reader;
import java.util.Arrays;
import java.util.Objects;
public class XmlReader extends SerializeReader<IOException, XmlReader> implements Closeable {
private static final int PEEKED_NONE = 0;
private static final int PEEKED_NAME_BEGIN_OBJECT = 1;
private static final int PEEKED_BEGIN_OBJECT = 2;
private static final int PEEKED_NAME_BEGIN_ARRAY = 3;
private static final int PEEKED_BEGIN_ARRAY = 4;
private static final int PEEKED_END_OBJECT = 5;
private static final int PEEKED_END_ARRAY = 6;
private static final int PEEKED_NAME_ATT = 7;
private static final int PEEKED_ATT_VALUE = 8;
private static final int PEEKED_NAME_TAG = 9;
private static final int PEEKED_TEXT = 10;
private static final int PEEKED_CDATA = 11;
private static final int PEEKED_NAME_VIRTUAL_TEXT = 12;
private static final int PEEKED_NAME_VIRTUAL_CDATA = 13;
private static final int PEEKED_NULL_VIRTUAL = 14;
private static final int PEEKED_EOF = 15;
int peeked = PEEKED_NONE;
private final NativeXmlReader reader;
private int[] stack = new int[32];
private int stackSize = 0;
private String[] pathNames = new String[32];
private int[] pathIndices = new int[32];
private Heuristics heuristics = Heuristics.DEFAULT;
private NameEncoding nameEncoding = NameEncoding.DEFAULT;
private String nextTagNamePath = null;
private String nextTagName = null;
{
push(WrapperScope.DOCUMENT);
}
private void push(int newTop) {
if (stackSize == stack.length) {
int newLength = stackSize * 2;
stack = Arrays.copyOf(stack, newLength);
pathIndices = Arrays.copyOf(pathIndices, newLength);
pathNames = Arrays.copyOf(pathNames, newLength);
}
stack[stackSize++] = newTop;
}
public XmlReader(NativeXmlReader reader) {
this.reader = Objects.requireNonNull(reader);
}
public XmlReader(Reader source) {
this(new NativeXmlReader(source));
}
@Override
public XmlReader setLenient(boolean lenient) {
super.setLenient(lenient);
reader.setLenient(lenient);
return this;
}
@Override
public boolean isLenient() {
return reader.isLenient();
}
public XmlReader setHeuristics(Heuristics heuristics) {
this.heuristics = Objects.requireNonNull(heuristics);
return this;
}
public Heuristics getHeuristics() {
return heuristics;
}
public XmlReader setNameEncoding(NameEncoding nameEncoding) {
this.nameEncoding = Objects.requireNonNull(nameEncoding);
return this;
}
public NameEncoding getNameEncoding() {
return nameEncoding;
}
@Override
public XmlReader beginArray() throws IOException {
int p = peeked;
if (p == PEEKED_NONE) {
p = doPeek();
}
if (p != PEEKED_BEGIN_ARRAY && !(p == PEEKED_NAME_BEGIN_ARRAY && stack[stackSize - 1] != WrapperScope.OBJECT)) throw unexpectedTokenError("an array");
nextTagName = nextTagNamePath = null;
push(WrapperScope.ARRAY);
pathIndices[stackSize - 1] = 0;
peeked = PEEKED_NONE;
return this;
}
@Override
public XmlReader endArray() throws IOException {
int p = peeked;
if (p == PEEKED_NONE) {
p = doPeek();
}
if (p != PEEKED_END_ARRAY) throw unexpectedTokenError("the end of an array");
reader.endTag();
stackSize--;
pathNames[stackSize] = null;
pathIndices[stackSize - 1]++;
peeked = PEEKED_NONE;
return this;
}
@Override
public XmlReader beginObject() throws IOException {
int p = peeked;
if (p == PEEKED_NONE) {
p = doPeek();
}
if (p != PEEKED_BEGIN_OBJECT && !(p == PEEKED_NAME_BEGIN_OBJECT && stack[stackSize - 1] != WrapperScope.OBJECT))
throw unexpectedTokenError("an object");
nextTagName = nextTagNamePath = null;
push(WrapperScope.OBJECT);
peeked = PEEKED_NONE;
return this;
}
@Override
public XmlReader endObject() throws IOException {
int p = peeked;
if (p == PEEKED_NONE) {
p = doPeek();
}
if (p != PEEKED_END_OBJECT) throw unexpectedTokenError("the end of an object");
reader.endTag();
stackSize--;
pathNames[stackSize] = null;
pathIndices[stackSize - 1]++;
peeked = PEEKED_NONE;
return this;
}
@Override
public boolean hasNext() throws IOException {
return nextTagName != null || reader.hasNext();
}
@Override
public Token peek() throws IOException {
int p = peeked;
if (p == PEEKED_NONE) {
p = doPeek();
}
return switch (p) {
case PEEKED_NAME_ATT, PEEKED_NAME_TAG -> Token.NAME;
case PEEKED_NAME_VIRTUAL_CDATA -> stack[stackSize - 1] == WrapperScope.OBJECT ? Token.NAME : heuristics.guessValueKind(getPath(), XmlToken.CDATA);
case PEEKED_NAME_VIRTUAL_TEXT -> stack[stackSize - 1] == WrapperScope.OBJECT ? Token.NAME : heuristics.guessValueKind(getPath(), XmlToken.TEXT);
case PEEKED_NAME_BEGIN_OBJECT -> stack[stackSize - 1] == WrapperScope.OBJECT ? Token.NAME : Token.BEGIN_OBJECT;
case PEEKED_NAME_BEGIN_ARRAY -> stack[stackSize - 1] == WrapperScope.OBJECT ? Token.NAME : Token.BEGIN_ARRAY;
case PEEKED_BEGIN_OBJECT -> Token.BEGIN_OBJECT;
case PEEKED_BEGIN_ARRAY -> Token.BEGIN_ARRAY;
case PEEKED_END_OBJECT -> Token.END_OBJECT;
case PEEKED_END_ARRAY -> Token.END_ARRAY;
case PEEKED_CDATA -> heuristics.guessValueKind(getPath(), XmlToken.CDATA);
case PEEKED_ATT_VALUE -> heuristics.guessValueKind(getPath(), XmlToken.ATTRIBUTE_VALUE);
case PEEKED_TEXT -> heuristics.guessValueKind(getPath(), XmlToken.TEXT);
case PEEKED_NULL_VIRTUAL -> Token.NULL;
case PEEKED_EOF -> Token.END_DOCUMENT;
default -> throw new AssertionError();
};
}
private int doPeek() throws IOException {
if (peeked == PEEKED_NAME_VIRTUAL_TEXT) return PEEKED_TEXT;
if (peeked == PEEKED_NAME_VIRTUAL_CDATA) return PEEKED_CDATA;
if (peeked == PEEKED_NAME_BEGIN_ARRAY) return PEEKED_BEGIN_ARRAY;
if (peeked == PEEKED_NAME_BEGIN_OBJECT) return PEEKED_BEGIN_OBJECT;
int peekStack = stack[stackSize - 1];
return peeked = switch (reader.peek()) {
case ATTRIBUTE_NAME -> PEEKED_NAME_ATT;
case ATTRIBUTE_VALUE -> PEEKED_ATT_VALUE;
case TEXT -> switch (peekStack) {
case WrapperScope.DOCUMENT -> PEEKED_TEXT;
case WrapperScope.OBJECT_VALUE_WRAPPER -> {
stack[stackSize - 1] = WrapperScope.OBJECT_VALUE_WRAPPER_USED;
yield PEEKED_TEXT;
}
case WrapperScope.ARRAY, WrapperScope.OBJECT -> PEEKED_NAME_VIRTUAL_TEXT;
default -> throw syntaxError("Unexpected text");
};
case CDATA -> switch (peekStack) {
case WrapperScope.DOCUMENT -> PEEKED_CDATA;
case WrapperScope.OBJECT_VALUE_WRAPPER -> {
stack[stackSize - 1] = WrapperScope.OBJECT_VALUE_WRAPPER_USED;
yield PEEKED_CDATA;
}
case WrapperScope.ARRAY, WrapperScope.OBJECT -> PEEKED_NAME_VIRTUAL_CDATA;
default -> throw syntaxError("Unexpected CDATA");
};
case BEGIN_TAG -> {
switch (peekStack) {
case WrapperScope.OBJECT_VALUE_WRAPPER_USED -> throw syntaxError("Unexpected Tag");
case WrapperScope.OBJECT_VALUE_WRAPPER -> stack[stackSize - 1] = WrapperScope.OBJECT_VALUE_WRAPPER_USED;
default -> {}
}
nextTagNamePath = getPath();
nextTagName = reader.beginTag();
if (peekStack == WrapperScope.OBJECT) {
String path = reader.getPath();
XmlToken next = reader.peek();
if (heuristics.guessServesAsEntryName(path, next)) {
push(WrapperScope.OBJECT_VALUE_WRAPPER);
yield PEEKED_NAME_TAG;
} else {
yield switch (heuristics.guessKind(path)) {
case OBJECT -> {
push(WrapperScope.OBJECT);
yield PEEKED_NAME_BEGIN_OBJECT;
}
case ARRAY -> {
push(WrapperScope.ARRAY);
yield PEEKED_NAME_BEGIN_ARRAY;
}
};
}
}
yield switch (heuristics.guessKind(reader.getPath())) {
case OBJECT -> {
push(WrapperScope.OBJECT);
yield PEEKED_BEGIN_OBJECT;
}
case ARRAY -> {
push(WrapperScope.ARRAY);
yield PEEKED_BEGIN_ARRAY;
}
};
}
case END_TAG -> switch (peekStack) {
case WrapperScope.ARRAY -> {
stackSize--;
yield PEEKED_END_ARRAY;
}
case WrapperScope.OBJECT -> {
stackSize--;
yield PEEKED_END_OBJECT;
}
case WrapperScope.OBJECT_VALUE_WRAPPER_USED -> {
stackSize--;
reader.endTag();
yield doPeek();
}
case WrapperScope.OBJECT_VALUE_WRAPPER -> {
stackSize--;
reader.endTag();
yield PEEKED_NULL_VIRTUAL;
}
default -> throw syntaxError("Unexpected end tag");
};
case EOF -> PEEKED_EOF;
};
}
@Override
public String nextName() throws IOException {
int p = peeked;
if (p == PEEKED_NONE) {
p = doPeek();
}
return switch (p) {
case PEEKED_NAME_ATT -> {
String result = reader.nextAttributeName();
peeked = PEEKED_NONE;
yield nameEncoding.decode(result);
}
case PEEKED_NAME_TAG -> {
String result = nextTagName;
if (result == null) throw unexpectedTokenError("a name");
peeked = PEEKED_NONE;
yield nameEncoding.decode(result);
}
case PEEKED_NAME_VIRTUAL_TEXT -> {
String result = heuristics.guessElementName(reader.getPath(), XmlToken.TEXT);
peeked = PEEKED_TEXT;
yield result;
}
case PEEKED_NAME_VIRTUAL_CDATA -> {
String result = heuristics.guessElementName(reader.getPath(), XmlToken.CDATA);
peeked = PEEKED_CDATA;
yield result;
}
case PEEKED_NAME_BEGIN_OBJECT -> {
peeked = PEEKED_BEGIN_OBJECT;
yield nameEncoding.decode(nextTagName);
}
case PEEKED_NAME_BEGIN_ARRAY -> {
peeked = PEEKED_BEGIN_ARRAY;
yield nameEncoding.decode(nextTagName);
}
default -> throw unexpectedTokenError("a name");
};
}
@Override
public String nextString() throws IOException {
return nextValue("a string");
}
@Override
public boolean nextBoolean() throws IOException {
String res = nextValue("a boolean");
if (res.equalsIgnoreCase("true")) return true;
if (res.equalsIgnoreCase("false")) return false;
throw unexpectedTokenError("a boolean");
}
@Override
public void nextNull() throws IOException {
String res = nextValue("null");
if (!res.equalsIgnoreCase("null")) throw unexpectedTokenError("null");
}
@Override
public Number nextNumber() throws IOException {
String res = nextValue("a number");
LazilyParsedNumber number = new LazilyParsedNumber(res);
if (!serializeSpecialFloatingPointValues && (res.equals("NaN") || res.equals("Infinity") || res.equals("-Infinity"))) {
throw new IllegalStateException("Special floating point values are not allowed: " + res);
}
return number;
}
@Override
public void skipValue() throws IOException {
int p = peeked;
if (p == PEEKED_NONE) {
p = doPeek();
}
switch (p) {
case PEEKED_BEGIN_OBJECT, PEEKED_BEGIN_ARRAY -> {
while (reader.hasNext()) reader.skipValue();
reader.endTag();
stackSize--;
}
case PEEKED_NAME_ATT, PEEKED_TEXT, PEEKED_CDATA, PEEKED_NAME_VIRTUAL_TEXT, PEEKED_NAME_VIRTUAL_CDATA -> reader.skipValue();
default -> throw unexpectedTokenError("a value");
}
peeked = PEEKED_NONE;
}
private String nextValue(String kind) throws IOException {
int p = peeked;
if (p == PEEKED_NONE) {
p = doPeek();
}
String result = switch (p) {
case PEEKED_ATT_VALUE -> reader.nextAttributeValue();
case PEEKED_TEXT -> reader.nextText();
case PEEKED_CDATA -> reader.nextCData();
case PEEKED_NAME_VIRTUAL_TEXT -> {
if (stack[stackSize - 1] == WrapperScope.OBJECT) {
throw unexpectedTokenError(kind);
}
yield reader.nextText();
}
case PEEKED_NAME_VIRTUAL_CDATA -> {
if (stack[stackSize - 1] == WrapperScope.OBJECT) {
throw unexpectedTokenError(kind);
}
yield reader.nextCData();
}
case PEEKED_NULL_VIRTUAL -> "null";
default -> throw unexpectedTokenError(kind);
};
peeked = PEEKED_NONE;
return result;
}
private String getPath(boolean usePreviousPath) {
if (nextTagNamePath != null) return nextTagNamePath;
StringBuilder result = new StringBuilder().append('$');
for (int i = 0; i < stackSize; i++) {
int scope = stack[i];
switch (scope) {
case WrapperScope.ARRAY -> {
int pathIndex = pathIndices[i];
// If index is last path element it points to next array element; have to decrement
if (usePreviousPath && pathIndex > 0 && i == stackSize - 1) {
pathIndex--;
}
result.append('[').append(pathIndex).append(']');
}
case WrapperScope.OBJECT -> {
result.append('.');
if (pathNames[i] != null) {
result.append(pathNames[i]);
}
}
case WrapperScope.OBJECT_VALUE_WRAPPER, WrapperScope.OBJECT_VALUE_WRAPPER_USED, WrapperScope.DOCUMENT -> {}
default -> throw new AssertionError("Unknown scope value: " + scope);
}
}
return result.toString();
}
@Override
public String getPath() {
return getPath(false);
}
@Override
public String getPreviousPath() {
return getPath(true);
}
@Override
public void close() throws IOException {
nextTagName = null;
reader.close();
}
public interface Heuristics {
enum Kind {OBJECT, ARRAY}
Kind guessKind(String path);
boolean guessServesAsEntryName(String path, XmlToken next);
String guessElementName(String path, XmlToken kind);
Token guessValueKind(String path, XmlToken next);
Heuristics DEFAULT = new Heuristics() {
@Override
public Kind guessKind(String path) {
if (path.endsWith("s")) return Kind.ARRAY;
return Kind.OBJECT;
}
@Override
public boolean guessServesAsEntryName(String path, XmlToken next) {
return true;
}
@Override
public String guessElementName(String path, XmlToken kind) {
return "item";
}
@Override
public Token guessValueKind(String path, XmlToken next) {
return Token.STRING;
}
};
}
/**
* Throws a new {@link MalformedDataException} with the given message and information about the
* current location.
*/
private MalformedDataException syntaxError(String message) throws MalformedDataException {
throw new MalformedDataException(message + locationString());
}
private IllegalStateException unexpectedTokenError(String expected) throws IOException {
return new IllegalStateException("Expected " + expected + " but was " + peek() + locationString());
}
}