package io.gitlab.jfronny.commons.serialize.xml.wrapper; import io.gitlab.jfronny.commons.data.LazilyParsedNumber; import io.gitlab.jfronny.commons.serialize.MalformedDataException; import io.gitlab.jfronny.commons.serialize.SerializeReader; import io.gitlab.jfronny.commons.serialize.Token; import io.gitlab.jfronny.commons.serialize.xml.NativeXmlReader; import io.gitlab.jfronny.commons.serialize.xml.XmlToken; import io.gitlab.jfronny.commons.serialize.xml.impl.WrapperScope; import java.io.Closeable; import java.io.IOException; import java.io.Reader; import java.util.Arrays; import java.util.Objects; public class XmlReader extends SerializeReader implements Closeable { private static final int PEEKED_NONE = 0; private static final int PEEKED_NAME_BEGIN_OBJECT = 1; private static final int PEEKED_BEGIN_OBJECT = 2; private static final int PEEKED_NAME_BEGIN_ARRAY = 3; private static final int PEEKED_BEGIN_ARRAY = 4; private static final int PEEKED_END_OBJECT = 5; private static final int PEEKED_END_ARRAY = 6; private static final int PEEKED_NAME_ATT = 7; private static final int PEEKED_ATT_VALUE = 8; private static final int PEEKED_NAME_TAG = 9; private static final int PEEKED_TEXT = 10; private static final int PEEKED_CDATA = 11; private static final int PEEKED_NAME_VIRTUAL_TEXT = 12; private static final int PEEKED_NAME_VIRTUAL_CDATA = 13; private static final int PEEKED_EOF = 14; int peeked = PEEKED_NONE; private final NativeXmlReader reader; private int[] stack = new int[32]; private int stackSize = 0; private String[] pathNames = new String[32]; private int[] pathIndices = new int[32]; private Heuristics heuristics = Heuristics.DEFAULT; private NameEncoding nameEncoding = NameEncoding.DEFAULT; private String nextTagNamePath = null; private String nextTagName = null; { stack[stackSize++] = WrapperScope.DOCUMENT; } private void push(int newTop) { if (stackSize == stack.length) { int newLength = stackSize * 2; stack = Arrays.copyOf(stack, newLength); pathIndices = Arrays.copyOf(pathIndices, newLength); pathNames = Arrays.copyOf(pathNames, newLength); } stack[stackSize++] = newTop; } public XmlReader(NativeXmlReader reader) { this.reader = Objects.requireNonNull(reader); } public XmlReader(Reader source) { this(new NativeXmlReader(source)); } @Override public XmlReader setLenient(boolean lenient) { reader.setLenient(lenient); return this; } @Override public boolean isLenient() { return reader.isLenient(); } public XmlReader setHeuristics(Heuristics heuristics) { this.heuristics = Objects.requireNonNull(heuristics); return this; } public Heuristics getHeuristics() { return heuristics; } public XmlReader setNameEncoding(NameEncoding nameEncoding) { this.nameEncoding = Objects.requireNonNull(nameEncoding); return this; } public NameEncoding getNameEncoding() { return nameEncoding; } @Override public XmlReader beginArray() throws IOException { int p = peeked; if (p == PEEKED_NONE) { p = doPeek(); } if (p != PEEKED_BEGIN_ARRAY && !(p == PEEKED_NAME_BEGIN_ARRAY && stack[stackSize - 1] != WrapperScope.OBJECT)) throw unexpectedTokenError("an array"); nextTagName = nextTagNamePath = null; push(WrapperScope.ARRAY); pathIndices[stackSize - 1] = 0; peeked = PEEKED_NONE; return this; } @Override public XmlReader endArray() throws IOException { int p = peeked; if (p == PEEKED_NONE) { p = doPeek(); } if (p != PEEKED_END_ARRAY) throw unexpectedTokenError("the end of an array"); reader.endTag(); stackSize--; pathNames[stackSize] = null; pathIndices[stackSize - 1]++; peeked = PEEKED_NONE; return this; } @Override public XmlReader beginObject() throws IOException { int p = peeked; if (p == PEEKED_NONE) { p = doPeek(); } if (p != PEEKED_BEGIN_OBJECT && !(p == PEEKED_NAME_BEGIN_OBJECT && stack[stackSize - 1] != WrapperScope.OBJECT)) throw unexpectedTokenError("an object"); nextTagName = nextTagNamePath = null; push(WrapperScope.OBJECT); peeked = PEEKED_NONE; return this; } @Override public XmlReader endObject() throws IOException { int p = peeked; if (p == PEEKED_NONE) { p = doPeek(); } if (p != PEEKED_END_OBJECT) throw unexpectedTokenError("the end of an object"); reader.endTag(); stackSize--; pathNames[stackSize] = null; pathIndices[stackSize - 1]++; peeked = PEEKED_NONE; return this; } @Override public boolean hasNext() throws IOException { return nextTagName != null || reader.hasNext(); } @Override public Token peek() throws IOException { int p = peeked; if (p == PEEKED_NONE) { p = doPeek(); } return switch (p) { case PEEKED_NAME_ATT, PEEKED_NAME_TAG -> Token.NAME; case PEEKED_NAME_VIRTUAL_CDATA -> stack[stackSize - 1] == WrapperScope.OBJECT ? Token.NAME : heuristics.guessValueKind(getPath(), XmlToken.CDATA); case PEEKED_NAME_VIRTUAL_TEXT -> stack[stackSize - 1] == WrapperScope.OBJECT ? Token.NAME : heuristics.guessValueKind(getPath(), XmlToken.TEXT); case PEEKED_NAME_BEGIN_OBJECT -> stack[stackSize - 1] == WrapperScope.OBJECT ? Token.NAME : Token.BEGIN_OBJECT; case PEEKED_NAME_BEGIN_ARRAY -> stack[stackSize - 1] == WrapperScope.OBJECT ? Token.NAME : Token.BEGIN_ARRAY; case PEEKED_BEGIN_OBJECT -> Token.BEGIN_OBJECT; case PEEKED_BEGIN_ARRAY -> Token.BEGIN_ARRAY; case PEEKED_END_OBJECT -> Token.END_OBJECT; case PEEKED_END_ARRAY -> Token.END_ARRAY; case PEEKED_CDATA -> heuristics.guessValueKind(getPath(), XmlToken.CDATA); case PEEKED_ATT_VALUE -> heuristics.guessValueKind(getPath(), XmlToken.ATTRIBUTE_VALUE); case PEEKED_TEXT -> heuristics.guessValueKind(getPath(), XmlToken.TEXT); case PEEKED_EOF -> Token.END_DOCUMENT; default -> throw new AssertionError(); }; } private int doPeek() throws IOException { if (peeked == PEEKED_NAME_VIRTUAL_TEXT) return PEEKED_TEXT; if (peeked == PEEKED_NAME_VIRTUAL_CDATA) return PEEKED_CDATA; if (peeked == PEEKED_NAME_BEGIN_ARRAY) return PEEKED_BEGIN_ARRAY; if (peeked == PEEKED_NAME_BEGIN_OBJECT) return PEEKED_BEGIN_OBJECT; int peekStack = stack[stackSize - 1]; return peeked = switch (reader.peek()) { case ATTRIBUTE_NAME -> PEEKED_NAME_ATT; case ATTRIBUTE_VALUE -> PEEKED_ATT_VALUE; case TEXT -> switch (peekStack) { case WrapperScope.DOCUMENT -> PEEKED_TEXT; case WrapperScope.OBJECT_VALUE_WRAPPER -> { stack[stackSize - 1] = WrapperScope.OBJECT_VALUE_WRAPPER_USED; yield PEEKED_TEXT; } case WrapperScope.ARRAY, WrapperScope.OBJECT -> PEEKED_NAME_VIRTUAL_TEXT; default -> throw syntaxError("Unexpected text"); }; case CDATA -> switch (peekStack) { case WrapperScope.DOCUMENT -> PEEKED_CDATA; case WrapperScope.OBJECT_VALUE_WRAPPER -> { stack[stackSize - 1] = WrapperScope.OBJECT_VALUE_WRAPPER_USED; yield PEEKED_CDATA; } case WrapperScope.ARRAY, WrapperScope.OBJECT -> PEEKED_NAME_VIRTUAL_CDATA; default -> throw syntaxError("Unexpected CDATA"); }; case BEGIN_TAG -> { switch (peekStack) { case WrapperScope.OBJECT_VALUE_WRAPPER_USED -> throw syntaxError("Unexpected Tag"); case WrapperScope.OBJECT_VALUE_WRAPPER -> stack[stackSize - 1] = WrapperScope.OBJECT_VALUE_WRAPPER_USED; default -> {} } nextTagNamePath = getPath(); nextTagName = reader.beginTag(); if (peekStack == WrapperScope.OBJECT) { String path = reader.getPath(); XmlToken next = reader.peek(); if (heuristics.guessServesAsEntryName(path, next)) { push(WrapperScope.OBJECT_VALUE_WRAPPER); yield PEEKED_NAME_TAG; } else { yield switch (heuristics.guessKind(path)) { case OBJECT -> { push(WrapperScope.OBJECT); yield PEEKED_NAME_BEGIN_OBJECT; } case ARRAY -> { push(WrapperScope.ARRAY); yield PEEKED_NAME_BEGIN_ARRAY; } }; } } yield switch (heuristics.guessKind(reader.getPath())) { case OBJECT -> { push(WrapperScope.OBJECT); yield PEEKED_BEGIN_OBJECT; } case ARRAY -> { push(WrapperScope.ARRAY); yield PEEKED_BEGIN_ARRAY; } }; } case END_TAG -> switch (peekStack) { case WrapperScope.ARRAY -> { stackSize--; yield PEEKED_END_ARRAY; } case WrapperScope.OBJECT -> { stackSize--; yield PEEKED_END_OBJECT; } case WrapperScope.OBJECT_VALUE_WRAPPER_USED -> { stackSize--; reader.endTag(); yield doPeek(); } default -> throw syntaxError("Unexpected end tag"); }; case EOF -> PEEKED_EOF; }; } @Override public String nextName() throws IOException { int p = peeked; if (p == PEEKED_NONE) { p = doPeek(); } return switch (p) { case PEEKED_NAME_ATT -> { String result = reader.nextAttributeName(); peeked = PEEKED_NONE; yield nameEncoding.decode(result); } case PEEKED_NAME_TAG -> { String result = nextTagName; if (result == null) throw unexpectedTokenError("a name"); peeked = PEEKED_NONE; yield nameEncoding.decode(result); } case PEEKED_NAME_VIRTUAL_TEXT -> { String result = heuristics.guessElementName(reader.getPath(), XmlToken.TEXT); peeked = PEEKED_TEXT; yield result; } case PEEKED_NAME_VIRTUAL_CDATA -> { String result = heuristics.guessElementName(reader.getPath(), XmlToken.CDATA); peeked = PEEKED_CDATA; yield result; } case PEEKED_NAME_BEGIN_OBJECT -> { peeked = PEEKED_BEGIN_OBJECT; yield nameEncoding.decode(nextTagName); } case PEEKED_NAME_BEGIN_ARRAY -> { peeked = PEEKED_BEGIN_ARRAY; yield nameEncoding.decode(nextTagName); } default -> throw unexpectedTokenError("a name"); }; } @Override public String nextString() throws IOException { return nextValue("a string"); } @Override public boolean nextBoolean() throws IOException { String res = nextValue("a boolean"); if (res.equalsIgnoreCase("true")) return true; if (res.equalsIgnoreCase("false")) return false; throw unexpectedTokenError("a boolean"); } @Override public void nextNull() throws IOException { String res = nextValue("null"); if (!res.equalsIgnoreCase("null")) throw unexpectedTokenError("null"); } @Override public Number nextNumber() throws IOException { String res = nextValue("a number"); LazilyParsedNumber number = new LazilyParsedNumber(res); if (!serializeSpecialFloatingPointValues && (res.equals("NaN") || res.equals("Infinity") || res.equals("-Infinity"))) { throw new IllegalStateException("Special floating point values are not allowed: " + res); } return number; } @Override public void skipValue() throws IOException { int p = peeked; if (p == PEEKED_NONE) { p = doPeek(); } switch (p) { case PEEKED_BEGIN_OBJECT, PEEKED_BEGIN_ARRAY -> { while (reader.hasNext()) reader.skipValue(); reader.endTag(); stackSize--; } case PEEKED_NAME_ATT, PEEKED_TEXT, PEEKED_CDATA, PEEKED_NAME_VIRTUAL_TEXT, PEEKED_NAME_VIRTUAL_CDATA -> reader.skipValue(); default -> throw unexpectedTokenError("a value"); } peeked = PEEKED_NONE; } private String nextValue(String kind) throws IOException { int p = peeked; if (p == PEEKED_NONE) { p = doPeek(); } String result = switch (p) { case PEEKED_ATT_VALUE -> reader.nextAttributeValue(); case PEEKED_TEXT -> reader.nextText(); case PEEKED_CDATA -> reader.nextCData(); case PEEKED_NAME_VIRTUAL_TEXT -> { if (stack[stackSize - 1] == WrapperScope.OBJECT) { throw unexpectedTokenError(kind); } yield reader.nextText(); } case PEEKED_NAME_VIRTUAL_CDATA -> { if (stack[stackSize - 1] == WrapperScope.OBJECT) { throw unexpectedTokenError(kind); } yield reader.nextCData(); } default -> throw unexpectedTokenError(kind); }; peeked = PEEKED_NONE; return result; } private String getPath(boolean usePreviousPath) { if (nextTagNamePath != null) return nextTagNamePath; StringBuilder result = new StringBuilder().append('$'); for (int i = 0; i < stackSize; i++) { int scope = stack[i]; switch (scope) { case WrapperScope.ARRAY -> { int pathIndex = pathIndices[i]; // If index is last path element it points to next array element; have to decrement if (usePreviousPath && pathIndex > 0 && i == stackSize - 1) { pathIndex--; } result.append('[').append(pathIndex).append(']'); } case WrapperScope.OBJECT -> { result.append('.'); if (pathNames[i] != null) { result.append(pathNames[i]); } } case WrapperScope.OBJECT_VALUE_WRAPPER, WrapperScope.OBJECT_VALUE_WRAPPER_USED, WrapperScope.DOCUMENT -> {} default -> throw new AssertionError("Unknown scope value: " + scope); } } return result.toString(); } @Override public String getPath() { return getPath(false); } @Override public String getPreviousPath() { return getPath(true); } @Override public void close() throws IOException { nextTagName = null; reader.close(); } public interface Heuristics { enum Kind {OBJECT, ARRAY} Kind guessKind(String path); boolean guessServesAsEntryName(String path, XmlToken next); String guessElementName(String path, XmlToken kind); Token guessValueKind(String path, XmlToken next); Heuristics DEFAULT = new Heuristics() { @Override public Kind guessKind(String path) { if (path.endsWith("s")) return Kind.ARRAY; return Kind.OBJECT; } @Override public boolean guessServesAsEntryName(String path, XmlToken next) { return true; } @Override public String guessElementName(String path, XmlToken kind) { return "item"; } @Override public Token guessValueKind(String path, XmlToken next) { return Token.STRING; } }; } /** * Throws a new {@link MalformedDataException} with the given message and information about the * current location. */ private MalformedDataException syntaxError(String message) throws MalformedDataException { throw new MalformedDataException(message + locationString()); } private IllegalStateException unexpectedTokenError(String expected) throws IOException { return new IllegalStateException("Expected " + expected + " but was " + peek() + locationString()); } }