From 63d7caca7ec3d0bf5f55a5f3d36ef3f81ce070c9 Mon Sep 17 00:00:00 2001 From: JFronny Date: Wed, 17 Apr 2024 10:44:56 +0200 Subject: [PATCH] feat(serialize-xml): Rewrite wrapping XmlReader --- .../serialize/xml/impl/WrapperScope.java | 10 +- .../serialize/xml/wrapper/XmlReader.java | 311 +++++++++++++----- 2 files changed, 234 insertions(+), 87 deletions(-) diff --git a/commons-serialize-xml/src/main/java/io/gitlab/jfronny/commons/serialize/xml/impl/WrapperScope.java b/commons-serialize-xml/src/main/java/io/gitlab/jfronny/commons/serialize/xml/impl/WrapperScope.java index 4e7d47d..87209c9 100644 --- a/commons-serialize-xml/src/main/java/io/gitlab/jfronny/commons/serialize/xml/impl/WrapperScope.java +++ b/commons-serialize-xml/src/main/java/io/gitlab/jfronny/commons/serialize/xml/impl/WrapperScope.java @@ -1,9 +1,9 @@ package io.gitlab.jfronny.commons.serialize.xml.impl; public class WrapperScope { - public static final int TAG_HEAD = 1; - public static final int TAG_HEAD_DANGLING_NAME = 2; - public static final int TAG_BODY_ARRAY = 3; - public static final int TAG_BODY_OBJECT = 4; - public static final int DOCUMENT = 6; + public static final int OBJECT_VALUE_WRAPPER = 1; + public static final int OBJECT_VALUE_WRAPPER_USED = 2; + public static final int ARRAY = 3; + public static final int OBJECT = 4; + public static final int DOCUMENT = 5; } diff --git a/commons-serialize-xml/src/main/java/io/gitlab/jfronny/commons/serialize/xml/wrapper/XmlReader.java b/commons-serialize-xml/src/main/java/io/gitlab/jfronny/commons/serialize/xml/wrapper/XmlReader.java index fd37a8e..6270ae4 100644 --- a/commons-serialize-xml/src/main/java/io/gitlab/jfronny/commons/serialize/xml/wrapper/XmlReader.java +++ b/commons-serialize-xml/src/main/java/io/gitlab/jfronny/commons/serialize/xml/wrapper/XmlReader.java @@ -15,6 +15,23 @@ import java.util.Arrays; import java.util.Objects; public class XmlReader extends SerializeReader implements Closeable { + private static final int PEEKED_NONE = 0; + private static final int PEEKED_NAME_BEGIN_OBJECT = 1; + private static final int PEEKED_BEGIN_OBJECT = 2; + private static final int PEEKED_NAME_BEGIN_ARRAY = 3; + private static final int PEEKED_BEGIN_ARRAY = 4; + private static final int PEEKED_END_OBJECT = 5; + private static final int PEEKED_END_ARRAY = 6; + private static final int PEEKED_NAME_ATT = 7; + private static final int PEEKED_ATT_VALUE = 8; + private static final int PEEKED_NAME_TAG = 9; + private static final int PEEKED_TEXT = 10; + private static final int PEEKED_CDATA = 11; + private static final int PEEKED_NAME_VIRTUAL_TEXT = 12; + private static final int PEEKED_NAME_VIRTUAL_CDATA = 13; + private static final int PEEKED_EOF = 14; + + int peeked = PEEKED_NONE; private final NativeXmlReader reader; private int[] stack = new int[32]; private int stackSize = 0; @@ -40,7 +57,6 @@ public class XmlReader extends SerializeReader implement public XmlReader(NativeXmlReader reader) { this.reader = Objects.requireNonNull(reader); - this.heuristics = Objects.requireNonNull(heuristics); } public XmlReader(Reader source) { @@ -69,60 +85,59 @@ public class XmlReader extends SerializeReader implement @Override public XmlReader beginArray() throws IOException { - if (nextTagName != null || stack[stackSize - 1] == WrapperScope.TAG_HEAD) { - // Tag was just created, interpret it as an array - nextTagName = null; - stack[stackSize - 1] = WrapperScope.TAG_BODY_ARRAY; - return this; - } else if (stack[stackSize - 1] == WrapperScope.TAG_BODY_ARRAY) { - // We are inside an array, interpret the next tag as the root of our array - reader.beginTag(); - push(WrapperScope.TAG_BODY_ARRAY); - return this; - } else { - throw unexpectedTokenError("an array"); + int p = peeked; + if (p == PEEKED_NONE) { + p = doPeek(); } + if (p != PEEKED_BEGIN_OBJECT) throw unexpectedTokenError("an array"); + nextTagName = nextTagNamePath = null; + push(WrapperScope.ARRAY); + pathIndices[stackSize - 1] = 0; + peeked = PEEKED_NONE; + return this; } @Override public XmlReader endArray() throws IOException { - if (nextTagName != null) throw unexpectedTokenError("the end of an array"); - if (stack[stackSize - 1] == WrapperScope.TAG_BODY_ARRAY) { - reader.endTag(); - stackSize--; - return this; - } else { - throw unexpectedTokenError("the end of an array"); + int p = peeked; + if (p == PEEKED_NONE) { + p = doPeek(); } + if (p != PEEKED_END_ARRAY) throw unexpectedTokenError("the end of an array"); + reader.endTag(); + stackSize--; + pathNames[stackSize] = null; + pathIndices[stackSize - 1]++; + peeked = PEEKED_NONE; + return this; } @Override public XmlReader beginObject() throws IOException { - if (nextTagName != null || stack[stackSize - 1] == WrapperScope.TAG_HEAD) { - // Tag was just created, interpret it as an object - nextTagName = null; - stack[stackSize - 1] = WrapperScope.TAG_BODY_OBJECT; - return this; - } else if (stack[stackSize - 1] == WrapperScope.TAG_BODY_ARRAY) { - // We are inside an array, interpret the next tag as the root of our object - reader.beginTag(); - push(WrapperScope.TAG_BODY_OBJECT); - return this; - } else { - throw unexpectedTokenError("an object"); + int p = peeked; + if (p == PEEKED_NONE) { + p = doPeek(); } + if (p != PEEKED_BEGIN_OBJECT) throw unexpectedTokenError("an object"); + nextTagName = nextTagNamePath = null; + push(WrapperScope.OBJECT); + peeked = PEEKED_NONE; + return this; } @Override public XmlReader endObject() throws IOException { - if (nextTagName != null) throw unexpectedTokenError("the end of an object"); - if (stack[stackSize - 1] == WrapperScope.TAG_BODY_OBJECT) { - reader.endTag(); - stackSize--; - return this; - } else { - throw unexpectedTokenError("the end of an object"); + int p = peeked; + if (p == PEEKED_NONE) { + p = doPeek(); } + if (p != PEEKED_END_OBJECT) throw unexpectedTokenError("the end of an object"); + reader.endTag(); + stackSize--; + pathNames[stackSize] = null; + pathIndices[stackSize - 1]++; + peeked = PEEKED_NONE; + return this; } @Override @@ -132,47 +147,143 @@ public class XmlReader extends SerializeReader implement @Override public Token peek() throws IOException { - if (nextTagName != null) { - return switch (heuristics.guessKind(reader.getPath())) { - case OBJECT -> Token.BEGIN_OBJECT; - case ARRAY -> Token.BEGIN_ARRAY; - }; + int p = peeked; + if (p == PEEKED_NONE) { + p = doPeek(); } - return switch (reader.peek()) { - case ATTRIBUTE_NAME -> Token.NAME; - case ATTRIBUTE_VALUE, TEXT, CDATA -> Token.STRING; + return switch (p) { + case PEEKED_NAME_BEGIN_ARRAY, PEEKED_NAME_BEGIN_OBJECT, PEEKED_NAME_ATT, PEEKED_NAME_TAG, PEEKED_NAME_VIRTUAL_TEXT, PEEKED_NAME_VIRTUAL_CDATA -> Token.NAME; + case PEEKED_BEGIN_OBJECT -> Token.BEGIN_OBJECT; + case PEEKED_BEGIN_ARRAY -> Token.BEGIN_ARRAY; + case PEEKED_END_OBJECT -> Token.END_OBJECT; + case PEEKED_END_ARRAY -> Token.END_ARRAY; + case PEEKED_CDATA -> heuristics.guessValueKind(getPath(), XmlToken.CDATA); + case PEEKED_ATT_VALUE -> heuristics.guessValueKind(getPath(), XmlToken.ATTRIBUTE_VALUE); + case PEEKED_TEXT -> heuristics.guessValueKind(getPath(), XmlToken.TEXT); + case PEEKED_EOF -> Token.END_DOCUMENT; + default -> throw new AssertionError(); + }; + } + + private int doPeek() throws IOException { + if (peeked == PEEKED_NAME_VIRTUAL_TEXT) return PEEKED_TEXT; + if (peeked == PEEKED_NAME_VIRTUAL_CDATA) return PEEKED_CDATA; + if (peeked == PEEKED_NAME_BEGIN_ARRAY) return PEEKED_BEGIN_ARRAY; + if (peeked == PEEKED_NAME_BEGIN_OBJECT) return PEEKED_BEGIN_OBJECT; + int peekStack = stack[stackSize - 1]; + return peeked = switch (reader.peek()) { + case ATTRIBUTE_NAME -> PEEKED_NAME_ATT; + case ATTRIBUTE_VALUE -> PEEKED_ATT_VALUE; + case TEXT -> switch (peekStack) { + case WrapperScope.DOCUMENT -> PEEKED_TEXT; + case WrapperScope.OBJECT_VALUE_WRAPPER -> { + stack[stackSize - 1] = WrapperScope.OBJECT_VALUE_WRAPPER_USED; + yield PEEKED_TEXT; + } + case WrapperScope.ARRAY, WrapperScope.OBJECT -> PEEKED_NAME_VIRTUAL_TEXT; + default -> throw syntaxError("Unexpected text"); + }; + case CDATA -> switch (peekStack) { + case WrapperScope.DOCUMENT -> PEEKED_CDATA; + case WrapperScope.OBJECT_VALUE_WRAPPER -> { + stack[stackSize - 1] = WrapperScope.OBJECT_VALUE_WRAPPER_USED; + yield PEEKED_CDATA; + } + case WrapperScope.ARRAY, WrapperScope.OBJECT -> PEEKED_NAME_VIRTUAL_CDATA; + default -> throw syntaxError("Unexpected CDATA"); + }; case BEGIN_TAG -> { + switch (peekStack) { + case WrapperScope.OBJECT_VALUE_WRAPPER_USED -> throw syntaxError("Unexpected Tag"); + case WrapperScope.OBJECT_VALUE_WRAPPER -> stack[stackSize - 1] = WrapperScope.OBJECT_VALUE_WRAPPER_USED; + default -> {} + } nextTagNamePath = getPath(); nextTagName = reader.beginTag(); - yield peek(); + if (peekStack == WrapperScope.OBJECT) { + String path = reader.getPath(); + XmlToken next = reader.peek(); + if (heuristics.guessServesAsEntryName(path, next)) { + push(WrapperScope.OBJECT_VALUE_WRAPPER); + yield PEEKED_NAME_TAG; + } else { + yield switch (heuristics.guessKind(path)) { + case OBJECT -> { + push(WrapperScope.OBJECT); + yield PEEKED_NAME_BEGIN_OBJECT; + } + case ARRAY -> { + push(WrapperScope.ARRAY); + yield PEEKED_NAME_BEGIN_ARRAY; + } + }; + } + } + yield switch (heuristics.guessKind(reader.getPath())) { + case OBJECT -> { + push(WrapperScope.OBJECT); + yield PEEKED_BEGIN_OBJECT; + } + case ARRAY -> { + push(WrapperScope.ARRAY); + yield PEEKED_BEGIN_ARRAY; + } + }; } - case END_TAG -> stack[stackSize - 1] == WrapperScope.TAG_BODY_ARRAY ? Token.END_ARRAY : Token.END_OBJECT; - case EOF -> Token.END_DOCUMENT; + case END_TAG -> switch (peekStack) { + case WrapperScope.ARRAY -> { + stackSize--; + yield PEEKED_END_ARRAY; + } + case WrapperScope.OBJECT -> { + stackSize--; + yield PEEKED_END_OBJECT; + } + case WrapperScope.OBJECT_VALUE_WRAPPER -> { + stackSize--; + reader.endTag(); + yield doPeek(); + } + default -> throw syntaxError("Unexpected end tag"); + }; + case EOF -> PEEKED_EOF; }; } @Override public String nextName() throws IOException { - if (nextTagName != null) { - String res = nextTagName; - pathNames[stackSize - 1] = res; - push(WrapperScope.TAG_HEAD); - nextTagName = null; - return res; - } - if (reader.peek() == XmlToken.ATTRIBUTE_NAME) { - stack[stackSize - 1] = WrapperScope.TAG_HEAD_DANGLING_NAME; - return reader.nextAttributeName(); - } else if (reader.peek() == XmlToken.BEGIN_TAG) { - // ordinarily, this would also require a check whether we are in an object, - // but doing it this way provides users with more flexibility - String res = reader.beginTag(); - pathNames[stackSize - 1] = res; - push(WrapperScope.TAG_HEAD); - return res; - } else { - throw unexpectedTokenError("a name"); + int p = peeked; + if (p == PEEKED_NONE) { + p = doPeek(); } + return switch (p) { + case PEEKED_NAME_ATT -> { + String res = reader.nextAttributeName(); + peeked = PEEKED_NONE; + yield res; + } + case PEEKED_NAME_TAG -> { + String res = nextTagName; + if (res == null) throw unexpectedTokenError("a name"); + peeked = PEEKED_NONE; + yield res; + } + case PEEKED_NAME_VIRTUAL_TEXT -> { + String result = heuristics.guessElementName(reader.getPath(), XmlToken.TEXT); + peeked = PEEKED_NONE; + yield result; + } + case PEEKED_NAME_VIRTUAL_CDATA -> { + String result = heuristics.guessElementName(reader.getPath(), XmlToken.CDATA); + peeked = PEEKED_NONE; + yield result; + } + case PEEKED_NAME_BEGIN_OBJECT, PEEKED_NAME_BEGIN_ARRAY -> { + peeked = PEEKED_NONE; + yield nextTagName; + } + default -> throw unexpectedTokenError("a name"); + }; } @Override @@ -206,20 +317,35 @@ public class XmlReader extends SerializeReader implement @Override public void skipValue() throws IOException { - nextValue("a value"); + int p = peeked; + if (p == PEEKED_NONE) { + p = doPeek(); + } + switch (p) { + case PEEKED_BEGIN_OBJECT, PEEKED_BEGIN_ARRAY -> { + while (reader.hasNext()) reader.skipValue(); + reader.endTag(); + stackSize--; + } + case PEEKED_NAME_ATT, PEEKED_TEXT, PEEKED_CDATA, PEEKED_NAME_VIRTUAL_TEXT, PEEKED_NAME_VIRTUAL_CDATA -> reader.skipValue(); + default -> throw unexpectedTokenError("a value"); + } + peeked = PEEKED_NONE; } private String nextValue(String kind) throws IOException { - if (nextTagName != null) throw unexpectedTokenError(kind); - return switch (reader.peek()) { - case ATTRIBUTE_VALUE -> { - stack[stackSize - 1] = WrapperScope.TAG_HEAD; - yield reader.nextAttributeValue(); - } - case TEXT -> reader.nextText(); - case CDATA -> reader.nextCData(); - case BEGIN_TAG, END_TAG, ATTRIBUTE_NAME, EOF -> throw unexpectedTokenError(kind); + int p = peeked; + if (p == PEEKED_NONE) { + p = doPeek(); + } + String result = switch (p) { + case PEEKED_ATT_VALUE -> reader.nextAttributeValue(); + case PEEKED_TEXT -> reader.nextText(); + case PEEKED_CDATA -> reader.nextCData(); + default -> throw unexpectedTokenError(kind); }; + peeked = PEEKED_NONE; + return result; } @Override @@ -241,10 +367,31 @@ public class XmlReader extends SerializeReader implement public interface Heuristics { enum Kind {OBJECT, ARRAY} Kind guessKind(String path); + boolean guessServesAsEntryName(String path, XmlToken next); + String guessElementName(String path, XmlToken kind); + Token guessValueKind(String path, XmlToken next); - Heuristics DEFAULT = path -> { - if (path.endsWith("s")) return Kind.ARRAY; - return Kind.OBJECT; + Heuristics DEFAULT = new Heuristics() { + @Override + public Kind guessKind(String path) { + if (path.endsWith("s")) return Kind.ARRAY; + return Kind.OBJECT; + } + + @Override + public boolean guessServesAsEntryName(String path, XmlToken next) { + return true; + } + + @Override + public String guessElementName(String path, XmlToken kind) { + return "item"; + } + + @Override + public Token guessValueKind(String path, XmlToken next) { + return Token.STRING; + } }; }