From 4a1944f792250f12489f74db8a091ec6fa8509b1 Mon Sep 17 00:00:00 2001 From: JFronny Date: Sat, 13 Apr 2024 21:48:19 +0200 Subject: [PATCH] feat(serialize): initial prototype for XML SerializeReader --- .../serialize/xml/impl/WrapperScope.java | 9 + .../serialize/xml/wrapper/XmlReader.java | 196 ++++++++++++++++-- 2 files changed, 190 insertions(+), 15 deletions(-) create mode 100644 commons-serialize-xml/src/main/java/io/gitlab/jfronny/commons/serialize/xml/impl/WrapperScope.java diff --git a/commons-serialize-xml/src/main/java/io/gitlab/jfronny/commons/serialize/xml/impl/WrapperScope.java b/commons-serialize-xml/src/main/java/io/gitlab/jfronny/commons/serialize/xml/impl/WrapperScope.java new file mode 100644 index 0000000..4e7d47d --- /dev/null +++ b/commons-serialize-xml/src/main/java/io/gitlab/jfronny/commons/serialize/xml/impl/WrapperScope.java @@ -0,0 +1,9 @@ +package io.gitlab.jfronny.commons.serialize.xml.impl; + +public class WrapperScope { + public static final int TAG_HEAD = 1; + public static final int TAG_HEAD_DANGLING_NAME = 2; + public static final int TAG_BODY_ARRAY = 3; + public static final int TAG_BODY_OBJECT = 4; + public static final int DOCUMENT = 6; +} diff --git a/commons-serialize-xml/src/main/java/io/gitlab/jfronny/commons/serialize/xml/wrapper/XmlReader.java b/commons-serialize-xml/src/main/java/io/gitlab/jfronny/commons/serialize/xml/wrapper/XmlReader.java index f52b646..fd37a8e 100644 --- a/commons-serialize-xml/src/main/java/io/gitlab/jfronny/commons/serialize/xml/wrapper/XmlReader.java +++ b/commons-serialize-xml/src/main/java/io/gitlab/jfronny/commons/serialize/xml/wrapper/XmlReader.java @@ -1,96 +1,262 @@ package io.gitlab.jfronny.commons.serialize.xml.wrapper; +import io.gitlab.jfronny.commons.data.LazilyParsedNumber; +import io.gitlab.jfronny.commons.serialize.MalformedDataException; import io.gitlab.jfronny.commons.serialize.SerializeReader; import io.gitlab.jfronny.commons.serialize.Token; import io.gitlab.jfronny.commons.serialize.xml.NativeXmlReader; +import io.gitlab.jfronny.commons.serialize.xml.XmlToken; +import io.gitlab.jfronny.commons.serialize.xml.impl.WrapperScope; +import java.io.Closeable; import java.io.IOException; import java.io.Reader; +import java.util.Arrays; import java.util.Objects; -public class XmlReader extends SerializeReader { +public class XmlReader extends SerializeReader implements Closeable { private final NativeXmlReader reader; + private int[] stack = new int[32]; + private int stackSize = 0; + private String[] pathNames = new String[32]; + private int[] pathIndices = new int[32]; + private Heuristics heuristics = Heuristics.DEFAULT; + private String nextTagNamePath = null; + private String nextTagName = null; + + { + stack[stackSize++] = WrapperScope.DOCUMENT; + } + + private void push(int newTop) { + if (stackSize == stack.length) { + int newLength = stackSize * 2; + stack = Arrays.copyOf(stack, newLength); + pathIndices = Arrays.copyOf(pathIndices, newLength); + pathNames = Arrays.copyOf(pathNames, newLength); + } + stack[stackSize++] = newTop; + } public XmlReader(NativeXmlReader reader) { this.reader = Objects.requireNonNull(reader); + this.heuristics = Objects.requireNonNull(heuristics); } public XmlReader(Reader source) { this(new NativeXmlReader(source)); } + @Override + public XmlReader setLenient(boolean lenient) { + reader.setLenient(lenient); + return this; + } + + @Override + public boolean isLenient() { + return reader.isLenient(); + } + + public XmlReader setHeuristics(Heuristics heuristics) { + this.heuristics = Objects.requireNonNull(heuristics); + return this; + } + + public Heuristics getHeuristics() { + return heuristics; + } + @Override public XmlReader beginArray() throws IOException { - return null; + if (nextTagName != null || stack[stackSize - 1] == WrapperScope.TAG_HEAD) { + // Tag was just created, interpret it as an array + nextTagName = null; + stack[stackSize - 1] = WrapperScope.TAG_BODY_ARRAY; + return this; + } else if (stack[stackSize - 1] == WrapperScope.TAG_BODY_ARRAY) { + // We are inside an array, interpret the next tag as the root of our array + reader.beginTag(); + push(WrapperScope.TAG_BODY_ARRAY); + return this; + } else { + throw unexpectedTokenError("an array"); + } } @Override public XmlReader endArray() throws IOException { - return null; + if (nextTagName != null) throw unexpectedTokenError("the end of an array"); + if (stack[stackSize - 1] == WrapperScope.TAG_BODY_ARRAY) { + reader.endTag(); + stackSize--; + return this; + } else { + throw unexpectedTokenError("the end of an array"); + } } @Override public XmlReader beginObject() throws IOException { - return null; + if (nextTagName != null || stack[stackSize - 1] == WrapperScope.TAG_HEAD) { + // Tag was just created, interpret it as an object + nextTagName = null; + stack[stackSize - 1] = WrapperScope.TAG_BODY_OBJECT; + return this; + } else if (stack[stackSize - 1] == WrapperScope.TAG_BODY_ARRAY) { + // We are inside an array, interpret the next tag as the root of our object + reader.beginTag(); + push(WrapperScope.TAG_BODY_OBJECT); + return this; + } else { + throw unexpectedTokenError("an object"); + } } @Override public XmlReader endObject() throws IOException { - return null; + if (nextTagName != null) throw unexpectedTokenError("the end of an object"); + if (stack[stackSize - 1] == WrapperScope.TAG_BODY_OBJECT) { + reader.endTag(); + stackSize--; + return this; + } else { + throw unexpectedTokenError("the end of an object"); + } } @Override public boolean hasNext() throws IOException { - return false; + return nextTagName != null || reader.hasNext(); } @Override public Token peek() throws IOException { - return null; + if (nextTagName != null) { + return switch (heuristics.guessKind(reader.getPath())) { + case OBJECT -> Token.BEGIN_OBJECT; + case ARRAY -> Token.BEGIN_ARRAY; + }; + } + return switch (reader.peek()) { + case ATTRIBUTE_NAME -> Token.NAME; + case ATTRIBUTE_VALUE, TEXT, CDATA -> Token.STRING; + case BEGIN_TAG -> { + nextTagNamePath = getPath(); + nextTagName = reader.beginTag(); + yield peek(); + } + case END_TAG -> stack[stackSize - 1] == WrapperScope.TAG_BODY_ARRAY ? Token.END_ARRAY : Token.END_OBJECT; + case EOF -> Token.END_DOCUMENT; + }; } @Override public String nextName() throws IOException { - return ""; + if (nextTagName != null) { + String res = nextTagName; + pathNames[stackSize - 1] = res; + push(WrapperScope.TAG_HEAD); + nextTagName = null; + return res; + } + if (reader.peek() == XmlToken.ATTRIBUTE_NAME) { + stack[stackSize - 1] = WrapperScope.TAG_HEAD_DANGLING_NAME; + return reader.nextAttributeName(); + } else if (reader.peek() == XmlToken.BEGIN_TAG) { + // ordinarily, this would also require a check whether we are in an object, + // but doing it this way provides users with more flexibility + String res = reader.beginTag(); + pathNames[stackSize - 1] = res; + push(WrapperScope.TAG_HEAD); + return res; + } else { + throw unexpectedTokenError("a name"); + } } @Override public String nextString() throws IOException { - return ""; + return nextValue("a string"); } @Override public boolean nextBoolean() throws IOException { - return false; + String res = nextValue("a boolean"); + if (res.equalsIgnoreCase("true")) return true; + if (res.equalsIgnoreCase("false")) return false; + throw unexpectedTokenError("a boolean"); } @Override public void nextNull() throws IOException { - + String res = nextValue("null"); + if (!res.equalsIgnoreCase("null")) throw unexpectedTokenError("null"); } @Override public Number nextNumber() throws IOException { - return null; + String res = nextValue("a number"); + LazilyParsedNumber number = new LazilyParsedNumber(res); + if (!serializeSpecialFloatingPointValues && (res.equals("NaN") || res.equals("Infinity") || res.equals("-Infinity"))) { + throw new IllegalStateException("Special floating point values are not allowed: " + res); + } + return number; } @Override public void skipValue() throws IOException { + nextValue("a value"); + } + private String nextValue(String kind) throws IOException { + if (nextTagName != null) throw unexpectedTokenError(kind); + return switch (reader.peek()) { + case ATTRIBUTE_VALUE -> { + stack[stackSize - 1] = WrapperScope.TAG_HEAD; + yield reader.nextAttributeValue(); + } + case TEXT -> reader.nextText(); + case CDATA -> reader.nextCData(); + case BEGIN_TAG, END_TAG, ATTRIBUTE_NAME, EOF -> throw unexpectedTokenError(kind); + }; } @Override public String getPath() { - return ""; + return nextTagName == null ? reader.getPath() : nextTagNamePath; } @Override public String getPreviousPath() { - return ""; + return getPath(); // TODO this should be different when handling arrays } @Override - public void close() throws Exception { + public void close() throws IOException { + nextTagName = null; + reader.close(); + } + public interface Heuristics { + enum Kind {OBJECT, ARRAY} + Kind guessKind(String path); + + Heuristics DEFAULT = path -> { + if (path.endsWith("s")) return Kind.ARRAY; + return Kind.OBJECT; + }; + } + + /** + * Throws a new {@link MalformedDataException} with the given message and information about the + * current location. + */ + private MalformedDataException syntaxError(String message) throws MalformedDataException { + throw new MalformedDataException(message + locationString()); + } + + private IllegalStateException unexpectedTokenError(String expected) throws IOException { + return new IllegalStateException("Expected " + expected + " but was " + peek() + locationString()); } }