feat(serialize-xml): initial prototyping for xml reader

2024-04-13 19:23:31 +02:00 · 2024-04-13 19:23:31 +02:00 · eb3db63fd5
parent dc04b7c929
commit eb3db63fd5
11 changed files with 2985 additions and 10 deletions
--- a/commons-serialize-json/src/main/java/io/gitlab/jfronny/commons/serialize/json/JsonReader.java
+++ b/commons-serialize-json/src/main/java/io/gitlab/jfronny/commons/serialize/json/JsonReader.java
@ -1398,16 +1398,7 @@ public class JsonReader extends SerializeReader<IOException, JsonReader> impleme
    }

    private IllegalStateException unexpectedTokenError(String expected) throws IOException {
-        Token peeked = peek();
-        String troubleshootingId =
-                peeked == Token.NULL ? "adapter-not-null-safe" : "unexpected-json-structure";
-        return new IllegalStateException(
-                "Expected "
-                        + expected
-                        + " but was "
-                        + peek()
-                        + locationString()
-        );
+        return new IllegalStateException("Expected " + expected + " but was " + peek() + locationString());
    }

    /** Consumes the non-execute prefix if it exists. */
--- a/commons-serialize-xml/build.gradle.kts
+++ b/commons-serialize-xml/build.gradle.kts
@ -0,0 +1,31 @@
+import io.gitlab.jfronny.scripts.*
+
+plugins {
+    commons.library
+}
+
+dependencies {
+    implementation(projects.commons)
+    api(projects.commonsSerialize)
+
+    testImplementation(libs.junit.jupiter.api)
+    testImplementation(libs.google.truth)
+    testRuntimeOnly(libs.junit.jupiter.engine)
+    testRuntimeOnly(libs.junit.vintage)
+}
+
+publishing {
+    publications {
+        create<MavenPublication>("maven") {
+            groupId = "io.gitlab.jfronny"
+            artifactId = "commons-serialize-json"
+
+            from(components["java"])
+        }
+    }
+}
+
+tasks.javadoc {
+    linksOffline("https://maven.frohnmeyer-wds.de/javadoc/artifacts/io/gitlab/jfronny/commons/$version/raw", projects.commons)
+    linksOffline("https://maven.frohnmeyer-wds.de/javadoc/artifacts/io/gitlab/jfronny/commons-serialize/$version/raw", projects.commonsSerialize)
+}
--- a/commons-serialize-xml/src/main/java/io/gitlab/jfronny/commons/serialize/xml/XmlReader.java
+++ b/commons-serialize-xml/src/main/java/io/gitlab/jfronny/commons/serialize/xml/XmlReader.java
@ -0,0 +1,4 @@
+package io.gitlab.jfronny.commons.serialize.xml;
+
+public class XmlReader {
+}
--- a/commons-serialize-xml/src/main/java/io/gitlab/jfronny/commons/serialize/xml/XmlTransport.java
+++ b/commons-serialize-xml/src/main/java/io/gitlab/jfronny/commons/serialize/xml/XmlTransport.java
@ -0,0 +1,4 @@
+package io.gitlab.jfronny.commons.serialize.xml;
+
+public class XmlTransport {
+}
--- a/commons-serialize-xml/src/main/java/io/gitlab/jfronny/commons/serialize/xml/XmlWriter.java
+++ b/commons-serialize-xml/src/main/java/io/gitlab/jfronny/commons/serialize/xml/XmlWriter.java
@ -0,0 +1,4 @@
+package io.gitlab.jfronny.commons.serialize.xml;
+
+public class XmlWriter {
+}
--- a/commons-serialize-xml/src/main/java/io/gitlab/jfronny/commons/serialize/xml/impl/BaseXmlReader.java
+++ b/commons-serialize-xml/src/main/java/io/gitlab/jfronny/commons/serialize/xml/impl/BaseXmlReader.java
@ -0,0 +1,802 @@
+package io.gitlab.jfronny.commons.serialize.xml.impl;
+
+import io.gitlab.jfronny.commons.serialize.MalformedDataException;
+import io.gitlab.jfronny.commons.serialize.StringEscapeUtil;
+
+import java.io.Closeable;
+import java.io.EOFException;
+import java.io.IOException;
+import java.io.Reader;
+import java.util.Arrays;
+import java.util.Objects;
+import java.util.function.IntPredicate;
+
+public class BaseXmlReader implements Closeable {
+    private static final int PEEKED_NONE = 0;
+    private static final int PEEKED_BEGIN_TAG = 1;
+    private static final int PEEKED_END_TAG = 2;
+    private static final int PEEKED_END_TAG_CONCISE = 3;
+    private static final int PEEKED_TEXT = 4;
+    private static final int PEEKED_CDATA = 5;
+    private static final int PEEKED_ATTRIBUTE_NAME = 6;
+    private static final int PEEKED_ATTRIBUTE_VALUE = 7;
+    private static final int PEEKED_EOF = 8;
+
+    /** The input JSON. */
+    private final Reader in;
+
+    static final int BUFFER_SIZE = 1024;
+    /**
+     * Use a manual buffer to easily read and unread upcoming characters, and also so we can create
+     * strings without an intermediate StringBuilder. We decode literals directly out of this buffer,
+     * so it must be at least as long as the longest token that can be reported as a number.
+     */
+    private final char[] buffer = new char[BUFFER_SIZE];
+
+    private int pos = 0;
+    private int limit = 0;
+
+    private int lineNumber = 0;
+    private int lineStart = 0;
+
+    int peeked = PEEKED_NONE;
+
+    /**
+     * The number of characters in a peeked number literal. Increment 'pos' by this after reading a
+     * number.
+     */
+    private int peekedNumberLength;
+
+    /**
+     * A peeked string that should be parsed on the next double, long or string. This is populated
+     * before a numeric value is parsed and used if that parsing fails.
+     */
+    private String peekedString;
+
+    /*
+     * The nesting stack. Using a manual array rather than an ArrayList saves 20%.
+     */
+    private int[] stack = new int[32];
+    private int stackSize = 0;
+
+    {
+        stack[stackSize++] = XmlScope.EMPTY_DOCUMENT;
+    }
+
+    /*
+     * The path members. It corresponds directly to stack: At indices where the
+     * stack contains an object (EMPTY_OBJECT, DANGLING_NAME or NONEMPTY_OBJECT),
+     * pathNames contains the name at this scope. Where it contains an array
+     * (EMPTY_ARRAY, NONEMPTY_ARRAY) pathIndices contains the current index in
+     * that array. Otherwise the value is undefined, and we take advantage of that
+     * by incrementing pathIndices when doing so isn't useful.
+     */
+    private String[] pathNames = new String[32];
+    private int[] pathIndices = new int[32];
+
+    private boolean lenient = false; // TODO: make this configurable
+    public BaseXmlReader(Reader in) {
+        this.in = Objects.requireNonNull(in, "in == null");
+    }
+
+    public BaseXmlReader setLenient(boolean lenient) {
+        this.lenient = lenient;
+        return this;
+    }
+
+    public boolean isLenient() {
+        return lenient;
+    }
+
+    private void push(int newTop) {
+        if (stackSize == stack.length) {
+            int newLength = stackSize * 2;
+            stack = Arrays.copyOf(stack, newLength);
+            pathIndices = Arrays.copyOf(pathIndices, newLength);
+            pathNames = Arrays.copyOf(pathNames, newLength);
+        }
+        stack[stackSize++] = newTop;
+    }
+
+    /**
+     * Returns true once {@code limit - pos >= minimum}. If the data is exhausted before that many
+     * characters are available, this returns false.
+     */
+    private boolean fillBuffer(int minimum) throws IOException {
+        char[] buffer = this.buffer;
+        lineStart -= pos;
+        if (limit != pos) {
+            limit -= pos;
+            System.arraycopy(buffer, pos, buffer, 0, limit);
+        } else {
+            limit = 0;
+        }
+
+        pos = 0;
+        int total;
+        while ((total = in.read(buffer, limit, buffer.length - limit)) != -1) {
+            limit += total;
+
+            // if this is the first read, consume an optional byte order mark (BOM) if it exists
+            if (lineNumber == 0 && lineStart == 0 && limit > 0 && buffer[0] == '\ufeff') {
+                pos++;
+                lineStart++;
+                minimum++;
+            }
+
+            if (limit >= minimum) {
+                return true;
+            }
+        }
+        return false;
+    }
+
+    public String beginTag() throws IOException {
+        int p = peeked;
+        if (p == PEEKED_NONE) {
+            p = doPeek();
+        }
+        if (p != PEEKED_BEGIN_TAG) {
+            throw unexpectedTokenError("BEGIN_TAG");
+        }
+        String name = nextName();
+        if (buffer[pos] != '>') {
+            throw syntaxError("Expected > but was " + buffer[pos]);
+        }
+        pos++;
+        pathNames[stackSize - 1] = name;
+        push(XmlScope.TAG_HEAD);
+        peeked = PEEKED_NONE;
+        return name;
+    }
+
+    public String endTag() throws IOException {
+        int p = peeked;
+        if (p == PEEKED_NONE) {
+            p = doPeek();
+        }
+        String name;
+        if (p == PEEKED_END_TAG) {
+            name = nextName();
+            if (buffer[pos] != '>') {
+                throw syntaxError("Expected > but was " + buffer[pos]);
+            }
+            pos++;
+        } else if (p == PEEKED_END_TAG_CONCISE) {
+            name = pathNames[stackSize - 1];
+        } else throw unexpectedTokenError("END_TAG");
+        if (!name.equals(pathNames[stackSize - 1])) {
+            if (!lenient) throw syntaxError("Mismatched closing tag: Expected " + pathNames[stackSize - 1] + " but was " + name);
+        }
+        stackSize--;
+        pathNames[stackSize] = null; // Free the last path name so that it can be garbage collected!
+        pathIndices[stackSize - 1]++;
+        peeked = PEEKED_NONE;
+        return name;
+    }
+
+    public boolean hasNext() throws IOException {
+        int p = peeked;
+        if (p == PEEKED_NONE) {
+            p = doPeek();
+        }
+        return p != PEEKED_EOF && p != PEEKED_END_TAG;
+    }
+
+    public XmlToken peek() throws IOException {
+        int p = peeked;
+        if (p == PEEKED_NONE) {
+            p = doPeek();
+        }
+        return switch (p) {
+            case PEEKED_BEGIN_TAG -> XmlToken.BEGIN_TAG;
+            case PEEKED_END_TAG -> XmlToken.END_TAG;
+            case PEEKED_TEXT -> XmlToken.TEXT;
+            case PEEKED_CDATA -> XmlToken.CDATA;
+            case PEEKED_ATTRIBUTE_NAME -> XmlToken.ATTRIBUTE_NAME;
+            case PEEKED_ATTRIBUTE_VALUE -> XmlToken.ATTRIBUTE_VALUE;
+            case PEEKED_EOF -> XmlToken.EOF;
+            default -> throw new AssertionError();
+        };
+    }
+
+    int doPeek() throws IOException {
+        int peekStack = stack[stackSize - 1];
+        if (peekStack == XmlScope.TAG_HEAD) {
+            stack[stackSize - 1] = XmlScope.DANGLING_NAME;
+            int c = nextNonWhitespace(true);
+            if (c == -1) {
+                throw syntaxError("Unterminated tag");
+            } if (c == '/') {
+                if (pos < limit || fillBuffer(1)) {
+                    char chNext = buffer[pos++];
+                    if (chNext == '>') {
+                        stack[stackSize - 1] = XmlScope.TAG_BODY;
+                        return peeked = PEEKED_END_TAG_CONCISE;
+                    } else {
+                        throw syntaxError("Expected /> but was /" + chNext);
+                    }
+                } else {
+                    throw syntaxError("Unterminated tag at " + c);
+                }
+            } else if (pos < limit || fillBuffer(1)) {
+                char chNext = buffer[pos + 1];
+                var check = isNameStart((char) c, chNext);
+                if (check != NameCheck.NONE) {
+                    return peeked = PEEKED_ATTRIBUTE_NAME;
+                } else {
+                    throw unexpectedTokenError("attribute name");
+                }
+            } else throw syntaxError("Unterminated tag at " + c);
+        } else if (peekStack == XmlScope.DANGLING_NAME) {
+            stack[stackSize - 1] = XmlScope.TAG_HEAD;
+            // Look for an equals sign before the value
+            int c = nextNonWhitespace(true);
+            if (c == '=') {
+                c = nextNonWhitespace(true);
+                if (c == '\'' || c == '"') {
+                    return peeked = PEEKED_ATTRIBUTE_VALUE;
+                } else {
+                    throw syntaxError("Expected a value but was " + (char) c);
+                }
+            } else {
+                throw syntaxError("Expected '='");
+            }
+        } else if (peekStack == XmlScope.TAG_BODY) {
+            // fall through: a new element is starting
+        } else if (peekStack == XmlScope.EMPTY_DOCUMENT) {
+            stack[stackSize - 1] = XmlScope.NONEMPTY_DOCUMENT;
+            // fall through: a new element is starting
+        } else if (peekStack == XmlScope.NONEMPTY_DOCUMENT) {
+            int c = nextNonWhitespace(false);
+            if (c == -1) {
+                return peeked = PEEKED_EOF;
+            } else {
+                checkLenient();
+                pos--;
+                // fall through: a new element is starting
+            }
+        } else if (peekStack == XmlScope.CLOSED) {
+            throw new IllegalStateException("BaseXmlReader is closed");
+        }
+        int c = nextNonWhitespace(true);
+        if (c == -1) {
+            throw syntaxError("Unterminated tag");
+        } else if (c == '<') {
+            if (pos + 1 <= limit || fillBuffer(2)) {
+                char chNext = buffer[pos + 1];
+                if (chNext == '/') {
+                    pos++;
+                    return peeked = PEEKED_END_TAG;
+                } else if (chNext == '!') {
+                    if (pos + 8 <= limit || fillBuffer(9)) {
+                        if (buffer[pos + 2] == '[' && buffer[pos + 3] == 'C' && buffer[pos + 4] == 'D' && buffer[pos + 5] == 'A' && buffer[pos + 6] == 'T' && buffer[pos + 7] == 'A' && buffer[pos + 8] == '[') {
+                            pos += 9;
+                            return peeked = PEEKED_CDATA;
+                        } else {
+                            throw syntaxError("Expected <![CDATA[ but was <![" + new String(buffer, pos, 5));
+                        }
+                    }
+                } else if (pos + 2 >= limit || fillBuffer(3)) {
+                    var check = isNameStart(chNext, buffer[pos + 2]);
+                    if (check != NameCheck.NONE) {
+                        pos++;
+                        return peeked = PEEKED_BEGIN_TAG;
+                    }
+                }
+            }
+            throw syntaxError("Unterminated tag at " + c);
+        } else {
+            return peeked = PEEKED_TEXT;
+        }
+    }
+
+    private enum NameCheck { FIRST, BOTH, NONE }
+    private NameCheck isNameStart(char ch, char chNext) {
+        if ('A' <= ch && ch <= 'Z') return NameCheck.FIRST;
+        if ('a' <= ch && ch <= 'z') return NameCheck.FIRST;
+        return switch (ch) {
+            case ':', '_' -> NameCheck.FIRST;
+            case '\u2070' -> chNext == '\u218F' ? NameCheck.BOTH : NameCheck.NONE;
+            case '\u2C00' -> chNext == '\u2FEF' ? NameCheck.BOTH : NameCheck.NONE;
+            case '\u3001' -> chNext == '\uD7FF' ? NameCheck.BOTH : NameCheck.NONE;
+            case '\uF900' -> chNext == '\uFDCF' ? NameCheck.BOTH : NameCheck.NONE;
+            case '\uFDF0' -> chNext == '\uFFFD' ? NameCheck.BOTH : NameCheck.NONE;
+            default -> NameCheck.NONE;
+        };
+    }
+
+    private NameCheck isName(char ch, char chNext) {
+        var nameStart = isNameStart(ch, chNext);
+        if (nameStart != NameCheck.NONE) return nameStart;
+        if ('0' <= ch && ch <= '9') return NameCheck.FIRST;
+        return switch (ch) {
+            case '-', '.', '\u00B7' -> NameCheck.FIRST;
+            case '\u0300' -> chNext == '\u036F' ? NameCheck.BOTH : NameCheck.NONE;
+            case '\u203F' -> chNext == '\u2040' ? NameCheck.BOTH : NameCheck.NONE;
+            default -> NameCheck.NONE;
+        };
+    }
+
+    public String nextAttributeName() throws IOException {
+        int p = peeked;
+        if (p == PEEKED_NONE) {
+            p = doPeek();
+        }
+        if (p != PEEKED_ATTRIBUTE_NAME) {
+            throw unexpectedTokenError("ATTRIBUTE_NAME");
+        }
+        String result = nextName();
+        peeked = PEEKED_NONE;
+        pathNames[stackSize - 1] = result;
+        return result;
+    }
+
+    public String nextAttributeValue() throws IOException {
+        int p = peeked;
+        if (p == PEEKED_NONE) {
+            p = doPeek();
+        }
+        if (p != PEEKED_ATTRIBUTE_VALUE) {
+            throw unexpectedTokenError("ATTRIBUTE_VALUE");
+        }
+        char quote = buffer[pos++];
+        return readUntil((c, i) -> {
+            if (!lenient && c < 0x20) throw syntaxError("Control character in attribute value: " + c);
+            if (c == '<') throw syntaxError("Expected " + quote + " but was '<'");
+            return c == quote;
+        }, true);
+//        StringBuilder builder = null;
+//        while (true) {
+//            p = pos; // repurpose 'p' to save a position since we no longer need it
+//            int l = limit;
+//            /* the index of the first character not yet appended to the builder. */
+//            int start = p;
+//            while (p < l) {
+//                int c = buffer[p++];
+//                if (!lenient && c < 0x20) {
+//                    throw syntaxError("Control character in attribute value: " + c);
+//                } else if (c == quote) {
+//                    pos = p;
+//                    int len = p - start - 1;
+//                    peeked = PEEKED_NONE;
+//                    if (builder == null) {
+//                        return new String(buffer, start, len);
+//                    } else {
+//                        builder.append(buffer, start, len);
+//                        return builder.toString();
+//                    }
+//                } else if (c == '&') {
+//                    pos = p;
+//                    int len = p - start - 1;
+//                    if (builder == null) {
+//                        int estimatedLength = (len + 1) * 2;
+//                        builder = new StringBuilder(Math.max(estimatedLength, 16));
+//                    }
+//                    builder.append(buffer, start, len);
+//                    builder.append(readReference());
+//                    p = pos;
+//                    l = limit;
+//                    start = p;
+//                } else if (c == '\n') {
+//                    lineNumber++;
+//                    lineStart = p;
+//                } else if (c == '<') {
+//                    throw syntaxError("Expected " + quote + " but was '<'");
+//                }
+//            }
+//
+//            if (builder == null) {
+//                int estimatedLength = (p - start) * 2;
+//                builder = new StringBuilder(Math.max(estimatedLength, 16));
+//            }
+//            builder.append(buffer, start, p - start);
+//            pos = p;
+//            if (!fillBuffer(1)) {
+//                throw syntaxError("Unterminated attribute value");
+//            }
+//        }
+    }
+
+    private String readReference() throws IOException {
+        if (pos == limit && !fillBuffer(1)) {
+            throw syntaxError("Unterminated escape sequence");
+        }
+        if (buffer[pos] == '#') {
+            // read the character reference
+            pos++;
+            if (pos == limit && !fillBuffer(1)) {
+                throw syntaxError("Unterminated escape sequence");
+            }
+            boolean isHex = buffer[pos] == 'x' || buffer[pos] == 'X';
+            if (isHex) pos++;
+            String result = readUntil((c, i) -> {
+                if (c == ';') return true;
+                if ('0' <= c && c <= '9') return false;
+                if (isHex && ('a' <= c && c <= 'f' || 'A' <= c && c <= 'F')) return false;
+                throw syntaxError("Malformed character reference");
+            }, false);
+            if (!result.endsWith(";")) throw syntaxError("Missing ';' in character reference");
+            result = result.substring(0, result.length() - 1);
+            return String.valueOf((char) Integer.parseInt(result, isHex ? 16 : 10));
+        } else {
+            // read the entity reference
+            // we don't support these, so just handle them like a normal string
+            String result = nextName();
+            if (buffer[pos] != ';') throw syntaxError("Missing ';' in entity reference");
+            pos++;
+            if (result.equals("apos")) return "'";
+            if (result.equals("quot")) return "\"";
+            if (result.equals("amp")) return "&";
+            if (result.equals("lt")) return "<";
+            if (result.equals("gt")) return ">";
+            return "&" + result + ";";
+        }
+    }
+
+    public String nextText() throws IOException {
+        int p = peeked;
+        if (p == PEEKED_NONE) {
+            p = doPeek();
+        }
+        if (p != PEEKED_TEXT) {
+            throw unexpectedTokenError("TEXT");
+        }
+        return readUntil((c, i) -> c == '<', true);
+    }
+
+    public String nextCData() throws IOException {
+        int p = peeked;
+        if (p == PEEKED_NONE) {
+            p = doPeek();
+        }
+        if (p != PEEKED_CDATA) {
+            throw unexpectedTokenError("CDATA");
+        }
+        StringBuilder sb = new StringBuilder();
+        while (true) {
+            sb.append(readUntil((c, i) -> c == ']', false));
+            if (pos + 2 < limit || fillBuffer(3)) {
+                if (buffer[pos] == ']' && buffer[pos + 1] == ']' && buffer[pos + 2] == '>') {
+                    pos += 3;
+                    return sb.toString();
+                }
+            } else {
+                throw syntaxError("Unterminated CDATA");
+            }
+        }
+    }
+
+    private String nextName() throws IOException {
+        return readUntil((c, i) -> isName(c, pos + i + 1 < limit ? buffer[pos + i + 1] : '\0') == NameCheck.NONE, false);
+    }
+
+    @FunctionalInterface
+    private interface EndPredicate {
+        boolean test(char c, int i) throws MalformedDataException;
+    }
+
+    private String readUntil(EndPredicate character, boolean handleReferences) throws IOException {
+        StringBuilder builder = null;
+        int i = 0;
+        findEnd:
+        while (true) {
+            for (; pos + i < limit; i++) {
+                char c = buffer[pos + i];
+                if (character.test(c, i)) {
+                    break findEnd;
+                } else if (handleReferences && c == '&') {
+                    if (builder == null) {
+                        builder = new StringBuilder(Math.max(i, 16));
+                    }
+                    builder.append(buffer, pos, i);
+                    pos += i;
+                    i = 0;
+                    builder.append(readReference());
+                } else if (c == '\n') {
+                    lineNumber++;
+                    lineStart = pos + i + 1;
+                }
+            }
+
+            // Attempt to load the entire name into the buffer at once.
+            if (i < buffer.length) {
+                if (fillBuffer(i + 1)) {
+                    continue;
+                } else {
+                    break;
+                }
+            }
+
+            // use a StringBuilder when the name is too long.
+            if (builder == null) {
+                builder = new StringBuilder(Math.max(i, 16));
+            }
+            builder.append(buffer, pos, i);
+            pos += i;
+            i = 0;
+            if (!fillBuffer(1)) {
+                break;
+            }
+        }
+
+        String result = builder != null
+                ? builder.append(buffer, pos, i).toString()
+                : new String(buffer, pos, i);
+        pos += i;
+        return result;
+    }
+
+    /**
+     * Returns the next character in the stream that is neither whitespace nor a part of a comment.
+     * When this returns, the returned character is always at {@code buffer[pos-1]}; this means the
+     * caller can always push back the returned character by decrementing {@code pos}.
+     */
+    private int nextNonWhitespace(boolean throwOnEof) throws IOException {
+        /*
+         * This code uses ugly local variables 'p' and 'l' representing the 'pos'
+         * and 'limit' fields respectively. Using locals rather than fields saves
+         * a few field reads for each whitespace character in a pretty-printed
+         * document, resulting in a 5% speedup. We need to flush 'p' to its field
+         * before any (potentially indirect) call to fillBuffer() and reread both
+         * 'p' and 'l' after any (potentially indirect) call to the same method.
+         */
+        char[] buffer = this.buffer;
+        int p = pos;
+        int l = limit;
+        while (true) {
+            if (p == l) {
+                pos = p;
+                if (!fillBuffer(1)) {
+                    break;
+                }
+                p = pos;
+                l = limit;
+            }
+
+            int c = buffer[p++];
+            if (c == '\n') {
+                lineNumber++;
+                lineStart = p;
+                continue;
+            } else if (c == ' ' || c == '\r' || c == '\t') {
+                continue;
+            }
+
+            pos = p;
+            if (c == '<') {
+                if (p == l) {
+                    pos--; // push back '/' so it's still in the buffer when this method returns
+                    boolean charsLoaded = fillBuffer(4);
+                    pos++; // consume the '/' again
+                    if (!charsLoaded) {
+                        return c;
+                    }
+                }
+
+                if (buffer[pos] == '!' && buffer[pos + 1] == '-' && buffer[pos + 2] == '-') {
+                    pos += 3;
+                    if (!skipTo("-->")) {
+                        throw syntaxError("Unterminated comment");
+                    }
+                    p = pos + 3;
+                    l = limit;
+                    continue;
+                }
+            }
+            return c;
+        }
+        if (throwOnEof) {
+            throw new EOFException("End of input" + locationString());
+        } else {
+            return -1;
+        }
+    }
+
+    private void checkLenient() throws MalformedDataException {
+        if (!lenient) {
+            throw syntaxError("Use JsonReader.setLenient(true) to accept malformed JSON");
+        }
+    }
+
+    /**
+     * Advances the position until after the next newline character. If the line is terminated by
+     * "\r\n", the '\n' must be consumed as whitespace by the caller.
+     */
+    private void skipToEndOfLine() throws IOException {
+        while (pos < limit || fillBuffer(1)) {
+            char c = buffer[pos++];
+            if (c == '\n') {
+                lineNumber++;
+                lineStart = pos;
+                break;
+            } else if (c == '\r') {
+                break;
+            }
+        }
+    }
+
+    /**
+     * @param toFind a string to search for. Must not contain a newline.
+     */
+    private boolean skipTo(String toFind) throws IOException {
+        int length = toFind.length();
+        outer:
+        for (; pos + length <= limit || fillBuffer(length); pos++) {
+            if (buffer[pos] == '\n') {
+                lineNumber++;
+                lineStart = pos + 1;
+                continue;
+            }
+            for (int c = 0; c < length; c++) {
+                if (buffer[pos + c] != toFind.charAt(c)) {
+                    continue outer;
+                }
+            }
+            return true;
+        }
+        return false;
+    }
+
+    protected String locationString() {
+        int line = lineNumber + 1;
+        int column = pos - lineStart + 1;
+        String replacement = StringEscapeUtil.getReplacement(buffer[pos]);
+        if (replacement == null)  {
+            replacement = String.valueOf(buffer[pos]);
+        }
+        String charInterjection = pos < buffer.length ? " (char '" + replacement + "')" : "";
+        return " at line " + line + " column " + column + charInterjection + " path " + getPath();
+    }
+
+    private String getPath(boolean usePreviousPath) {
+        StringBuilder result = new StringBuilder().append('$');
+        for (int i = 0; i < stackSize; i++) {
+            int scope = stack[i];
+            switch (scope) {
+                case XmlScope.TAG_BODY:
+                case XmlScope.DANGLING_NAME:
+                case XmlScope.TAG_HEAD:
+                    result.append('.');
+                    if (pathNames[i] != null) {
+                        result.append(pathNames[i]);
+                    }
+                    break;
+                case XmlScope.NONEMPTY_DOCUMENT:
+                case XmlScope.EMPTY_DOCUMENT:
+                case XmlScope.CLOSED:
+                    break;
+                default:
+                    throw new AssertionError("Unknown scope value: " + scope);
+            }
+        }
+        return result.toString();
+    }
+
+    public String getPath() {
+        return getPath(false);
+    }
+
+    public String getPreviousPath() {
+        return getPath(true);
+    }
+
+    /**
+     * Unescapes the character identified by the character or characters that immediately follow a
+     * backslash. The backslash '\' should have already been read. This supports both Unicode escapes
+     * "u000A" and two-character escapes "\n".
+     *
+     * @throws MalformedDataException if the escape sequence is malformed
+     */
+    @SuppressWarnings("fallthrough")
+    private char readEscapeCharacter() throws IOException {
+        if (pos == limit && !fillBuffer(1)) {
+            throw syntaxError("Unterminated escape sequence");
+        }
+
+        char escaped = buffer[pos++];
+        switch (escaped) {
+            case 'u':
+                if (pos + 4 > limit && !fillBuffer(4)) {
+                    throw syntaxError("Unterminated escape sequence");
+                }
+                // Equivalent to Integer.parseInt(stringPool.get(buffer, pos, 4), 16);
+                int result = 0;
+                for (int i = pos, end = i + 4; i < end; i++) {
+                    char c = buffer[i];
+                    result <<= 4;
+                    if (c >= '0' && c <= '9') {
+                        result += (c - '0');
+                    } else if (c >= 'a' && c <= 'f') {
+                        result += (c - 'a' + 10);
+                    } else if (c >= 'A' && c <= 'F') {
+                        result += (c - 'A' + 10);
+                    } else {
+                        throw syntaxError("Malformed Unicode escape \\u" + new String(buffer, pos, 4));
+                    }
+                }
+                pos += 4;
+                return (char) result;
+
+            case 't':
+                return '\t';
+
+            case 'b':
+                return '\b';
+
+            case 'n':
+                return '\n';
+
+            case 'r':
+                return '\r';
+
+            case 'f':
+                return '\f';
+
+            case '\n':
+                if (!lenient) {
+                    throw syntaxError("Cannot escape a newline character in strict mode");
+                }
+                lineNumber++;
+                lineStart = pos;
+                // fall-through
+
+            case '\'':
+                if (!lenient) {
+                    throw syntaxError("Invalid escaped character \"'\" in strict mode");
+                }
+            case '"':
+            case '\\':
+            case '/':
+                return escaped;
+            default:
+                // throw error when none of the above cases are matched
+                throw syntaxError("Invalid escape sequence");
+        }
+    }
+
+    /**
+     * Throws a new {@link MalformedDataException} with the given message and information about the
+     * current location.
+     */
+    private MalformedDataException syntaxError(String message) throws MalformedDataException {
+        throw new MalformedDataException(message + locationString());
+    }
+
+    private IllegalStateException unexpectedTokenError(String expected) throws IOException {
+        return new IllegalStateException("Expected " + expected + " but was " + peek() + locationString());
+    }
+
+    /** Consumes the header if it exists. */
+    private void consumeHeader() throws IOException {
+        // fast-forward through the leading whitespace
+        int unused = nextNonWhitespace(true);
+        pos--;
+
+        if (pos + 5 > limit && !fillBuffer(5)) {
+            return;
+        }
+
+        int p = pos;
+        char[] buf = buffer;
+        if (buf[p] != '<'
+                || buf[p + 1] != '?'
+                || buf[p + 2] != 'x'
+                || buf[p + 3] != 'm'
+                || buf[p + 4] != 'l') {
+            return; // not a header!
+        }
+
+        // we found a header, consume it
+        pos += 5;
+        skipTo("?>");
+    }
+
+    @Override
+    public void close() throws IOException {
+        peeked = PEEKED_NONE;
+        stack[0] = XmlScope.CLOSED;
+        stackSize = 1;
+        in.close();
+    }
+}
--- a/commons-serialize-xml/src/main/java/io/gitlab/jfronny/commons/serialize/xml/impl/XmlScope.java
+++ b/commons-serialize-xml/src/main/java/io/gitlab/jfronny/commons/serialize/xml/impl/XmlScope.java
@ -0,0 +1,11 @@
+package io.gitlab.jfronny.commons.serialize.xml.impl;
+
+public class XmlScope {
+    public static final int TAG_HEAD = 1;
+    public static final int TAG_BODY = 2;
+    public static final int DANGLING_NAME = 3;
+    public static final int EMPTY_DOCUMENT = 4;
+    public static final int NONEMPTY_DOCUMENT = 5;
+    public static final int CDATA = 6;
+    public static final int CLOSED = 7;
+}
--- a/commons-serialize-xml/src/main/java/io/gitlab/jfronny/commons/serialize/xml/impl/XmlToken.java
+++ b/commons-serialize-xml/src/main/java/io/gitlab/jfronny/commons/serialize/xml/impl/XmlToken.java
@ -0,0 +1,11 @@
+package io.gitlab.jfronny.commons.serialize.xml.impl;
+
+public enum XmlToken {
+    BEGIN_TAG,
+    END_TAG,
+    ATTRIBUTE_NAME,
+    ATTRIBUTE_VALUE,
+    TEXT,
+    CDATA,
+    EOF
+}
--- a/commons-serialize-xml/src/main/java/module-info.java
+++ b/commons-serialize-xml/src/main/java/module-info.java
@ -0,0 +1,6 @@
+module io.gitlab.jfronny.commons.serialize.xml {
+    requires io.gitlab.jfronny.commons;
+    requires io.gitlab.jfronny.commons.serialize;
+    requires static org.jetbrains.annotations;
+    exports io.gitlab.jfronny.commons.serialize.xml;
+}
--- a/commons-serialize-xml/src/test/java/io/gitlab/jfronny/commons/serialize/xml/test/BaseXmlReaderTest.java
+++ b/commons-serialize-xml/src/test/java/io/gitlab/jfronny/commons/serialize/xml/test/BaseXmlReaderTest.java
--- a/settings.gradle.kts
+++ b/settings.gradle.kts
@ -8,6 +8,7 @@ include("commons-http-server")
 include("commons-serialize")
 include("commons-serialize-dsl")
 include("commons-serialize-json")
+include("commons-serialize-xml")
 include("commons-serialize-databind")
 include("commons-serialize-databind-api")
 include("commons-serialize-databind-sql")