feat(serialize-xml): initial prototyping for xml reader
This commit is contained in:
parent
dc04b7c929
commit
eb3db63fd5
|
@ -1398,16 +1398,7 @@ public class JsonReader extends SerializeReader<IOException, JsonReader> impleme
|
|||
}
|
||||
|
||||
private IllegalStateException unexpectedTokenError(String expected) throws IOException {
|
||||
Token peeked = peek();
|
||||
String troubleshootingId =
|
||||
peeked == Token.NULL ? "adapter-not-null-safe" : "unexpected-json-structure";
|
||||
return new IllegalStateException(
|
||||
"Expected "
|
||||
+ expected
|
||||
+ " but was "
|
||||
+ peek()
|
||||
+ locationString()
|
||||
);
|
||||
return new IllegalStateException("Expected " + expected + " but was " + peek() + locationString());
|
||||
}
|
||||
|
||||
/** Consumes the non-execute prefix if it exists. */
|
||||
|
|
|
@ -0,0 +1,31 @@
|
|||
import io.gitlab.jfronny.scripts.*
|
||||
|
||||
plugins {
|
||||
commons.library
|
||||
}
|
||||
|
||||
dependencies {
|
||||
implementation(projects.commons)
|
||||
api(projects.commonsSerialize)
|
||||
|
||||
testImplementation(libs.junit.jupiter.api)
|
||||
testImplementation(libs.google.truth)
|
||||
testRuntimeOnly(libs.junit.jupiter.engine)
|
||||
testRuntimeOnly(libs.junit.vintage)
|
||||
}
|
||||
|
||||
publishing {
|
||||
publications {
|
||||
create<MavenPublication>("maven") {
|
||||
groupId = "io.gitlab.jfronny"
|
||||
artifactId = "commons-serialize-json"
|
||||
|
||||
from(components["java"])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
tasks.javadoc {
|
||||
linksOffline("https://maven.frohnmeyer-wds.de/javadoc/artifacts/io/gitlab/jfronny/commons/$version/raw", projects.commons)
|
||||
linksOffline("https://maven.frohnmeyer-wds.de/javadoc/artifacts/io/gitlab/jfronny/commons-serialize/$version/raw", projects.commonsSerialize)
|
||||
}
|
|
@ -0,0 +1,4 @@
|
|||
package io.gitlab.jfronny.commons.serialize.xml;
|
||||
|
||||
public class XmlReader {
|
||||
}
|
|
@ -0,0 +1,4 @@
|
|||
package io.gitlab.jfronny.commons.serialize.xml;
|
||||
|
||||
public class XmlTransport {
|
||||
}
|
|
@ -0,0 +1,4 @@
|
|||
package io.gitlab.jfronny.commons.serialize.xml;
|
||||
|
||||
public class XmlWriter {
|
||||
}
|
|
@ -0,0 +1,802 @@
|
|||
package io.gitlab.jfronny.commons.serialize.xml.impl;
|
||||
|
||||
import io.gitlab.jfronny.commons.serialize.MalformedDataException;
|
||||
import io.gitlab.jfronny.commons.serialize.StringEscapeUtil;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.EOFException;
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.util.Arrays;
|
||||
import java.util.Objects;
|
||||
import java.util.function.IntPredicate;
|
||||
|
||||
public class BaseXmlReader implements Closeable {
|
||||
private static final int PEEKED_NONE = 0;
|
||||
private static final int PEEKED_BEGIN_TAG = 1;
|
||||
private static final int PEEKED_END_TAG = 2;
|
||||
private static final int PEEKED_END_TAG_CONCISE = 3;
|
||||
private static final int PEEKED_TEXT = 4;
|
||||
private static final int PEEKED_CDATA = 5;
|
||||
private static final int PEEKED_ATTRIBUTE_NAME = 6;
|
||||
private static final int PEEKED_ATTRIBUTE_VALUE = 7;
|
||||
private static final int PEEKED_EOF = 8;
|
||||
|
||||
/** The input JSON. */
|
||||
private final Reader in;
|
||||
|
||||
static final int BUFFER_SIZE = 1024;
|
||||
/**
|
||||
* Use a manual buffer to easily read and unread upcoming characters, and also so we can create
|
||||
* strings without an intermediate StringBuilder. We decode literals directly out of this buffer,
|
||||
* so it must be at least as long as the longest token that can be reported as a number.
|
||||
*/
|
||||
private final char[] buffer = new char[BUFFER_SIZE];
|
||||
|
||||
private int pos = 0;
|
||||
private int limit = 0;
|
||||
|
||||
private int lineNumber = 0;
|
||||
private int lineStart = 0;
|
||||
|
||||
int peeked = PEEKED_NONE;
|
||||
|
||||
/**
|
||||
* The number of characters in a peeked number literal. Increment 'pos' by this after reading a
|
||||
* number.
|
||||
*/
|
||||
private int peekedNumberLength;
|
||||
|
||||
/**
|
||||
* A peeked string that should be parsed on the next double, long or string. This is populated
|
||||
* before a numeric value is parsed and used if that parsing fails.
|
||||
*/
|
||||
private String peekedString;
|
||||
|
||||
/*
|
||||
* The nesting stack. Using a manual array rather than an ArrayList saves 20%.
|
||||
*/
|
||||
private int[] stack = new int[32];
|
||||
private int stackSize = 0;
|
||||
|
||||
{
|
||||
stack[stackSize++] = XmlScope.EMPTY_DOCUMENT;
|
||||
}
|
||||
|
||||
/*
|
||||
* The path members. It corresponds directly to stack: At indices where the
|
||||
* stack contains an object (EMPTY_OBJECT, DANGLING_NAME or NONEMPTY_OBJECT),
|
||||
* pathNames contains the name at this scope. Where it contains an array
|
||||
* (EMPTY_ARRAY, NONEMPTY_ARRAY) pathIndices contains the current index in
|
||||
* that array. Otherwise the value is undefined, and we take advantage of that
|
||||
* by incrementing pathIndices when doing so isn't useful.
|
||||
*/
|
||||
private String[] pathNames = new String[32];
|
||||
private int[] pathIndices = new int[32];
|
||||
|
||||
private boolean lenient = false; // TODO: make this configurable
|
||||
public BaseXmlReader(Reader in) {
|
||||
this.in = Objects.requireNonNull(in, "in == null");
|
||||
}
|
||||
|
||||
public BaseXmlReader setLenient(boolean lenient) {
|
||||
this.lenient = lenient;
|
||||
return this;
|
||||
}
|
||||
|
||||
public boolean isLenient() {
|
||||
return lenient;
|
||||
}
|
||||
|
||||
private void push(int newTop) {
|
||||
if (stackSize == stack.length) {
|
||||
int newLength = stackSize * 2;
|
||||
stack = Arrays.copyOf(stack, newLength);
|
||||
pathIndices = Arrays.copyOf(pathIndices, newLength);
|
||||
pathNames = Arrays.copyOf(pathNames, newLength);
|
||||
}
|
||||
stack[stackSize++] = newTop;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true once {@code limit - pos >= minimum}. If the data is exhausted before that many
|
||||
* characters are available, this returns false.
|
||||
*/
|
||||
private boolean fillBuffer(int minimum) throws IOException {
|
||||
char[] buffer = this.buffer;
|
||||
lineStart -= pos;
|
||||
if (limit != pos) {
|
||||
limit -= pos;
|
||||
System.arraycopy(buffer, pos, buffer, 0, limit);
|
||||
} else {
|
||||
limit = 0;
|
||||
}
|
||||
|
||||
pos = 0;
|
||||
int total;
|
||||
while ((total = in.read(buffer, limit, buffer.length - limit)) != -1) {
|
||||
limit += total;
|
||||
|
||||
// if this is the first read, consume an optional byte order mark (BOM) if it exists
|
||||
if (lineNumber == 0 && lineStart == 0 && limit > 0 && buffer[0] == '\ufeff') {
|
||||
pos++;
|
||||
lineStart++;
|
||||
minimum++;
|
||||
}
|
||||
|
||||
if (limit >= minimum) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public String beginTag() throws IOException {
|
||||
int p = peeked;
|
||||
if (p == PEEKED_NONE) {
|
||||
p = doPeek();
|
||||
}
|
||||
if (p != PEEKED_BEGIN_TAG) {
|
||||
throw unexpectedTokenError("BEGIN_TAG");
|
||||
}
|
||||
String name = nextName();
|
||||
if (buffer[pos] != '>') {
|
||||
throw syntaxError("Expected > but was " + buffer[pos]);
|
||||
}
|
||||
pos++;
|
||||
pathNames[stackSize - 1] = name;
|
||||
push(XmlScope.TAG_HEAD);
|
||||
peeked = PEEKED_NONE;
|
||||
return name;
|
||||
}
|
||||
|
||||
public String endTag() throws IOException {
|
||||
int p = peeked;
|
||||
if (p == PEEKED_NONE) {
|
||||
p = doPeek();
|
||||
}
|
||||
String name;
|
||||
if (p == PEEKED_END_TAG) {
|
||||
name = nextName();
|
||||
if (buffer[pos] != '>') {
|
||||
throw syntaxError("Expected > but was " + buffer[pos]);
|
||||
}
|
||||
pos++;
|
||||
} else if (p == PEEKED_END_TAG_CONCISE) {
|
||||
name = pathNames[stackSize - 1];
|
||||
} else throw unexpectedTokenError("END_TAG");
|
||||
if (!name.equals(pathNames[stackSize - 1])) {
|
||||
if (!lenient) throw syntaxError("Mismatched closing tag: Expected " + pathNames[stackSize - 1] + " but was " + name);
|
||||
}
|
||||
stackSize--;
|
||||
pathNames[stackSize] = null; // Free the last path name so that it can be garbage collected!
|
||||
pathIndices[stackSize - 1]++;
|
||||
peeked = PEEKED_NONE;
|
||||
return name;
|
||||
}
|
||||
|
||||
public boolean hasNext() throws IOException {
|
||||
int p = peeked;
|
||||
if (p == PEEKED_NONE) {
|
||||
p = doPeek();
|
||||
}
|
||||
return p != PEEKED_EOF && p != PEEKED_END_TAG;
|
||||
}
|
||||
|
||||
public XmlToken peek() throws IOException {
|
||||
int p = peeked;
|
||||
if (p == PEEKED_NONE) {
|
||||
p = doPeek();
|
||||
}
|
||||
return switch (p) {
|
||||
case PEEKED_BEGIN_TAG -> XmlToken.BEGIN_TAG;
|
||||
case PEEKED_END_TAG -> XmlToken.END_TAG;
|
||||
case PEEKED_TEXT -> XmlToken.TEXT;
|
||||
case PEEKED_CDATA -> XmlToken.CDATA;
|
||||
case PEEKED_ATTRIBUTE_NAME -> XmlToken.ATTRIBUTE_NAME;
|
||||
case PEEKED_ATTRIBUTE_VALUE -> XmlToken.ATTRIBUTE_VALUE;
|
||||
case PEEKED_EOF -> XmlToken.EOF;
|
||||
default -> throw new AssertionError();
|
||||
};
|
||||
}
|
||||
|
||||
int doPeek() throws IOException {
|
||||
int peekStack = stack[stackSize - 1];
|
||||
if (peekStack == XmlScope.TAG_HEAD) {
|
||||
stack[stackSize - 1] = XmlScope.DANGLING_NAME;
|
||||
int c = nextNonWhitespace(true);
|
||||
if (c == -1) {
|
||||
throw syntaxError("Unterminated tag");
|
||||
} if (c == '/') {
|
||||
if (pos < limit || fillBuffer(1)) {
|
||||
char chNext = buffer[pos++];
|
||||
if (chNext == '>') {
|
||||
stack[stackSize - 1] = XmlScope.TAG_BODY;
|
||||
return peeked = PEEKED_END_TAG_CONCISE;
|
||||
} else {
|
||||
throw syntaxError("Expected /> but was /" + chNext);
|
||||
}
|
||||
} else {
|
||||
throw syntaxError("Unterminated tag at " + c);
|
||||
}
|
||||
} else if (pos < limit || fillBuffer(1)) {
|
||||
char chNext = buffer[pos + 1];
|
||||
var check = isNameStart((char) c, chNext);
|
||||
if (check != NameCheck.NONE) {
|
||||
return peeked = PEEKED_ATTRIBUTE_NAME;
|
||||
} else {
|
||||
throw unexpectedTokenError("attribute name");
|
||||
}
|
||||
} else throw syntaxError("Unterminated tag at " + c);
|
||||
} else if (peekStack == XmlScope.DANGLING_NAME) {
|
||||
stack[stackSize - 1] = XmlScope.TAG_HEAD;
|
||||
// Look for an equals sign before the value
|
||||
int c = nextNonWhitespace(true);
|
||||
if (c == '=') {
|
||||
c = nextNonWhitespace(true);
|
||||
if (c == '\'' || c == '"') {
|
||||
return peeked = PEEKED_ATTRIBUTE_VALUE;
|
||||
} else {
|
||||
throw syntaxError("Expected a value but was " + (char) c);
|
||||
}
|
||||
} else {
|
||||
throw syntaxError("Expected '='");
|
||||
}
|
||||
} else if (peekStack == XmlScope.TAG_BODY) {
|
||||
// fall through: a new element is starting
|
||||
} else if (peekStack == XmlScope.EMPTY_DOCUMENT) {
|
||||
stack[stackSize - 1] = XmlScope.NONEMPTY_DOCUMENT;
|
||||
// fall through: a new element is starting
|
||||
} else if (peekStack == XmlScope.NONEMPTY_DOCUMENT) {
|
||||
int c = nextNonWhitespace(false);
|
||||
if (c == -1) {
|
||||
return peeked = PEEKED_EOF;
|
||||
} else {
|
||||
checkLenient();
|
||||
pos--;
|
||||
// fall through: a new element is starting
|
||||
}
|
||||
} else if (peekStack == XmlScope.CLOSED) {
|
||||
throw new IllegalStateException("BaseXmlReader is closed");
|
||||
}
|
||||
int c = nextNonWhitespace(true);
|
||||
if (c == -1) {
|
||||
throw syntaxError("Unterminated tag");
|
||||
} else if (c == '<') {
|
||||
if (pos + 1 <= limit || fillBuffer(2)) {
|
||||
char chNext = buffer[pos + 1];
|
||||
if (chNext == '/') {
|
||||
pos++;
|
||||
return peeked = PEEKED_END_TAG;
|
||||
} else if (chNext == '!') {
|
||||
if (pos + 8 <= limit || fillBuffer(9)) {
|
||||
if (buffer[pos + 2] == '[' && buffer[pos + 3] == 'C' && buffer[pos + 4] == 'D' && buffer[pos + 5] == 'A' && buffer[pos + 6] == 'T' && buffer[pos + 7] == 'A' && buffer[pos + 8] == '[') {
|
||||
pos += 9;
|
||||
return peeked = PEEKED_CDATA;
|
||||
} else {
|
||||
throw syntaxError("Expected <![CDATA[ but was <![" + new String(buffer, pos, 5));
|
||||
}
|
||||
}
|
||||
} else if (pos + 2 >= limit || fillBuffer(3)) {
|
||||
var check = isNameStart(chNext, buffer[pos + 2]);
|
||||
if (check != NameCheck.NONE) {
|
||||
pos++;
|
||||
return peeked = PEEKED_BEGIN_TAG;
|
||||
}
|
||||
}
|
||||
}
|
||||
throw syntaxError("Unterminated tag at " + c);
|
||||
} else {
|
||||
return peeked = PEEKED_TEXT;
|
||||
}
|
||||
}
|
||||
|
||||
private enum NameCheck { FIRST, BOTH, NONE }
|
||||
private NameCheck isNameStart(char ch, char chNext) {
|
||||
if ('A' <= ch && ch <= 'Z') return NameCheck.FIRST;
|
||||
if ('a' <= ch && ch <= 'z') return NameCheck.FIRST;
|
||||
return switch (ch) {
|
||||
case ':', '_' -> NameCheck.FIRST;
|
||||
case '\u2070' -> chNext == '\u218F' ? NameCheck.BOTH : NameCheck.NONE;
|
||||
case '\u2C00' -> chNext == '\u2FEF' ? NameCheck.BOTH : NameCheck.NONE;
|
||||
case '\u3001' -> chNext == '\uD7FF' ? NameCheck.BOTH : NameCheck.NONE;
|
||||
case '\uF900' -> chNext == '\uFDCF' ? NameCheck.BOTH : NameCheck.NONE;
|
||||
case '\uFDF0' -> chNext == '\uFFFD' ? NameCheck.BOTH : NameCheck.NONE;
|
||||
default -> NameCheck.NONE;
|
||||
};
|
||||
}
|
||||
|
||||
private NameCheck isName(char ch, char chNext) {
|
||||
var nameStart = isNameStart(ch, chNext);
|
||||
if (nameStart != NameCheck.NONE) return nameStart;
|
||||
if ('0' <= ch && ch <= '9') return NameCheck.FIRST;
|
||||
return switch (ch) {
|
||||
case '-', '.', '\u00B7' -> NameCheck.FIRST;
|
||||
case '\u0300' -> chNext == '\u036F' ? NameCheck.BOTH : NameCheck.NONE;
|
||||
case '\u203F' -> chNext == '\u2040' ? NameCheck.BOTH : NameCheck.NONE;
|
||||
default -> NameCheck.NONE;
|
||||
};
|
||||
}
|
||||
|
||||
public String nextAttributeName() throws IOException {
|
||||
int p = peeked;
|
||||
if (p == PEEKED_NONE) {
|
||||
p = doPeek();
|
||||
}
|
||||
if (p != PEEKED_ATTRIBUTE_NAME) {
|
||||
throw unexpectedTokenError("ATTRIBUTE_NAME");
|
||||
}
|
||||
String result = nextName();
|
||||
peeked = PEEKED_NONE;
|
||||
pathNames[stackSize - 1] = result;
|
||||
return result;
|
||||
}
|
||||
|
||||
public String nextAttributeValue() throws IOException {
|
||||
int p = peeked;
|
||||
if (p == PEEKED_NONE) {
|
||||
p = doPeek();
|
||||
}
|
||||
if (p != PEEKED_ATTRIBUTE_VALUE) {
|
||||
throw unexpectedTokenError("ATTRIBUTE_VALUE");
|
||||
}
|
||||
char quote = buffer[pos++];
|
||||
return readUntil((c, i) -> {
|
||||
if (!lenient && c < 0x20) throw syntaxError("Control character in attribute value: " + c);
|
||||
if (c == '<') throw syntaxError("Expected " + quote + " but was '<'");
|
||||
return c == quote;
|
||||
}, true);
|
||||
// StringBuilder builder = null;
|
||||
// while (true) {
|
||||
// p = pos; // repurpose 'p' to save a position since we no longer need it
|
||||
// int l = limit;
|
||||
// /* the index of the first character not yet appended to the builder. */
|
||||
// int start = p;
|
||||
// while (p < l) {
|
||||
// int c = buffer[p++];
|
||||
// if (!lenient && c < 0x20) {
|
||||
// throw syntaxError("Control character in attribute value: " + c);
|
||||
// } else if (c == quote) {
|
||||
// pos = p;
|
||||
// int len = p - start - 1;
|
||||
// peeked = PEEKED_NONE;
|
||||
// if (builder == null) {
|
||||
// return new String(buffer, start, len);
|
||||
// } else {
|
||||
// builder.append(buffer, start, len);
|
||||
// return builder.toString();
|
||||
// }
|
||||
// } else if (c == '&') {
|
||||
// pos = p;
|
||||
// int len = p - start - 1;
|
||||
// if (builder == null) {
|
||||
// int estimatedLength = (len + 1) * 2;
|
||||
// builder = new StringBuilder(Math.max(estimatedLength, 16));
|
||||
// }
|
||||
// builder.append(buffer, start, len);
|
||||
// builder.append(readReference());
|
||||
// p = pos;
|
||||
// l = limit;
|
||||
// start = p;
|
||||
// } else if (c == '\n') {
|
||||
// lineNumber++;
|
||||
// lineStart = p;
|
||||
// } else if (c == '<') {
|
||||
// throw syntaxError("Expected " + quote + " but was '<'");
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// if (builder == null) {
|
||||
// int estimatedLength = (p - start) * 2;
|
||||
// builder = new StringBuilder(Math.max(estimatedLength, 16));
|
||||
// }
|
||||
// builder.append(buffer, start, p - start);
|
||||
// pos = p;
|
||||
// if (!fillBuffer(1)) {
|
||||
// throw syntaxError("Unterminated attribute value");
|
||||
// }
|
||||
// }
|
||||
}
|
||||
|
||||
private String readReference() throws IOException {
|
||||
if (pos == limit && !fillBuffer(1)) {
|
||||
throw syntaxError("Unterminated escape sequence");
|
||||
}
|
||||
if (buffer[pos] == '#') {
|
||||
// read the character reference
|
||||
pos++;
|
||||
if (pos == limit && !fillBuffer(1)) {
|
||||
throw syntaxError("Unterminated escape sequence");
|
||||
}
|
||||
boolean isHex = buffer[pos] == 'x' || buffer[pos] == 'X';
|
||||
if (isHex) pos++;
|
||||
String result = readUntil((c, i) -> {
|
||||
if (c == ';') return true;
|
||||
if ('0' <= c && c <= '9') return false;
|
||||
if (isHex && ('a' <= c && c <= 'f' || 'A' <= c && c <= 'F')) return false;
|
||||
throw syntaxError("Malformed character reference");
|
||||
}, false);
|
||||
if (!result.endsWith(";")) throw syntaxError("Missing ';' in character reference");
|
||||
result = result.substring(0, result.length() - 1);
|
||||
return String.valueOf((char) Integer.parseInt(result, isHex ? 16 : 10));
|
||||
} else {
|
||||
// read the entity reference
|
||||
// we don't support these, so just handle them like a normal string
|
||||
String result = nextName();
|
||||
if (buffer[pos] != ';') throw syntaxError("Missing ';' in entity reference");
|
||||
pos++;
|
||||
if (result.equals("apos")) return "'";
|
||||
if (result.equals("quot")) return "\"";
|
||||
if (result.equals("amp")) return "&";
|
||||
if (result.equals("lt")) return "<";
|
||||
if (result.equals("gt")) return ">";
|
||||
return "&" + result + ";";
|
||||
}
|
||||
}
|
||||
|
||||
public String nextText() throws IOException {
|
||||
int p = peeked;
|
||||
if (p == PEEKED_NONE) {
|
||||
p = doPeek();
|
||||
}
|
||||
if (p != PEEKED_TEXT) {
|
||||
throw unexpectedTokenError("TEXT");
|
||||
}
|
||||
return readUntil((c, i) -> c == '<', true);
|
||||
}
|
||||
|
||||
public String nextCData() throws IOException {
|
||||
int p = peeked;
|
||||
if (p == PEEKED_NONE) {
|
||||
p = doPeek();
|
||||
}
|
||||
if (p != PEEKED_CDATA) {
|
||||
throw unexpectedTokenError("CDATA");
|
||||
}
|
||||
StringBuilder sb = new StringBuilder();
|
||||
while (true) {
|
||||
sb.append(readUntil((c, i) -> c == ']', false));
|
||||
if (pos + 2 < limit || fillBuffer(3)) {
|
||||
if (buffer[pos] == ']' && buffer[pos + 1] == ']' && buffer[pos + 2] == '>') {
|
||||
pos += 3;
|
||||
return sb.toString();
|
||||
}
|
||||
} else {
|
||||
throw syntaxError("Unterminated CDATA");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private String nextName() throws IOException {
|
||||
return readUntil((c, i) -> isName(c, pos + i + 1 < limit ? buffer[pos + i + 1] : '\0') == NameCheck.NONE, false);
|
||||
}
|
||||
|
||||
@FunctionalInterface
|
||||
private interface EndPredicate {
|
||||
boolean test(char c, int i) throws MalformedDataException;
|
||||
}
|
||||
|
||||
private String readUntil(EndPredicate character, boolean handleReferences) throws IOException {
|
||||
StringBuilder builder = null;
|
||||
int i = 0;
|
||||
findEnd:
|
||||
while (true) {
|
||||
for (; pos + i < limit; i++) {
|
||||
char c = buffer[pos + i];
|
||||
if (character.test(c, i)) {
|
||||
break findEnd;
|
||||
} else if (handleReferences && c == '&') {
|
||||
if (builder == null) {
|
||||
builder = new StringBuilder(Math.max(i, 16));
|
||||
}
|
||||
builder.append(buffer, pos, i);
|
||||
pos += i;
|
||||
i = 0;
|
||||
builder.append(readReference());
|
||||
} else if (c == '\n') {
|
||||
lineNumber++;
|
||||
lineStart = pos + i + 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Attempt to load the entire name into the buffer at once.
|
||||
if (i < buffer.length) {
|
||||
if (fillBuffer(i + 1)) {
|
||||
continue;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// use a StringBuilder when the name is too long.
|
||||
if (builder == null) {
|
||||
builder = new StringBuilder(Math.max(i, 16));
|
||||
}
|
||||
builder.append(buffer, pos, i);
|
||||
pos += i;
|
||||
i = 0;
|
||||
if (!fillBuffer(1)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
String result = builder != null
|
||||
? builder.append(buffer, pos, i).toString()
|
||||
: new String(buffer, pos, i);
|
||||
pos += i;
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the next character in the stream that is neither whitespace nor a part of a comment.
|
||||
* When this returns, the returned character is always at {@code buffer[pos-1]}; this means the
|
||||
* caller can always push back the returned character by decrementing {@code pos}.
|
||||
*/
|
||||
private int nextNonWhitespace(boolean throwOnEof) throws IOException {
|
||||
/*
|
||||
* This code uses ugly local variables 'p' and 'l' representing the 'pos'
|
||||
* and 'limit' fields respectively. Using locals rather than fields saves
|
||||
* a few field reads for each whitespace character in a pretty-printed
|
||||
* document, resulting in a 5% speedup. We need to flush 'p' to its field
|
||||
* before any (potentially indirect) call to fillBuffer() and reread both
|
||||
* 'p' and 'l' after any (potentially indirect) call to the same method.
|
||||
*/
|
||||
char[] buffer = this.buffer;
|
||||
int p = pos;
|
||||
int l = limit;
|
||||
while (true) {
|
||||
if (p == l) {
|
||||
pos = p;
|
||||
if (!fillBuffer(1)) {
|
||||
break;
|
||||
}
|
||||
p = pos;
|
||||
l = limit;
|
||||
}
|
||||
|
||||
int c = buffer[p++];
|
||||
if (c == '\n') {
|
||||
lineNumber++;
|
||||
lineStart = p;
|
||||
continue;
|
||||
} else if (c == ' ' || c == '\r' || c == '\t') {
|
||||
continue;
|
||||
}
|
||||
|
||||
pos = p;
|
||||
if (c == '<') {
|
||||
if (p == l) {
|
||||
pos--; // push back '/' so it's still in the buffer when this method returns
|
||||
boolean charsLoaded = fillBuffer(4);
|
||||
pos++; // consume the '/' again
|
||||
if (!charsLoaded) {
|
||||
return c;
|
||||
}
|
||||
}
|
||||
|
||||
if (buffer[pos] == '!' && buffer[pos + 1] == '-' && buffer[pos + 2] == '-') {
|
||||
pos += 3;
|
||||
if (!skipTo("-->")) {
|
||||
throw syntaxError("Unterminated comment");
|
||||
}
|
||||
p = pos + 3;
|
||||
l = limit;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
return c;
|
||||
}
|
||||
if (throwOnEof) {
|
||||
throw new EOFException("End of input" + locationString());
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
private void checkLenient() throws MalformedDataException {
|
||||
if (!lenient) {
|
||||
throw syntaxError("Use JsonReader.setLenient(true) to accept malformed JSON");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Advances the position until after the next newline character. If the line is terminated by
|
||||
* "\r\n", the '\n' must be consumed as whitespace by the caller.
|
||||
*/
|
||||
private void skipToEndOfLine() throws IOException {
|
||||
while (pos < limit || fillBuffer(1)) {
|
||||
char c = buffer[pos++];
|
||||
if (c == '\n') {
|
||||
lineNumber++;
|
||||
lineStart = pos;
|
||||
break;
|
||||
} else if (c == '\r') {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param toFind a string to search for. Must not contain a newline.
|
||||
*/
|
||||
private boolean skipTo(String toFind) throws IOException {
|
||||
int length = toFind.length();
|
||||
outer:
|
||||
for (; pos + length <= limit || fillBuffer(length); pos++) {
|
||||
if (buffer[pos] == '\n') {
|
||||
lineNumber++;
|
||||
lineStart = pos + 1;
|
||||
continue;
|
||||
}
|
||||
for (int c = 0; c < length; c++) {
|
||||
if (buffer[pos + c] != toFind.charAt(c)) {
|
||||
continue outer;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
protected String locationString() {
|
||||
int line = lineNumber + 1;
|
||||
int column = pos - lineStart + 1;
|
||||
String replacement = StringEscapeUtil.getReplacement(buffer[pos]);
|
||||
if (replacement == null) {
|
||||
replacement = String.valueOf(buffer[pos]);
|
||||
}
|
||||
String charInterjection = pos < buffer.length ? " (char '" + replacement + "')" : "";
|
||||
return " at line " + line + " column " + column + charInterjection + " path " + getPath();
|
||||
}
|
||||
|
||||
private String getPath(boolean usePreviousPath) {
|
||||
StringBuilder result = new StringBuilder().append('$');
|
||||
for (int i = 0; i < stackSize; i++) {
|
||||
int scope = stack[i];
|
||||
switch (scope) {
|
||||
case XmlScope.TAG_BODY:
|
||||
case XmlScope.DANGLING_NAME:
|
||||
case XmlScope.TAG_HEAD:
|
||||
result.append('.');
|
||||
if (pathNames[i] != null) {
|
||||
result.append(pathNames[i]);
|
||||
}
|
||||
break;
|
||||
case XmlScope.NONEMPTY_DOCUMENT:
|
||||
case XmlScope.EMPTY_DOCUMENT:
|
||||
case XmlScope.CLOSED:
|
||||
break;
|
||||
default:
|
||||
throw new AssertionError("Unknown scope value: " + scope);
|
||||
}
|
||||
}
|
||||
return result.toString();
|
||||
}
|
||||
|
||||
public String getPath() {
|
||||
return getPath(false);
|
||||
}
|
||||
|
||||
public String getPreviousPath() {
|
||||
return getPath(true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Unescapes the character identified by the character or characters that immediately follow a
|
||||
* backslash. The backslash '\' should have already been read. This supports both Unicode escapes
|
||||
* "u000A" and two-character escapes "\n".
|
||||
*
|
||||
* @throws MalformedDataException if the escape sequence is malformed
|
||||
*/
|
||||
@SuppressWarnings("fallthrough")
|
||||
private char readEscapeCharacter() throws IOException {
|
||||
if (pos == limit && !fillBuffer(1)) {
|
||||
throw syntaxError("Unterminated escape sequence");
|
||||
}
|
||||
|
||||
char escaped = buffer[pos++];
|
||||
switch (escaped) {
|
||||
case 'u':
|
||||
if (pos + 4 > limit && !fillBuffer(4)) {
|
||||
throw syntaxError("Unterminated escape sequence");
|
||||
}
|
||||
// Equivalent to Integer.parseInt(stringPool.get(buffer, pos, 4), 16);
|
||||
int result = 0;
|
||||
for (int i = pos, end = i + 4; i < end; i++) {
|
||||
char c = buffer[i];
|
||||
result <<= 4;
|
||||
if (c >= '0' && c <= '9') {
|
||||
result += (c - '0');
|
||||
} else if (c >= 'a' && c <= 'f') {
|
||||
result += (c - 'a' + 10);
|
||||
} else if (c >= 'A' && c <= 'F') {
|
||||
result += (c - 'A' + 10);
|
||||
} else {
|
||||
throw syntaxError("Malformed Unicode escape \\u" + new String(buffer, pos, 4));
|
||||
}
|
||||
}
|
||||
pos += 4;
|
||||
return (char) result;
|
||||
|
||||
case 't':
|
||||
return '\t';
|
||||
|
||||
case 'b':
|
||||
return '\b';
|
||||
|
||||
case 'n':
|
||||
return '\n';
|
||||
|
||||
case 'r':
|
||||
return '\r';
|
||||
|
||||
case 'f':
|
||||
return '\f';
|
||||
|
||||
case '\n':
|
||||
if (!lenient) {
|
||||
throw syntaxError("Cannot escape a newline character in strict mode");
|
||||
}
|
||||
lineNumber++;
|
||||
lineStart = pos;
|
||||
// fall-through
|
||||
|
||||
case '\'':
|
||||
if (!lenient) {
|
||||
throw syntaxError("Invalid escaped character \"'\" in strict mode");
|
||||
}
|
||||
case '"':
|
||||
case '\\':
|
||||
case '/':
|
||||
return escaped;
|
||||
default:
|
||||
// throw error when none of the above cases are matched
|
||||
throw syntaxError("Invalid escape sequence");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Throws a new {@link MalformedDataException} with the given message and information about the
|
||||
* current location.
|
||||
*/
|
||||
private MalformedDataException syntaxError(String message) throws MalformedDataException {
|
||||
throw new MalformedDataException(message + locationString());
|
||||
}
|
||||
|
||||
private IllegalStateException unexpectedTokenError(String expected) throws IOException {
|
||||
return new IllegalStateException("Expected " + expected + " but was " + peek() + locationString());
|
||||
}
|
||||
|
||||
/** Consumes the header if it exists. */
|
||||
private void consumeHeader() throws IOException {
|
||||
// fast-forward through the leading whitespace
|
||||
int unused = nextNonWhitespace(true);
|
||||
pos--;
|
||||
|
||||
if (pos + 5 > limit && !fillBuffer(5)) {
|
||||
return;
|
||||
}
|
||||
|
||||
int p = pos;
|
||||
char[] buf = buffer;
|
||||
if (buf[p] != '<'
|
||||
|| buf[p + 1] != '?'
|
||||
|| buf[p + 2] != 'x'
|
||||
|| buf[p + 3] != 'm'
|
||||
|| buf[p + 4] != 'l') {
|
||||
return; // not a header!
|
||||
}
|
||||
|
||||
// we found a header, consume it
|
||||
pos += 5;
|
||||
skipTo("?>");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
peeked = PEEKED_NONE;
|
||||
stack[0] = XmlScope.CLOSED;
|
||||
stackSize = 1;
|
||||
in.close();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,11 @@
|
|||
package io.gitlab.jfronny.commons.serialize.xml.impl;
|
||||
|
||||
public class XmlScope {
|
||||
public static final int TAG_HEAD = 1;
|
||||
public static final int TAG_BODY = 2;
|
||||
public static final int DANGLING_NAME = 3;
|
||||
public static final int EMPTY_DOCUMENT = 4;
|
||||
public static final int NONEMPTY_DOCUMENT = 5;
|
||||
public static final int CDATA = 6;
|
||||
public static final int CLOSED = 7;
|
||||
}
|
|
@ -0,0 +1,11 @@
|
|||
package io.gitlab.jfronny.commons.serialize.xml.impl;
|
||||
|
||||
public enum XmlToken {
|
||||
BEGIN_TAG,
|
||||
END_TAG,
|
||||
ATTRIBUTE_NAME,
|
||||
ATTRIBUTE_VALUE,
|
||||
TEXT,
|
||||
CDATA,
|
||||
EOF
|
||||
}
|
|
@ -0,0 +1,6 @@
|
|||
module io.gitlab.jfronny.commons.serialize.xml {
|
||||
requires io.gitlab.jfronny.commons;
|
||||
requires io.gitlab.jfronny.commons.serialize;
|
||||
requires static org.jetbrains.annotations;
|
||||
exports io.gitlab.jfronny.commons.serialize.xml;
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -8,6 +8,7 @@ include("commons-http-server")
|
|||
include("commons-serialize")
|
||||
include("commons-serialize-dsl")
|
||||
include("commons-serialize-json")
|
||||
include("commons-serialize-xml")
|
||||
include("commons-serialize-databind")
|
||||
include("commons-serialize-databind-api")
|
||||
include("commons-serialize-databind-sql")
|
||||
|
|
Loading…
Reference in New Issue