2024-04-13 20:41:13 +02:00
|
|
|
package io.gitlab.jfronny.commons.serialize.xml;
|
2024-04-13 19:23:31 +02:00
|
|
|
|
|
|
|
import io.gitlab.jfronny.commons.serialize.MalformedDataException;
|
|
|
|
import io.gitlab.jfronny.commons.serialize.StringEscapeUtil;
|
2024-04-13 20:41:13 +02:00
|
|
|
import io.gitlab.jfronny.commons.serialize.xml.impl.XmlScope;
|
2024-04-13 19:23:31 +02:00
|
|
|
|
|
|
|
import java.io.Closeable;
|
|
|
|
import java.io.EOFException;
|
|
|
|
import java.io.IOException;
|
|
|
|
import java.io.Reader;
|
|
|
|
import java.util.Arrays;
|
|
|
|
import java.util.Objects;
|
|
|
|
|
2024-04-13 20:41:13 +02:00
|
|
|
public class NativeXmlReader implements Closeable {
|
2024-04-13 19:23:31 +02:00
|
|
|
private static final int PEEKED_NONE = 0;
|
|
|
|
private static final int PEEKED_BEGIN_TAG = 1;
|
|
|
|
private static final int PEEKED_END_TAG = 2;
|
|
|
|
private static final int PEEKED_END_TAG_CONCISE = 3;
|
|
|
|
private static final int PEEKED_TEXT = 4;
|
|
|
|
private static final int PEEKED_CDATA = 5;
|
|
|
|
private static final int PEEKED_ATTRIBUTE_NAME = 6;
|
|
|
|
private static final int PEEKED_ATTRIBUTE_VALUE = 7;
|
|
|
|
private static final int PEEKED_EOF = 8;
|
|
|
|
|
|
|
|
/** The input JSON. */
|
|
|
|
private final Reader in;
|
|
|
|
|
|
|
|
static final int BUFFER_SIZE = 1024;
|
|
|
|
/**
|
|
|
|
* Use a manual buffer to easily read and unread upcoming characters, and also so we can create
|
|
|
|
* strings without an intermediate StringBuilder. We decode literals directly out of this buffer,
|
|
|
|
* so it must be at least as long as the longest token that can be reported as a number.
|
|
|
|
*/
|
|
|
|
private final char[] buffer = new char[BUFFER_SIZE];
|
|
|
|
|
|
|
|
private int pos = 0;
|
|
|
|
private int limit = 0;
|
|
|
|
|
|
|
|
private int lineNumber = 0;
|
|
|
|
private int lineStart = 0;
|
|
|
|
|
|
|
|
int peeked = PEEKED_NONE;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* The number of characters in a peeked number literal. Increment 'pos' by this after reading a
|
|
|
|
* number.
|
|
|
|
*/
|
|
|
|
private int peekedNumberLength;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* A peeked string that should be parsed on the next double, long or string. This is populated
|
|
|
|
* before a numeric value is parsed and used if that parsing fails.
|
|
|
|
*/
|
|
|
|
private String peekedString;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The nesting stack. Using a manual array rather than an ArrayList saves 20%.
|
|
|
|
*/
|
|
|
|
private int[] stack = new int[32];
|
|
|
|
private int stackSize = 0;
|
|
|
|
|
|
|
|
{
|
|
|
|
stack[stackSize++] = XmlScope.EMPTY_DOCUMENT;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The path members. It corresponds directly to stack: At indices where the
|
|
|
|
* stack contains an object (EMPTY_OBJECT, DANGLING_NAME or NONEMPTY_OBJECT),
|
|
|
|
* pathNames contains the name at this scope. Where it contains an array
|
|
|
|
* (EMPTY_ARRAY, NONEMPTY_ARRAY) pathIndices contains the current index in
|
|
|
|
* that array. Otherwise the value is undefined, and we take advantage of that
|
|
|
|
* by incrementing pathIndices when doing so isn't useful.
|
|
|
|
*/
|
|
|
|
private String[] pathNames = new String[32];
|
|
|
|
private int[] pathIndices = new int[32];
|
|
|
|
|
2024-04-13 20:06:02 +02:00
|
|
|
private boolean lenient = false;
|
|
|
|
private boolean skipWhitespace = true;
|
2024-04-13 20:41:13 +02:00
|
|
|
public NativeXmlReader(Reader in) {
|
2024-04-13 19:23:31 +02:00
|
|
|
this.in = Objects.requireNonNull(in, "in == null");
|
|
|
|
}
|
|
|
|
|
2024-04-13 20:41:13 +02:00
|
|
|
public NativeXmlReader setLenient(boolean lenient) {
|
2024-04-13 19:23:31 +02:00
|
|
|
this.lenient = lenient;
|
|
|
|
return this;
|
|
|
|
}
|
|
|
|
|
|
|
|
public boolean isLenient() {
|
|
|
|
return lenient;
|
|
|
|
}
|
|
|
|
|
2024-04-13 20:41:13 +02:00
|
|
|
public NativeXmlReader setSkipWhitespace(boolean skipWhitespace) {
|
2024-04-13 20:06:02 +02:00
|
|
|
this.skipWhitespace = skipWhitespace;
|
|
|
|
return this;
|
|
|
|
}
|
|
|
|
|
|
|
|
public boolean isSkipWhitespace() {
|
|
|
|
return skipWhitespace;
|
|
|
|
}
|
|
|
|
|
2024-04-13 19:23:31 +02:00
|
|
|
private void push(int newTop) {
|
|
|
|
if (stackSize == stack.length) {
|
|
|
|
int newLength = stackSize * 2;
|
|
|
|
stack = Arrays.copyOf(stack, newLength);
|
|
|
|
pathIndices = Arrays.copyOf(pathIndices, newLength);
|
|
|
|
pathNames = Arrays.copyOf(pathNames, newLength);
|
|
|
|
}
|
|
|
|
stack[stackSize++] = newTop;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns true once {@code limit - pos >= minimum}. If the data is exhausted before that many
|
|
|
|
* characters are available, this returns false.
|
|
|
|
*/
|
|
|
|
private boolean fillBuffer(int minimum) throws IOException {
|
|
|
|
char[] buffer = this.buffer;
|
|
|
|
lineStart -= pos;
|
|
|
|
if (limit != pos) {
|
|
|
|
limit -= pos;
|
|
|
|
System.arraycopy(buffer, pos, buffer, 0, limit);
|
|
|
|
} else {
|
|
|
|
limit = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
pos = 0;
|
|
|
|
int total;
|
|
|
|
while ((total = in.read(buffer, limit, buffer.length - limit)) != -1) {
|
|
|
|
limit += total;
|
|
|
|
|
|
|
|
// if this is the first read, consume an optional byte order mark (BOM) if it exists
|
|
|
|
if (lineNumber == 0 && lineStart == 0 && limit > 0 && buffer[0] == '\ufeff') {
|
|
|
|
pos++;
|
|
|
|
lineStart++;
|
|
|
|
minimum++;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (limit >= minimum) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
public String beginTag() throws IOException {
|
|
|
|
int p = peeked;
|
|
|
|
if (p == PEEKED_NONE) {
|
|
|
|
p = doPeek();
|
|
|
|
}
|
|
|
|
if (p != PEEKED_BEGIN_TAG) {
|
|
|
|
throw unexpectedTokenError("BEGIN_TAG");
|
|
|
|
}
|
|
|
|
String name = nextName();
|
|
|
|
pathNames[stackSize - 1] = name;
|
|
|
|
push(XmlScope.TAG_HEAD);
|
|
|
|
peeked = PEEKED_NONE;
|
|
|
|
return name;
|
|
|
|
}
|
|
|
|
|
|
|
|
public String endTag() throws IOException {
|
|
|
|
int p = peeked;
|
|
|
|
if (p == PEEKED_NONE) {
|
|
|
|
p = doPeek();
|
|
|
|
}
|
|
|
|
String name;
|
|
|
|
if (p == PEEKED_END_TAG) {
|
|
|
|
name = nextName();
|
|
|
|
if (buffer[pos] != '>') {
|
|
|
|
throw syntaxError("Expected > but was " + buffer[pos]);
|
|
|
|
}
|
|
|
|
pos++;
|
|
|
|
} else if (p == PEEKED_END_TAG_CONCISE) {
|
2024-04-13 20:09:10 +02:00
|
|
|
name = pathNames[stackSize - 2];
|
2024-04-13 19:23:31 +02:00
|
|
|
} else throw unexpectedTokenError("END_TAG");
|
2024-04-13 20:09:10 +02:00
|
|
|
if (!name.equals(pathNames[stackSize - 2])) {
|
2024-04-13 19:23:31 +02:00
|
|
|
if (!lenient) throw syntaxError("Mismatched closing tag: Expected " + pathNames[stackSize - 1] + " but was " + name);
|
|
|
|
}
|
|
|
|
stackSize--;
|
|
|
|
pathNames[stackSize] = null; // Free the last path name so that it can be garbage collected!
|
|
|
|
pathIndices[stackSize - 1]++;
|
|
|
|
peeked = PEEKED_NONE;
|
|
|
|
return name;
|
|
|
|
}
|
|
|
|
|
|
|
|
public boolean hasNext() throws IOException {
|
|
|
|
int p = peeked;
|
|
|
|
if (p == PEEKED_NONE) {
|
|
|
|
p = doPeek();
|
|
|
|
}
|
2024-04-13 20:35:26 +02:00
|
|
|
return p != PEEKED_EOF && p != PEEKED_END_TAG && p != PEEKED_END_TAG_CONCISE;
|
2024-04-13 19:23:31 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
public XmlToken peek() throws IOException {
|
|
|
|
int p = peeked;
|
|
|
|
if (p == PEEKED_NONE) {
|
|
|
|
p = doPeek();
|
|
|
|
}
|
|
|
|
return switch (p) {
|
|
|
|
case PEEKED_BEGIN_TAG -> XmlToken.BEGIN_TAG;
|
2024-04-13 21:47:35 +02:00
|
|
|
case PEEKED_END_TAG, PEEKED_END_TAG_CONCISE -> XmlToken.END_TAG;
|
2024-04-13 19:23:31 +02:00
|
|
|
case PEEKED_TEXT -> XmlToken.TEXT;
|
|
|
|
case PEEKED_CDATA -> XmlToken.CDATA;
|
|
|
|
case PEEKED_ATTRIBUTE_NAME -> XmlToken.ATTRIBUTE_NAME;
|
|
|
|
case PEEKED_ATTRIBUTE_VALUE -> XmlToken.ATTRIBUTE_VALUE;
|
|
|
|
case PEEKED_EOF -> XmlToken.EOF;
|
|
|
|
default -> throw new AssertionError();
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
int doPeek() throws IOException {
|
|
|
|
int peekStack = stack[stackSize - 1];
|
|
|
|
if (peekStack == XmlScope.TAG_HEAD) {
|
|
|
|
stack[stackSize - 1] = XmlScope.DANGLING_NAME;
|
|
|
|
int c = nextNonWhitespace(true);
|
|
|
|
if (c == -1) {
|
|
|
|
throw syntaxError("Unterminated tag");
|
|
|
|
} if (c == '/') {
|
|
|
|
if (pos < limit || fillBuffer(1)) {
|
|
|
|
char chNext = buffer[pos++];
|
|
|
|
if (chNext == '>') {
|
|
|
|
stack[stackSize - 1] = XmlScope.TAG_BODY;
|
|
|
|
return peeked = PEEKED_END_TAG_CONCISE;
|
|
|
|
} else {
|
|
|
|
throw syntaxError("Expected /> but was /" + chNext);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
throw syntaxError("Unterminated tag at " + c);
|
|
|
|
}
|
2024-04-13 20:06:41 +02:00
|
|
|
} else if (c == '>') {
|
|
|
|
stack[stackSize - 1] = XmlScope.TAG_BODY;
|
|
|
|
// fall through
|
2024-04-13 19:23:31 +02:00
|
|
|
} else if (pos < limit || fillBuffer(1)) {
|
|
|
|
char chNext = buffer[pos + 1];
|
|
|
|
var check = isNameStart((char) c, chNext);
|
2024-04-13 20:32:54 +02:00
|
|
|
pos--;
|
2024-04-13 19:23:31 +02:00
|
|
|
if (check != NameCheck.NONE) {
|
|
|
|
return peeked = PEEKED_ATTRIBUTE_NAME;
|
|
|
|
} else {
|
|
|
|
throw unexpectedTokenError("attribute name");
|
|
|
|
}
|
|
|
|
} else throw syntaxError("Unterminated tag at " + c);
|
|
|
|
} else if (peekStack == XmlScope.DANGLING_NAME) {
|
|
|
|
stack[stackSize - 1] = XmlScope.TAG_HEAD;
|
|
|
|
// Look for an equals sign before the value
|
2024-04-13 20:06:02 +02:00
|
|
|
int c = lenient ? nextNonWhitespace(true) : buffer[pos++];
|
2024-04-13 19:23:31 +02:00
|
|
|
if (c == '=') {
|
2024-04-13 20:06:02 +02:00
|
|
|
c = lenient ? nextNonWhitespace(true) : buffer[pos++];
|
2024-04-13 20:32:54 +02:00
|
|
|
pos--;
|
2024-04-13 19:23:31 +02:00
|
|
|
if (c == '\'' || c == '"') {
|
|
|
|
return peeked = PEEKED_ATTRIBUTE_VALUE;
|
|
|
|
} else {
|
|
|
|
throw syntaxError("Expected a value but was " + (char) c);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
throw syntaxError("Expected '='");
|
|
|
|
}
|
|
|
|
} else if (peekStack == XmlScope.TAG_BODY) {
|
|
|
|
// fall through: a new element is starting
|
|
|
|
} else if (peekStack == XmlScope.EMPTY_DOCUMENT) {
|
|
|
|
stack[stackSize - 1] = XmlScope.NONEMPTY_DOCUMENT;
|
|
|
|
// fall through: a new element is starting
|
|
|
|
} else if (peekStack == XmlScope.NONEMPTY_DOCUMENT) {
|
2024-04-13 20:06:02 +02:00
|
|
|
int c = skipWhitespace ? nextNonWhitespace(false) : buffer[pos++];
|
2024-04-13 19:23:31 +02:00
|
|
|
if (c == -1) {
|
|
|
|
return peeked = PEEKED_EOF;
|
|
|
|
} else {
|
|
|
|
checkLenient();
|
|
|
|
pos--;
|
|
|
|
// fall through: a new element is starting
|
|
|
|
}
|
|
|
|
} else if (peekStack == XmlScope.CLOSED) {
|
|
|
|
throw new IllegalStateException("BaseXmlReader is closed");
|
|
|
|
}
|
2024-04-13 20:06:02 +02:00
|
|
|
int c = skipWhitespace ? nextNonWhitespace(true) : buffer[pos++];
|
2024-04-13 19:23:31 +02:00
|
|
|
if (c == -1) {
|
|
|
|
throw syntaxError("Unterminated tag");
|
|
|
|
} else if (c == '<') {
|
2024-04-13 19:32:27 +02:00
|
|
|
if (pos + 1 <= limit || fillBuffer(1)) {
|
|
|
|
char chNext = buffer[pos];
|
2024-04-13 19:23:31 +02:00
|
|
|
if (chNext == '/') {
|
|
|
|
pos++;
|
|
|
|
return peeked = PEEKED_END_TAG;
|
|
|
|
} else if (chNext == '!') {
|
2024-04-13 20:06:41 +02:00
|
|
|
if (pos + 8 <= limit || fillBuffer(8)) {
|
|
|
|
if (buffer[pos + 1] == '[' && buffer[pos + 2] == 'C' && buffer[pos + 3] == 'D' && buffer[pos + 4] == 'A' && buffer[pos + 5] == 'T' && buffer[pos + 6] == 'A' && buffer[pos + 7] == '[') {
|
|
|
|
pos += 8;
|
2024-04-13 19:23:31 +02:00
|
|
|
return peeked = PEEKED_CDATA;
|
|
|
|
} else {
|
|
|
|
throw syntaxError("Expected <![CDATA[ but was <![" + new String(buffer, pos, 5));
|
|
|
|
}
|
|
|
|
}
|
2024-04-13 19:32:27 +02:00
|
|
|
} else if (pos + 2 <= limit || fillBuffer(2)) {
|
|
|
|
var check = isNameStart(chNext, buffer[pos + 1]);
|
2024-04-13 19:23:31 +02:00
|
|
|
if (check != NameCheck.NONE) {
|
|
|
|
return peeked = PEEKED_BEGIN_TAG;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2024-04-13 19:32:27 +02:00
|
|
|
throw syntaxError("Unterminated tag");
|
2024-04-13 19:23:31 +02:00
|
|
|
} else {
|
2024-04-13 20:06:41 +02:00
|
|
|
pos--;
|
2024-04-13 19:23:31 +02:00
|
|
|
return peeked = PEEKED_TEXT;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
private enum NameCheck { FIRST, BOTH, NONE }
|
|
|
|
private NameCheck isNameStart(char ch, char chNext) {
|
|
|
|
if ('A' <= ch && ch <= 'Z') return NameCheck.FIRST;
|
|
|
|
if ('a' <= ch && ch <= 'z') return NameCheck.FIRST;
|
|
|
|
return switch (ch) {
|
|
|
|
case ':', '_' -> NameCheck.FIRST;
|
|
|
|
case '\u2070' -> chNext == '\u218F' ? NameCheck.BOTH : NameCheck.NONE;
|
|
|
|
case '\u2C00' -> chNext == '\u2FEF' ? NameCheck.BOTH : NameCheck.NONE;
|
|
|
|
case '\u3001' -> chNext == '\uD7FF' ? NameCheck.BOTH : NameCheck.NONE;
|
|
|
|
case '\uF900' -> chNext == '\uFDCF' ? NameCheck.BOTH : NameCheck.NONE;
|
|
|
|
case '\uFDF0' -> chNext == '\uFFFD' ? NameCheck.BOTH : NameCheck.NONE;
|
|
|
|
default -> NameCheck.NONE;
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
private NameCheck isName(char ch, char chNext) {
|
|
|
|
var nameStart = isNameStart(ch, chNext);
|
|
|
|
if (nameStart != NameCheck.NONE) return nameStart;
|
|
|
|
if ('0' <= ch && ch <= '9') return NameCheck.FIRST;
|
|
|
|
return switch (ch) {
|
|
|
|
case '-', '.', '\u00B7' -> NameCheck.FIRST;
|
|
|
|
case '\u0300' -> chNext == '\u036F' ? NameCheck.BOTH : NameCheck.NONE;
|
|
|
|
case '\u203F' -> chNext == '\u2040' ? NameCheck.BOTH : NameCheck.NONE;
|
|
|
|
default -> NameCheck.NONE;
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
public String nextAttributeName() throws IOException {
|
|
|
|
int p = peeked;
|
|
|
|
if (p == PEEKED_NONE) {
|
|
|
|
p = doPeek();
|
|
|
|
}
|
|
|
|
if (p != PEEKED_ATTRIBUTE_NAME) {
|
|
|
|
throw unexpectedTokenError("ATTRIBUTE_NAME");
|
|
|
|
}
|
|
|
|
String result = nextName();
|
|
|
|
peeked = PEEKED_NONE;
|
|
|
|
pathNames[stackSize - 1] = result;
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
public String nextAttributeValue() throws IOException {
|
|
|
|
int p = peeked;
|
|
|
|
if (p == PEEKED_NONE) {
|
|
|
|
p = doPeek();
|
|
|
|
}
|
|
|
|
if (p != PEEKED_ATTRIBUTE_VALUE) {
|
|
|
|
throw unexpectedTokenError("ATTRIBUTE_VALUE");
|
|
|
|
}
|
|
|
|
char quote = buffer[pos++];
|
2024-04-15 10:22:33 +02:00
|
|
|
String result = readUntil((c, i) -> {
|
2024-04-13 20:32:54 +02:00
|
|
|
if (!lenient && c < 0x20 && c != 0x09) throw syntaxError("Control character in attribute value");
|
2024-04-13 19:23:31 +02:00
|
|
|
if (c == '<') throw syntaxError("Expected " + quote + " but was '<'");
|
|
|
|
return c == quote;
|
|
|
|
}, true);
|
2024-04-15 10:22:33 +02:00
|
|
|
pos++;
|
|
|
|
peeked = PEEKED_NONE;
|
|
|
|
return result;
|
2024-04-13 19:23:31 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
private String readReference() throws IOException {
|
|
|
|
if (pos == limit && !fillBuffer(1)) {
|
|
|
|
throw syntaxError("Unterminated escape sequence");
|
|
|
|
}
|
|
|
|
if (buffer[pos] == '#') {
|
|
|
|
// read the character reference
|
|
|
|
pos++;
|
|
|
|
if (pos == limit && !fillBuffer(1)) {
|
|
|
|
throw syntaxError("Unterminated escape sequence");
|
|
|
|
}
|
|
|
|
boolean isHex = buffer[pos] == 'x' || buffer[pos] == 'X';
|
|
|
|
if (isHex) pos++;
|
|
|
|
String result = readUntil((c, i) -> {
|
|
|
|
if (c == ';') return true;
|
|
|
|
if ('0' <= c && c <= '9') return false;
|
|
|
|
if (isHex && ('a' <= c && c <= 'f' || 'A' <= c && c <= 'F')) return false;
|
|
|
|
throw syntaxError("Malformed character reference");
|
|
|
|
}, false);
|
|
|
|
if (!result.endsWith(";")) throw syntaxError("Missing ';' in character reference");
|
|
|
|
result = result.substring(0, result.length() - 1);
|
|
|
|
return String.valueOf((char) Integer.parseInt(result, isHex ? 16 : 10));
|
|
|
|
} else {
|
|
|
|
// read the entity reference
|
|
|
|
// we don't support these, so just handle them like a normal string
|
|
|
|
String result = nextName();
|
|
|
|
if (buffer[pos] != ';') throw syntaxError("Missing ';' in entity reference");
|
|
|
|
pos++;
|
|
|
|
if (result.equals("apos")) return "'";
|
|
|
|
if (result.equals("quot")) return "\"";
|
|
|
|
if (result.equals("amp")) return "&";
|
|
|
|
if (result.equals("lt")) return "<";
|
|
|
|
if (result.equals("gt")) return ">";
|
|
|
|
return "&" + result + ";";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
public String nextText() throws IOException {
|
|
|
|
int p = peeked;
|
|
|
|
if (p == PEEKED_NONE) {
|
|
|
|
p = doPeek();
|
|
|
|
}
|
|
|
|
if (p != PEEKED_TEXT) {
|
|
|
|
throw unexpectedTokenError("TEXT");
|
|
|
|
}
|
2024-04-13 20:06:02 +02:00
|
|
|
String result = readUntil((c, i) -> c == '<', true);
|
|
|
|
if (skipWhitespace) {
|
|
|
|
result = result.trim();
|
|
|
|
}
|
|
|
|
peeked = PEEKED_NONE;
|
|
|
|
return result;
|
2024-04-13 19:23:31 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
public String nextCData() throws IOException {
|
|
|
|
int p = peeked;
|
|
|
|
if (p == PEEKED_NONE) {
|
|
|
|
p = doPeek();
|
|
|
|
}
|
|
|
|
if (p != PEEKED_CDATA) {
|
|
|
|
throw unexpectedTokenError("CDATA");
|
|
|
|
}
|
|
|
|
StringBuilder sb = new StringBuilder();
|
|
|
|
while (true) {
|
|
|
|
sb.append(readUntil((c, i) -> c == ']', false));
|
|
|
|
if (pos + 2 < limit || fillBuffer(3)) {
|
|
|
|
if (buffer[pos] == ']' && buffer[pos + 1] == ']' && buffer[pos + 2] == '>') {
|
|
|
|
pos += 3;
|
2024-04-13 20:06:41 +02:00
|
|
|
peeked = PEEKED_NONE;
|
2024-04-13 19:23:31 +02:00
|
|
|
return sb.toString();
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
throw syntaxError("Unterminated CDATA");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
private String nextName() throws IOException {
|
|
|
|
return readUntil((c, i) -> isName(c, pos + i + 1 < limit ? buffer[pos + i + 1] : '\0') == NameCheck.NONE, false);
|
|
|
|
}
|
|
|
|
|
|
|
|
@FunctionalInterface
|
|
|
|
private interface EndPredicate {
|
|
|
|
boolean test(char c, int i) throws MalformedDataException;
|
|
|
|
}
|
|
|
|
|
|
|
|
private String readUntil(EndPredicate character, boolean handleReferences) throws IOException {
|
|
|
|
StringBuilder builder = null;
|
|
|
|
int i = 0;
|
|
|
|
findEnd:
|
|
|
|
while (true) {
|
|
|
|
for (; pos + i < limit; i++) {
|
|
|
|
char c = buffer[pos + i];
|
|
|
|
if (character.test(c, i)) {
|
|
|
|
break findEnd;
|
|
|
|
} else if (handleReferences && c == '&') {
|
|
|
|
if (builder == null) {
|
|
|
|
builder = new StringBuilder(Math.max(i, 16));
|
|
|
|
}
|
|
|
|
builder.append(buffer, pos, i);
|
|
|
|
pos += i;
|
|
|
|
i = 0;
|
|
|
|
builder.append(readReference());
|
|
|
|
} else if (c == '\n') {
|
|
|
|
lineNumber++;
|
|
|
|
lineStart = pos + i + 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Attempt to load the entire name into the buffer at once.
|
|
|
|
if (i < buffer.length) {
|
|
|
|
if (fillBuffer(i + 1)) {
|
|
|
|
continue;
|
|
|
|
} else {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// use a StringBuilder when the name is too long.
|
|
|
|
if (builder == null) {
|
|
|
|
builder = new StringBuilder(Math.max(i, 16));
|
|
|
|
}
|
|
|
|
builder.append(buffer, pos, i);
|
|
|
|
pos += i;
|
|
|
|
i = 0;
|
|
|
|
if (!fillBuffer(1)) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
String result = builder != null
|
|
|
|
? builder.append(buffer, pos, i).toString()
|
|
|
|
: new String(buffer, pos, i);
|
|
|
|
pos += i;
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns the next character in the stream that is neither whitespace nor a part of a comment.
|
|
|
|
* When this returns, the returned character is always at {@code buffer[pos-1]}; this means the
|
|
|
|
* caller can always push back the returned character by decrementing {@code pos}.
|
|
|
|
*/
|
|
|
|
private int nextNonWhitespace(boolean throwOnEof) throws IOException {
|
|
|
|
/*
|
|
|
|
* This code uses ugly local variables 'p' and 'l' representing the 'pos'
|
|
|
|
* and 'limit' fields respectively. Using locals rather than fields saves
|
|
|
|
* a few field reads for each whitespace character in a pretty-printed
|
|
|
|
* document, resulting in a 5% speedup. We need to flush 'p' to its field
|
|
|
|
* before any (potentially indirect) call to fillBuffer() and reread both
|
|
|
|
* 'p' and 'l' after any (potentially indirect) call to the same method.
|
|
|
|
*/
|
|
|
|
char[] buffer = this.buffer;
|
|
|
|
int p = pos;
|
|
|
|
int l = limit;
|
|
|
|
while (true) {
|
|
|
|
if (p == l) {
|
|
|
|
pos = p;
|
|
|
|
if (!fillBuffer(1)) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
p = pos;
|
|
|
|
l = limit;
|
|
|
|
}
|
|
|
|
|
|
|
|
int c = buffer[p++];
|
|
|
|
if (c == '\n') {
|
|
|
|
lineNumber++;
|
|
|
|
lineStart = p;
|
|
|
|
continue;
|
|
|
|
} else if (c == ' ' || c == '\r' || c == '\t') {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
pos = p;
|
|
|
|
if (c == '<') {
|
|
|
|
if (p == l) {
|
|
|
|
pos--; // push back '/' so it's still in the buffer when this method returns
|
|
|
|
boolean charsLoaded = fillBuffer(4);
|
|
|
|
pos++; // consume the '/' again
|
|
|
|
if (!charsLoaded) {
|
|
|
|
return c;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (buffer[pos] == '!' && buffer[pos + 1] == '-' && buffer[pos + 2] == '-') {
|
|
|
|
pos += 3;
|
|
|
|
if (!skipTo("-->")) {
|
|
|
|
throw syntaxError("Unterminated comment");
|
|
|
|
}
|
|
|
|
p = pos + 3;
|
|
|
|
l = limit;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return c;
|
|
|
|
}
|
|
|
|
if (throwOnEof) {
|
|
|
|
throw new EOFException("End of input" + locationString());
|
|
|
|
} else {
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
private void checkLenient() throws MalformedDataException {
|
|
|
|
if (!lenient) {
|
|
|
|
throw syntaxError("Use JsonReader.setLenient(true) to accept malformed JSON");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @param toFind a string to search for. Must not contain a newline.
|
|
|
|
*/
|
|
|
|
private boolean skipTo(String toFind) throws IOException {
|
|
|
|
int length = toFind.length();
|
|
|
|
outer:
|
|
|
|
for (; pos + length <= limit || fillBuffer(length); pos++) {
|
|
|
|
if (buffer[pos] == '\n') {
|
|
|
|
lineNumber++;
|
|
|
|
lineStart = pos + 1;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
for (int c = 0; c < length; c++) {
|
|
|
|
if (buffer[pos + c] != toFind.charAt(c)) {
|
|
|
|
continue outer;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
protected String locationString() {
|
|
|
|
int line = lineNumber + 1;
|
|
|
|
int column = pos - lineStart + 1;
|
|
|
|
String replacement = StringEscapeUtil.getReplacement(buffer[pos]);
|
|
|
|
if (replacement == null) {
|
|
|
|
replacement = String.valueOf(buffer[pos]);
|
|
|
|
}
|
|
|
|
String charInterjection = pos < buffer.length ? " (char '" + replacement + "')" : "";
|
|
|
|
return " at line " + line + " column " + column + charInterjection + " path " + getPath();
|
|
|
|
}
|
|
|
|
|
2024-04-13 20:32:54 +02:00
|
|
|
public String getPath() {
|
|
|
|
StringBuilder result = new StringBuilder();
|
|
|
|
boolean first = true;
|
2024-04-13 19:23:31 +02:00
|
|
|
for (int i = 0; i < stackSize; i++) {
|
|
|
|
int scope = stack[i];
|
|
|
|
switch (scope) {
|
2024-04-13 20:32:54 +02:00
|
|
|
case XmlScope.TAG_HEAD:
|
2024-04-13 19:23:31 +02:00
|
|
|
case XmlScope.TAG_BODY:
|
|
|
|
case XmlScope.DANGLING_NAME:
|
2024-04-13 20:32:54 +02:00
|
|
|
case XmlScope.NONEMPTY_DOCUMENT:
|
|
|
|
if (first) first = false;
|
|
|
|
else result.append('.');
|
2024-04-13 19:23:31 +02:00
|
|
|
if (pathNames[i] != null) {
|
|
|
|
result.append(pathNames[i]);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case XmlScope.EMPTY_DOCUMENT:
|
|
|
|
case XmlScope.CLOSED:
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
throw new AssertionError("Unknown scope value: " + scope);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return result.toString();
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Unescapes the character identified by the character or characters that immediately follow a
|
|
|
|
* backslash. The backslash '\' should have already been read. This supports both Unicode escapes
|
|
|
|
* "u000A" and two-character escapes "\n".
|
|
|
|
*
|
|
|
|
* @throws MalformedDataException if the escape sequence is malformed
|
|
|
|
*/
|
|
|
|
@SuppressWarnings("fallthrough")
|
|
|
|
private char readEscapeCharacter() throws IOException {
|
|
|
|
if (pos == limit && !fillBuffer(1)) {
|
|
|
|
throw syntaxError("Unterminated escape sequence");
|
|
|
|
}
|
|
|
|
|
|
|
|
char escaped = buffer[pos++];
|
|
|
|
switch (escaped) {
|
|
|
|
case 'u':
|
|
|
|
if (pos + 4 > limit && !fillBuffer(4)) {
|
|
|
|
throw syntaxError("Unterminated escape sequence");
|
|
|
|
}
|
|
|
|
// Equivalent to Integer.parseInt(stringPool.get(buffer, pos, 4), 16);
|
|
|
|
int result = 0;
|
|
|
|
for (int i = pos, end = i + 4; i < end; i++) {
|
|
|
|
char c = buffer[i];
|
|
|
|
result <<= 4;
|
|
|
|
if (c >= '0' && c <= '9') {
|
|
|
|
result += (c - '0');
|
|
|
|
} else if (c >= 'a' && c <= 'f') {
|
|
|
|
result += (c - 'a' + 10);
|
|
|
|
} else if (c >= 'A' && c <= 'F') {
|
|
|
|
result += (c - 'A' + 10);
|
|
|
|
} else {
|
|
|
|
throw syntaxError("Malformed Unicode escape \\u" + new String(buffer, pos, 4));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
pos += 4;
|
|
|
|
return (char) result;
|
|
|
|
|
|
|
|
case 't':
|
|
|
|
return '\t';
|
|
|
|
|
|
|
|
case 'b':
|
|
|
|
return '\b';
|
|
|
|
|
|
|
|
case 'n':
|
|
|
|
return '\n';
|
|
|
|
|
|
|
|
case 'r':
|
|
|
|
return '\r';
|
|
|
|
|
|
|
|
case 'f':
|
|
|
|
return '\f';
|
|
|
|
|
|
|
|
case '\n':
|
|
|
|
if (!lenient) {
|
|
|
|
throw syntaxError("Cannot escape a newline character in strict mode");
|
|
|
|
}
|
|
|
|
lineNumber++;
|
|
|
|
lineStart = pos;
|
|
|
|
// fall-through
|
|
|
|
|
|
|
|
case '\'':
|
|
|
|
if (!lenient) {
|
|
|
|
throw syntaxError("Invalid escaped character \"'\" in strict mode");
|
|
|
|
}
|
|
|
|
case '"':
|
|
|
|
case '\\':
|
|
|
|
case '/':
|
|
|
|
return escaped;
|
|
|
|
default:
|
|
|
|
// throw error when none of the above cases are matched
|
|
|
|
throw syntaxError("Invalid escape sequence");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Throws a new {@link MalformedDataException} with the given message and information about the
|
|
|
|
* current location.
|
|
|
|
*/
|
|
|
|
private MalformedDataException syntaxError(String message) throws MalformedDataException {
|
|
|
|
throw new MalformedDataException(message + locationString());
|
|
|
|
}
|
|
|
|
|
|
|
|
private IllegalStateException unexpectedTokenError(String expected) throws IOException {
|
|
|
|
return new IllegalStateException("Expected " + expected + " but was " + peek() + locationString());
|
|
|
|
}
|
|
|
|
|
|
|
|
/** Consumes the header if it exists. */
|
|
|
|
private void consumeHeader() throws IOException {
|
|
|
|
// fast-forward through the leading whitespace
|
|
|
|
int unused = nextNonWhitespace(true);
|
|
|
|
pos--;
|
|
|
|
|
|
|
|
if (pos + 5 > limit && !fillBuffer(5)) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
int p = pos;
|
|
|
|
char[] buf = buffer;
|
|
|
|
if (buf[p] != '<'
|
|
|
|
|| buf[p + 1] != '?'
|
|
|
|
|| buf[p + 2] != 'x'
|
|
|
|
|| buf[p + 3] != 'm'
|
|
|
|
|| buf[p + 4] != 'l') {
|
|
|
|
return; // not a header!
|
|
|
|
}
|
|
|
|
|
|
|
|
// we found a header, consume it
|
|
|
|
pos += 5;
|
|
|
|
skipTo("?>");
|
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public void close() throws IOException {
|
|
|
|
peeked = PEEKED_NONE;
|
|
|
|
stack[0] = XmlScope.CLOSED;
|
|
|
|
stackSize = 1;
|
|
|
|
in.close();
|
|
|
|
}
|
|
|
|
}
|