diff --git a/commons-serialize-xml/src/main/java/io/gitlab/jfronny/commons/serialize/xml/impl/BaseXmlReader.java b/commons-serialize-xml/src/main/java/io/gitlab/jfronny/commons/serialize/xml/impl/BaseXmlReader.java
index b04fa3c..c6293d1 100644
--- a/commons-serialize-xml/src/main/java/io/gitlab/jfronny/commons/serialize/xml/impl/BaseXmlReader.java
+++ b/commons-serialize-xml/src/main/java/io/gitlab/jfronny/commons/serialize/xml/impl/BaseXmlReader.java
@@ -230,6 +230,7 @@ public class BaseXmlReader implements Closeable {
} else if (pos < limit || fillBuffer(1)) {
char chNext = buffer[pos + 1];
var check = isNameStart((char) c, chNext);
+ pos--;
if (check != NameCheck.NONE) {
return peeked = PEEKED_ATTRIBUTE_NAME;
} else {
@@ -242,6 +243,7 @@ public class BaseXmlReader implements Closeable {
int c = lenient ? nextNonWhitespace(true) : buffer[pos++];
if (c == '=') {
c = lenient ? nextNonWhitespace(true) : buffer[pos++];
+ pos--;
if (c == '\'' || c == '"') {
return peeked = PEEKED_ATTRIBUTE_VALUE;
} else {
@@ -350,60 +352,10 @@ public class BaseXmlReader implements Closeable {
}
char quote = buffer[pos++];
return readUntil((c, i) -> {
- if (!lenient && c < 0x20) throw syntaxError("Control character in attribute value: " + c);
+ if (!lenient && c < 0x20 && c != 0x09) throw syntaxError("Control character in attribute value");
if (c == '<') throw syntaxError("Expected " + quote + " but was '<'");
return c == quote;
}, true);
-// StringBuilder builder = null;
-// while (true) {
-// p = pos; // repurpose 'p' to save a position since we no longer need it
-// int l = limit;
-// /* the index of the first character not yet appended to the builder. */
-// int start = p;
-// while (p < l) {
-// int c = buffer[p++];
-// if (!lenient && c < 0x20) {
-// throw syntaxError("Control character in attribute value: " + c);
-// } else if (c == quote) {
-// pos = p;
-// int len = p - start - 1;
-// peeked = PEEKED_NONE;
-// if (builder == null) {
-// return new String(buffer, start, len);
-// } else {
-// builder.append(buffer, start, len);
-// return builder.toString();
-// }
-// } else if (c == '&') {
-// pos = p;
-// int len = p - start - 1;
-// if (builder == null) {
-// int estimatedLength = (len + 1) * 2;
-// builder = new StringBuilder(Math.max(estimatedLength, 16));
-// }
-// builder.append(buffer, start, len);
-// builder.append(readReference());
-// p = pos;
-// l = limit;
-// start = p;
-// } else if (c == '\n') {
-// lineNumber++;
-// lineStart = p;
-// } else if (c == '<') {
-// throw syntaxError("Expected " + quote + " but was '<'");
-// }
-// }
-//
-// if (builder == null) {
-// int estimatedLength = (p - start) * 2;
-// builder = new StringBuilder(Math.max(estimatedLength, 16));
-// }
-// builder.append(buffer, start, p - start);
-// pos = p;
-// if (!fillBuffer(1)) {
-// throw syntaxError("Unterminated attribute value");
-// }
-// }
}
private String readReference() throws IOException {
@@ -613,23 +565,6 @@ public class BaseXmlReader implements Closeable {
}
}
- /**
- * Advances the position until after the next newline character. If the line is terminated by
- * "\r\n", the '\n' must be consumed as whitespace by the caller.
- */
- private void skipToEndOfLine() throws IOException {
- while (pos < limit || fillBuffer(1)) {
- char c = buffer[pos++];
- if (c == '\n') {
- lineNumber++;
- lineStart = pos;
- break;
- } else if (c == '\r') {
- break;
- }
- }
- }
-
/**
* @param toFind a string to search for. Must not contain a newline.
*/
@@ -663,20 +598,22 @@ public class BaseXmlReader implements Closeable {
return " at line " + line + " column " + column + charInterjection + " path " + getPath();
}
- private String getPath(boolean usePreviousPath) {
- StringBuilder result = new StringBuilder().append('$');
+ public String getPath() {
+ StringBuilder result = new StringBuilder();
+ boolean first = true;
for (int i = 0; i < stackSize; i++) {
int scope = stack[i];
switch (scope) {
+ case XmlScope.TAG_HEAD:
case XmlScope.TAG_BODY:
case XmlScope.DANGLING_NAME:
- case XmlScope.TAG_HEAD:
- result.append('.');
+ case XmlScope.NONEMPTY_DOCUMENT:
+ if (first) first = false;
+ else result.append('.');
if (pathNames[i] != null) {
result.append(pathNames[i]);
}
break;
- case XmlScope.NONEMPTY_DOCUMENT:
case XmlScope.EMPTY_DOCUMENT:
case XmlScope.CLOSED:
break;
@@ -687,14 +624,6 @@ public class BaseXmlReader implements Closeable {
return result.toString();
}
- public String getPath() {
- return getPath(false);
- }
-
- public String getPreviousPath() {
- return getPath(true);
- }
-
/**
* Unescapes the character identified by the character or characters that immediately follow a
* backslash. The backslash '\' should have already been read. This supports both Unicode escapes
diff --git a/commons-serialize-xml/src/main/java/io/gitlab/jfronny/commons/serialize/xml/impl/XmlScope.java b/commons-serialize-xml/src/main/java/io/gitlab/jfronny/commons/serialize/xml/impl/XmlScope.java
index ad09d8d..42631ab 100644
--- a/commons-serialize-xml/src/main/java/io/gitlab/jfronny/commons/serialize/xml/impl/XmlScope.java
+++ b/commons-serialize-xml/src/main/java/io/gitlab/jfronny/commons/serialize/xml/impl/XmlScope.java
@@ -6,6 +6,5 @@ public class XmlScope {
public static final int DANGLING_NAME = 3;
public static final int EMPTY_DOCUMENT = 4;
public static final int NONEMPTY_DOCUMENT = 5;
- public static final int CDATA = 6;
public static final int CLOSED = 7;
}
diff --git a/commons-serialize-xml/src/test/java/io/gitlab/jfronny/commons/serialize/xml/test/BaseXmlReaderTest.java b/commons-serialize-xml/src/test/java/io/gitlab/jfronny/commons/serialize/xml/test/BaseXmlReaderTest.java
index 1f95e8c..0edb7b9 100644
--- a/commons-serialize-xml/src/test/java/io/gitlab/jfronny/commons/serialize/xml/test/BaseXmlReaderTest.java
+++ b/commons-serialize-xml/src/test/java/io/gitlab/jfronny/commons/serialize/xml/test/BaseXmlReaderTest.java
@@ -46,13 +46,13 @@ public final class BaseXmlReaderTest {
BaseXmlReader reader = new BaseXmlReader(reader(json)).setLenient(true);
assertThat(reader.peek()).isEqualTo(BEGIN_TAG);
- reader.beginTag();
+ assertThat(reader.beginTag()).isEqualTo("tag");
assertThat(reader.peek()).isEqualTo(TEXT);
assertThat(reader.nextText()).isEqualTo("someText");
assertThat(reader.peek()).isEqualTo(CDATA);
assertThat(reader.nextCData()).isEqualTo("\"b\",\n \"c\"");
assertThat(reader.peek()).isEqualTo(END_TAG);
- reader.endTag();
+ assertThat(reader.endTag()).isEqualTo("tag");
assertThat(reader.peek()).isEqualTo(EOF);
}
@@ -71,36 +71,28 @@ public final class BaseXmlReaderTest {
}
@Test
- public void testStrictModeFailsToParseUnescapedControlCharacter() {
- String json = "\0";
+ public void testStrictModeFailsToParseUnescapedControlCharacter() throws IOException {
+ String json = "";
BaseXmlReader reader = new BaseXmlReader(reader(json));
reader.setLenient(false);
- IOException expected = assertThrows(IOException.class, reader::nextText);
+ assertThat(reader.beginTag()).isEqualTo("t");
+ assertThat(reader.nextAttributeName()).isEqualTo("a");
+ IOException expected = assertThrows(IOException.class, reader::nextAttributeValue);
assertThat(expected)
.hasMessageThat()
- .startsWith(
- "Unescaped control characters (\\u0000-\\u001F) are not allowed in strict mode");
+ .startsWith("Control character in attribute value at line 1 column 7 (char '\\0') path t.a");
- json = "\t";
+ json = "";
reader = new BaseXmlReader(reader(json));
reader.setLenient(false);
- expected = assertThrows(IOException.class, reader::nextText);
+ assertThat(reader.beginTag()).isEqualTo("t");
+ assertThat(reader.nextAttributeName()).isEqualTo("b");
+ expected = assertThrows(IOException.class, reader::nextAttributeValue);
assertThat(expected)
.hasMessageThat()
- .startsWith(
- "Unescaped control characters (\\u0000-\\u001F) are not allowed in strict mode");
-
- json = "\u001F";
- reader = new BaseXmlReader(reader(json));
- reader.setLenient(false);
-
- expected = assertThrows(IOException.class, reader::nextText);
- assertThat(expected)
- .hasMessageThat()
- .startsWith(
- "Unescaped control characters (\\u0000-\\u001F) are not allowed in strict mode");
+ .startsWith("Control character in attribute value at line 1 column 7 (char '\\u001f') path t.b");
}
@Test
@@ -110,14 +102,14 @@ public final class BaseXmlReaderTest {
String json = "\"\u007F\u009F\"";
BaseXmlReader reader = new BaseXmlReader(reader(json));
reader.setLenient(false);
- assertThat(reader.nextText()).isEqualTo("\u007F\u009F");
+ assertThat(reader.nextText()).isEqualTo("\"\u007F\u009F\"");
}
@Test
public void testNonStrictModeParsesUnescapedControlCharacter() throws IOException {
String json = "\"\t\"";
BaseXmlReader reader = new BaseXmlReader(reader(json)).setLenient(true);
- assertThat(reader.nextText()).isEqualTo("\t");
+ assertThat(reader.nextText()).isEqualTo("\"\t\"");
}
@Test