diff --git a/commons-serialize-xml/src/main/java/io/gitlab/jfronny/commons/serialize/xml/impl/BaseXmlReader.java b/commons-serialize-xml/src/main/java/io/gitlab/jfronny/commons/serialize/xml/impl/BaseXmlReader.java index b04fa3c..c6293d1 100644 --- a/commons-serialize-xml/src/main/java/io/gitlab/jfronny/commons/serialize/xml/impl/BaseXmlReader.java +++ b/commons-serialize-xml/src/main/java/io/gitlab/jfronny/commons/serialize/xml/impl/BaseXmlReader.java @@ -230,6 +230,7 @@ public class BaseXmlReader implements Closeable { } else if (pos < limit || fillBuffer(1)) { char chNext = buffer[pos + 1]; var check = isNameStart((char) c, chNext); + pos--; if (check != NameCheck.NONE) { return peeked = PEEKED_ATTRIBUTE_NAME; } else { @@ -242,6 +243,7 @@ public class BaseXmlReader implements Closeable { int c = lenient ? nextNonWhitespace(true) : buffer[pos++]; if (c == '=') { c = lenient ? nextNonWhitespace(true) : buffer[pos++]; + pos--; if (c == '\'' || c == '"') { return peeked = PEEKED_ATTRIBUTE_VALUE; } else { @@ -350,60 +352,10 @@ public class BaseXmlReader implements Closeable { } char quote = buffer[pos++]; return readUntil((c, i) -> { - if (!lenient && c < 0x20) throw syntaxError("Control character in attribute value: " + c); + if (!lenient && c < 0x20 && c != 0x09) throw syntaxError("Control character in attribute value"); if (c == '<') throw syntaxError("Expected " + quote + " but was '<'"); return c == quote; }, true); -// StringBuilder builder = null; -// while (true) { -// p = pos; // repurpose 'p' to save a position since we no longer need it -// int l = limit; -// /* the index of the first character not yet appended to the builder. */ -// int start = p; -// while (p < l) { -// int c = buffer[p++]; -// if (!lenient && c < 0x20) { -// throw syntaxError("Control character in attribute value: " + c); -// } else if (c == quote) { -// pos = p; -// int len = p - start - 1; -// peeked = PEEKED_NONE; -// if (builder == null) { -// return new String(buffer, start, len); -// } else { -// builder.append(buffer, start, len); -// return builder.toString(); -// } -// } else if (c == '&') { -// pos = p; -// int len = p - start - 1; -// if (builder == null) { -// int estimatedLength = (len + 1) * 2; -// builder = new StringBuilder(Math.max(estimatedLength, 16)); -// } -// builder.append(buffer, start, len); -// builder.append(readReference()); -// p = pos; -// l = limit; -// start = p; -// } else if (c == '\n') { -// lineNumber++; -// lineStart = p; -// } else if (c == '<') { -// throw syntaxError("Expected " + quote + " but was '<'"); -// } -// } -// -// if (builder == null) { -// int estimatedLength = (p - start) * 2; -// builder = new StringBuilder(Math.max(estimatedLength, 16)); -// } -// builder.append(buffer, start, p - start); -// pos = p; -// if (!fillBuffer(1)) { -// throw syntaxError("Unterminated attribute value"); -// } -// } } private String readReference() throws IOException { @@ -613,23 +565,6 @@ public class BaseXmlReader implements Closeable { } } - /** - * Advances the position until after the next newline character. If the line is terminated by - * "\r\n", the '\n' must be consumed as whitespace by the caller. - */ - private void skipToEndOfLine() throws IOException { - while (pos < limit || fillBuffer(1)) { - char c = buffer[pos++]; - if (c == '\n') { - lineNumber++; - lineStart = pos; - break; - } else if (c == '\r') { - break; - } - } - } - /** * @param toFind a string to search for. Must not contain a newline. */ @@ -663,20 +598,22 @@ public class BaseXmlReader implements Closeable { return " at line " + line + " column " + column + charInterjection + " path " + getPath(); } - private String getPath(boolean usePreviousPath) { - StringBuilder result = new StringBuilder().append('$'); + public String getPath() { + StringBuilder result = new StringBuilder(); + boolean first = true; for (int i = 0; i < stackSize; i++) { int scope = stack[i]; switch (scope) { + case XmlScope.TAG_HEAD: case XmlScope.TAG_BODY: case XmlScope.DANGLING_NAME: - case XmlScope.TAG_HEAD: - result.append('.'); + case XmlScope.NONEMPTY_DOCUMENT: + if (first) first = false; + else result.append('.'); if (pathNames[i] != null) { result.append(pathNames[i]); } break; - case XmlScope.NONEMPTY_DOCUMENT: case XmlScope.EMPTY_DOCUMENT: case XmlScope.CLOSED: break; @@ -687,14 +624,6 @@ public class BaseXmlReader implements Closeable { return result.toString(); } - public String getPath() { - return getPath(false); - } - - public String getPreviousPath() { - return getPath(true); - } - /** * Unescapes the character identified by the character or characters that immediately follow a * backslash. The backslash '\' should have already been read. This supports both Unicode escapes diff --git a/commons-serialize-xml/src/main/java/io/gitlab/jfronny/commons/serialize/xml/impl/XmlScope.java b/commons-serialize-xml/src/main/java/io/gitlab/jfronny/commons/serialize/xml/impl/XmlScope.java index ad09d8d..42631ab 100644 --- a/commons-serialize-xml/src/main/java/io/gitlab/jfronny/commons/serialize/xml/impl/XmlScope.java +++ b/commons-serialize-xml/src/main/java/io/gitlab/jfronny/commons/serialize/xml/impl/XmlScope.java @@ -6,6 +6,5 @@ public class XmlScope { public static final int DANGLING_NAME = 3; public static final int EMPTY_DOCUMENT = 4; public static final int NONEMPTY_DOCUMENT = 5; - public static final int CDATA = 6; public static final int CLOSED = 7; } diff --git a/commons-serialize-xml/src/test/java/io/gitlab/jfronny/commons/serialize/xml/test/BaseXmlReaderTest.java b/commons-serialize-xml/src/test/java/io/gitlab/jfronny/commons/serialize/xml/test/BaseXmlReaderTest.java index 1f95e8c..0edb7b9 100644 --- a/commons-serialize-xml/src/test/java/io/gitlab/jfronny/commons/serialize/xml/test/BaseXmlReaderTest.java +++ b/commons-serialize-xml/src/test/java/io/gitlab/jfronny/commons/serialize/xml/test/BaseXmlReaderTest.java @@ -46,13 +46,13 @@ public final class BaseXmlReaderTest { BaseXmlReader reader = new BaseXmlReader(reader(json)).setLenient(true); assertThat(reader.peek()).isEqualTo(BEGIN_TAG); - reader.beginTag(); + assertThat(reader.beginTag()).isEqualTo("tag"); assertThat(reader.peek()).isEqualTo(TEXT); assertThat(reader.nextText()).isEqualTo("someText"); assertThat(reader.peek()).isEqualTo(CDATA); assertThat(reader.nextCData()).isEqualTo("\"b\",\n \"c\""); assertThat(reader.peek()).isEqualTo(END_TAG); - reader.endTag(); + assertThat(reader.endTag()).isEqualTo("tag"); assertThat(reader.peek()).isEqualTo(EOF); } @@ -71,36 +71,28 @@ public final class BaseXmlReaderTest { } @Test - public void testStrictModeFailsToParseUnescapedControlCharacter() { - String json = "\0"; + public void testStrictModeFailsToParseUnescapedControlCharacter() throws IOException { + String json = ""; BaseXmlReader reader = new BaseXmlReader(reader(json)); reader.setLenient(false); - IOException expected = assertThrows(IOException.class, reader::nextText); + assertThat(reader.beginTag()).isEqualTo("t"); + assertThat(reader.nextAttributeName()).isEqualTo("a"); + IOException expected = assertThrows(IOException.class, reader::nextAttributeValue); assertThat(expected) .hasMessageThat() - .startsWith( - "Unescaped control characters (\\u0000-\\u001F) are not allowed in strict mode"); + .startsWith("Control character in attribute value at line 1 column 7 (char '\\0') path t.a"); - json = "\t"; + json = ""; reader = new BaseXmlReader(reader(json)); reader.setLenient(false); - expected = assertThrows(IOException.class, reader::nextText); + assertThat(reader.beginTag()).isEqualTo("t"); + assertThat(reader.nextAttributeName()).isEqualTo("b"); + expected = assertThrows(IOException.class, reader::nextAttributeValue); assertThat(expected) .hasMessageThat() - .startsWith( - "Unescaped control characters (\\u0000-\\u001F) are not allowed in strict mode"); - - json = "\u001F"; - reader = new BaseXmlReader(reader(json)); - reader.setLenient(false); - - expected = assertThrows(IOException.class, reader::nextText); - assertThat(expected) - .hasMessageThat() - .startsWith( - "Unescaped control characters (\\u0000-\\u001F) are not allowed in strict mode"); + .startsWith("Control character in attribute value at line 1 column 7 (char '\\u001f') path t.b"); } @Test @@ -110,14 +102,14 @@ public final class BaseXmlReaderTest { String json = "\"\u007F\u009F\""; BaseXmlReader reader = new BaseXmlReader(reader(json)); reader.setLenient(false); - assertThat(reader.nextText()).isEqualTo("\u007F\u009F"); + assertThat(reader.nextText()).isEqualTo("\"\u007F\u009F\""); } @Test public void testNonStrictModeParsesUnescapedControlCharacter() throws IOException { String json = "\"\t\""; BaseXmlReader reader = new BaseXmlReader(reader(json)).setLenient(true); - assertThat(reader.nextText()).isEqualTo("\t"); + assertThat(reader.nextText()).isEqualTo("\"\t\""); } @Test