fix(serialize-xml): first passing test run

This commit is contained in:
Johannes Frohnmeyer 2024-04-13 20:32:54 +02:00
parent dbf5d51343
commit 0507254c30
Signed by: Johannes
GPG Key ID: E76429612C2929F4
3 changed files with 25 additions and 105 deletions

View File

@ -230,6 +230,7 @@ public class BaseXmlReader implements Closeable {
} else if (pos < limit || fillBuffer(1)) { } else if (pos < limit || fillBuffer(1)) {
char chNext = buffer[pos + 1]; char chNext = buffer[pos + 1];
var check = isNameStart((char) c, chNext); var check = isNameStart((char) c, chNext);
pos--;
if (check != NameCheck.NONE) { if (check != NameCheck.NONE) {
return peeked = PEEKED_ATTRIBUTE_NAME; return peeked = PEEKED_ATTRIBUTE_NAME;
} else { } else {
@ -242,6 +243,7 @@ public class BaseXmlReader implements Closeable {
int c = lenient ? nextNonWhitespace(true) : buffer[pos++]; int c = lenient ? nextNonWhitespace(true) : buffer[pos++];
if (c == '=') { if (c == '=') {
c = lenient ? nextNonWhitespace(true) : buffer[pos++]; c = lenient ? nextNonWhitespace(true) : buffer[pos++];
pos--;
if (c == '\'' || c == '"') { if (c == '\'' || c == '"') {
return peeked = PEEKED_ATTRIBUTE_VALUE; return peeked = PEEKED_ATTRIBUTE_VALUE;
} else { } else {
@ -350,60 +352,10 @@ public class BaseXmlReader implements Closeable {
} }
char quote = buffer[pos++]; char quote = buffer[pos++];
return readUntil((c, i) -> { return readUntil((c, i) -> {
if (!lenient && c < 0x20) throw syntaxError("Control character in attribute value: " + c); if (!lenient && c < 0x20 && c != 0x09) throw syntaxError("Control character in attribute value");
if (c == '<') throw syntaxError("Expected " + quote + " but was '<'"); if (c == '<') throw syntaxError("Expected " + quote + " but was '<'");
return c == quote; return c == quote;
}, true); }, true);
// StringBuilder builder = null;
// while (true) {
// p = pos; // repurpose 'p' to save a position since we no longer need it
// int l = limit;
// /* the index of the first character not yet appended to the builder. */
// int start = p;
// while (p < l) {
// int c = buffer[p++];
// if (!lenient && c < 0x20) {
// throw syntaxError("Control character in attribute value: " + c);
// } else if (c == quote) {
// pos = p;
// int len = p - start - 1;
// peeked = PEEKED_NONE;
// if (builder == null) {
// return new String(buffer, start, len);
// } else {
// builder.append(buffer, start, len);
// return builder.toString();
// }
// } else if (c == '&') {
// pos = p;
// int len = p - start - 1;
// if (builder == null) {
// int estimatedLength = (len + 1) * 2;
// builder = new StringBuilder(Math.max(estimatedLength, 16));
// }
// builder.append(buffer, start, len);
// builder.append(readReference());
// p = pos;
// l = limit;
// start = p;
// } else if (c == '\n') {
// lineNumber++;
// lineStart = p;
// } else if (c == '<') {
// throw syntaxError("Expected " + quote + " but was '<'");
// }
// }
//
// if (builder == null) {
// int estimatedLength = (p - start) * 2;
// builder = new StringBuilder(Math.max(estimatedLength, 16));
// }
// builder.append(buffer, start, p - start);
// pos = p;
// if (!fillBuffer(1)) {
// throw syntaxError("Unterminated attribute value");
// }
// }
} }
private String readReference() throws IOException { private String readReference() throws IOException {
@ -613,23 +565,6 @@ public class BaseXmlReader implements Closeable {
} }
} }
/**
* Advances the position until after the next newline character. If the line is terminated by
* "\r\n", the '\n' must be consumed as whitespace by the caller.
*/
private void skipToEndOfLine() throws IOException {
while (pos < limit || fillBuffer(1)) {
char c = buffer[pos++];
if (c == '\n') {
lineNumber++;
lineStart = pos;
break;
} else if (c == '\r') {
break;
}
}
}
/** /**
* @param toFind a string to search for. Must not contain a newline. * @param toFind a string to search for. Must not contain a newline.
*/ */
@ -663,20 +598,22 @@ public class BaseXmlReader implements Closeable {
return " at line " + line + " column " + column + charInterjection + " path " + getPath(); return " at line " + line + " column " + column + charInterjection + " path " + getPath();
} }
private String getPath(boolean usePreviousPath) { public String getPath() {
StringBuilder result = new StringBuilder().append('$'); StringBuilder result = new StringBuilder();
boolean first = true;
for (int i = 0; i < stackSize; i++) { for (int i = 0; i < stackSize; i++) {
int scope = stack[i]; int scope = stack[i];
switch (scope) { switch (scope) {
case XmlScope.TAG_HEAD:
case XmlScope.TAG_BODY: case XmlScope.TAG_BODY:
case XmlScope.DANGLING_NAME: case XmlScope.DANGLING_NAME:
case XmlScope.TAG_HEAD: case XmlScope.NONEMPTY_DOCUMENT:
result.append('.'); if (first) first = false;
else result.append('.');
if (pathNames[i] != null) { if (pathNames[i] != null) {
result.append(pathNames[i]); result.append(pathNames[i]);
} }
break; break;
case XmlScope.NONEMPTY_DOCUMENT:
case XmlScope.EMPTY_DOCUMENT: case XmlScope.EMPTY_DOCUMENT:
case XmlScope.CLOSED: case XmlScope.CLOSED:
break; break;
@ -687,14 +624,6 @@ public class BaseXmlReader implements Closeable {
return result.toString(); return result.toString();
} }
public String getPath() {
return getPath(false);
}
public String getPreviousPath() {
return getPath(true);
}
/** /**
* Unescapes the character identified by the character or characters that immediately follow a * Unescapes the character identified by the character or characters that immediately follow a
* backslash. The backslash '\' should have already been read. This supports both Unicode escapes * backslash. The backslash '\' should have already been read. This supports both Unicode escapes

View File

@ -6,6 +6,5 @@ public class XmlScope {
public static final int DANGLING_NAME = 3; public static final int DANGLING_NAME = 3;
public static final int EMPTY_DOCUMENT = 4; public static final int EMPTY_DOCUMENT = 4;
public static final int NONEMPTY_DOCUMENT = 5; public static final int NONEMPTY_DOCUMENT = 5;
public static final int CDATA = 6;
public static final int CLOSED = 7; public static final int CLOSED = 7;
} }

View File

@ -46,13 +46,13 @@ public final class BaseXmlReaderTest {
BaseXmlReader reader = new BaseXmlReader(reader(json)).setLenient(true); BaseXmlReader reader = new BaseXmlReader(reader(json)).setLenient(true);
assertThat(reader.peek()).isEqualTo(BEGIN_TAG); assertThat(reader.peek()).isEqualTo(BEGIN_TAG);
reader.beginTag(); assertThat(reader.beginTag()).isEqualTo("tag");
assertThat(reader.peek()).isEqualTo(TEXT); assertThat(reader.peek()).isEqualTo(TEXT);
assertThat(reader.nextText()).isEqualTo("someText"); assertThat(reader.nextText()).isEqualTo("someText");
assertThat(reader.peek()).isEqualTo(CDATA); assertThat(reader.peek()).isEqualTo(CDATA);
assertThat(reader.nextCData()).isEqualTo("\"b\",\n \"c\""); assertThat(reader.nextCData()).isEqualTo("\"b\",\n \"c\"");
assertThat(reader.peek()).isEqualTo(END_TAG); assertThat(reader.peek()).isEqualTo(END_TAG);
reader.endTag(); assertThat(reader.endTag()).isEqualTo("tag");
assertThat(reader.peek()).isEqualTo(EOF); assertThat(reader.peek()).isEqualTo(EOF);
} }
@ -71,36 +71,28 @@ public final class BaseXmlReaderTest {
} }
@Test @Test
public void testStrictModeFailsToParseUnescapedControlCharacter() { public void testStrictModeFailsToParseUnescapedControlCharacter() throws IOException {
String json = "\0"; String json = "<t a='\0'/>";
BaseXmlReader reader = new BaseXmlReader(reader(json)); BaseXmlReader reader = new BaseXmlReader(reader(json));
reader.setLenient(false); reader.setLenient(false);
IOException expected = assertThrows(IOException.class, reader::nextText); assertThat(reader.beginTag()).isEqualTo("t");
assertThat(reader.nextAttributeName()).isEqualTo("a");
IOException expected = assertThrows(IOException.class, reader::nextAttributeValue);
assertThat(expected) assertThat(expected)
.hasMessageThat() .hasMessageThat()
.startsWith( .startsWith("Control character in attribute value at line 1 column 7 (char '\\0') path t.a");
"Unescaped control characters (\\u0000-\\u001F) are not allowed in strict mode");
json = "\t"; json = "<t b='\u001F'/>";
reader = new BaseXmlReader(reader(json)); reader = new BaseXmlReader(reader(json));
reader.setLenient(false); reader.setLenient(false);
expected = assertThrows(IOException.class, reader::nextText); assertThat(reader.beginTag()).isEqualTo("t");
assertThat(reader.nextAttributeName()).isEqualTo("b");
expected = assertThrows(IOException.class, reader::nextAttributeValue);
assertThat(expected) assertThat(expected)
.hasMessageThat() .hasMessageThat()
.startsWith( .startsWith("Control character in attribute value at line 1 column 7 (char '\\u001f') path t.b");
"Unescaped control characters (\\u0000-\\u001F) are not allowed in strict mode");
json = "\u001F";
reader = new BaseXmlReader(reader(json));
reader.setLenient(false);
expected = assertThrows(IOException.class, reader::nextText);
assertThat(expected)
.hasMessageThat()
.startsWith(
"Unescaped control characters (\\u0000-\\u001F) are not allowed in strict mode");
} }
@Test @Test
@ -110,14 +102,14 @@ public final class BaseXmlReaderTest {
String json = "\"\u007F\u009F\""; String json = "\"\u007F\u009F\"";
BaseXmlReader reader = new BaseXmlReader(reader(json)); BaseXmlReader reader = new BaseXmlReader(reader(json));
reader.setLenient(false); reader.setLenient(false);
assertThat(reader.nextText()).isEqualTo("\u007F\u009F"); assertThat(reader.nextText()).isEqualTo("\"\u007F\u009F\"");
} }
@Test @Test
public void testNonStrictModeParsesUnescapedControlCharacter() throws IOException { public void testNonStrictModeParsesUnescapedControlCharacter() throws IOException {
String json = "\"\t\""; String json = "\"\t\"";
BaseXmlReader reader = new BaseXmlReader(reader(json)).setLenient(true); BaseXmlReader reader = new BaseXmlReader(reader(json)).setLenient(true);
assertThat(reader.nextText()).isEqualTo("\t"); assertThat(reader.nextText()).isEqualTo("\"\t\"");
} }
@Test @Test