fix(serialize-xml): first passing test run

This commit is contained in:
Johannes Frohnmeyer 2024-04-13 20:32:54 +02:00
parent dbf5d51343
commit 0507254c30
Signed by: Johannes
GPG Key ID: E76429612C2929F4
3 changed files with 25 additions and 105 deletions

View File

@ -230,6 +230,7 @@ public class BaseXmlReader implements Closeable {
} else if (pos < limit || fillBuffer(1)) {
char chNext = buffer[pos + 1];
var check = isNameStart((char) c, chNext);
pos--;
if (check != NameCheck.NONE) {
return peeked = PEEKED_ATTRIBUTE_NAME;
} else {
@ -242,6 +243,7 @@ public class BaseXmlReader implements Closeable {
int c = lenient ? nextNonWhitespace(true) : buffer[pos++];
if (c == '=') {
c = lenient ? nextNonWhitespace(true) : buffer[pos++];
pos--;
if (c == '\'' || c == '"') {
return peeked = PEEKED_ATTRIBUTE_VALUE;
} else {
@ -350,60 +352,10 @@ public class BaseXmlReader implements Closeable {
}
char quote = buffer[pos++];
return readUntil((c, i) -> {
if (!lenient && c < 0x20) throw syntaxError("Control character in attribute value: " + c);
if (!lenient && c < 0x20 && c != 0x09) throw syntaxError("Control character in attribute value");
if (c == '<') throw syntaxError("Expected " + quote + " but was '<'");
return c == quote;
}, true);
// StringBuilder builder = null;
// while (true) {
// p = pos; // repurpose 'p' to save a position since we no longer need it
// int l = limit;
// /* the index of the first character not yet appended to the builder. */
// int start = p;
// while (p < l) {
// int c = buffer[p++];
// if (!lenient && c < 0x20) {
// throw syntaxError("Control character in attribute value: " + c);
// } else if (c == quote) {
// pos = p;
// int len = p - start - 1;
// peeked = PEEKED_NONE;
// if (builder == null) {
// return new String(buffer, start, len);
// } else {
// builder.append(buffer, start, len);
// return builder.toString();
// }
// } else if (c == '&') {
// pos = p;
// int len = p - start - 1;
// if (builder == null) {
// int estimatedLength = (len + 1) * 2;
// builder = new StringBuilder(Math.max(estimatedLength, 16));
// }
// builder.append(buffer, start, len);
// builder.append(readReference());
// p = pos;
// l = limit;
// start = p;
// } else if (c == '\n') {
// lineNumber++;
// lineStart = p;
// } else if (c == '<') {
// throw syntaxError("Expected " + quote + " but was '<'");
// }
// }
//
// if (builder == null) {
// int estimatedLength = (p - start) * 2;
// builder = new StringBuilder(Math.max(estimatedLength, 16));
// }
// builder.append(buffer, start, p - start);
// pos = p;
// if (!fillBuffer(1)) {
// throw syntaxError("Unterminated attribute value");
// }
// }
}
private String readReference() throws IOException {
@ -613,23 +565,6 @@ public class BaseXmlReader implements Closeable {
}
}
/**
* Advances the position until after the next newline character. If the line is terminated by
* "\r\n", the '\n' must be consumed as whitespace by the caller.
*/
private void skipToEndOfLine() throws IOException {
while (pos < limit || fillBuffer(1)) {
char c = buffer[pos++];
if (c == '\n') {
lineNumber++;
lineStart = pos;
break;
} else if (c == '\r') {
break;
}
}
}
/**
* @param toFind a string to search for. Must not contain a newline.
*/
@ -663,20 +598,22 @@ public class BaseXmlReader implements Closeable {
return " at line " + line + " column " + column + charInterjection + " path " + getPath();
}
private String getPath(boolean usePreviousPath) {
StringBuilder result = new StringBuilder().append('$');
public String getPath() {
StringBuilder result = new StringBuilder();
boolean first = true;
for (int i = 0; i < stackSize; i++) {
int scope = stack[i];
switch (scope) {
case XmlScope.TAG_HEAD:
case XmlScope.TAG_BODY:
case XmlScope.DANGLING_NAME:
case XmlScope.TAG_HEAD:
result.append('.');
case XmlScope.NONEMPTY_DOCUMENT:
if (first) first = false;
else result.append('.');
if (pathNames[i] != null) {
result.append(pathNames[i]);
}
break;
case XmlScope.NONEMPTY_DOCUMENT:
case XmlScope.EMPTY_DOCUMENT:
case XmlScope.CLOSED:
break;
@ -687,14 +624,6 @@ public class BaseXmlReader implements Closeable {
return result.toString();
}
public String getPath() {
return getPath(false);
}
public String getPreviousPath() {
return getPath(true);
}
/**
* Unescapes the character identified by the character or characters that immediately follow a
* backslash. The backslash '\' should have already been read. This supports both Unicode escapes

View File

@ -6,6 +6,5 @@ public class XmlScope {
public static final int DANGLING_NAME = 3;
public static final int EMPTY_DOCUMENT = 4;
public static final int NONEMPTY_DOCUMENT = 5;
public static final int CDATA = 6;
public static final int CLOSED = 7;
}

View File

@ -46,13 +46,13 @@ public final class BaseXmlReaderTest {
BaseXmlReader reader = new BaseXmlReader(reader(json)).setLenient(true);
assertThat(reader.peek()).isEqualTo(BEGIN_TAG);
reader.beginTag();
assertThat(reader.beginTag()).isEqualTo("tag");
assertThat(reader.peek()).isEqualTo(TEXT);
assertThat(reader.nextText()).isEqualTo("someText");
assertThat(reader.peek()).isEqualTo(CDATA);
assertThat(reader.nextCData()).isEqualTo("\"b\",\n \"c\"");
assertThat(reader.peek()).isEqualTo(END_TAG);
reader.endTag();
assertThat(reader.endTag()).isEqualTo("tag");
assertThat(reader.peek()).isEqualTo(EOF);
}
@ -71,36 +71,28 @@ public final class BaseXmlReaderTest {
}
@Test
public void testStrictModeFailsToParseUnescapedControlCharacter() {
String json = "\0";
public void testStrictModeFailsToParseUnescapedControlCharacter() throws IOException {
String json = "<t a='\0'/>";
BaseXmlReader reader = new BaseXmlReader(reader(json));
reader.setLenient(false);
IOException expected = assertThrows(IOException.class, reader::nextText);
assertThat(reader.beginTag()).isEqualTo("t");
assertThat(reader.nextAttributeName()).isEqualTo("a");
IOException expected = assertThrows(IOException.class, reader::nextAttributeValue);
assertThat(expected)
.hasMessageThat()
.startsWith(
"Unescaped control characters (\\u0000-\\u001F) are not allowed in strict mode");
.startsWith("Control character in attribute value at line 1 column 7 (char '\\0') path t.a");
json = "\t";
json = "<t b='\u001F'/>";
reader = new BaseXmlReader(reader(json));
reader.setLenient(false);
expected = assertThrows(IOException.class, reader::nextText);
assertThat(reader.beginTag()).isEqualTo("t");
assertThat(reader.nextAttributeName()).isEqualTo("b");
expected = assertThrows(IOException.class, reader::nextAttributeValue);
assertThat(expected)
.hasMessageThat()
.startsWith(
"Unescaped control characters (\\u0000-\\u001F) are not allowed in strict mode");
json = "\u001F";
reader = new BaseXmlReader(reader(json));
reader.setLenient(false);
expected = assertThrows(IOException.class, reader::nextText);
assertThat(expected)
.hasMessageThat()
.startsWith(
"Unescaped control characters (\\u0000-\\u001F) are not allowed in strict mode");
.startsWith("Control character in attribute value at line 1 column 7 (char '\\u001f') path t.b");
}
@Test
@ -110,14 +102,14 @@ public final class BaseXmlReaderTest {
String json = "\"\u007F\u009F\"";
BaseXmlReader reader = new BaseXmlReader(reader(json));
reader.setLenient(false);
assertThat(reader.nextText()).isEqualTo("\u007F\u009F");
assertThat(reader.nextText()).isEqualTo("\"\u007F\u009F\"");
}
@Test
public void testNonStrictModeParsesUnescapedControlCharacter() throws IOException {
String json = "\"\t\"";
BaseXmlReader reader = new BaseXmlReader(reader(json)).setLenient(true);
assertThat(reader.nextText()).isEqualTo("\t");
assertThat(reader.nextText()).isEqualTo("\"\t\"");
}
@Test