feat(muscript): Abstract lexer away from parser through interface to allow different data sources
ci/woodpecker/push/woodpecker Pipeline was successful Details

This commit is contained in:
Johannes Frohnmeyer 2023-11-14 18:55:17 +01:00
parent 7ae4cd55b6
commit b926bf4495
Signed by: Johannes
GPG Key ID: E76429612C2929F4
5 changed files with 141 additions and 69 deletions

View File

@ -4,7 +4,11 @@ import java.util.Objects;
import java.util.Set;
import java.util.regex.Pattern;
// Heavily inspired by starscript
/**
* The lexer for muScript, heavily inspired by starscript
* @deprecated To be moved into the lexer module and refactored
*/
@Deprecated(forRemoval = true)
public class Lexer extends VersionedComponent {
public final String file;

View File

@ -10,6 +10,8 @@ import io.gitlab.jfronny.muscript.ast.dynamic.assign.DynamicAssign;
import io.gitlab.jfronny.muscript.ast.literal.DynamicLiteral;
import io.gitlab.jfronny.muscript.ast.math.*;
import io.gitlab.jfronny.muscript.ast.string.Concatenate;
import io.gitlab.jfronny.muscript.compiler.lexer.LegacyLexer;
import io.gitlab.jfronny.muscript.compiler.lexer.Lexer;
import io.gitlab.jfronny.muscript.data.Script;
import io.gitlab.jfronny.muscript.data.dynamic.additional.DFinal;
import io.gitlab.jfronny.muscript.error.*;
@ -20,15 +22,14 @@ import java.util.*;
public class Parser extends VersionedComponent {
private final Lexer lexer;
private final TokenData previous = new TokenData();
private final TokenData current = new TokenData();
private Lexer.Token previous = null;
public static Expr<?> parse(MuScriptVersion version, String source) {
return parse(version, source, null);
}
public static Expr<?> parse(MuScriptVersion version, String source, String file) {
return new Parser(new Lexer(version, source, file)).parse().optimize();
return new Parser(new LegacyLexer(version, source, file)).parse().optimize();
}
public static Script parseScript(MuScriptVersion version, String source) {
@ -36,7 +37,7 @@ public class Parser extends VersionedComponent {
}
public static Script parseScript(MuScriptVersion version, String source, String file) {
return new Parser(new Lexer(version, source, file)).parseScript().optimize();
return new Parser(new LegacyLexer(version, source, file)).parseScript().optimize();
}
public static Script parseMultiScript(MuScriptVersion version, String startFile, SourceFS filesystem) {
@ -76,8 +77,12 @@ public class Parser extends VersionedComponent {
return includes;
}
public Parser(io.gitlab.jfronny.muscript.compiler.Lexer lexer) {
this(new LegacyLexer(lexer));
}
public Parser(Lexer lexer) {
super(lexer.version);
super(lexer.version());
this.lexer = lexer;
}
@ -107,7 +112,7 @@ public class Parser extends VersionedComponent {
while (!isAtEnd()) {
expressions.add(expression());
// Consume semicolon if present
if (!lexer.passedNewline & !match(Token.Semicolon) & !isAtEnd() & version.contains(MuScriptVersion.V3)) {
if (!lexer.wasNewlinePassed() & !match(Token.Semicolon) & !isAtEnd() & version.contains(MuScriptVersion.V3)) {
throw error("Either a semicolon or a new line must separate expressions in scripts");
}
}
@ -169,7 +174,7 @@ public class Parser extends VersionedComponent {
Expr<?> expr = concat();
while (match(Token.EqualEqual, Token.BangEqual)) {
Token op = previous.token;
Token op = previous.token();
CodeLocation location = previous.location();
Expr<?> right = concat();
BoolExpr e = new Equal(location, expr, right);
@ -196,7 +201,7 @@ public class Parser extends VersionedComponent {
Expr<?> expr = term();
while (match(Token.Greater, Token.GreaterEqual, Token.Less, Token.LessEqual)) {
Token op = previous.token;
Token op = previous.token();
CodeLocation location = previous.location();
NumberExpr right = asNumber(term());
expr = switch (op) {
@ -215,7 +220,7 @@ public class Parser extends VersionedComponent {
Expr<?> expr = factor();
while (match(Token.Plus, Token.Minus)) {
Token op = previous.token;
Token op = previous.token();
CodeLocation location = previous.location();
NumberExpr right = asNumber(factor());
expr = switch (op) {
@ -232,7 +237,7 @@ public class Parser extends VersionedComponent {
Expr<?> expr = exp();
while (match(Token.Star, Token.Slash, Token.Percentage)) {
Token op = previous.token;
Token op = previous.token();
CodeLocation location = previous.location();
NumberExpr right = asNumber(exp());
expr = switch (op) {
@ -260,7 +265,7 @@ public class Parser extends VersionedComponent {
private Expr<?> unary() {
if (match(Token.Bang, Token.Minus)) {
Token op = previous.token;
Token op = previous.token();
CodeLocation location = previous.location();
Expr<?> right = unary();
return switch (op) {
@ -278,16 +283,16 @@ public class Parser extends VersionedComponent {
while (match(Token.LeftParen, Token.Dot, Token.LeftBracket, Token.DoubleColon)) {
CodeLocation location = previous.location();
expr = switch (previous.token) {
expr = switch (previous.token()) {
case LeftParen -> finishCall(location, expr);
case Dot -> {
TokenData name = consume(Token.Identifier, "Expected field name after '.'");
yield new Get(location, asDynamic(expr), Expr.literal(name.location(), name.lexeme));
Lexer.Token name = consume(Token.Identifier, "Expected field name after '.'");
yield new Get(location, asDynamic(expr), Expr.literal(name.location(), name.lexeme()));
}
case DoubleColon -> {
DynamicExpr callable;
if (match(Token.Identifier)) {
callable = new Variable(previous.location(), previous.lexeme);
callable = new Variable(previous.location(), previous.lexeme());
} else if (match(Token.LeftParen)) {
callable = expression().asDynamicExpr();
consume(Token.RightParen, "Expected ')' after expression");
@ -321,12 +326,12 @@ public class Parser extends VersionedComponent {
private Expr<?> primary() {
if (match(Token.Null)) return Expr.literalNull(previous.location());
if (match(Token.String)) return Expr.literal(previous.location(), previous.lexeme);
if (match(Token.True, Token.False)) return Expr.literal(previous.location(), previous.lexeme.equals("true"));
if (match(Token.Number)) return Expr.literal(previous.location(), Double.parseDouble(previous.lexeme));
if (match(Token.String)) return Expr.literal(previous.location(), previous.lexeme());
if (match(Token.True, Token.False)) return Expr.literal(previous.location(), previous.lexeme().equals("true"));
if (match(Token.Number)) return Expr.literal(previous.location(), Double.parseDouble(previous.lexeme()));
if (match(Token.Identifier)) {
CodeLocation location = previous.location();
String name = previous.lexeme;
String name = previous.lexeme();
if (match(Token.Assign)) return new DynamicAssign(location, name, expression().asDynamicExpr());
else return new Variable(location, name);
}
@ -338,11 +343,11 @@ public class Parser extends VersionedComponent {
}
if (match(Token.LeftBrace)) {
int start = previous.start;
int start = previous.start();
if (match(Token.Arrow)) return finishClosure(start, null, false);
if (match(Token.RightBrace)) return new DynamicLiteral<>(location(start, previous.start), DFinal.of(Map.of()));
if (match(Token.RightBrace)) return new DynamicLiteral<>(location(start, previous.start()), DFinal.of(Map.of()));
consume(Token.Identifier, "Expected arrow or identifier as first element in closure or object");
String first = previous.lexeme;
String first = previous.lexeme();
if (check(Token.Arrow)) return finishClosure(start, first, false);
if (match(Token.Ellipsis)) return finishClosure(start, first, true);
if (check(Token.Comma)) return finishClosure(start, first, false);
@ -367,7 +372,7 @@ public class Parser extends VersionedComponent {
while (!match(Token.Arrow)) {
consume(Token.Comma, "Closure parameters MUST be comma-seperated");
consume(Token.Identifier, "Closure arguments MUST be identifiers");
boundArgs.add(previous.lexeme);
boundArgs.add(previous.lexeme());
if (match(Token.Ellipsis)) {
variadic = true;
consume(Token.Arrow, "Variadic argument MUST be the last argument");
@ -380,12 +385,12 @@ public class Parser extends VersionedComponent {
while (!match(Token.RightBrace)) {
expressions.add(expression());
// Consume semicolon if present
if (!lexer.passedNewline & !match(Token.Semicolon) & version.contains(MuScriptVersion.V3)) {
if (!lexer.wasNewlinePassed() & !match(Token.Semicolon) & version.contains(MuScriptVersion.V3)) {
if (match(Token.RightBrace)) break;
throw error("Either a semicolon or a new line must separate expressions in closures");
}
}
int end = previous.start;
int end = previous.start();
return new Closure(location(start, end), boundArgs, expressions, variadic);
}
@ -394,12 +399,12 @@ public class Parser extends VersionedComponent {
content.put(firstArg, firstValue);
while (match(Token.Comma)) {
consume(Token.Identifier, "Object element MUST start with an identifier");
String name = previous.lexeme;
String name = previous.lexeme();
consume(Token.Assign, "Object element name and value MUST be seperated with '='");
content.put(name, expression().asDynamicExpr());
}
consume(Token.RightBrace, "Expected end of object");
return new ObjectLiteral(location(start, previous.start), content);
return new ObjectLiteral(location(start, previous.start()), content);
}
// Type conversion
@ -437,11 +442,11 @@ public class Parser extends VersionedComponent {
// Helpers
private CodeLocation location(int chStart, int chEnd) {
return new CodeLocation(chStart, chEnd, lexer.source, lexer.file);
return new CodeLocation(chStart, chEnd, lexer.getSource(), lexer.getFile());
}
private ParseException error(String message) {
int loc = current.current - 1;
int loc = lexer.getPrevious().current() - 1;
return new ParseException(PrettyPrintError.builder(location(loc, loc)).setMessage(message).build());
}
@ -449,9 +454,9 @@ public class Parser extends VersionedComponent {
return new ParseException(PrettyPrintError.builder(expr.location).setMessage(message).build());
}
private TokenData consume(Token token, String message) {
private Lexer.Token consume(Token token, String message) {
if (check(token)) return advance();
throw error(message + " but got " + current.token);
throw error(message + " but got " + lexer.getPrevious().token());
}
private boolean match(Token... tokens) {
@ -467,53 +472,23 @@ public class Parser extends VersionedComponent {
private boolean check(Token token) {
if (isAtEnd()) return false;
return current.token == token;
return lexer.getPrevious().token() == token;
}
private TokenData advance() {
previous.set(current);
private Lexer.Token advance() {
previous = lexer.getPrevious();
lexer.next();
current.set(lexer.token, lexer.lexeme, lexer.start, lexer.current, lexer.ch);
lexer.advance();
if (current.token == Token.Error) {
throw error(current.lexeme);
if (lexer.getPrevious().token() == Token.Error) {
throw error(lexer.getPrevious().lexeme());
}
return previous;
}
private boolean isAtEnd() {
return current.token == Token.EOF;
}
// Token data
private class TokenData {
public Token token;
public String lexeme;
public int start, current;
public char ch;
public void set(Token token, String lexeme, int start, int current, char ch) {
this.token = token;
this.lexeme = lexeme;
this.start = start;
this.current = current;
this.ch = ch;
}
public void set(TokenData data) {
set(data.token, data.lexeme, data.start, data.current, data.ch);
}
@Override
public String toString() {
return String.format("%s '%s'", token, lexeme);
}
public CodeLocation location() {
return new CodeLocation(start, current - 1, lexer.source, lexer.file);
}
return lexer.getPrevious().token() == Token.EOF;
}
// Parse Exception

View File

@ -8,4 +8,8 @@ public abstract class VersionedComponent {
public VersionedComponent(MuScriptVersion version) {
this.version = Objects.requireNonNull(version);
}
public MuScriptVersion getComponentVersion() {
return version;
}
}

View File

@ -0,0 +1,67 @@
package io.gitlab.jfronny.muscript.compiler.lexer;
import io.gitlab.jfronny.muscript.compiler.*;
import org.jetbrains.annotations.Nullable;
/**
* Wraps the old Lexer implementation in the new Lexer interface for compatibility
*/
public class LegacyLexer implements Lexer {
private final io.gitlab.jfronny.muscript.compiler.Lexer backend;
private Lexer.Token previous = null;
public LegacyLexer(MuScriptVersion version, String source) {
this(new io.gitlab.jfronny.muscript.compiler.Lexer(version, source));
}
public LegacyLexer(MuScriptVersion version, String source, String file) {
this(new io.gitlab.jfronny.muscript.compiler.Lexer(version, source, file));
}
public LegacyLexer(io.gitlab.jfronny.muscript.compiler.Lexer backend) {
this.backend = backend;
}
@Override
public CodeLocation location() {
return backend.location();
}
@Override
public MuScriptVersion version() {
return backend.getComponentVersion();
}
@Override
public boolean wasNewlinePassed() {
return backend.passedNewline;
}
@Override
public Token getPrevious() {
return previous;
}
@Override
public Token advance() {
backend.next();
return previous = new Token(
backend.lexeme,
backend.token,
backend.start,
backend.current,
backend.ch,
new CodeLocation(backend.start, backend.current - 1, backend.source, backend.file)
);
}
@Override
public @Nullable String getSource() {
return backend.source;
}
@Override
public @Nullable String getFile() {
return backend.file;
}
}

View File

@ -0,0 +1,22 @@
package io.gitlab.jfronny.muscript.compiler.lexer;
import io.gitlab.jfronny.muscript.compiler.*;
import org.jetbrains.annotations.Nullable;
public interface Lexer {
CodeLocation location();
MuScriptVersion version();
boolean wasNewlinePassed();
Token getPrevious();
Token advance();
@Nullable String getSource();
@Nullable String getFile();
record Token(String lexeme, io.gitlab.jfronny.muscript.compiler.Token token, int start, int current, char ch, CodeLocation location) {
@Override
public String toString() {
return token + " '" + lexeme + "'";
}
}
}