feat(muscript): Abstract lexer away from parser through interface to allow different data sources
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
This commit is contained in:
parent
7ae4cd55b6
commit
b926bf4495
|
@ -4,7 +4,11 @@ import java.util.Objects;
|
|||
import java.util.Set;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
// Heavily inspired by starscript
|
||||
/**
|
||||
* The lexer for muScript, heavily inspired by starscript
|
||||
* @deprecated To be moved into the lexer module and refactored
|
||||
*/
|
||||
@Deprecated(forRemoval = true)
|
||||
public class Lexer extends VersionedComponent {
|
||||
public final String file;
|
||||
|
||||
|
|
|
@ -10,6 +10,8 @@ import io.gitlab.jfronny.muscript.ast.dynamic.assign.DynamicAssign;
|
|||
import io.gitlab.jfronny.muscript.ast.literal.DynamicLiteral;
|
||||
import io.gitlab.jfronny.muscript.ast.math.*;
|
||||
import io.gitlab.jfronny.muscript.ast.string.Concatenate;
|
||||
import io.gitlab.jfronny.muscript.compiler.lexer.LegacyLexer;
|
||||
import io.gitlab.jfronny.muscript.compiler.lexer.Lexer;
|
||||
import io.gitlab.jfronny.muscript.data.Script;
|
||||
import io.gitlab.jfronny.muscript.data.dynamic.additional.DFinal;
|
||||
import io.gitlab.jfronny.muscript.error.*;
|
||||
|
@ -20,15 +22,14 @@ import java.util.*;
|
|||
public class Parser extends VersionedComponent {
|
||||
private final Lexer lexer;
|
||||
|
||||
private final TokenData previous = new TokenData();
|
||||
private final TokenData current = new TokenData();
|
||||
private Lexer.Token previous = null;
|
||||
|
||||
public static Expr<?> parse(MuScriptVersion version, String source) {
|
||||
return parse(version, source, null);
|
||||
}
|
||||
|
||||
public static Expr<?> parse(MuScriptVersion version, String source, String file) {
|
||||
return new Parser(new Lexer(version, source, file)).parse().optimize();
|
||||
return new Parser(new LegacyLexer(version, source, file)).parse().optimize();
|
||||
}
|
||||
|
||||
public static Script parseScript(MuScriptVersion version, String source) {
|
||||
|
@ -36,7 +37,7 @@ public class Parser extends VersionedComponent {
|
|||
}
|
||||
|
||||
public static Script parseScript(MuScriptVersion version, String source, String file) {
|
||||
return new Parser(new Lexer(version, source, file)).parseScript().optimize();
|
||||
return new Parser(new LegacyLexer(version, source, file)).parseScript().optimize();
|
||||
}
|
||||
|
||||
public static Script parseMultiScript(MuScriptVersion version, String startFile, SourceFS filesystem) {
|
||||
|
@ -76,8 +77,12 @@ public class Parser extends VersionedComponent {
|
|||
return includes;
|
||||
}
|
||||
|
||||
public Parser(io.gitlab.jfronny.muscript.compiler.Lexer lexer) {
|
||||
this(new LegacyLexer(lexer));
|
||||
}
|
||||
|
||||
public Parser(Lexer lexer) {
|
||||
super(lexer.version);
|
||||
super(lexer.version());
|
||||
this.lexer = lexer;
|
||||
}
|
||||
|
||||
|
@ -107,7 +112,7 @@ public class Parser extends VersionedComponent {
|
|||
while (!isAtEnd()) {
|
||||
expressions.add(expression());
|
||||
// Consume semicolon if present
|
||||
if (!lexer.passedNewline & !match(Token.Semicolon) & !isAtEnd() & version.contains(MuScriptVersion.V3)) {
|
||||
if (!lexer.wasNewlinePassed() & !match(Token.Semicolon) & !isAtEnd() & version.contains(MuScriptVersion.V3)) {
|
||||
throw error("Either a semicolon or a new line must separate expressions in scripts");
|
||||
}
|
||||
}
|
||||
|
@ -169,7 +174,7 @@ public class Parser extends VersionedComponent {
|
|||
Expr<?> expr = concat();
|
||||
|
||||
while (match(Token.EqualEqual, Token.BangEqual)) {
|
||||
Token op = previous.token;
|
||||
Token op = previous.token();
|
||||
CodeLocation location = previous.location();
|
||||
Expr<?> right = concat();
|
||||
BoolExpr e = new Equal(location, expr, right);
|
||||
|
@ -196,7 +201,7 @@ public class Parser extends VersionedComponent {
|
|||
Expr<?> expr = term();
|
||||
|
||||
while (match(Token.Greater, Token.GreaterEqual, Token.Less, Token.LessEqual)) {
|
||||
Token op = previous.token;
|
||||
Token op = previous.token();
|
||||
CodeLocation location = previous.location();
|
||||
NumberExpr right = asNumber(term());
|
||||
expr = switch (op) {
|
||||
|
@ -215,7 +220,7 @@ public class Parser extends VersionedComponent {
|
|||
Expr<?> expr = factor();
|
||||
|
||||
while (match(Token.Plus, Token.Minus)) {
|
||||
Token op = previous.token;
|
||||
Token op = previous.token();
|
||||
CodeLocation location = previous.location();
|
||||
NumberExpr right = asNumber(factor());
|
||||
expr = switch (op) {
|
||||
|
@ -232,7 +237,7 @@ public class Parser extends VersionedComponent {
|
|||
Expr<?> expr = exp();
|
||||
|
||||
while (match(Token.Star, Token.Slash, Token.Percentage)) {
|
||||
Token op = previous.token;
|
||||
Token op = previous.token();
|
||||
CodeLocation location = previous.location();
|
||||
NumberExpr right = asNumber(exp());
|
||||
expr = switch (op) {
|
||||
|
@ -260,7 +265,7 @@ public class Parser extends VersionedComponent {
|
|||
|
||||
private Expr<?> unary() {
|
||||
if (match(Token.Bang, Token.Minus)) {
|
||||
Token op = previous.token;
|
||||
Token op = previous.token();
|
||||
CodeLocation location = previous.location();
|
||||
Expr<?> right = unary();
|
||||
return switch (op) {
|
||||
|
@ -278,16 +283,16 @@ public class Parser extends VersionedComponent {
|
|||
|
||||
while (match(Token.LeftParen, Token.Dot, Token.LeftBracket, Token.DoubleColon)) {
|
||||
CodeLocation location = previous.location();
|
||||
expr = switch (previous.token) {
|
||||
expr = switch (previous.token()) {
|
||||
case LeftParen -> finishCall(location, expr);
|
||||
case Dot -> {
|
||||
TokenData name = consume(Token.Identifier, "Expected field name after '.'");
|
||||
yield new Get(location, asDynamic(expr), Expr.literal(name.location(), name.lexeme));
|
||||
Lexer.Token name = consume(Token.Identifier, "Expected field name after '.'");
|
||||
yield new Get(location, asDynamic(expr), Expr.literal(name.location(), name.lexeme()));
|
||||
}
|
||||
case DoubleColon -> {
|
||||
DynamicExpr callable;
|
||||
if (match(Token.Identifier)) {
|
||||
callable = new Variable(previous.location(), previous.lexeme);
|
||||
callable = new Variable(previous.location(), previous.lexeme());
|
||||
} else if (match(Token.LeftParen)) {
|
||||
callable = expression().asDynamicExpr();
|
||||
consume(Token.RightParen, "Expected ')' after expression");
|
||||
|
@ -321,12 +326,12 @@ public class Parser extends VersionedComponent {
|
|||
|
||||
private Expr<?> primary() {
|
||||
if (match(Token.Null)) return Expr.literalNull(previous.location());
|
||||
if (match(Token.String)) return Expr.literal(previous.location(), previous.lexeme);
|
||||
if (match(Token.True, Token.False)) return Expr.literal(previous.location(), previous.lexeme.equals("true"));
|
||||
if (match(Token.Number)) return Expr.literal(previous.location(), Double.parseDouble(previous.lexeme));
|
||||
if (match(Token.String)) return Expr.literal(previous.location(), previous.lexeme());
|
||||
if (match(Token.True, Token.False)) return Expr.literal(previous.location(), previous.lexeme().equals("true"));
|
||||
if (match(Token.Number)) return Expr.literal(previous.location(), Double.parseDouble(previous.lexeme()));
|
||||
if (match(Token.Identifier)) {
|
||||
CodeLocation location = previous.location();
|
||||
String name = previous.lexeme;
|
||||
String name = previous.lexeme();
|
||||
if (match(Token.Assign)) return new DynamicAssign(location, name, expression().asDynamicExpr());
|
||||
else return new Variable(location, name);
|
||||
}
|
||||
|
@ -338,11 +343,11 @@ public class Parser extends VersionedComponent {
|
|||
}
|
||||
|
||||
if (match(Token.LeftBrace)) {
|
||||
int start = previous.start;
|
||||
int start = previous.start();
|
||||
if (match(Token.Arrow)) return finishClosure(start, null, false);
|
||||
if (match(Token.RightBrace)) return new DynamicLiteral<>(location(start, previous.start), DFinal.of(Map.of()));
|
||||
if (match(Token.RightBrace)) return new DynamicLiteral<>(location(start, previous.start()), DFinal.of(Map.of()));
|
||||
consume(Token.Identifier, "Expected arrow or identifier as first element in closure or object");
|
||||
String first = previous.lexeme;
|
||||
String first = previous.lexeme();
|
||||
if (check(Token.Arrow)) return finishClosure(start, first, false);
|
||||
if (match(Token.Ellipsis)) return finishClosure(start, first, true);
|
||||
if (check(Token.Comma)) return finishClosure(start, first, false);
|
||||
|
@ -367,7 +372,7 @@ public class Parser extends VersionedComponent {
|
|||
while (!match(Token.Arrow)) {
|
||||
consume(Token.Comma, "Closure parameters MUST be comma-seperated");
|
||||
consume(Token.Identifier, "Closure arguments MUST be identifiers");
|
||||
boundArgs.add(previous.lexeme);
|
||||
boundArgs.add(previous.lexeme());
|
||||
if (match(Token.Ellipsis)) {
|
||||
variadic = true;
|
||||
consume(Token.Arrow, "Variadic argument MUST be the last argument");
|
||||
|
@ -380,12 +385,12 @@ public class Parser extends VersionedComponent {
|
|||
while (!match(Token.RightBrace)) {
|
||||
expressions.add(expression());
|
||||
// Consume semicolon if present
|
||||
if (!lexer.passedNewline & !match(Token.Semicolon) & version.contains(MuScriptVersion.V3)) {
|
||||
if (!lexer.wasNewlinePassed() & !match(Token.Semicolon) & version.contains(MuScriptVersion.V3)) {
|
||||
if (match(Token.RightBrace)) break;
|
||||
throw error("Either a semicolon or a new line must separate expressions in closures");
|
||||
}
|
||||
}
|
||||
int end = previous.start;
|
||||
int end = previous.start();
|
||||
return new Closure(location(start, end), boundArgs, expressions, variadic);
|
||||
}
|
||||
|
||||
|
@ -394,12 +399,12 @@ public class Parser extends VersionedComponent {
|
|||
content.put(firstArg, firstValue);
|
||||
while (match(Token.Comma)) {
|
||||
consume(Token.Identifier, "Object element MUST start with an identifier");
|
||||
String name = previous.lexeme;
|
||||
String name = previous.lexeme();
|
||||
consume(Token.Assign, "Object element name and value MUST be seperated with '='");
|
||||
content.put(name, expression().asDynamicExpr());
|
||||
}
|
||||
consume(Token.RightBrace, "Expected end of object");
|
||||
return new ObjectLiteral(location(start, previous.start), content);
|
||||
return new ObjectLiteral(location(start, previous.start()), content);
|
||||
}
|
||||
|
||||
// Type conversion
|
||||
|
@ -437,11 +442,11 @@ public class Parser extends VersionedComponent {
|
|||
|
||||
// Helpers
|
||||
private CodeLocation location(int chStart, int chEnd) {
|
||||
return new CodeLocation(chStart, chEnd, lexer.source, lexer.file);
|
||||
return new CodeLocation(chStart, chEnd, lexer.getSource(), lexer.getFile());
|
||||
}
|
||||
|
||||
private ParseException error(String message) {
|
||||
int loc = current.current - 1;
|
||||
int loc = lexer.getPrevious().current() - 1;
|
||||
return new ParseException(PrettyPrintError.builder(location(loc, loc)).setMessage(message).build());
|
||||
}
|
||||
|
||||
|
@ -449,9 +454,9 @@ public class Parser extends VersionedComponent {
|
|||
return new ParseException(PrettyPrintError.builder(expr.location).setMessage(message).build());
|
||||
}
|
||||
|
||||
private TokenData consume(Token token, String message) {
|
||||
private Lexer.Token consume(Token token, String message) {
|
||||
if (check(token)) return advance();
|
||||
throw error(message + " but got " + current.token);
|
||||
throw error(message + " but got " + lexer.getPrevious().token());
|
||||
}
|
||||
|
||||
private boolean match(Token... tokens) {
|
||||
|
@ -467,53 +472,23 @@ public class Parser extends VersionedComponent {
|
|||
|
||||
private boolean check(Token token) {
|
||||
if (isAtEnd()) return false;
|
||||
return current.token == token;
|
||||
return lexer.getPrevious().token() == token;
|
||||
}
|
||||
|
||||
private TokenData advance() {
|
||||
previous.set(current);
|
||||
private Lexer.Token advance() {
|
||||
previous = lexer.getPrevious();
|
||||
|
||||
lexer.next();
|
||||
current.set(lexer.token, lexer.lexeme, lexer.start, lexer.current, lexer.ch);
|
||||
lexer.advance();
|
||||
|
||||
if (current.token == Token.Error) {
|
||||
throw error(current.lexeme);
|
||||
if (lexer.getPrevious().token() == Token.Error) {
|
||||
throw error(lexer.getPrevious().lexeme());
|
||||
}
|
||||
|
||||
return previous;
|
||||
}
|
||||
|
||||
private boolean isAtEnd() {
|
||||
return current.token == Token.EOF;
|
||||
}
|
||||
|
||||
// Token data
|
||||
private class TokenData {
|
||||
public Token token;
|
||||
public String lexeme;
|
||||
public int start, current;
|
||||
public char ch;
|
||||
|
||||
public void set(Token token, String lexeme, int start, int current, char ch) {
|
||||
this.token = token;
|
||||
this.lexeme = lexeme;
|
||||
this.start = start;
|
||||
this.current = current;
|
||||
this.ch = ch;
|
||||
}
|
||||
|
||||
public void set(TokenData data) {
|
||||
set(data.token, data.lexeme, data.start, data.current, data.ch);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("%s '%s'", token, lexeme);
|
||||
}
|
||||
|
||||
public CodeLocation location() {
|
||||
return new CodeLocation(start, current - 1, lexer.source, lexer.file);
|
||||
}
|
||||
return lexer.getPrevious().token() == Token.EOF;
|
||||
}
|
||||
|
||||
// Parse Exception
|
||||
|
|
|
@ -8,4 +8,8 @@ public abstract class VersionedComponent {
|
|||
public VersionedComponent(MuScriptVersion version) {
|
||||
this.version = Objects.requireNonNull(version);
|
||||
}
|
||||
|
||||
public MuScriptVersion getComponentVersion() {
|
||||
return version;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,67 @@
|
|||
package io.gitlab.jfronny.muscript.compiler.lexer;
|
||||
|
||||
import io.gitlab.jfronny.muscript.compiler.*;
|
||||
import org.jetbrains.annotations.Nullable;
|
||||
|
||||
/**
|
||||
* Wraps the old Lexer implementation in the new Lexer interface for compatibility
|
||||
*/
|
||||
public class LegacyLexer implements Lexer {
|
||||
private final io.gitlab.jfronny.muscript.compiler.Lexer backend;
|
||||
private Lexer.Token previous = null;
|
||||
|
||||
public LegacyLexer(MuScriptVersion version, String source) {
|
||||
this(new io.gitlab.jfronny.muscript.compiler.Lexer(version, source));
|
||||
}
|
||||
|
||||
public LegacyLexer(MuScriptVersion version, String source, String file) {
|
||||
this(new io.gitlab.jfronny.muscript.compiler.Lexer(version, source, file));
|
||||
}
|
||||
|
||||
public LegacyLexer(io.gitlab.jfronny.muscript.compiler.Lexer backend) {
|
||||
this.backend = backend;
|
||||
}
|
||||
|
||||
@Override
|
||||
public CodeLocation location() {
|
||||
return backend.location();
|
||||
}
|
||||
|
||||
@Override
|
||||
public MuScriptVersion version() {
|
||||
return backend.getComponentVersion();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean wasNewlinePassed() {
|
||||
return backend.passedNewline;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Token getPrevious() {
|
||||
return previous;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Token advance() {
|
||||
backend.next();
|
||||
return previous = new Token(
|
||||
backend.lexeme,
|
||||
backend.token,
|
||||
backend.start,
|
||||
backend.current,
|
||||
backend.ch,
|
||||
new CodeLocation(backend.start, backend.current - 1, backend.source, backend.file)
|
||||
);
|
||||
}
|
||||
|
||||
@Override
|
||||
public @Nullable String getSource() {
|
||||
return backend.source;
|
||||
}
|
||||
|
||||
@Override
|
||||
public @Nullable String getFile() {
|
||||
return backend.file;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,22 @@
|
|||
package io.gitlab.jfronny.muscript.compiler.lexer;
|
||||
|
||||
import io.gitlab.jfronny.muscript.compiler.*;
|
||||
import org.jetbrains.annotations.Nullable;
|
||||
|
||||
public interface Lexer {
|
||||
CodeLocation location();
|
||||
MuScriptVersion version();
|
||||
boolean wasNewlinePassed();
|
||||
Token getPrevious();
|
||||
Token advance();
|
||||
|
||||
@Nullable String getSource();
|
||||
@Nullable String getFile();
|
||||
|
||||
record Token(String lexeme, io.gitlab.jfronny.muscript.compiler.Token token, int start, int current, char ch, CodeLocation location) {
|
||||
@Override
|
||||
public String toString() {
|
||||
return token + " '" + lexeme + "'";
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user