java-commons/muscript/src/main/java/io/gitlab/jfronny/muscript/compiler/Lexer.java

233 lines
6.9 KiB
Java

package io.gitlab.jfronny.muscript.compiler;
import java.util.Set;
import java.util.regex.Pattern;
// Heavily inspired by starscript
public class Lexer {
/**
* The type of the token
*/
public Token token;
/**
* The string representation of the token
*/
public String lexeme;
public char ch;
public final String source;
public int start, current;
public Lexer(String source) {
this.source = source;
}
/**
* Scans for the next token storing it in {@link Lexer#token} and {@link Lexer#lexeme}. Produces {@link Token#EOF} if the end of source code has been reached and {@link Token#Error} if there has been an error
*/
public void next() {
start = current;
if (isAtEnd()) {
createToken(Token.EOF);
return;
}
// Scan expression
skipWhitespaceAndComments();
if (isAtEnd()) {
createToken(Token.EOF);
return;
}
char c = advance();
if (isDigit(c)) number();
else if (isIdentifier(c)) identifier();
else {
switch (c) {
case '\'', '"' -> string(c);
case '=' -> {
if (match('=')) createToken(Token.EqualEqual);
else createToken(Token.Assign);
}
case '!' -> createToken(match('=') ? Token.BangEqual : Token.Bang);
case '+' -> createToken(Token.Plus);
case '-' -> {
if (match('>')) createToken(Token.Arrow);
else createToken(Token.Minus);
}
case '*' -> createToken(Token.Star);
case '/' -> createToken(Token.Slash);
case '%' -> createToken(Token.Percentage);
case '>' -> createToken(match('=') ? Token.GreaterEqual : Token.Greater);
case '<' -> createToken(match('=') ? Token.LessEqual : Token.Less);
case '&' -> createToken(Token.And);
case '|' -> createToken(match('|') ? Token.Concat : Token.Or);
case '^' -> createToken(Token.UpArrow);
case '.' -> {
if (match('.')) {
if (match('.')) createToken(Token.Ellipsis);
else createToken(Token.Error, "Unexpected '..', did you mean '...'?");
} else createToken(Token.Dot);
}
case ',' -> createToken(Token.Comma);
case '?' -> createToken(Token.QuestionMark);
case ':' -> createToken(match(':') ? Token.DoubleColon : Token.Colon);
case '(' -> createToken(Token.LeftParen);
case ')' -> createToken(Token.RightParen);
case '[' -> createToken(Token.LeftBracket);
case ']' -> createToken(Token.RightBracket);
case '{' -> createToken(Token.LeftBrace);
case '}' -> createToken(Token.RightBrace);
case ';' -> createToken(Token.Semicolon);
default -> unexpected();
}
}
}
private void string(char stringChar) {
while (!isAtEnd() && peek() != stringChar) {
advance();
}
if (isAtEnd()) {
createToken(Token.Error, "Unterminated expression.");
} else {
advance();
createToken(Token.String, source.substring(start + 1, current - 1));
}
}
private void number() {
while (isDigit(peek())) advance();
if (peek() == '.' && isDigit(peekNext())) {
advance();
while (isDigit(peek())) advance();
}
createToken(Token.Number);
}
private void identifier() {
while (!isAtEnd()) {
char c = peek();
if (isIdentifier(c) || isDigit(c)) {
advance();
} else break;
}
createToken(Token.Identifier);
switch (lexeme) {
case "null" -> token = Token.Null;
case "true" -> token = Token.True;
case "false" -> token = Token.False;
}
}
private void skipWhitespaceAndComments() {
while (true) {
if (isAtEnd()) return;
switch (peek()) {
case ' ', '\r', '\t', '\n' -> advance();
case '/' -> {
switch (peekNext()) {
case '/' -> {
while (!isAtEnd() && peek() != '\r' && peek() != '\n') advance();
}
case '*' -> {
advance();
advance();
while (!isAtEnd() && (peek() != '*' || peekNext() != '/')) advance();
if (!isAtEnd()) {
advance();
advance();
}
}
default -> {
start = current;
return;
}
}
}
default -> {
start = current;
return;
}
}
}
}
// Helpers
private void unexpected() {
createToken(Token.Error, "Unexpected character");
}
private void createToken(Token token, String lexeme) {
this.token = token;
this.lexeme = lexeme;
}
private void createToken(Token token) {
createToken(token, source.substring(start, current));
}
private boolean match(char expected) {
if (isAtEnd()) return false;
if (source.charAt(current) != expected) return false;
advance();
return true;
}
private char advance() {
return ch = source.charAt(current++);
}
private char peek() {
if (isAtEnd()) return '\0';
return source.charAt(current);
}
private char peekNext() {
if (current + 1 >= source.length()) return '\0';
return source.charAt(current + 1);
}
private boolean isAtEnd() {
return current >= source.length();
}
private boolean isDigit(char c) {
return c >= '0' && c <= '9';
}
private boolean isIdentifier(char c) {
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || c == '$';
}
public static boolean isValidId(String id) {
return IDENTIFIER.matcher(id).matches() && !RESERVED_IDS.contains(id);
}
public static final Set<String> RESERVED_IDS = Set.of("null", "true", "false");
public static final Pattern IDENTIFIER = Pattern.compile("[a-zA-Z_$][a-zA-Z_$0-9]*");
// Visualization
@Override
public String toString() {
return source.substring(0, start) + '[' + source.substring(start, current) + ']' + source.substring(current);
}
}