233 lines
6.9 KiB
Java
233 lines
6.9 KiB
Java
package io.gitlab.jfronny.muscript.compiler;
|
|
|
|
import java.util.Set;
|
|
import java.util.regex.Pattern;
|
|
|
|
// Heavily inspired by starscript
|
|
public class Lexer {
|
|
/**
|
|
* The type of the token
|
|
*/
|
|
public Token token;
|
|
/**
|
|
* The string representation of the token
|
|
*/
|
|
public String lexeme;
|
|
|
|
public char ch;
|
|
|
|
public final String source;
|
|
public int start, current;
|
|
|
|
public Lexer(String source) {
|
|
this.source = source;
|
|
}
|
|
|
|
/**
|
|
* Scans for the next token storing it in {@link Lexer#token} and {@link Lexer#lexeme}. Produces {@link Token#EOF} if the end of source code has been reached and {@link Token#Error} if there has been an error
|
|
*/
|
|
public void next() {
|
|
start = current;
|
|
|
|
if (isAtEnd()) {
|
|
createToken(Token.EOF);
|
|
return;
|
|
}
|
|
|
|
// Scan expression
|
|
skipWhitespaceAndComments();
|
|
if (isAtEnd()) {
|
|
createToken(Token.EOF);
|
|
return;
|
|
}
|
|
|
|
char c = advance();
|
|
|
|
if (isDigit(c)) number();
|
|
else if (isIdentifier(c)) identifier();
|
|
else {
|
|
switch (c) {
|
|
case '\'', '"' -> string(c);
|
|
|
|
case '=' -> {
|
|
if (match('=')) createToken(Token.EqualEqual);
|
|
else createToken(Token.Assign);
|
|
}
|
|
case '!' -> createToken(match('=') ? Token.BangEqual : Token.Bang);
|
|
|
|
case '+' -> createToken(Token.Plus);
|
|
case '-' -> {
|
|
if (match('>')) createToken(Token.Arrow);
|
|
else createToken(Token.Minus);
|
|
}
|
|
case '*' -> createToken(Token.Star);
|
|
case '/' -> createToken(Token.Slash);
|
|
case '%' -> createToken(Token.Percentage);
|
|
case '>' -> createToken(match('=') ? Token.GreaterEqual : Token.Greater);
|
|
case '<' -> createToken(match('=') ? Token.LessEqual : Token.Less);
|
|
|
|
case '&' -> createToken(Token.And);
|
|
case '|' -> createToken(match('|') ? Token.Concat : Token.Or);
|
|
case '^' -> createToken(Token.UpArrow);
|
|
|
|
case '.' -> {
|
|
if (match('.')) {
|
|
if (match('.')) createToken(Token.Ellipsis);
|
|
else createToken(Token.Error, "Unexpected '..', did you mean '...'?");
|
|
} else createToken(Token.Dot);
|
|
}
|
|
case ',' -> createToken(Token.Comma);
|
|
case '?' -> createToken(Token.QuestionMark);
|
|
case ':' -> createToken(match(':') ? Token.DoubleColon : Token.Colon);
|
|
case '(' -> createToken(Token.LeftParen);
|
|
case ')' -> createToken(Token.RightParen);
|
|
case '[' -> createToken(Token.LeftBracket);
|
|
case ']' -> createToken(Token.RightBracket);
|
|
case '{' -> createToken(Token.LeftBrace);
|
|
case '}' -> createToken(Token.RightBrace);
|
|
|
|
case ';' -> createToken(Token.Semicolon);
|
|
|
|
default -> unexpected();
|
|
}
|
|
}
|
|
}
|
|
|
|
private void string(char stringChar) {
|
|
while (!isAtEnd() && peek() != stringChar) {
|
|
advance();
|
|
}
|
|
|
|
if (isAtEnd()) {
|
|
createToken(Token.Error, "Unterminated expression.");
|
|
} else {
|
|
advance();
|
|
createToken(Token.String, source.substring(start + 1, current - 1));
|
|
}
|
|
}
|
|
|
|
private void number() {
|
|
while (isDigit(peek())) advance();
|
|
|
|
if (peek() == '.' && isDigit(peekNext())) {
|
|
advance();
|
|
|
|
while (isDigit(peek())) advance();
|
|
}
|
|
|
|
createToken(Token.Number);
|
|
}
|
|
|
|
private void identifier() {
|
|
while (!isAtEnd()) {
|
|
char c = peek();
|
|
if (isIdentifier(c) || isDigit(c)) {
|
|
advance();
|
|
} else break;
|
|
}
|
|
|
|
createToken(Token.Identifier);
|
|
|
|
switch (lexeme) {
|
|
case "null" -> token = Token.Null;
|
|
case "true" -> token = Token.True;
|
|
case "false" -> token = Token.False;
|
|
}
|
|
}
|
|
|
|
private void skipWhitespaceAndComments() {
|
|
while (true) {
|
|
if (isAtEnd()) return;
|
|
|
|
switch (peek()) {
|
|
case ' ', '\r', '\t', '\n' -> advance();
|
|
case '/' -> {
|
|
switch (peekNext()) {
|
|
case '/' -> {
|
|
while (!isAtEnd() && peek() != '\r' && peek() != '\n') advance();
|
|
}
|
|
case '*' -> {
|
|
advance();
|
|
advance();
|
|
while (!isAtEnd() && (peek() != '*' || peekNext() != '/')) advance();
|
|
if (!isAtEnd()) {
|
|
advance();
|
|
advance();
|
|
}
|
|
}
|
|
default -> {
|
|
start = current;
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
default -> {
|
|
start = current;
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Helpers
|
|
|
|
private void unexpected() {
|
|
createToken(Token.Error, "Unexpected character");
|
|
}
|
|
|
|
private void createToken(Token token, String lexeme) {
|
|
this.token = token;
|
|
this.lexeme = lexeme;
|
|
}
|
|
|
|
private void createToken(Token token) {
|
|
createToken(token, source.substring(start, current));
|
|
}
|
|
|
|
private boolean match(char expected) {
|
|
if (isAtEnd()) return false;
|
|
if (source.charAt(current) != expected) return false;
|
|
|
|
advance();
|
|
return true;
|
|
}
|
|
|
|
private char advance() {
|
|
return ch = source.charAt(current++);
|
|
}
|
|
|
|
private char peek() {
|
|
if (isAtEnd()) return '\0';
|
|
return source.charAt(current);
|
|
}
|
|
|
|
private char peekNext() {
|
|
if (current + 1 >= source.length()) return '\0';
|
|
return source.charAt(current + 1);
|
|
}
|
|
|
|
private boolean isAtEnd() {
|
|
return current >= source.length();
|
|
}
|
|
|
|
private boolean isDigit(char c) {
|
|
return c >= '0' && c <= '9';
|
|
}
|
|
|
|
private boolean isIdentifier(char c) {
|
|
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || c == '$';
|
|
}
|
|
|
|
public static boolean isValidId(String id) {
|
|
return IDENTIFIER.matcher(id).matches() && !RESERVED_IDS.contains(id);
|
|
}
|
|
public static final Set<String> RESERVED_IDS = Set.of("null", "true", "false");
|
|
public static final Pattern IDENTIFIER = Pattern.compile("[a-zA-Z_$][a-zA-Z_$0-9]*");
|
|
|
|
// Visualization
|
|
@Override
|
|
public String toString() {
|
|
return source.substring(0, start) + '[' + source.substring(start, current) + ']' + source.substring(current);
|
|
}
|
|
}
|