2022-06-03 19:54:31 +02:00
|
|
|
package io.gitlab.jfronny.muscript.compiler;
|
|
|
|
|
|
|
|
// Heavily inspired by starscript
|
|
|
|
public class Lexer {
|
2022-11-24 19:05:51 +01:00
|
|
|
/**
|
|
|
|
* The type of the token
|
|
|
|
*/
|
2022-06-03 19:54:31 +02:00
|
|
|
public Token token;
|
2022-11-24 19:05:51 +01:00
|
|
|
/**
|
|
|
|
* The string representation of the token
|
|
|
|
*/
|
2022-06-03 19:54:31 +02:00
|
|
|
public String lexeme;
|
|
|
|
|
|
|
|
public char ch;
|
|
|
|
|
2022-06-13 10:44:59 +02:00
|
|
|
public final String source;
|
|
|
|
public int start, current;
|
2022-06-03 19:54:31 +02:00
|
|
|
|
|
|
|
public Lexer(String source) {
|
|
|
|
this.source = source;
|
|
|
|
}
|
|
|
|
|
2022-11-24 19:05:51 +01:00
|
|
|
/**
|
|
|
|
* Scans for the next token storing it in {@link Lexer#token} and {@link Lexer#lexeme}. Produces {@link Token#EOF} if the end of source code has been reached and {@link Token#Error} if there has been an error
|
|
|
|
*/
|
2022-06-03 19:54:31 +02:00
|
|
|
public void next() {
|
|
|
|
start = current;
|
|
|
|
|
|
|
|
if (isAtEnd()) {
|
|
|
|
createToken(Token.EOF);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Scan expression
|
|
|
|
skipWhitespace();
|
|
|
|
if (isAtEnd()) {
|
|
|
|
createToken(Token.EOF);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
char c = advance();
|
|
|
|
|
2022-06-13 10:44:59 +02:00
|
|
|
if (isDigit(c)) number();
|
|
|
|
else if (isIdentifier(c)) identifier();
|
2022-06-03 19:54:31 +02:00
|
|
|
else {
|
|
|
|
switch (c) {
|
2022-06-04 19:29:38 +02:00
|
|
|
case '\'', '"' -> string(c);
|
2022-06-03 19:54:31 +02:00
|
|
|
|
2022-11-24 19:05:51 +01:00
|
|
|
case '=' -> {
|
|
|
|
if (match('=')) createToken(Token.EqualEqual);
|
|
|
|
else unexpected();
|
|
|
|
}
|
2022-06-03 19:54:31 +02:00
|
|
|
case '!' -> createToken(match('=') ? Token.BangEqual : Token.Bang);
|
|
|
|
|
|
|
|
case '+' -> createToken(Token.Plus);
|
|
|
|
case '-' -> createToken(Token.Minus);
|
|
|
|
case '*' -> createToken(Token.Star);
|
|
|
|
case '/' -> createToken(Token.Slash);
|
|
|
|
case '%' -> createToken(Token.Percentage);
|
|
|
|
case '>' -> createToken(match('=') ? Token.GreaterEqual : Token.Greater);
|
|
|
|
case '<' -> createToken(match('=') ? Token.LessEqual : Token.Less);
|
|
|
|
|
|
|
|
case '&' -> createToken(Token.And);
|
|
|
|
case '|' -> createToken(match('|') ? Token.Concat : Token.Or);
|
|
|
|
case '^' -> createToken(Token.UpArrow);
|
|
|
|
|
|
|
|
case '.' -> createToken(Token.Dot);
|
|
|
|
case ',' -> createToken(Token.Comma);
|
|
|
|
case '?' -> createToken(Token.QuestionMark);
|
|
|
|
case ':' -> createToken(Token.Colon);
|
|
|
|
case '(' -> createToken(Token.LeftParen);
|
|
|
|
case ')' -> createToken(Token.RightParen);
|
|
|
|
case '[' -> createToken(Token.LeftBracket);
|
|
|
|
case ']' -> createToken(Token.RightBracket);
|
|
|
|
|
|
|
|
default -> unexpected();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-06-04 19:29:38 +02:00
|
|
|
private void string(char stringChar) {
|
|
|
|
while (!isAtEnd() && peek() != stringChar) {
|
2022-06-03 19:54:31 +02:00
|
|
|
advance();
|
|
|
|
}
|
|
|
|
|
|
|
|
if (isAtEnd()) {
|
|
|
|
createToken(Token.Error, "Unterminated expression.");
|
2022-11-24 19:05:51 +01:00
|
|
|
} else {
|
2022-06-03 19:54:31 +02:00
|
|
|
advance();
|
|
|
|
createToken(Token.String, source.substring(start + 1, current - 1));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
private void number() {
|
|
|
|
while (isDigit(peek())) advance();
|
|
|
|
|
|
|
|
if (peek() == '.' && isDigit(peekNext())) {
|
|
|
|
advance();
|
|
|
|
|
|
|
|
while (isDigit(peek())) advance();
|
|
|
|
}
|
|
|
|
|
|
|
|
createToken(Token.Number);
|
|
|
|
}
|
|
|
|
|
|
|
|
private void identifier() {
|
|
|
|
while (!isAtEnd()) {
|
|
|
|
char c = peek();
|
2022-06-13 10:44:59 +02:00
|
|
|
if (isIdentifier(c) || isDigit(c)) {
|
2022-06-03 19:54:31 +02:00
|
|
|
advance();
|
|
|
|
} else if (c == ':' && peekNext() == ':') {
|
|
|
|
advance();
|
|
|
|
advance();
|
|
|
|
} else break;
|
|
|
|
}
|
|
|
|
|
|
|
|
createToken(Token.Identifier);
|
|
|
|
|
|
|
|
switch (lexeme) {
|
|
|
|
case "null" -> token = Token.Null;
|
|
|
|
case "true" -> token = Token.True;
|
|
|
|
case "false" -> token = Token.False;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
private void skipWhitespace() {
|
|
|
|
while (true) {
|
|
|
|
if (isAtEnd()) return;
|
|
|
|
char c = peek();
|
|
|
|
|
|
|
|
switch (c) {
|
2022-06-13 10:44:59 +02:00
|
|
|
case ' ', '\r', '\t', '\n' -> advance();
|
2022-06-03 19:54:31 +02:00
|
|
|
default -> {
|
|
|
|
start = current;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Helpers
|
|
|
|
|
|
|
|
private void unexpected() {
|
2022-06-13 10:44:59 +02:00
|
|
|
createToken(Token.Error, "Unexpected character");
|
2022-06-03 19:54:31 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
private void createToken(Token token, String lexeme) {
|
|
|
|
this.token = token;
|
|
|
|
this.lexeme = lexeme;
|
|
|
|
}
|
|
|
|
|
|
|
|
private void createToken(Token token) {
|
|
|
|
createToken(token, source.substring(start, current));
|
|
|
|
}
|
|
|
|
|
|
|
|
private boolean match(char expected) {
|
|
|
|
if (isAtEnd()) return false;
|
|
|
|
if (source.charAt(current) != expected) return false;
|
|
|
|
|
|
|
|
advance();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
private char advance() {
|
|
|
|
return ch = source.charAt(current++);
|
|
|
|
}
|
|
|
|
|
|
|
|
private char peek() {
|
|
|
|
if (isAtEnd()) return '\0';
|
|
|
|
return source.charAt(current);
|
|
|
|
}
|
|
|
|
|
|
|
|
private char peekNext() {
|
|
|
|
if (current + 1 >= source.length()) return '\0';
|
|
|
|
return source.charAt(current + 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
private boolean isAtEnd() {
|
|
|
|
return current >= source.length();
|
|
|
|
}
|
|
|
|
|
|
|
|
private boolean isDigit(char c) {
|
|
|
|
return c >= '0' && c <= '9';
|
|
|
|
}
|
|
|
|
|
2022-06-13 10:44:59 +02:00
|
|
|
private boolean isIdentifier(char c) {
|
2022-06-03 19:54:31 +02:00
|
|
|
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || c == '$';
|
|
|
|
}
|
2023-01-20 17:47:41 +01:00
|
|
|
|
|
|
|
// Visualization
|
|
|
|
@Override
|
|
|
|
public String toString() {
|
|
|
|
return source.substring(0, start) + '[' + source.substring(start, current) + ']' + source.substring(current);
|
|
|
|
}
|
2022-06-03 19:54:31 +02:00
|
|
|
}
|