package io.gitlab.jfronny.muscript.compiler; import java.util.Set; import java.util.regex.Pattern; // Heavily inspired by starscript public class Lexer { /** * The type of the token */ public Token token; /** * The string representation of the token */ public String lexeme; public char ch; public final String source; public int start, current; public Lexer(String source) { this.source = source; } /** * Scans for the next token storing it in {@link Lexer#token} and {@link Lexer#lexeme}. Produces {@link Token#EOF} if the end of source code has been reached and {@link Token#Error} if there has been an error */ public void next() { start = current; if (isAtEnd()) { createToken(Token.EOF); return; } // Scan expression skipWhitespaceAndComments(); if (isAtEnd()) { createToken(Token.EOF); return; } char c = advance(); if (isDigit(c)) number(); else if (isIdentifier(c)) identifier(); else { switch (c) { case '\'', '"' -> string(c); case '=' -> { if (match('=')) createToken(Token.EqualEqual); else createToken(Token.Assign); } case '!' -> createToken(match('=') ? Token.BangEqual : Token.Bang); case '+' -> createToken(Token.Plus); case '-' -> { if (match('>')) createToken(Token.Arrow); else createToken(Token.Minus); } case '*' -> createToken(Token.Star); case '/' -> createToken(Token.Slash); case '%' -> createToken(Token.Percentage); case '>' -> createToken(match('=') ? Token.GreaterEqual : Token.Greater); case '<' -> createToken(match('=') ? Token.LessEqual : Token.Less); case '&' -> createToken(Token.And); case '|' -> createToken(match('|') ? Token.Concat : Token.Or); case '^' -> createToken(Token.UpArrow); case '.' -> { if (match('.')) { if (match('.')) createToken(Token.Ellipsis); else createToken(Token.Error, "Unexpected '..', did you mean '...'?"); } else createToken(Token.Dot); } case ',' -> createToken(Token.Comma); case '?' -> createToken(Token.QuestionMark); case ':' -> createToken(match(':') ? Token.DoubleColon : Token.Colon); case '(' -> createToken(Token.LeftParen); case ')' -> createToken(Token.RightParen); case '[' -> createToken(Token.LeftBracket); case ']' -> createToken(Token.RightBracket); case '{' -> createToken(Token.LeftBrace); case '}' -> createToken(Token.RightBrace); case ';' -> createToken(Token.Semicolon); default -> unexpected(); } } } private void string(char stringChar) { while (!isAtEnd() && peek() != stringChar) { advance(); } if (isAtEnd()) { createToken(Token.Error, "Unterminated expression."); } else { advance(); createToken(Token.String, source.substring(start + 1, current - 1)); } } private void number() { while (isDigit(peek())) advance(); if (peek() == '.' && isDigit(peekNext())) { advance(); while (isDigit(peek())) advance(); } createToken(Token.Number); } private void identifier() { while (!isAtEnd()) { char c = peek(); if (isIdentifier(c) || isDigit(c)) { advance(); } else break; } createToken(Token.Identifier); switch (lexeme) { case "null" -> token = Token.Null; case "true" -> token = Token.True; case "false" -> token = Token.False; } } private void skipWhitespaceAndComments() { while (true) { if (isAtEnd()) return; switch (peek()) { case ' ', '\r', '\t', '\n' -> advance(); case '/' -> { switch (peekNext()) { case '/' -> { while (!isAtEnd() && peek() != '\r' && peek() != '\n') advance(); } case '*' -> { advance(); advance(); while (!isAtEnd() && (peek() != '*' || peekNext() != '/')) advance(); if (!isAtEnd()) { advance(); advance(); } } default -> { start = current; return; } } } default -> { start = current; return; } } } } // Helpers private void unexpected() { createToken(Token.Error, "Unexpected character"); } private void createToken(Token token, String lexeme) { this.token = token; this.lexeme = lexeme; } private void createToken(Token token) { createToken(token, source.substring(start, current)); } private boolean match(char expected) { if (isAtEnd()) return false; if (source.charAt(current) != expected) return false; advance(); return true; } private char advance() { return ch = source.charAt(current++); } private char peek() { if (isAtEnd()) return '\0'; return source.charAt(current); } private char peekNext() { if (current + 1 >= source.length()) return '\0'; return source.charAt(current + 1); } private boolean isAtEnd() { return current >= source.length(); } private boolean isDigit(char c) { return c >= '0' && c <= '9'; } private boolean isIdentifier(char c) { return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || c == '$'; } public static boolean isValidId(String id) { return IDENTIFIER.matcher(id).matches() && !RESERVED_IDS.contains(id); } public static final Set RESERVED_IDS = Set.of("null", "true", "false"); public static final Pattern IDENTIFIER = Pattern.compile("[a-zA-Z_$][a-zA-Z_$0-9]*"); // Visualization @Override public String toString() { return source.substring(0, start) + '[' + source.substring(start, current) + ']' + source.substring(current); } }