2022-06-03 19:54:31 +02:00
package io.gitlab.jfronny.muscript.compiler ;
// Heavily inspired by starscript
public class Lexer {
/** The type of the token */
public Token token ;
/** The string representation of the token */
public String lexeme ;
public char ch ;
2022-06-13 10:44:59 +02:00
public final String source ;
public int start , current ;
2022-06-03 19:54:31 +02:00
public Lexer ( String source ) {
this . source = source ;
}
/** Scans for the next token storing it in {@link Lexer#token} and {@link Lexer#lexeme}. Produces {@link Token#EOF} if the end of source code has been reached and {@link Token#Error} if there has been an error */
public void next ( ) {
start = current ;
if ( isAtEnd ( ) ) {
createToken ( Token . EOF ) ;
return ;
}
// Scan expression
skipWhitespace ( ) ;
if ( isAtEnd ( ) ) {
createToken ( Token . EOF ) ;
return ;
}
char c = advance ( ) ;
2022-06-13 10:44:59 +02:00
if ( isDigit ( c ) ) number ( ) ;
else if ( isIdentifier ( c ) ) identifier ( ) ;
2022-06-03 19:54:31 +02:00
else {
switch ( c ) {
2022-06-04 19:29:38 +02:00
case '\'' , '"' - > string ( c ) ;
2022-06-03 19:54:31 +02:00
case '=' - > { if ( match ( '=' ) ) createToken ( Token . EqualEqual ) ; else unexpected ( ) ; }
case '!' - > createToken ( match ( '=' ) ? Token . BangEqual : Token . Bang ) ;
case '+' - > createToken ( Token . Plus ) ;
case '-' - > createToken ( Token . Minus ) ;
case '*' - > createToken ( Token . Star ) ;
case '/' - > createToken ( Token . Slash ) ;
case '%' - > createToken ( Token . Percentage ) ;
case '>' - > createToken ( match ( '=' ) ? Token . GreaterEqual : Token . Greater ) ;
case '<' - > createToken ( match ( '=' ) ? Token . LessEqual : Token . Less ) ;
case '&' - > createToken ( Token . And ) ;
case '|' - > createToken ( match ( '|' ) ? Token . Concat : Token . Or ) ;
case '^' - > createToken ( Token . UpArrow ) ;
case '.' - > createToken ( Token . Dot ) ;
case ',' - > createToken ( Token . Comma ) ;
case '?' - > createToken ( Token . QuestionMark ) ;
case ':' - > createToken ( Token . Colon ) ;
case '(' - > createToken ( Token . LeftParen ) ;
case ')' - > createToken ( Token . RightParen ) ;
case '[' - > createToken ( Token . LeftBracket ) ;
case ']' - > createToken ( Token . RightBracket ) ;
default - > unexpected ( ) ;
}
}
}
2022-06-04 19:29:38 +02:00
private void string ( char stringChar ) {
while ( ! isAtEnd ( ) & & peek ( ) ! = stringChar ) {
2022-06-03 19:54:31 +02:00
advance ( ) ;
}
if ( isAtEnd ( ) ) {
createToken ( Token . Error , " Unterminated expression. " ) ;
}
else {
advance ( ) ;
createToken ( Token . String , source . substring ( start + 1 , current - 1 ) ) ;
}
}
private void number ( ) {
while ( isDigit ( peek ( ) ) ) advance ( ) ;
if ( peek ( ) = = '.' & & isDigit ( peekNext ( ) ) ) {
advance ( ) ;
while ( isDigit ( peek ( ) ) ) advance ( ) ;
}
createToken ( Token . Number ) ;
}
private void identifier ( ) {
while ( ! isAtEnd ( ) ) {
char c = peek ( ) ;
2022-06-13 10:44:59 +02:00
if ( isIdentifier ( c ) | | isDigit ( c ) ) {
2022-06-03 19:54:31 +02:00
advance ( ) ;
} else if ( c = = ':' & & peekNext ( ) = = ':' ) {
advance ( ) ;
advance ( ) ;
} else break ;
}
createToken ( Token . Identifier ) ;
switch ( lexeme ) {
case " null " - > token = Token . Null ;
case " true " - > token = Token . True ;
case " false " - > token = Token . False ;
}
}
private void skipWhitespace ( ) {
while ( true ) {
if ( isAtEnd ( ) ) return ;
char c = peek ( ) ;
switch ( c ) {
2022-06-13 10:44:59 +02:00
case ' ' , '\r' , '\t' , '\n' - > advance ( ) ;
2022-06-03 19:54:31 +02:00
default - > {
start = current ;
return ;
}
}
}
}
// Helpers
private void unexpected ( ) {
2022-06-13 10:44:59 +02:00
createToken ( Token . Error , " Unexpected character " ) ;
2022-06-03 19:54:31 +02:00
}
private void createToken ( Token token , String lexeme ) {
this . token = token ;
this . lexeme = lexeme ;
}
private void createToken ( Token token ) {
createToken ( token , source . substring ( start , current ) ) ;
}
private boolean match ( char expected ) {
if ( isAtEnd ( ) ) return false ;
if ( source . charAt ( current ) ! = expected ) return false ;
advance ( ) ;
return true ;
}
private char advance ( ) {
return ch = source . charAt ( current + + ) ;
}
private char peek ( ) {
if ( isAtEnd ( ) ) return '\0' ;
return source . charAt ( current ) ;
}
private char peekNext ( ) {
if ( current + 1 > = source . length ( ) ) return '\0' ;
return source . charAt ( current + 1 ) ;
}
private boolean isAtEnd ( ) {
return current > = source . length ( ) ;
}
private boolean isDigit ( char c ) {
return c > = '0' & & c < = '9' ;
}
2022-06-13 10:44:59 +02:00
private boolean isIdentifier ( char c ) {
2022-06-03 19:54:31 +02:00
return ( c > = 'a' & & c < = 'z' ) | | ( c > = 'A' & & c < = 'Z' ) | | c = = '_' | | c = = '$' ;
}
}