Implement my own ArgumentTokenizer

This commit is contained in:
JFronny 2021-11-24 18:06:29 +01:00
parent a7f5931730
commit 85cb5cce71
No known key found for this signature in database
GPG Key ID: BEC5ACBBD4EE17E5
2 changed files with 50 additions and 172 deletions

View File

@ -32,7 +32,7 @@ public class BatchCommand extends Command {
}
try {
for (String line : Files.readAllLines(p)) {
argsSet.add(new InvokedCommandDescription(ArgumentTokenizer.tokenize(line).toArray(String[]::new)));
argsSet.add(new InvokedCommandDescription(ArgumentTokenizer.tokenize(line)));
}
} catch (Exception e) {
Inceptum.LOGGER.error("Could not read file", e);

View File

@ -1,183 +1,61 @@
package io.gitlab.jfronny.inceptum.util;
import java.util.LinkedList;
import java.util.ArrayList;
import java.util.List;
public class ArgumentTokenizer {
private static final int NO_TOKEN_STATE = 0;
private static final int NORMAL_TOKEN_STATE = 1;
private static final int SINGLE_QUOTE_STATE = 2;
private static final int DOUBLE_QUOTE_STATE = 3;
/** Tokenizes the given String into String tokens
* @param arguments A String containing one or more command-line style arguments to be tokenized.
* @return A list of parsed and properly escaped arguments.
*/
public static List<String> tokenize(String arguments) {
return tokenize(arguments, false);
}
/** Tokenizes the given String into String tokens.
* @param arguments A String containing one or more command-line style arguments to be tokenized.
* @param stringify whether to include escape special characters
* @return A list of parsed and properly escaped arguments.
*/
public static List<String> tokenize(String arguments, boolean stringify) {
List<String> argList = new LinkedList<>();
StringBuilder currArg = new StringBuilder();
boolean escaped = false;
int state = NO_TOKEN_STATE; // start in the NO_TOKEN_STATE
int len = arguments.length();
// Loop over each character in the string
for (int i = 0; i < len; i++) {
char c = arguments.charAt(i);
if (escaped) {
// Escaped state: just append the next character to the current arg.
escaped = false;
currArg.append(c);
}
else {
switch(state) {
case SINGLE_QUOTE_STATE:
if (c == '\'') {
// Seen the close quote; continue this arg until whitespace is seen
state = NORMAL_TOKEN_STATE;
}
else {
currArg.append(c);
}
break;
case DOUBLE_QUOTE_STATE:
if (c == '"') {
// Seen the close quote; continue this arg until whitespace is seen
state = NORMAL_TOKEN_STATE;
}
else if (c == '\\') {
// Look ahead, and only escape quotes or backslashes
i++;
char next = arguments.charAt(i);
if (next == '"' || next == '\\') {
currArg.append(next);
}
else {
currArg.append(c);
currArg.append(next);
}
}
else {
currArg.append(c);
}
break;
// case NORMAL_TOKEN_STATE:
// if (Character.isWhitespace(c)) {
// // Whitespace ends the token; start a new one
// argList.add(currArg.toString());
// currArg = new StringBuffer();
// state = NO_TOKEN_STATE;
// }
// else if (c == '\\') {
// // Backslash in a normal token: escape the next character
// escaped = true;
// }
// else if (c == '\'') {
// state = SINGLE_QUOTE_STATE;
// }
// else if (c == '"') {
// state = DOUBLE_QUOTE_STATE;
// }
// else {
// currArg.append(c);
// }
// break;
case NO_TOKEN_STATE:
case NORMAL_TOKEN_STATE:
switch(c) {
case '\\':
escaped = true;
state = NORMAL_TOKEN_STATE;
break;
case '\'':
state = SINGLE_QUOTE_STATE;
break;
case '"':
state = DOUBLE_QUOTE_STATE;
break;
default:
if (!Character.isWhitespace(c)) {
currArg.append(c);
state = NORMAL_TOKEN_STATE;
}
else if (state == NORMAL_TOKEN_STATE) {
// Whitespace ends the token; start a new one
argList.add(currArg.toString());
currArg = new StringBuilder();
state = NO_TOKEN_STATE;
}
}
break;
default:
throw new IllegalStateException("ArgumentTokenizer state " + state + " is invalid!");
public static String[] tokenize(String toProcess) {
List<String> tokens = new ArrayList<>();
StringBuilder currentToken = new StringBuilder();
State state = State.None;
char[] chars = toProcess.toCharArray();
for (int i = 0; i < chars.length; i++) {
switch (chars[i]) {
case '\'' -> state = switch (state) {
case None -> State.QuoteSingle;
case QuoteSingle -> State.None;
case QuoteDouble -> {
currentToken.append('"');
yield State.QuoteDouble;
}
};
case '"' -> state = switch (state) {
case None -> State.QuoteDouble;
case QuoteSingle -> {
currentToken.append('"');
yield State.QuoteSingle;
}
case QuoteDouble -> State.None;
};
case '\\' -> {
if (i++ < chars.length) {
currentToken.append(switch (chars[i]) {
case 'b' -> '\b';
case 'f' -> '\f';
case 'n' -> '\n';
case 'r' -> '\r';
case 't' -> '\t';
default -> chars[i];
});
} else currentToken.append('\\');
}
case ' ' -> {
if (state == State.None) {
tokens.add(currentToken.toString());
currentToken = new StringBuilder();
}
else {
currentToken.append(' ');
}
}
default -> currentToken.append(chars[i]);
}
}
// If we're still escaped, put in the backslash
if (escaped) {
currArg.append('\\');
argList.add(currArg.toString());
}
// Close the last argument if we haven't yet
else if (state != NO_TOKEN_STATE) {
argList.add(currArg.toString());
}
// Format each argument if we've been told to stringify them
if (stringify) {
for (int i = 0; i < argList.size(); i++) {
argList.set(i, "\"" + _escapeQuotesAndBackslashes(argList.get(i)) + "\"");
}
}
return argList;
if (!currentToken.isEmpty()) tokens.add(currentToken.toString());
return tokens.toArray(String[]::new);
}
/** Inserts backslashes before any occurrences of a backslash or
* quote in the given string. Also converts any special characters
* appropriately.
*/
protected static String _escapeQuotesAndBackslashes(String s) {
final StringBuilder buf = new StringBuilder(s);
// Walk backwards, looking for quotes or backslashes.
// If we see any, insert an extra backslash into the buffer at
// the same index. (By walking backwards, the index into the buffer
// will remain correct as we change the buffer.)
for (int i = s.length()-1; i >= 0; i--) {
char c = s.charAt(i);
if ((c == '\\') || (c == '"')) {
buf.insert(i, '\\');
}
// Replace any special characters with escaped versions
else if (c == '\n') {
buf.deleteCharAt(i);
buf.insert(i, "\\n");
}
else if (c == '\t') {
buf.deleteCharAt(i);
buf.insert(i, "\\t");
}
else if (c == '\r') {
buf.deleteCharAt(i);
buf.insert(i, "\\r");
}
else if (c == '\b') {
buf.deleteCharAt(i);
buf.insert(i, "\\b");
}
else if (c == '\f') {
buf.deleteCharAt(i);
buf.insert(i, "\\f");
}
}
return buf.toString();
enum State {
None, QuoteSingle, QuoteDouble
}
}