Implement my own ArgumentTokenizer
This commit is contained in:
parent
a7f5931730
commit
85cb5cce71
|
@ -32,7 +32,7 @@ public class BatchCommand extends Command {
|
|||
}
|
||||
try {
|
||||
for (String line : Files.readAllLines(p)) {
|
||||
argsSet.add(new InvokedCommandDescription(ArgumentTokenizer.tokenize(line).toArray(String[]::new)));
|
||||
argsSet.add(new InvokedCommandDescription(ArgumentTokenizer.tokenize(line)));
|
||||
}
|
||||
} catch (Exception e) {
|
||||
Inceptum.LOGGER.error("Could not read file", e);
|
||||
|
|
|
@ -1,183 +1,61 @@
|
|||
package io.gitlab.jfronny.inceptum.util;
|
||||
|
||||
import java.util.LinkedList;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
public class ArgumentTokenizer {
|
||||
private static final int NO_TOKEN_STATE = 0;
|
||||
private static final int NORMAL_TOKEN_STATE = 1;
|
||||
private static final int SINGLE_QUOTE_STATE = 2;
|
||||
private static final int DOUBLE_QUOTE_STATE = 3;
|
||||
|
||||
/** Tokenizes the given String into String tokens
|
||||
* @param arguments A String containing one or more command-line style arguments to be tokenized.
|
||||
* @return A list of parsed and properly escaped arguments.
|
||||
*/
|
||||
public static List<String> tokenize(String arguments) {
|
||||
return tokenize(arguments, false);
|
||||
}
|
||||
|
||||
/** Tokenizes the given String into String tokens.
|
||||
* @param arguments A String containing one or more command-line style arguments to be tokenized.
|
||||
* @param stringify whether to include escape special characters
|
||||
* @return A list of parsed and properly escaped arguments.
|
||||
*/
|
||||
public static List<String> tokenize(String arguments, boolean stringify) {
|
||||
List<String> argList = new LinkedList<>();
|
||||
StringBuilder currArg = new StringBuilder();
|
||||
boolean escaped = false;
|
||||
int state = NO_TOKEN_STATE; // start in the NO_TOKEN_STATE
|
||||
int len = arguments.length();
|
||||
|
||||
// Loop over each character in the string
|
||||
for (int i = 0; i < len; i++) {
|
||||
char c = arguments.charAt(i);
|
||||
if (escaped) {
|
||||
// Escaped state: just append the next character to the current arg.
|
||||
escaped = false;
|
||||
currArg.append(c);
|
||||
}
|
||||
else {
|
||||
switch(state) {
|
||||
case SINGLE_QUOTE_STATE:
|
||||
if (c == '\'') {
|
||||
// Seen the close quote; continue this arg until whitespace is seen
|
||||
state = NORMAL_TOKEN_STATE;
|
||||
}
|
||||
else {
|
||||
currArg.append(c);
|
||||
}
|
||||
break;
|
||||
case DOUBLE_QUOTE_STATE:
|
||||
if (c == '"') {
|
||||
// Seen the close quote; continue this arg until whitespace is seen
|
||||
state = NORMAL_TOKEN_STATE;
|
||||
}
|
||||
else if (c == '\\') {
|
||||
// Look ahead, and only escape quotes or backslashes
|
||||
i++;
|
||||
char next = arguments.charAt(i);
|
||||
if (next == '"' || next == '\\') {
|
||||
currArg.append(next);
|
||||
}
|
||||
else {
|
||||
currArg.append(c);
|
||||
currArg.append(next);
|
||||
}
|
||||
}
|
||||
else {
|
||||
currArg.append(c);
|
||||
}
|
||||
break;
|
||||
// case NORMAL_TOKEN_STATE:
|
||||
// if (Character.isWhitespace(c)) {
|
||||
// // Whitespace ends the token; start a new one
|
||||
// argList.add(currArg.toString());
|
||||
// currArg = new StringBuffer();
|
||||
// state = NO_TOKEN_STATE;
|
||||
// }
|
||||
// else if (c == '\\') {
|
||||
// // Backslash in a normal token: escape the next character
|
||||
// escaped = true;
|
||||
// }
|
||||
// else if (c == '\'') {
|
||||
// state = SINGLE_QUOTE_STATE;
|
||||
// }
|
||||
// else if (c == '"') {
|
||||
// state = DOUBLE_QUOTE_STATE;
|
||||
// }
|
||||
// else {
|
||||
// currArg.append(c);
|
||||
// }
|
||||
// break;
|
||||
case NO_TOKEN_STATE:
|
||||
case NORMAL_TOKEN_STATE:
|
||||
switch(c) {
|
||||
case '\\':
|
||||
escaped = true;
|
||||
state = NORMAL_TOKEN_STATE;
|
||||
break;
|
||||
case '\'':
|
||||
state = SINGLE_QUOTE_STATE;
|
||||
break;
|
||||
case '"':
|
||||
state = DOUBLE_QUOTE_STATE;
|
||||
break;
|
||||
default:
|
||||
if (!Character.isWhitespace(c)) {
|
||||
currArg.append(c);
|
||||
state = NORMAL_TOKEN_STATE;
|
||||
}
|
||||
else if (state == NORMAL_TOKEN_STATE) {
|
||||
// Whitespace ends the token; start a new one
|
||||
argList.add(currArg.toString());
|
||||
currArg = new StringBuilder();
|
||||
state = NO_TOKEN_STATE;
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:
|
||||
throw new IllegalStateException("ArgumentTokenizer state " + state + " is invalid!");
|
||||
public static String[] tokenize(String toProcess) {
|
||||
List<String> tokens = new ArrayList<>();
|
||||
StringBuilder currentToken = new StringBuilder();
|
||||
State state = State.None;
|
||||
char[] chars = toProcess.toCharArray();
|
||||
for (int i = 0; i < chars.length; i++) {
|
||||
switch (chars[i]) {
|
||||
case '\'' -> state = switch (state) {
|
||||
case None -> State.QuoteSingle;
|
||||
case QuoteSingle -> State.None;
|
||||
case QuoteDouble -> {
|
||||
currentToken.append('"');
|
||||
yield State.QuoteDouble;
|
||||
}
|
||||
};
|
||||
case '"' -> state = switch (state) {
|
||||
case None -> State.QuoteDouble;
|
||||
case QuoteSingle -> {
|
||||
currentToken.append('"');
|
||||
yield State.QuoteSingle;
|
||||
}
|
||||
case QuoteDouble -> State.None;
|
||||
};
|
||||
case '\\' -> {
|
||||
if (i++ < chars.length) {
|
||||
currentToken.append(switch (chars[i]) {
|
||||
case 'b' -> '\b';
|
||||
case 'f' -> '\f';
|
||||
case 'n' -> '\n';
|
||||
case 'r' -> '\r';
|
||||
case 't' -> '\t';
|
||||
default -> chars[i];
|
||||
});
|
||||
} else currentToken.append('\\');
|
||||
}
|
||||
case ' ' -> {
|
||||
if (state == State.None) {
|
||||
tokens.add(currentToken.toString());
|
||||
currentToken = new StringBuilder();
|
||||
}
|
||||
else {
|
||||
currentToken.append(' ');
|
||||
}
|
||||
}
|
||||
default -> currentToken.append(chars[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// If we're still escaped, put in the backslash
|
||||
if (escaped) {
|
||||
currArg.append('\\');
|
||||
argList.add(currArg.toString());
|
||||
}
|
||||
// Close the last argument if we haven't yet
|
||||
else if (state != NO_TOKEN_STATE) {
|
||||
argList.add(currArg.toString());
|
||||
}
|
||||
// Format each argument if we've been told to stringify them
|
||||
if (stringify) {
|
||||
for (int i = 0; i < argList.size(); i++) {
|
||||
argList.set(i, "\"" + _escapeQuotesAndBackslashes(argList.get(i)) + "\"");
|
||||
}
|
||||
}
|
||||
return argList;
|
||||
if (!currentToken.isEmpty()) tokens.add(currentToken.toString());
|
||||
return tokens.toArray(String[]::new);
|
||||
}
|
||||
|
||||
/** Inserts backslashes before any occurrences of a backslash or
|
||||
* quote in the given string. Also converts any special characters
|
||||
* appropriately.
|
||||
*/
|
||||
protected static String _escapeQuotesAndBackslashes(String s) {
|
||||
final StringBuilder buf = new StringBuilder(s);
|
||||
|
||||
// Walk backwards, looking for quotes or backslashes.
|
||||
// If we see any, insert an extra backslash into the buffer at
|
||||
// the same index. (By walking backwards, the index into the buffer
|
||||
// will remain correct as we change the buffer.)
|
||||
for (int i = s.length()-1; i >= 0; i--) {
|
||||
char c = s.charAt(i);
|
||||
if ((c == '\\') || (c == '"')) {
|
||||
buf.insert(i, '\\');
|
||||
}
|
||||
// Replace any special characters with escaped versions
|
||||
else if (c == '\n') {
|
||||
buf.deleteCharAt(i);
|
||||
buf.insert(i, "\\n");
|
||||
}
|
||||
else if (c == '\t') {
|
||||
buf.deleteCharAt(i);
|
||||
buf.insert(i, "\\t");
|
||||
}
|
||||
else if (c == '\r') {
|
||||
buf.deleteCharAt(i);
|
||||
buf.insert(i, "\\r");
|
||||
}
|
||||
else if (c == '\b') {
|
||||
buf.deleteCharAt(i);
|
||||
buf.insert(i, "\\b");
|
||||
}
|
||||
else if (c == '\f') {
|
||||
buf.deleteCharAt(i);
|
||||
buf.insert(i, "\\f");
|
||||
}
|
||||
}
|
||||
return buf.toString();
|
||||
enum State {
|
||||
None, QuoteSingle, QuoteDouble
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue