commons: tree-based String2ObjectMap with utils for string substitution
ci/woodpecker/push/woodpecker Pipeline failed
Details
ci/woodpecker/push/woodpecker Pipeline failed
Details
This commit is contained in:
parent
66fe880575
commit
b02eb3142f
|
@ -0,0 +1,759 @@
|
|||
package io.gitlab.jfronny.commons.data;
|
||||
|
||||
import io.gitlab.jfronny.commons.data.impl.node.Node;
|
||||
import io.gitlab.jfronny.commons.data.impl.util.*;
|
||||
import org.jetbrains.annotations.NotNull;
|
||||
import org.jetbrains.annotations.Nullable;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.locks.Lock;
|
||||
import java.util.concurrent.locks.ReentrantLock;
|
||||
import java.util.function.Supplier;
|
||||
import java.util.function.UnaryOperator;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.StreamSupport;
|
||||
|
||||
import static io.gitlab.jfronny.commons.data.String2ObjectMap.SearchResult.Classification;
|
||||
|
||||
public class String2ObjectMap<V> extends AbstractMap<String, V> implements Serializable, Iterable<Map.Entry<String, V>> {
|
||||
protected volatile Node<V> root;
|
||||
|
||||
private final Lock lock = new ReentrantLock();
|
||||
|
||||
public String2ObjectMap() {
|
||||
this.root = Node.of("", Collections.emptyList(), true);
|
||||
}
|
||||
|
||||
protected void acquireWriteLock() {
|
||||
lock.lock();
|
||||
}
|
||||
|
||||
protected void releaseWriteLock() {
|
||||
lock.unlock();
|
||||
}
|
||||
|
||||
@Override
|
||||
public V put(String key, V value) {
|
||||
return putInternal(key, value, true); // putInternal acquires write lock
|
||||
}
|
||||
|
||||
@Nullable
|
||||
@Override
|
||||
public V putIfAbsent(String key, V value) {
|
||||
return putInternal(key, value, false); // putInternal acquires write lock
|
||||
}
|
||||
|
||||
@Override
|
||||
public V get(Object key) {
|
||||
if (!(key instanceof CharSequence seq)) return null;
|
||||
SearchResult<V> searchResult = searchTree(seq);
|
||||
if (searchResult.classification.equals(Classification.EXACT_MATCH)) {
|
||||
return searchResult.nodeFound.hasValue()
|
||||
? searchResult.nodeFound.getValue()
|
||||
: null;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public Iterable<CharSequence> getKeysStartingWith(CharSequence prefix) {
|
||||
return getForKeysStartingWith(prefix, this::getDescendantKeys, Collections::emptySet);
|
||||
}
|
||||
|
||||
public Iterable<V> getValuesForKeysStartingWith(CharSequence prefix) {
|
||||
return getForKeysStartingWith(prefix, this::getDescendantValues, Collections::emptySet);
|
||||
}
|
||||
|
||||
public Iterable<? extends Entry<String, V>> getKeyValuePairsForKeysStartingWith(CharSequence prefix) {
|
||||
return getForKeysStartingWith(prefix, this::getDescendantKeyValuePairs, Collections::emptySet);
|
||||
}
|
||||
|
||||
interface GetForKeysStartingWith<V, T> {
|
||||
T transform(CharSequence prefix, Node<V> startNode);
|
||||
}
|
||||
|
||||
private <T> T getForKeysStartingWith(CharSequence prefix, GetForKeysStartingWith<V, T> transform, Supplier<T> def) {
|
||||
SearchResult<V> searchResult = searchTree(prefix);
|
||||
Classification classification = searchResult.classification;
|
||||
switch (classification) {
|
||||
case EXACT_MATCH -> {
|
||||
return transform.transform(prefix, searchResult.nodeFound);
|
||||
}
|
||||
case KEY_ENDS_MID_EDGE -> {
|
||||
// Append the remaining characters of the edge to the key.
|
||||
// For example if we searched for CO, but first matching node was COFFEE,
|
||||
// the key associated with the first node should be COFFEE...
|
||||
CharSequence edgeSuffix = CharSequences.getSuffix(searchResult.nodeFound.getIncomingEdge(), searchResult.charsMatchedInNodeFound);
|
||||
prefix = CharSequences.concatenate(prefix, edgeSuffix);
|
||||
return transform.transform(prefix, searchResult.nodeFound);
|
||||
}
|
||||
default -> {
|
||||
// Incomplete match means key is not a prefix of any node...
|
||||
return def.get();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public V remove(Object key) {
|
||||
if (!(key instanceof CharSequence csq)) throw new ClassCastException();
|
||||
acquireWriteLock();
|
||||
try {
|
||||
SearchResult<V> searchResult = searchTree(csq);
|
||||
SearchResult.Classification classification = searchResult.classification;
|
||||
if (Objects.requireNonNull(classification) != Classification.EXACT_MATCH) {
|
||||
return null;
|
||||
}
|
||||
if (!searchResult.nodeFound.hasValue()) {
|
||||
// This node was created automatically as a split between two branches (implicit node).
|
||||
// No need to remove it...
|
||||
return null;
|
||||
}
|
||||
|
||||
// Proceed with deleting the node...
|
||||
List<Node<V>> childEdges = searchResult.nodeFound.getOutgoingEdges();
|
||||
if (childEdges.size() > 1) {
|
||||
// This node has more than one child, so if we delete the value from this node, we still need
|
||||
// to leave a similar node in place to act as the split between the child edges.
|
||||
// Just delete the value associated with this node.
|
||||
// -> Clone this node without its value, preserving its child nodes...
|
||||
Node<V> cloned = searchResult.nodeFound.copyWithoutValue(false);
|
||||
// Re-add the replacement node to the parent...
|
||||
searchResult.parentNode.updateOutgoingEdge(cloned);
|
||||
} else if (childEdges.size() == 1) {
|
||||
// Node has one child edge.
|
||||
// Create a new node which is the concatenation of the edges from this node and its child,
|
||||
// and which has the outgoing edges of the child and the value from the child.
|
||||
Node<V> child = childEdges.get(0);
|
||||
CharSequence concatenatedEdges = CharSequences.concatenate(searchResult.nodeFound.getIncomingEdge(), child.getIncomingEdge());
|
||||
Node<V> mergedNode = child.copyWithEdgeCharacters(concatenatedEdges, false);
|
||||
// Re-add the merged node to the parent...
|
||||
searchResult.parentNode.updateOutgoingEdge(mergedNode);
|
||||
} else {
|
||||
// Node has no children. Delete this node from its parent,
|
||||
// which involves re-creating the parent rather than simply updating its child edge
|
||||
// (this is why we need parentNodesParent).
|
||||
// However if this would leave the parent with only one remaining child edge,
|
||||
// and the parent itself has no value (is a split node), and the parent is not the root node
|
||||
// (a special case which we never merge), then we also need to merge the parent with its
|
||||
// remaining child.
|
||||
|
||||
List<Node<V>> currentEdgesFromParent = searchResult.parentNode.getOutgoingEdges();
|
||||
// Create a list of the outgoing edges of the parent which will remain
|
||||
// if we remove this child...
|
||||
// Use a non-resizable list, as a sanity check to force ArrayIndexOutOfBounds...
|
||||
List<Node<V>> newEdgesOfParent = Arrays.asList(new Node[searchResult.parentNode.getOutgoingEdges().size() - 1]);
|
||||
for (int i = 0, added = 0, numParentEdges = currentEdgesFromParent.size(); i < numParentEdges; i++) {
|
||||
Node<V> node = currentEdgesFromParent.get(i);
|
||||
if (node != searchResult.nodeFound) {
|
||||
newEdgesOfParent.set(added++, node);
|
||||
}
|
||||
}
|
||||
|
||||
// Note the parent might actually be the root node (which we should never merge)...
|
||||
boolean parentIsRoot = (searchResult.parentNode == root);
|
||||
Node<V> newParent;
|
||||
if (newEdgesOfParent.size() == 1 && !searchResult.parentNode.hasValue() && !parentIsRoot) {
|
||||
// Parent is a non-root split node with only one remaining child, which can now be merged.
|
||||
Node<V> parentsRemainingChild = newEdgesOfParent.get(0);
|
||||
// Merge the parent with its only remaining child...
|
||||
CharSequence concatenatedEdges = CharSequences.concatenate(searchResult.parentNode.getIncomingEdge(), parentsRemainingChild.getIncomingEdge());
|
||||
newParent = parentsRemainingChild.copyWithEdgeCharacters(concatenatedEdges, false);
|
||||
} else {
|
||||
// Parent is a node which either has a value of its own, has more than one remaining
|
||||
// child, or is actually the root node (we never merge the root node).
|
||||
// Create new parent node which is the same as is currently just without the edge to the
|
||||
// node being deleted...
|
||||
newParent = searchResult.parentNode.copyWithChildren(newEdgesOfParent, parentIsRoot);
|
||||
}
|
||||
// Re-add the parent node to its parent...
|
||||
if (parentIsRoot) {
|
||||
// Replace the root node...
|
||||
this.root = newParent;
|
||||
} else {
|
||||
// Re-add the parent node to its parent...
|
||||
searchResult.parentNodesParent.updateOutgoingEdge(newParent);
|
||||
}
|
||||
}
|
||||
return searchResult.nodeFound.getValue();
|
||||
}
|
||||
finally {
|
||||
releaseWriteLock();
|
||||
}
|
||||
}
|
||||
|
||||
public Iterable<CharSequence> getClosestKeys(CharSequence candidate) {
|
||||
return getForClosestKeys(candidate, this::getDescendantKeys);
|
||||
}
|
||||
|
||||
public Iterable<V> getValuesForClosestKeys(CharSequence candidate) {
|
||||
return getForClosestKeys(candidate, this::getDescendantValues);
|
||||
}
|
||||
|
||||
public Iterable<? extends Entry<String, V>> getKeyValuePairsForClosestKeys(CharSequence candidate) {
|
||||
return getForClosestKeys(candidate, this::getDescendantKeyValuePairs);
|
||||
}
|
||||
|
||||
interface GetForClosestKeys<V, T> {
|
||||
T transform(CharSequence candidate, Node<V> startNode);
|
||||
}
|
||||
|
||||
private <T> Iterable<T> getForClosestKeys(CharSequence candidate, GetForClosestKeys<V, Iterable<T>> transform) {
|
||||
SearchResult<V> searchResult = searchTree(candidate);
|
||||
Classification classification = searchResult.classification;
|
||||
return switch (classification) {
|
||||
case EXACT_MATCH -> transform.transform(candidate, searchResult.nodeFound);
|
||||
case KEY_ENDS_MID_EDGE -> {
|
||||
// Append the remaining characters of the edge to the key.
|
||||
// For example if we searched for CO, but first matching node was COFFEE,
|
||||
// the key associated with the first node should be COFFEE...
|
||||
CharSequence edgeSuffix = CharSequences.getSuffix(searchResult.nodeFound.getIncomingEdge(), searchResult.charsMatchedInNodeFound);
|
||||
candidate = CharSequences.concatenate(candidate, edgeSuffix);
|
||||
yield transform.transform(candidate, searchResult.nodeFound);
|
||||
}
|
||||
case INCOMPLETE_MATCH_TO_MIDDLE_OF_EDGE -> {
|
||||
// Example: if we searched for CX, but deepest matching node was CO,
|
||||
// the results should include node CO and its descendants...
|
||||
CharSequence keyOfParentNode = CharSequences.getPrefix(candidate, searchResult.charsMatched - searchResult.charsMatchedInNodeFound);
|
||||
CharSequence keyOfNodeFound = CharSequences.concatenate(keyOfParentNode, searchResult.nodeFound.getIncomingEdge());
|
||||
yield transform.transform(keyOfNodeFound, searchResult.nodeFound);
|
||||
}
|
||||
case INCOMPLETE_MATCH_TO_END_OF_EDGE -> {
|
||||
if (searchResult.charsMatched == 0) {
|
||||
// Closest match is the root node, we don't consider this a match for anything...
|
||||
yield Collections.emptySet();
|
||||
}
|
||||
// Example: if we searched for COFFEE, but deepest matching node was CO,
|
||||
// the results should include node CO and its descendants...
|
||||
CharSequence keyOfNodeFound = CharSequences.getPrefix(candidate, searchResult.charsMatched);
|
||||
yield transform.transform(keyOfNodeFound, searchResult.nodeFound);
|
||||
}
|
||||
default -> Collections.emptySet();
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public int size() {
|
||||
Deque<Node<V>> stack = new LinkedList<>();
|
||||
stack.push(this.root);
|
||||
int count = 0;
|
||||
while (true) {
|
||||
if (stack.isEmpty()) {
|
||||
return count;
|
||||
}
|
||||
Node<V> current = stack.pop();
|
||||
stack.addAll(current.getOutgoingEdges());
|
||||
if (current.hasValue()) {
|
||||
count++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Atomically adds the given value to the tree, creating a node for the value as necessary. If the value is already
|
||||
* stored for the same key, either overwrites the existing value, or simply returns the existing value, depending
|
||||
* on the given value of the <code>overwrite</code> flag.
|
||||
*
|
||||
* @param key The key against which the value should be stored
|
||||
* @param value The value to store against the key
|
||||
* @param overwrite If true, should replace any existing value, if false should not replace any existing value
|
||||
* @return The existing value for this key, if there was one, otherwise null
|
||||
*/
|
||||
V putInternal(CharSequence key, V value, boolean overwrite) {
|
||||
if (key == null) {
|
||||
throw new IllegalArgumentException("The key argument was null");
|
||||
}
|
||||
if (key.length() == 0) {
|
||||
throw new IllegalArgumentException("The key argument was zero-length");
|
||||
}
|
||||
if (value == null) {
|
||||
throw new IllegalArgumentException("The value argument was null");
|
||||
}
|
||||
acquireWriteLock();
|
||||
try {
|
||||
// Note we search the tree here after we have acquired the write lock...
|
||||
SearchResult<V> searchResult = searchTree(key);
|
||||
SearchResult.Classification classification = searchResult.classification;
|
||||
|
||||
switch (classification) {
|
||||
case EXACT_MATCH -> {
|
||||
// Search found an exact match for all edges leading to this node.
|
||||
// -> Add or update the value in the node found, by replacing
|
||||
// the existing node with a new node containing the value...
|
||||
|
||||
// First check if existing node has a value, and if we are allowed to overwrite it.
|
||||
// Return early without overwriting if necessary...
|
||||
V existingValue = searchResult.nodeFound.hasValue() ? searchResult.nodeFound.getValue() : null;
|
||||
if (!overwrite && searchResult.nodeFound.hasValue()) {
|
||||
return existingValue;
|
||||
}
|
||||
// Create a replacement for the existing node containing the new value...
|
||||
Node<V> replacementNode = Node.of(searchResult.nodeFound.getIncomingEdge(), value, searchResult.nodeFound.getOutgoingEdges(), false);
|
||||
searchResult.parentNode.updateOutgoingEdge(replacementNode);
|
||||
// Return the existing value...
|
||||
return existingValue;
|
||||
}
|
||||
case KEY_ENDS_MID_EDGE -> {
|
||||
// Search ran out of characters from the key while in the middle of an edge in the node.
|
||||
// -> Split the node in two: Create a new parent node storing the new value,
|
||||
// and a new child node holding the original value and edges from the existing node...
|
||||
CharSequence keyCharsFromStartOfNodeFound = key.subSequence(searchResult.charsMatched - searchResult.charsMatchedInNodeFound, key.length());
|
||||
CharSequence commonPrefix = CharSequences.getCommonPrefix(keyCharsFromStartOfNodeFound, searchResult.nodeFound.getIncomingEdge());
|
||||
CharSequence suffixFromExistingEdge = CharSequences.subtractPrefix(searchResult.nodeFound.getIncomingEdge(), commonPrefix);
|
||||
|
||||
// Create new nodes...
|
||||
Node<V> newChild = searchResult.nodeFound.copyWithEdgeCharacters(suffixFromExistingEdge, false);
|
||||
Node<V> newParent = Node.of(commonPrefix, value, Arrays.asList(newChild), false);
|
||||
|
||||
// Add the new parent to the parent of the node being replaced (replacing the existing node)...
|
||||
searchResult.parentNode.updateOutgoingEdge(newParent);
|
||||
|
||||
// Return null for the existing value...
|
||||
return null;
|
||||
}
|
||||
case INCOMPLETE_MATCH_TO_END_OF_EDGE -> {
|
||||
// Search found a difference in characters between the key and the start of all child edges leaving the
|
||||
// node, the key still has trailing unmatched characters.
|
||||
// -> Add a new child to the node, containing the trailing characters from the key.
|
||||
|
||||
// NOTE: this is the only branch which allows an edge to be added to the root.
|
||||
// (Root node's own edge is "" empty string, so is considered a prefixing edge of every key)
|
||||
|
||||
// Create a new child node containing the trailing characters...
|
||||
CharSequence keySuffix = key.subSequence(searchResult.charsMatched, key.length());
|
||||
Node<V> newChild = Node.of(keySuffix, value, Collections.emptyList(), false);
|
||||
|
||||
// Clone the current node adding the new child...
|
||||
List<Node<V>> edges = new ArrayList<>(searchResult.nodeFound.getOutgoingEdges().size() + 1);
|
||||
edges.addAll(searchResult.nodeFound.getOutgoingEdges());
|
||||
edges.add(newChild);
|
||||
Node<V> clonedNode = searchResult.nodeFound.copyWithChildren(edges, searchResult.nodeFound == root);
|
||||
|
||||
// Re-add the cloned node to its parent node...
|
||||
if (searchResult.nodeFound == root) {
|
||||
this.root = clonedNode;
|
||||
} else {
|
||||
searchResult.parentNode.updateOutgoingEdge(clonedNode);
|
||||
}
|
||||
|
||||
// Return null for the existing value...
|
||||
return null;
|
||||
}
|
||||
case INCOMPLETE_MATCH_TO_MIDDLE_OF_EDGE -> {
|
||||
// Search found a difference in characters between the key and the characters in the middle of the
|
||||
// edge in the current node, and the key still has trailing unmatched characters.
|
||||
// -> Split the node in three:
|
||||
// Let's call node found: NF
|
||||
// (1) Create a new node N1 containing the unmatched characters from the rest of the key, and the
|
||||
// value supplied to this method
|
||||
// (2) Create a new node N2 containing the unmatched characters from the rest of the edge in NF, and
|
||||
// copy the original edges and the value from NF unmodified into N2
|
||||
// (3) Create a new node N3, which will be the split node, containing the matched characters from
|
||||
// the key and the edge, and add N1 and N2 as child nodes of N3
|
||||
// (4) Re-add N3 to the parent node of NF, effectively replacing NF in the tree
|
||||
|
||||
CharSequence keyCharsFromStartOfNodeFound = key.subSequence(searchResult.charsMatched - searchResult.charsMatchedInNodeFound, key.length());
|
||||
CharSequence commonPrefix = CharSequences.getCommonPrefix(keyCharsFromStartOfNodeFound, searchResult.nodeFound.getIncomingEdge());
|
||||
CharSequence suffixFromExistingEdge = CharSequences.subtractPrefix(searchResult.nodeFound.getIncomingEdge(), commonPrefix);
|
||||
CharSequence suffixFromKey = key.subSequence(searchResult.charsMatched, key.length());
|
||||
|
||||
// Create new nodes...
|
||||
Node<V> n1 = Node.of(suffixFromKey, value, Collections.emptyList(), false);
|
||||
Node<V> n2 = searchResult.nodeFound.copyWithEdgeCharacters(suffixFromExistingEdge, false);
|
||||
Node<V> n3 = Node.of(commonPrefix, Arrays.asList(n1, n2), false);
|
||||
|
||||
searchResult.parentNode.updateOutgoingEdge(n3);
|
||||
|
||||
// Return null for the existing value...
|
||||
return null;
|
||||
}
|
||||
default -> {
|
||||
// This is a safeguard against a new enum constant being added in future.
|
||||
throw new IllegalStateException("Unexpected classification for search result: " + searchResult);
|
||||
}
|
||||
}
|
||||
}
|
||||
finally {
|
||||
releaseWriteLock();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a lazy iterable which will return {@link CharSequence} keys for which the given key is a prefix.
|
||||
* The results inherently will not contain duplicates (duplicate keys cannot exist in the tree).
|
||||
* <p/>
|
||||
* Note that this method internally converts {@link CharSequence}s to {@link String}s, to avoid set equality issues,
|
||||
* because equals() and hashCode() are not specified by the CharSequence API contract.
|
||||
*/
|
||||
@SuppressWarnings({"JavaDoc"})
|
||||
Iterable<CharSequence> getDescendantKeys(final CharSequence startKey, final Node<V> startNode) {
|
||||
return getDescendantThing(startKey, startNode, (keyString, value) -> keyString);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a lazy iterable which will return values which are associated with keys in the tree for which
|
||||
* the given key is a prefix.
|
||||
*/
|
||||
@SuppressWarnings({"JavaDoc"})
|
||||
Iterable<V> getDescendantValues(final CharSequence startKey, final Node<V> startNode) {
|
||||
return getDescendantThing(startKey, startNode, (keyString, value) -> value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a lazy iterable which will return {@link KeyValuePair} objects each containing a key and a value,
|
||||
* for which the given key is a prefix of the key in the {@link KeyValuePair}. These results inherently will not
|
||||
* contain duplicates (duplicate keys cannot exist in the tree).
|
||||
* <p/>
|
||||
* Note that this method internally converts {@link CharSequence}s to {@link String}s, to avoid set equality issues,
|
||||
* because equals() and hashCode() are not specified by the CharSequence API contract.
|
||||
*/
|
||||
@SuppressWarnings({"JavaDoc"})
|
||||
Iterable<KeyValuePair<String, V>> getDescendantKeyValuePairs(final CharSequence startKey, final Node<V> startNode) {
|
||||
return getDescendantThing(startKey, startNode, (keyString, value) -> new KeyValuePair<>(keyString, value, this));
|
||||
}
|
||||
|
||||
interface GetDescendantThing<V, T> {
|
||||
T transform(String keyString, V value);
|
||||
}
|
||||
|
||||
private <T> Iterable<T> getDescendantThing(final CharSequence startKey, final Node<V> startNode, final GetDescendantThing<V, T> transform) {
|
||||
return LazyIterator.iterable(() -> {
|
||||
Iterator<NodeKeyPair<V>> descendantNodes = lazyTraverseDescendants(startKey, startNode).iterator();
|
||||
return scope -> {
|
||||
// Traverse to the next matching node in the tree and return its key and value...
|
||||
while (descendantNodes.hasNext()) {
|
||||
NodeKeyPair<V> nodeKeyPair = descendantNodes.next();
|
||||
if (nodeKeyPair.node.hasValue()) {
|
||||
// Dealing with a node explicitly added to tree (rather than an automatically-added split node).
|
||||
|
||||
// -> Convert the CharSequence to a String before returning, to avoid set equality issues,
|
||||
// because equals() and hashCode() is not specified by the CharSequence API contract...
|
||||
String keyString = String.valueOf(nodeKeyPair.key);
|
||||
return transform.transform(keyString, nodeKeyPair.node.getValue());
|
||||
}
|
||||
}
|
||||
// Finished traversing the tree, no more matching nodes to return...
|
||||
return scope.endOfData();
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Traverses the tree using depth-first, preordered traversal, starting at the given node, using lazy evaluation
|
||||
* such that the next node is only determined when next() is called on the iterator returned.
|
||||
* The traversal algorithm uses iteration instead of recursion to allow deep trees to be traversed without
|
||||
* requiring large JVM stack sizes.
|
||||
* <p/>
|
||||
* Each node that is encountered is returned from the iterator along with a key associated with that node,
|
||||
* in a NodeKeyPair object. The key will be prefixed by the given start key, and will be generated by appending
|
||||
* to the start key the edges traversed along the path to that node from the start node.
|
||||
*
|
||||
* @param startKey The key which matches the given start node
|
||||
* @param startNode The start node
|
||||
* @return An iterator which when iterated traverses the tree using depth-first, preordered traversal,
|
||||
* starting at the given start node
|
||||
*/
|
||||
protected Iterable<NodeKeyPair<V>> lazyTraverseDescendants(final CharSequence startKey, final Node<V> startNode) {
|
||||
return LazyIterator.iterable(() -> {
|
||||
Deque<NodeKeyPair<V>> stack = new LinkedList<>();
|
||||
stack.push(new NodeKeyPair<>(startNode, startKey));
|
||||
return scope -> {
|
||||
if (stack.isEmpty()) {
|
||||
return scope.endOfData();
|
||||
}
|
||||
NodeKeyPair<V> current = stack.pop();
|
||||
List<Node<V>> childNodes = current.node.getOutgoingEdges();
|
||||
|
||||
// -> Iterate child nodes in reverse order and so push them onto the stack in reverse order,
|
||||
// to counteract that pushing them onto the stack alone would otherwise reverse their processing order.
|
||||
// This ensures that we actually process nodes in ascending alphabetical order.
|
||||
for (int i = childNodes.size(); i > 0; i--) {
|
||||
Node<V> child = childNodes.get(i - 1);
|
||||
stack.push(new NodeKeyPair<>(child, CharSequences.concatenate(current.key, child.getIncomingEdge())));
|
||||
}
|
||||
return current;
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Encapsulates a node and its associated key. Used internally by {@link #lazyTraverseDescendants}.
|
||||
*/
|
||||
protected static class NodeKeyPair<V> {
|
||||
public final Node<V> node;
|
||||
public final CharSequence key;
|
||||
|
||||
public NodeKeyPair(Node<V> node, CharSequence key) {
|
||||
this.node = node;
|
||||
this.key = key;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Traverses the tree and finds the node which matches the longest prefix of the given key.
|
||||
* <p/>
|
||||
* The node returned might be an <u>exact match</u> for the key, in which case {@link SearchResult#charsMatched}
|
||||
* will equal the length of the key.
|
||||
* <p/>
|
||||
* The node returned might be an <u>inexact match</u> for the key, in which case {@link SearchResult#charsMatched}
|
||||
* will be less than the length of the key.
|
||||
* <p/>
|
||||
* There are two types of inexact match:
|
||||
* <ul>
|
||||
* <li>
|
||||
* An inexact match which ends evenly at the boundary between a node and its children (the rest of the key
|
||||
* not matching any children at all). In this case if we we wanted to add nodes to the tree to represent the
|
||||
* rest of the key, we could simply add child nodes to the node found.
|
||||
* </li>
|
||||
* <li>
|
||||
* An inexact match which ends in the middle of a the characters for an edge stored in a node (the key
|
||||
* matching only the first few characters of the edge). In this case if we we wanted to add nodes to the
|
||||
* tree to represent the rest of the key, we would have to split the node (let's call this node found: NF):
|
||||
* <ol>
|
||||
* <li>
|
||||
* Create a new node (N1) which will be the split node, containing the matched characters from the
|
||||
* start of the edge in NF
|
||||
* </li>
|
||||
* <li>
|
||||
* Create a new node (N2) which will contain the unmatched characters from the rest of the edge
|
||||
* in NF, and copy the original edges from NF unmodified into N2
|
||||
* </li>
|
||||
* <li>
|
||||
* Create a new node (N3) which will be the new branch, containing the unmatched characters from
|
||||
* the rest of the key
|
||||
* </li>
|
||||
* <li>
|
||||
* Add N2 as a child of N1
|
||||
* </li>
|
||||
* <li>
|
||||
* Add N3 as a child of N1
|
||||
* </li>
|
||||
* <li>
|
||||
* In the <b>parent node of NF</b>, replace the edge pointing to NF with an edge pointing instead
|
||||
* to N1. If we do this step atomically, reading threads are guaranteed to never see "invalid"
|
||||
* data, only either the old data or the new data
|
||||
* </li>
|
||||
* </ol>
|
||||
* </li>
|
||||
* </ul>
|
||||
* The {@link SearchResult#classification} is an enum value based on its classification of the
|
||||
* match according to the descriptions above.
|
||||
*
|
||||
* @param key a key for which the node matching the longest prefix of the key is required
|
||||
* @return A {@link SearchResult} object which contains the node matching the longest prefix of the key, its
|
||||
* parent node, the number of characters of the key which were matched in total and within the edge of the
|
||||
* matched node, and a {@link SearchResult#classification} of the match as described above
|
||||
*/
|
||||
public SearchResult<V> searchTree(CharSequence key) {
|
||||
Node<V> parentNodesParent = null;
|
||||
Node<V> parentNode = null;
|
||||
Node<V> currentNode = root;
|
||||
int charsMatched = 0, charsMatchedInNodeFound = 0;
|
||||
|
||||
final int keyLength = key.length();
|
||||
outer_loop: while (charsMatched < keyLength) {
|
||||
Node<V> nextNode = currentNode.getOutgoingEdge(key.charAt(charsMatched));
|
||||
if (nextNode == null) {
|
||||
// Next node is a dead end...
|
||||
//noinspection UnnecessaryLabelOnBreakStatement
|
||||
break outer_loop;
|
||||
}
|
||||
|
||||
parentNodesParent = parentNode;
|
||||
parentNode = currentNode;
|
||||
currentNode = nextNode;
|
||||
charsMatchedInNodeFound = 0;
|
||||
CharSequence currentNodeEdgeCharacters = currentNode.getIncomingEdge();
|
||||
for (int i = 0, numEdgeChars = currentNodeEdgeCharacters.length(); i < numEdgeChars && charsMatched < keyLength; i++) {
|
||||
if (currentNodeEdgeCharacters.charAt(i) != key.charAt(charsMatched)) {
|
||||
// Found a difference in chars between character in key and a character in current node.
|
||||
// Current node is the deepest match (inexact match)....
|
||||
break outer_loop;
|
||||
}
|
||||
charsMatched++;
|
||||
charsMatchedInNodeFound++;
|
||||
}
|
||||
}
|
||||
return new SearchResult<>(key, currentNode, charsMatched, charsMatchedInNodeFound, parentNode, parentNodesParent);
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds the entry whose key matches the largest substring from the start of the provided key
|
||||
*/
|
||||
public @Nullable Entry<String, V> searchTreeForLongestSubstring(CharSequence key) {
|
||||
String sKey = key.toString();
|
||||
|
||||
Node<V> currentNode = root;
|
||||
KeyValuePair<String, V> lastWithValue = null;
|
||||
int charsMatched = 0;
|
||||
|
||||
final int keyLength = sKey.length();
|
||||
outer_loop: while (charsMatched < keyLength) {
|
||||
Node<V> nextNode = currentNode.getOutgoingEdge(sKey.charAt(charsMatched));
|
||||
if (nextNode == null) {
|
||||
// Next node is a dead end...
|
||||
//noinspection UnnecessaryLabelOnBreakStatement
|
||||
break outer_loop;
|
||||
}
|
||||
|
||||
currentNode = nextNode;
|
||||
int charsMatchedInNodeFound = 0;
|
||||
CharSequence currentNodeEdgeCharacters = currentNode.getIncomingEdge();
|
||||
for (int j = 0, numEdgeChars = currentNodeEdgeCharacters.length(); j < numEdgeChars && charsMatched < keyLength; j++) {
|
||||
if (currentNodeEdgeCharacters.charAt(j) != sKey.charAt(charsMatched)) {
|
||||
// Found a difference in chars between character in key and a character in current node.
|
||||
// Current node is the deepest match (inexact match)....
|
||||
break outer_loop;
|
||||
}
|
||||
charsMatched++;
|
||||
charsMatchedInNodeFound++;
|
||||
}
|
||||
if (charsMatchedInNodeFound == currentNodeEdgeCharacters.length() && currentNode.hasValue()) {
|
||||
lastWithValue = new KeyValuePair<>(sKey.substring(0, charsMatched), currentNode.getValue(), String2ObjectMap.this);
|
||||
}
|
||||
}
|
||||
|
||||
return lastWithValue;
|
||||
}
|
||||
|
||||
/**
|
||||
* Encapsulates results of searching the tree for a node for which a given key is a prefix. Encapsulates the node
|
||||
* found, its parent node, its parent's parent node, and the number of characters matched in the current node and
|
||||
* in total.
|
||||
* <p/>
|
||||
* Also classifies the search result so that algorithms in methods which use this SearchResult, when adding nodes
|
||||
* and removing nodes from the tree, can select appropriate strategies based on the classification.
|
||||
*/
|
||||
public static class SearchResult<V> {
|
||||
final CharSequence key;
|
||||
final Node<V> nodeFound;
|
||||
final int charsMatched;
|
||||
final int charsMatchedInNodeFound;
|
||||
final Node<V> parentNode;
|
||||
final Node<V> parentNodesParent;
|
||||
final Classification classification;
|
||||
|
||||
public enum Classification {
|
||||
EXACT_MATCH,
|
||||
INCOMPLETE_MATCH_TO_END_OF_EDGE,
|
||||
INCOMPLETE_MATCH_TO_MIDDLE_OF_EDGE,
|
||||
KEY_ENDS_MID_EDGE
|
||||
}
|
||||
|
||||
SearchResult(CharSequence key, Node<V> nodeFound, int charsMatched, int charsMatchedInNodeFound, Node<V> parentNode, Node<V> parentNodesParent) {
|
||||
this.key = key;
|
||||
this.nodeFound = nodeFound;
|
||||
this.charsMatched = charsMatched;
|
||||
this.charsMatchedInNodeFound = charsMatchedInNodeFound;
|
||||
this.parentNode = parentNode;
|
||||
this.parentNodesParent = parentNodesParent;
|
||||
|
||||
// Classify this search result...
|
||||
this.classification = classify(key, nodeFound, charsMatched, charsMatchedInNodeFound);
|
||||
}
|
||||
|
||||
protected Classification classify(CharSequence key, Node<V> nodeFound, int charsMatched, int charsMatchedInNodeFound) {
|
||||
if (charsMatched == key.length()) {
|
||||
if (charsMatchedInNodeFound == nodeFound.getIncomingEdge().length()) {
|
||||
return Classification.EXACT_MATCH;
|
||||
}
|
||||
else if (charsMatchedInNodeFound < nodeFound.getIncomingEdge().length()) {
|
||||
return Classification.KEY_ENDS_MID_EDGE;
|
||||
}
|
||||
}
|
||||
else if (charsMatched < key.length()) {
|
||||
if (charsMatchedInNodeFound == nodeFound.getIncomingEdge().length()) {
|
||||
return Classification.INCOMPLETE_MATCH_TO_END_OF_EDGE;
|
||||
}
|
||||
else if (charsMatchedInNodeFound < nodeFound.getIncomingEdge().length()) {
|
||||
return Classification.INCOMPLETE_MATCH_TO_MIDDLE_OF_EDGE;
|
||||
}
|
||||
}
|
||||
throw new IllegalStateException("Unexpected failure to classify SearchResult: " + this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "SearchResult{" +
|
||||
"key=" + key +
|
||||
", nodeFound=" + nodeFound +
|
||||
", charsMatched=" + charsMatched +
|
||||
", charsMatchedInNodeFound=" + charsMatchedInNodeFound +
|
||||
", parentNode=" + parentNode +
|
||||
", parentNodesParent=" + parentNodesParent +
|
||||
", classification=" + classification +
|
||||
'}';
|
||||
}
|
||||
}
|
||||
|
||||
@NotNull
|
||||
@Override
|
||||
public Set<Entry<String, V>> entrySet() {
|
||||
return new AbstractSet<>() {
|
||||
@Override
|
||||
public Iterator<Entry<String, V>> iterator() {
|
||||
return String2ObjectMap.this.iterator();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int size() {
|
||||
return String2ObjectMap.this.size();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
public Iterator<Entry<String, V>> iterator() {
|
||||
Deque<KeyValuePair<String, V>> result = StreamSupport
|
||||
.stream(getDescendantKeyValuePairs("", root).spliterator(), false)
|
||||
.collect(Collectors.toCollection(LinkedList::new));
|
||||
|
||||
return new Iterator<>() {
|
||||
Entry<String, V> previous = null;
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return !result.isEmpty();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Entry<String, V> next() {
|
||||
if (result.isEmpty()) throw new NoSuchElementException();
|
||||
return previous = result.pop();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void remove() {
|
||||
if (previous == null) throw new IllegalStateException();
|
||||
String2ObjectMap.this.remove(previous.getKey());
|
||||
previous = null;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
public String prettyPrint() {
|
||||
return root.prettyPrint();
|
||||
}
|
||||
|
||||
/**
|
||||
* A unary operator that replaces all occurrences of keys of this map in the string with their values (via String.valueOf)
|
||||
* The largest possible replacement will be used.
|
||||
* The results of a replacement are not subject to further modification.
|
||||
*/
|
||||
public UnaryOperator<String> asSubstitution() {
|
||||
return origin -> {
|
||||
StringBuilder result = new StringBuilder();
|
||||
for (int i = 0; i < origin.length();) {
|
||||
Entry<String, V> lastWithValue = searchTreeForLongestSubstring(origin.substring(i));
|
||||
|
||||
if (lastWithValue == null) {
|
||||
result.append(origin.charAt(i));
|
||||
i++;
|
||||
} else {
|
||||
result.append(lastWithValue.getValue());
|
||||
i += lastWithValue.getKey().length();
|
||||
}
|
||||
}
|
||||
return result.toString();
|
||||
};
|
||||
}
|
||||
}
|
|
@ -0,0 +1,24 @@
|
|||
package io.gitlab.jfronny.commons.data.impl.node;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
public class CharSequenceNodeLeafVoidValue<T> extends LeafNode<T> {
|
||||
public CharSequenceNodeLeafVoidValue(CharSequence edgeCharSequence) {
|
||||
super(edgeCharSequence);
|
||||
}
|
||||
|
||||
@Override
|
||||
public T getValue() {
|
||||
throw new NoSuchElementException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasValue() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getValueString() {
|
||||
return "-";
|
||||
}
|
||||
}
|
|
@ -0,0 +1,27 @@
|
|||
package io.gitlab.jfronny.commons.data.impl.node;
|
||||
|
||||
import java.util.Objects;
|
||||
|
||||
public class CharSequenceNodeLeafWithValue<T> extends LeafNode<T> {
|
||||
private final T value;
|
||||
|
||||
public CharSequenceNodeLeafWithValue(CharSequence edgeCharSequence, T value) {
|
||||
super(edgeCharSequence);
|
||||
this.value = Objects.requireNonNull(value);
|
||||
}
|
||||
|
||||
@Override
|
||||
public T getValue() {
|
||||
return value;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasValue() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getValueString() {
|
||||
return String.valueOf(value);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,25 @@
|
|||
package io.gitlab.jfronny.commons.data.impl.node;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.NoSuchElementException;
|
||||
|
||||
public class CharSequenceNodeNonLeafVoidValue<T> extends NonLeafNode<T> {
|
||||
public CharSequenceNodeNonLeafVoidValue(CharSequence edgeCharSequence, List<Node<T>> outgoingEdges) {
|
||||
super(edgeCharSequence, outgoingEdges);
|
||||
}
|
||||
|
||||
@Override
|
||||
public T getValue() {
|
||||
throw new NoSuchElementException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasValue() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getValueString() {
|
||||
return "-";
|
||||
}
|
||||
}
|
|
@ -0,0 +1,28 @@
|
|||
package io.gitlab.jfronny.commons.data.impl.node;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
||||
public class CharSequenceNodeNonLeafWithValue<T> extends NonLeafNode<T> {
|
||||
private final T value;
|
||||
|
||||
public CharSequenceNodeNonLeafWithValue(CharSequence edgeCharSequence, T value, List<Node<T>> outgoingEdges) {
|
||||
super(edgeCharSequence, outgoingEdges);
|
||||
this.value = Objects.requireNonNull(value);
|
||||
}
|
||||
|
||||
@Override
|
||||
public T getValue() {
|
||||
return value;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasValue() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getValueString() {
|
||||
return String.valueOf(value);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,24 @@
|
|||
package io.gitlab.jfronny.commons.data.impl.node;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
public abstract class LeafNode<T> extends Node<T> {
|
||||
public LeafNode(CharSequence edgeCharSequence) {
|
||||
super(edgeCharSequence);
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<Node<T>> getOutgoingEdges() {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
@Override
|
||||
public Node<T> getOutgoingEdge(Character edgeFirstCharacter) {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void updateOutgoingEdge(Node<T> childNode) {
|
||||
throw new IllegalStateException("Cannot update the reference to the following child node for the edge starting with '" + childNode.getIncomingEdgeFirstCharacter() +"', no such edge already exists: " + childNode);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,117 @@
|
|||
package io.gitlab.jfronny.commons.data.impl.node;
|
||||
|
||||
import io.gitlab.jfronny.commons.data.impl.util.NodeUtil;
|
||||
import org.jetbrains.annotations.NotNull;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Serializable;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
||||
public abstract class Node<T> implements Serializable, Comparable<Node<T>> {
|
||||
public static <T> Node<T> of(CharSequence edgeCharacters, T value, List<Node<T>> children, boolean isRoot) {
|
||||
NodeUtil.precheckCreation(edgeCharacters, children, isRoot);
|
||||
Objects.requireNonNull(value, "Use the other factory method for nodes without values!");
|
||||
if (children.isEmpty()) {
|
||||
return new CharSequenceNodeLeafWithValue<>(edgeCharacters, value);
|
||||
}
|
||||
else {
|
||||
return new CharSequenceNodeNonLeafWithValue<>(edgeCharacters, value, children);
|
||||
}
|
||||
}
|
||||
|
||||
public static <T> Node<T> of(CharSequence edgeCharacters, List<Node<T>> children, boolean isRoot) {
|
||||
NodeUtil.precheckCreation(edgeCharacters, children, isRoot);
|
||||
if (children.isEmpty()) {
|
||||
return new CharSequenceNodeLeafVoidValue<>(edgeCharacters);
|
||||
}
|
||||
else {
|
||||
return new CharSequenceNodeNonLeafVoidValue<>(edgeCharacters, children);
|
||||
}
|
||||
}
|
||||
|
||||
private final CharSequence incomingEdgeCharSequence;
|
||||
|
||||
public Node(CharSequence edgeCharSequence) {
|
||||
this.incomingEdgeCharSequence = edgeCharSequence;
|
||||
}
|
||||
|
||||
public CharSequence getIncomingEdge() {
|
||||
return incomingEdgeCharSequence;
|
||||
}
|
||||
public Character getIncomingEdgeFirstCharacter() {
|
||||
return incomingEdgeCharSequence.charAt(0);
|
||||
}
|
||||
|
||||
protected abstract String getValueString();
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "Node{" +
|
||||
"edge=" + getIncomingEdge() +
|
||||
", value=" + getValueString() +
|
||||
", edges=" + getOutgoingEdges() +
|
||||
"}";
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTo(@NotNull Node<T> o) {
|
||||
// Technically not a compliant implementation, but this should be fine given our use case
|
||||
return String.valueOf(getIncomingEdge()).compareTo(String.valueOf(o.getIncomingEdge()));
|
||||
}
|
||||
|
||||
public abstract T getValue();
|
||||
public abstract boolean hasValue();
|
||||
public abstract Node<T> getOutgoingEdge(Character edgeFirstCharacter);
|
||||
public abstract void updateOutgoingEdge(Node<T> childNode);
|
||||
public abstract List<Node<T>> getOutgoingEdges();
|
||||
|
||||
public Node<T> copyWithEdgeCharacters(CharSequence edgeCharacters, boolean isRoot) {
|
||||
return hasValue()
|
||||
? of(edgeCharacters, getValue(), getOutgoingEdges(), isRoot)
|
||||
: of(edgeCharacters, getOutgoingEdges(), isRoot);
|
||||
}
|
||||
|
||||
public Node<T> copyWithoutValue(boolean isRoot) {
|
||||
return of(getIncomingEdge(), getOutgoingEdges(), isRoot);
|
||||
}
|
||||
|
||||
public Node<T> copyWithChildren(List<Node<T>> children, boolean isRoot) {
|
||||
return hasValue()
|
||||
? of(getIncomingEdge(), getValue(), children, isRoot)
|
||||
: of(getIncomingEdge(), children, isRoot);
|
||||
}
|
||||
|
||||
public String prettyPrint() {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
prettyPrint(sb, "", true, true);
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
protected void prettyPrint(Appendable sb, String prefix, boolean isTail, boolean isRoot) {
|
||||
try {
|
||||
StringBuilder label = new StringBuilder();
|
||||
if (isRoot) {
|
||||
label.append("○");
|
||||
if (!getIncomingEdge().isEmpty()) {
|
||||
label.append(" ");
|
||||
}
|
||||
}
|
||||
label.append(getIncomingEdge());
|
||||
if (hasValue()) {
|
||||
label.append(" (").append(getValue()).append(")");
|
||||
}
|
||||
sb.append(prefix).append(isTail ? isRoot ? "" : "└── ○ " : "├── ○ ").append(label).append("\n");
|
||||
List<Node<T>> children = getOutgoingEdges();
|
||||
for (int i = 0; i < children.size() - 1; i++) {
|
||||
children.get(i).prettyPrint(sb, prefix + (isTail ? isRoot ? "" : " " : "│ "), false, false);
|
||||
}
|
||||
if (!children.isEmpty()) {
|
||||
children.get(children.size() - 1).prettyPrint(sb, prefix + (isTail ? isRoot ? "" : " " : "│ "), true, false);
|
||||
}
|
||||
} catch (IOException ioException) {
|
||||
// Rethrow the checked exception as a runtime exception...
|
||||
throw new IllegalStateException(ioException);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,52 @@
|
|||
package io.gitlab.jfronny.commons.data.impl.node;
|
||||
|
||||
import io.gitlab.jfronny.commons.data.impl.util.*;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.concurrent.atomic.AtomicReferenceArray;
|
||||
|
||||
public abstract class NonLeafNode<T> extends Node<T> {
|
||||
private final AtomicReferenceArray<Node<T>> outgoingEdges;
|
||||
private final List<Node<T>> outgoingEdgesAsList;
|
||||
|
||||
public NonLeafNode(CharSequence edgeCharSequence, List<Node<T>> outgoingEdges) {
|
||||
super(edgeCharSequence);
|
||||
Node<T>[] childNodeArray = outgoingEdges.toArray(new Node[outgoingEdges.size()]);
|
||||
// Sort the child nodes...
|
||||
Arrays.sort(childNodeArray, null);
|
||||
this.outgoingEdges = new AtomicReferenceArray<>(childNodeArray);
|
||||
this.outgoingEdgesAsList = new AtomicReferenceArrayListAdapter<>(this.outgoingEdges);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Node<T> getOutgoingEdge(Character edgeFirstCharacter) {
|
||||
// Binary search for the index of the node whose edge starts with the given character.
|
||||
// Note that this binary search is safe in the face of concurrent modification due to constraints
|
||||
// we enforce on use of the array, as documented in the binarySearchForEdge method...
|
||||
int index = NodeUtil.binarySearchForEdge(outgoingEdges, edgeFirstCharacter);
|
||||
if (index < 0) {
|
||||
// No such edge exists...
|
||||
return null;
|
||||
}
|
||||
// Atomically return the child node at this index...
|
||||
return outgoingEdges.get(index);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void updateOutgoingEdge(Node<T> childNode) {
|
||||
// Binary search for the index of the node whose edge starts with the given character.
|
||||
// Note that this binary search is safe in the face of concurrent modification due to constraints
|
||||
// we enforce on use of the array, as documented in the binarySearchForEdge method...
|
||||
int index = NodeUtil.binarySearchForEdge(outgoingEdges, childNode.getIncomingEdgeFirstCharacter());
|
||||
if (index < 0) {
|
||||
throw new IllegalStateException("Cannot update the reference to the following child node for the edge starting with '" + childNode.getIncomingEdgeFirstCharacter() +"', no such edge already exists: " + childNode);
|
||||
}
|
||||
// Atomically update the child node at this index...
|
||||
outgoingEdges.set(index, childNode);
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<Node<T>> getOutgoingEdges() {
|
||||
return outgoingEdgesAsList;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,23 @@
|
|||
package io.gitlab.jfronny.commons.data.impl.util;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.AbstractList;
|
||||
import java.util.concurrent.atomic.AtomicReferenceArray;
|
||||
|
||||
public class AtomicReferenceArrayListAdapter<T> extends AbstractList<T> implements Serializable {
|
||||
private final AtomicReferenceArray<T> atomicReferenceArray;
|
||||
|
||||
public AtomicReferenceArrayListAdapter(AtomicReferenceArray<T> atomicReferenceArray) {
|
||||
this.atomicReferenceArray = atomicReferenceArray;
|
||||
}
|
||||
|
||||
@Override
|
||||
public T get(int index) {
|
||||
return atomicReferenceArray.get(index);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int size() {
|
||||
return atomicReferenceArray.length();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,40 @@
|
|||
package io.gitlab.jfronny.commons.data.impl.util;
|
||||
|
||||
public class CharSequences {
|
||||
public static CharSequence getCommonPrefix(CharSequence first, CharSequence second) {
|
||||
int minLength = Math.min(first.length(), second.length());
|
||||
for (int i = 0; i < minLength; i++) {
|
||||
if (first.charAt(i) != second.charAt(i)) {
|
||||
return first.subSequence(0, i);
|
||||
}
|
||||
}
|
||||
return first.subSequence(0, minLength);
|
||||
}
|
||||
|
||||
public static CharSequence getSuffix(CharSequence input, int startIndex) {
|
||||
if (startIndex >= input.length()) {
|
||||
return "";
|
||||
}
|
||||
return input.subSequence(startIndex, input.length());
|
||||
}
|
||||
|
||||
public static CharSequence getPrefix(CharSequence input, int endIndex) {
|
||||
if (endIndex > input.length()) {
|
||||
return input;
|
||||
}
|
||||
return input.subSequence(0, endIndex);
|
||||
}
|
||||
|
||||
public static CharSequence subtractPrefix(CharSequence main, CharSequence prefix) {
|
||||
int startIndex = prefix.length();
|
||||
int mainLength = main.length();
|
||||
if (startIndex > mainLength) {
|
||||
return "";
|
||||
}
|
||||
return main.subSequence(startIndex, mainLength);
|
||||
}
|
||||
|
||||
public static CharSequence concatenate(final CharSequence first, final CharSequence second) {
|
||||
return new StringBuilder().append(first).append(second);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,51 @@
|
|||
package io.gitlab.jfronny.commons.data.impl.util;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
|
||||
public final class KeyValuePair<K, V> implements Map.Entry<K, V> {
|
||||
private final Map<K, V> source;
|
||||
private final K key;
|
||||
private V value;
|
||||
|
||||
public KeyValuePair(K key, V value, Map<K, V> source) {
|
||||
this.key = key;
|
||||
this.value = value;
|
||||
this.source = source;
|
||||
}
|
||||
|
||||
@Override
|
||||
public K getKey() {
|
||||
return key;
|
||||
}
|
||||
|
||||
@Override
|
||||
public V getValue() {
|
||||
return value;
|
||||
}
|
||||
|
||||
@Override
|
||||
public V setValue(V value) {
|
||||
V result = source.put(key, value);
|
||||
this.value = value;
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "(" + key + ", " + value + ")";
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
if (obj == this) return true;
|
||||
if (!(obj instanceof KeyValuePair<?,?> that)) return false;
|
||||
return Objects.equals(this.key, that.key) &&
|
||||
Objects.equals(this.value, that.value);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(key, value);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,95 @@
|
|||
package io.gitlab.jfronny.commons.data.impl.util;
|
||||
|
||||
import java.util.Iterator;
|
||||
import java.util.NoSuchElementException;
|
||||
import java.util.function.Supplier;
|
||||
|
||||
/**
|
||||
* An unmodifiable iterator which computes the next element to return only when it is requested.
|
||||
* <p/>
|
||||
* This class is inspired by com.google.common.collect.AbstractIterator in Google Guava,
|
||||
* which was written by the Google Guava Authors, in particular by Kevin Bourrillion.
|
||||
*
|
||||
* @author Niall Gallagher
|
||||
*/
|
||||
public abstract class LazyIterator<T> implements Iterator<T> {
|
||||
T next = null;
|
||||
|
||||
enum State { READY, NOT_READY, DONE, FAILED }
|
||||
|
||||
State state = State.NOT_READY;
|
||||
|
||||
public interface LazyIteratorFn<T> {
|
||||
T computeNext(Scope<T> scope);
|
||||
}
|
||||
|
||||
public interface Scope<T> {
|
||||
T endOfData();
|
||||
}
|
||||
|
||||
public static <T> Iterable<T> iterable(Supplier<LazyIteratorFn<T>> supplier) {
|
||||
return () -> LazyIterator.of(supplier.get());
|
||||
}
|
||||
|
||||
public static <T> LazyIterator<T> of(LazyIteratorFn<T> supplier) {
|
||||
return new LazyIterator<>() {
|
||||
@Override
|
||||
protected T computeNext() {
|
||||
return supplier.computeNext(this::endOfData);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("Iterator.remove() is not supported");
|
||||
}
|
||||
|
||||
@Override
|
||||
public final boolean hasNext() {
|
||||
if (state == State.FAILED) {
|
||||
throw new IllegalStateException("This iterator is in an inconsistent state, and can no longer be used, " +
|
||||
"due to an exception previously thrown by the computeNext() method");
|
||||
}
|
||||
return switch (state) {
|
||||
case DONE -> false;
|
||||
case READY -> true;
|
||||
default -> tryToComputeNext();
|
||||
};
|
||||
}
|
||||
|
||||
boolean tryToComputeNext() {
|
||||
state = State.FAILED; // temporary pessimism
|
||||
next = computeNext();
|
||||
if (state != State.DONE) {
|
||||
state = State.READY;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public final T next() {
|
||||
if (!hasNext()) {
|
||||
throw new NoSuchElementException();
|
||||
}
|
||||
state = State.NOT_READY;
|
||||
return next;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @return a dummy value which if returned by the <code>computeNext()</code> method, signals that there are no more
|
||||
* elements to return
|
||||
*/
|
||||
protected final T endOfData() {
|
||||
state = State.DONE;
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return The next element which the iterator should return, or the result of calling <code>endOfData()</code>
|
||||
* if there are no more elements to return
|
||||
*/
|
||||
protected abstract T computeNext();
|
||||
}
|
|
@ -0,0 +1,48 @@
|
|||
package io.gitlab.jfronny.commons.data.impl.util;
|
||||
|
||||
import io.gitlab.jfronny.commons.data.impl.node.Node;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.concurrent.atomic.AtomicReferenceArray;
|
||||
|
||||
public class NodeUtil {
|
||||
public static <T> int binarySearchForEdge(AtomicReferenceArray<Node<T>> childNodes, Character edgeFirstCharacter) {
|
||||
// inspired by Collections#indexedBinarySearch()
|
||||
int low = 0;
|
||||
int high = childNodes.length() - 1;
|
||||
|
||||
while (low <= high) {
|
||||
int mid = (low + high) >>> 1;
|
||||
Node<T> midVal = childNodes.get(mid);
|
||||
int cmp = midVal.getIncomingEdgeFirstCharacter().compareTo(edgeFirstCharacter);
|
||||
|
||||
if (cmp < 0)
|
||||
low = mid + 1;
|
||||
else if (cmp > 0)
|
||||
high = mid - 1;
|
||||
else
|
||||
return mid; // key found
|
||||
}
|
||||
return -(low + 1); // key not found
|
||||
}
|
||||
|
||||
public static <T> void precheckCreation(CharSequence edgeCharacters, List<Node<T>> children, boolean isRoot) {
|
||||
if (edgeCharacters == null) {
|
||||
throw new IllegalStateException("The edgeCharacters argument was null");
|
||||
}
|
||||
if (!isRoot && edgeCharacters.length() == 0) {
|
||||
throw new IllegalStateException("Invalid edge characters for non-root node: " + edgeCharacters);
|
||||
}
|
||||
if (children == null) {
|
||||
throw new IllegalStateException("The childNodes argument was null");
|
||||
}
|
||||
// Sanity check that no two nodes specify an edge with the same first character...
|
||||
Set<Character> uniqueChars = new HashSet<>(children.size());
|
||||
for (Node<T> node : children) {
|
||||
uniqueChars.add(node.getIncomingEdgeFirstCharacter());
|
||||
}
|
||||
if (children.size() != uniqueChars.size()) {
|
||||
throw new IllegalStateException("Duplicate edge detected in list of nodes supplied: " + children);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,221 @@
|
|||
package io.gitlab.jfronny.commons.test;
|
||||
|
||||
import io.gitlab.jfronny.commons.data.String2ObjectMap;
|
||||
import io.gitlab.jfronny.commons.data.impl.node.Node;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.function.UnaryOperator;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
|
||||
class String2ObjectMapTest {
|
||||
@Test
|
||||
void testBuildTreeByHand() {
|
||||
// Build the tree by hand, as if the following strings were added: B, BA, BAN, BANDANA, BANAN, BANANA
|
||||
|
||||
// ○
|
||||
// └── ○ B (1)
|
||||
// └── ○ A (2)
|
||||
// └── ○ N (3)
|
||||
// ├── ○ AN (5)
|
||||
// │ └── ○ A (6)
|
||||
// └── ○ DANA (4)
|
||||
|
||||
final Node<Integer> root, n1, n2, n3, n4, n5, n6;
|
||||
n6 = Node.of("A", 6, Collections.emptyList(), false);
|
||||
n5 = Node.of("AN", 5, Arrays.asList(n6), false);
|
||||
n4 = Node.of("DANA", 4, Collections.emptyList(), false);
|
||||
n3 = Node.of("N", 3, Arrays.asList(n4, n5), false); // note: it should sort alphabetically such that n5 is first
|
||||
n2 = Node.of("A", 2, Arrays.asList(n3), false);
|
||||
n1 = Node.of("B", 1, Arrays.asList(n2), false);
|
||||
root = Node.of("", Arrays.asList(n1), true);
|
||||
|
||||
assertEquals("""
|
||||
○
|
||||
└── ○ B (1)
|
||||
└── ○ A (2)
|
||||
└── ○ N (3)
|
||||
├── ○ AN (5)
|
||||
│ └── ○ A (6)
|
||||
└── ○ DANA (4)
|
||||
""", root.prettyPrint());
|
||||
}
|
||||
|
||||
@Test
|
||||
void testPut_AddToRoot() {
|
||||
String2ObjectMap<Integer> tree = new String2ObjectMap<>();
|
||||
tree.put("A", 1);
|
||||
assertEquals("""
|
||||
○
|
||||
└── ○ A (1)
|
||||
""", tree.prettyPrint());
|
||||
}
|
||||
|
||||
@Test
|
||||
void testPut_ChildNodeSorting() {
|
||||
String2ObjectMap<Integer> tree = new String2ObjectMap<>();
|
||||
tree.put("B", 1);
|
||||
tree.put("A", 2);
|
||||
assertEquals("""
|
||||
○
|
||||
├── ○ A (2)
|
||||
└── ○ B (1)
|
||||
""", tree.prettyPrint());
|
||||
}
|
||||
|
||||
@Test
|
||||
void testPut_AppendChild() {
|
||||
String2ObjectMap<Integer> tree = new String2ObjectMap<>();
|
||||
tree.put("FOO", 1);
|
||||
tree.put("FOOBAR", 2);
|
||||
|
||||
assertEquals("""
|
||||
○
|
||||
└── ○ FOO (1)
|
||||
└── ○ BAR (2)
|
||||
""", tree.prettyPrint());
|
||||
}
|
||||
|
||||
@Test
|
||||
void testPut_SplitEdge() {
|
||||
String2ObjectMap<Integer> tree = new String2ObjectMap<>();
|
||||
tree.put("FOOBAR", 1);
|
||||
tree.put("FOO", 2);
|
||||
|
||||
assertEquals("""
|
||||
○
|
||||
└── ○ FOO (2)
|
||||
└── ○ BAR (1)
|
||||
""", tree.prettyPrint());
|
||||
}
|
||||
|
||||
@Test
|
||||
void testPut_SplitWithImplicitNode() {
|
||||
String2ObjectMap<Integer> tree = new String2ObjectMap<>();
|
||||
tree.put("FOOBAR", 1);
|
||||
tree.put("FOOD", 2);
|
||||
|
||||
assertEquals("""
|
||||
○
|
||||
└── ○ FOO
|
||||
├── ○ BAR (1)
|
||||
└── ○ D (2)
|
||||
""", tree.prettyPrint());
|
||||
}
|
||||
|
||||
@Test
|
||||
void testPut_SplitAndMove() {
|
||||
String2ObjectMap<Integer> tree = new String2ObjectMap<>();
|
||||
tree.put("TEST", 1);
|
||||
tree.put("TEAM", 2);
|
||||
tree.put("TOAST", 3);
|
||||
|
||||
assertEquals("""
|
||||
○
|
||||
└── ○ T
|
||||
├── ○ E
|
||||
│ ├── ○ AM (2)
|
||||
│ └── ○ ST (1)
|
||||
└── ○ OAST (3)
|
||||
""", tree.prettyPrint());
|
||||
}
|
||||
|
||||
@Test
|
||||
void testPut_OverwriteValue() {
|
||||
String2ObjectMap<Integer> tree = new String2ObjectMap<>();
|
||||
|
||||
Integer existing;
|
||||
existing = tree.put("FOO", 1);
|
||||
assertNull(existing);
|
||||
existing = tree.put("FOO", 2);
|
||||
assertNotNull(existing);
|
||||
|
||||
assertEquals(Integer.valueOf(1), existing);
|
||||
assertEquals(Integer.valueOf(2), tree.get("FOO"));
|
||||
}
|
||||
|
||||
@Test
|
||||
void testPutIfAbsent_DoNotOverwriteValue() {
|
||||
String2ObjectMap<Integer> tree = new String2ObjectMap<>();
|
||||
|
||||
Integer existing = tree.putIfAbsent("FOO", 1);
|
||||
assertNull(existing);
|
||||
|
||||
existing = tree.putIfAbsent("FOO", 2);
|
||||
assertNotNull(existing);
|
||||
|
||||
assertEquals(Integer.valueOf(1), existing);
|
||||
assertEquals(Integer.valueOf(1), tree.get("FOO"));
|
||||
}
|
||||
|
||||
@Test
|
||||
void testPutIfAbsent_SplitNode() {
|
||||
String2ObjectMap<Integer> tree = new String2ObjectMap<>();
|
||||
|
||||
// ○
|
||||
// └── ○ FOO // implicit node added automatically
|
||||
// ├── ○ BAR (1)
|
||||
// └── ○ D (1)
|
||||
|
||||
Integer existing;
|
||||
existing = tree.putIfAbsent("FOOBAR", 1);
|
||||
assertNull(existing);
|
||||
existing = tree.putIfAbsent("FOOD", 1);
|
||||
assertNull(existing);
|
||||
|
||||
// This tests 'overwrite' set to true and exact match for node,
|
||||
// but no existing value to return (i.e. implicit node above)...
|
||||
|
||||
// ○
|
||||
// └── ○ FOO (2)
|
||||
// ├── ○ BAR (1)
|
||||
// └── ○ D (1)
|
||||
|
||||
existing = tree.putIfAbsent("FOO", 2);
|
||||
assertNull(existing);
|
||||
}
|
||||
|
||||
@Test
|
||||
void testPut_VoidValue_CharArrayNodeFactory() {
|
||||
String2ObjectMap<VoidValue> tree = new String2ObjectMap<>();
|
||||
tree.put("FOO", VoidValue.SINGLETON);
|
||||
tree.put("FOOBAR", VoidValue.SINGLETON);
|
||||
assertEquals("""
|
||||
○
|
||||
└── ○ FOO (-)
|
||||
└── ○ BAR (-)
|
||||
""", tree.prettyPrint());
|
||||
}
|
||||
|
||||
private enum VoidValue {
|
||||
SINGLETON;
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "-";
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void testAsSubstitution() {
|
||||
String2ObjectMap<String> tree = new String2ObjectMap<>();
|
||||
tree.put("Joe", "Wilhelm");
|
||||
tree.put("Joe Biden", "Joachim Böden");
|
||||
tree.put("Joey", "Joe");
|
||||
|
||||
assertEquals("""
|
||||
○
|
||||
└── ○ Joe (Wilhelm)
|
||||
├── ○ Biden (Joachim Böden)
|
||||
└── ○ y (Joe)
|
||||
""", tree.prettyPrint());
|
||||
|
||||
UnaryOperator<String> substitution = tree.asSubstitution();
|
||||
assertEquals("Jo", substitution.apply("Jo"));
|
||||
assertEquals("Wilhelm", substitution.apply("Joe"));
|
||||
assertEquals("Wilhelm B", substitution.apply("Joe B"));
|
||||
assertEquals("Wilhelm Wilhelm Bide Joachim Böden Joe Biden", substitution.apply("Joe Joe Bide Joe Biden Joey Biden"));
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue