amend compilation errors in zip-string-set.sml
This commit is contained in:
@@ -57,7 +57,7 @@ sig
|
|||||||
val foldrWithPrefix: (string * 'b -> 'b) -> 'b -> t -> string -> 'b
|
val foldrWithPrefix: (string * 'b -> 'b) -> 'b -> t -> string -> 'b
|
||||||
end
|
end
|
||||||
|
|
||||||
structure ZipStringSet: StringSet =
|
structure ZipStringSet =
|
||||||
struct
|
struct
|
||||||
datatype t =
|
datatype t =
|
||||||
CHILDREN of
|
CHILDREN of
|
||||||
@@ -181,13 +181,13 @@ struct
|
|||||||
checkExistsLeft (searchKey, searchChr, keyPos, ktl, ctl)
|
checkExistsLeft (searchKey, searchChr, keyPos, ktl, ctl)
|
||||||
else if firstNodeChr = searchChr then
|
else if firstNodeChr = searchChr then
|
||||||
(* check if there is a full/partial key match at this node *)
|
(* check if there is a full/partial key match at this node *)
|
||||||
checkNodeMatch (searchKey, firstNode, keyPos, chd, idx)
|
checkNodeMatch (searchKey, firstNode, keyPos, chd, 0)
|
||||||
else
|
else
|
||||||
(* binary search this node to see if there is a matching chr *)
|
(* binary search this node to see if there is a matching chr *)
|
||||||
(case findBinSearch (searchChr, keyPos, chd) of
|
(case findBinSearch (searchChr, keyPos, khd) of
|
||||||
SOME idx =>
|
SOME idx =>
|
||||||
checkNodeMatch
|
checkNodeMatch
|
||||||
(searchKey, Vector.sub (keys, idx), keyPos, chd, idx)
|
(searchKey, Vector.sub (khd, idx), keyPos, chd, idx)
|
||||||
| NONE => false)
|
| NONE => false)
|
||||||
end
|
end
|
||||||
| (_, _) => false
|
| (_, _) => false
|
||||||
@@ -204,13 +204,14 @@ struct
|
|||||||
checkExistsRight (searchKey, searchChr, keyPos, ktl, ctl)
|
checkExistsRight (searchKey, searchChr, keyPos, ktl, ctl)
|
||||||
else if lastNodeChr = searchChr then
|
else if lastNodeChr = searchChr then
|
||||||
(* check for full/partial match at this node *)
|
(* check for full/partial match at this node *)
|
||||||
checkNodeMatch (searchKey, lastNode, keyPos, chd, idx)
|
checkNodeMatch
|
||||||
|
(searchKey, lastNode, keyPos, chd, Vector.length khd - 1)
|
||||||
else
|
else
|
||||||
(* binary search this node to see if there is a matching chr *)
|
(* binary search this node to see if there is a matching chr *)
|
||||||
(case findBinSearch (searchChr, keyPos, chd) of
|
(case findBinSearch (searchChr, keyPos, khd) of
|
||||||
SOME idx =>
|
SOME idx =>
|
||||||
checkNodeMatch
|
checkNodeMatch
|
||||||
(searchKey, Vector.sub (keys, idx), keyPos, chd, idx)
|
(searchKey, Vector.sub (khd, idx), keyPos, chd, idx)
|
||||||
| NONE => false)
|
| NONE => false)
|
||||||
end
|
end
|
||||||
| (_, _) => false
|
| (_, _) => false
|
||||||
@@ -225,7 +226,7 @@ struct
|
|||||||
val startNodeChr = String.sub (firstNode, keyPos)
|
val startNodeChr = String.sub (firstNode, keyPos)
|
||||||
in
|
in
|
||||||
if searchChr < startNodeChr then
|
if searchChr < startNodeChr then
|
||||||
checkExistsLeft (searchKey, searchChr, ktl, ctl)
|
checkExistsLeft (searchKey, searchChr, keyPos, ktl, ctl)
|
||||||
else if searchChr = startNodeChr then
|
else if searchChr = startNodeChr then
|
||||||
(* check string match on first key/firstNode
|
(* check string match on first key/firstNode
|
||||||
* and recurse if full or partial match *)
|
* and recurse if full or partial match *)
|
||||||
@@ -233,15 +234,15 @@ struct
|
|||||||
else
|
else
|
||||||
(* implicit: searchChr > startNodeChr *)
|
(* implicit: searchChr > startNodeChr *)
|
||||||
let
|
let
|
||||||
val khdLast = Vector.sub (khd, Vector.length khd - 1)
|
val lastNode = Vector.sub (khd, Vector.length khd - 1)
|
||||||
val lastNodeChr = String.sub (khdLast, keyPos)
|
val lastNodeChr = String.sub (lastNode, keyPos)
|
||||||
in
|
in
|
||||||
if searchChr > lastNodeChr then
|
if searchChr > lastNodeChr then
|
||||||
(* check rightKeys/rightChildren *)
|
(* check rightKeys/rightChildren *)
|
||||||
checkExistsRight
|
checkExistsRight
|
||||||
(searchKey, searchChr, keyPos, rightKeys, rightChildren)
|
(searchKey, searchChr, keyPos, rightKeys, rightChildren)
|
||||||
else if searchChr = lastNodeChr then
|
else if searchChr = lastNodeChr then
|
||||||
(* check string match on last key/khdLast
|
(* check string match on last key/lastNode
|
||||||
* and recurse if full or partial match *)
|
* and recurse if full or partial match *)
|
||||||
checkNodeMatch
|
checkNodeMatch
|
||||||
(searchKey, lastNode, keyPos, chd, Vector.length khd - 1)
|
(searchKey, lastNode, keyPos, chd, Vector.length khd - 1)
|
||||||
@@ -249,10 +250,10 @@ struct
|
|||||||
(* implicit: searchChr < lastNodeChr
|
(* implicit: searchChr < lastNodeChr
|
||||||
* should perform binary search at this node
|
* should perform binary search at this node
|
||||||
* to find if key exists *)
|
* to find if key exists *)
|
||||||
(case findBinSearch (searchChr, keyPos, chd) of
|
(case findBinSearch (searchChr, keyPos, khd) of
|
||||||
SOME idx =>
|
SOME idx =>
|
||||||
checkNodeMatch
|
checkNodeMatch
|
||||||
(searchKey, Vector.sub (keys, idx), keyPos, chd, idx)
|
(searchKey, Vector.sub (khd, idx), keyPos, chd, idx)
|
||||||
| NONE => false)
|
| NONE => false)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
@@ -260,7 +261,12 @@ struct
|
|||||||
(* leftKeys and leftChildren are both empty
|
(* leftKeys and leftChildren are both empty
|
||||||
* so check rightKeys/rightChildren *)
|
* so check rightKeys/rightChildren *)
|
||||||
checkExistsRight
|
checkExistsRight
|
||||||
(searchKey, searchChr, keyPos, rightKeys, rightChildren)
|
( searchKey
|
||||||
|
, String.sub (searchKey, keyPos)
|
||||||
|
, keyPos
|
||||||
|
, rightKeys
|
||||||
|
, rightChildren
|
||||||
|
)
|
||||||
|
|
||||||
and recurseExists (searchKey, keyPos, trie) =
|
and recurseExists (searchKey, keyPos, trie) =
|
||||||
case trie of
|
case trie of
|
||||||
@@ -275,651 +281,4 @@ struct
|
|||||||
fun exists (searchKey, trie) =
|
fun exists (searchKey, trie) =
|
||||||
if isEmpty trie orelse String.size searchKey = 0 then false
|
if isEmpty trie orelse String.size searchKey = 0 then false
|
||||||
else recurseExists (searchKey, 0, trie)
|
else recurseExists (searchKey, 0, trie)
|
||||||
|
|
||||||
datatype prefix_result =
|
|
||||||
PREFIX_FOUND of string * t
|
|
||||||
| NO_PREFIX_FOUND
|
|
||||||
| PREFIX_MATCHES_WHOLE_TRIE
|
|
||||||
|
|
||||||
fun helpGetPrefixSubtrieChildren (prefix, keyPos, keys, children, trie) =
|
|
||||||
let
|
|
||||||
val findChr = String.sub (prefix, keyPos)
|
|
||||||
in
|
|
||||||
case findBinSearch (findChr, keyPos, keys) of
|
|
||||||
SOME idx =>
|
|
||||||
let
|
|
||||||
val trieKey = Vector.sub (keys, idx)
|
|
||||||
in
|
|
||||||
(case searchKeyMatch (prefix, trieKey, keyPos + 1) of
|
|
||||||
NO_SEARCH_MATCH => NO_PREFIX_FOUND
|
|
||||||
| SEARCH_KEY_CONTAINS_TRIE_KEY =>
|
|
||||||
let
|
|
||||||
val trieChild = Vector.sub (children, idx)
|
|
||||||
in
|
|
||||||
helpGetPrefixSubtrie (prefix, String.size trieKey, trieChild)
|
|
||||||
end
|
|
||||||
| FULL_SEARCH_MATCH =>
|
|
||||||
let val node = Vector.sub (children, idx)
|
|
||||||
in PREFIX_FOUND (prefix, node)
|
|
||||||
end
|
|
||||||
| TRIE_KEY_CONTAINS_SEARCH_KEY =>
|
|
||||||
let val node = Vector.sub (children, idx)
|
|
||||||
in PREFIX_FOUND (trieKey, node)
|
|
||||||
end)
|
|
||||||
end
|
|
||||||
| NONE => NO_PREFIX_FOUND
|
|
||||||
end
|
|
||||||
|
|
||||||
and helpGetPrefixSubtrie (prefix, keyPos, trie) =
|
|
||||||
case trie of
|
|
||||||
CHILDREN {keys, children} =>
|
|
||||||
helpGetPrefixSubtrieChildren (prefix, keyPos, keys, children, trie)
|
|
||||||
| FOUND_WITH_CHILDREN {keys, children} =>
|
|
||||||
helpGetPrefixSubtrieChildren (prefix, keyPos, keys, children, trie)
|
|
||||||
| FOUND => NO_PREFIX_FOUND
|
|
||||||
|
|
||||||
fun getPrefixSubtrie (prefix, trie) =
|
|
||||||
if String.size prefix > 0 then
|
|
||||||
if isEmpty trie then NO_PREFIX_FOUND
|
|
||||||
else helpGetPrefixSubtrie (prefix, 0, trie)
|
|
||||||
else
|
|
||||||
PREFIX_MATCHES_WHOLE_TRIE
|
|
||||||
|
|
||||||
fun helpFoldlTrieVector (f, pos, keys, children, acc) =
|
|
||||||
if pos = Vector.length children then
|
|
||||||
acc
|
|
||||||
else
|
|
||||||
let
|
|
||||||
val curChild = Vector.sub (children, pos)
|
|
||||||
val acc = helpFoldl (f, curChild, acc)
|
|
||||||
val acc =
|
|
||||||
if isFoundNode curChild then f (Vector.sub (keys, pos), acc) else acc
|
|
||||||
in
|
|
||||||
helpFoldlTrieVector (f, pos + 1, keys, children, acc)
|
|
||||||
end
|
|
||||||
|
|
||||||
and helpFoldl (f, trie, acc) =
|
|
||||||
case trie of
|
|
||||||
CHILDREN {keys, children} =>
|
|
||||||
helpFoldlTrieVector (f, 0, keys, children, acc)
|
|
||||||
| FOUND_WITH_CHILDREN {keys, children} =>
|
|
||||||
helpFoldlTrieVector (f, 0, keys, children, acc)
|
|
||||||
| FOUND => acc
|
|
||||||
|
|
||||||
fun foldl f initial trie = helpFoldl (f, trie, initial)
|
|
||||||
|
|
||||||
fun foldlWithPrefix f initial trie prefix =
|
|
||||||
case getPrefixSubtrie (prefix, trie) of
|
|
||||||
PREFIX_FOUND (prefix, subtrie) =>
|
|
||||||
let val acc = helpFoldl (f, subtrie, initial)
|
|
||||||
in if isFoundNode subtrie then f (prefix, acc) else acc
|
|
||||||
end
|
|
||||||
| NO_PREFIX_FOUND => initial
|
|
||||||
| PREFIX_MATCHES_WHOLE_TRIE => helpFoldl (f, trie, initial)
|
|
||||||
|
|
||||||
fun helpFoldrTrieVector (f, pos, keys, children, acc) =
|
|
||||||
if pos < 0 then
|
|
||||||
acc
|
|
||||||
else
|
|
||||||
let
|
|
||||||
val curChild = Vector.sub (children, pos)
|
|
||||||
val acc = helpFoldr (f, curChild, acc)
|
|
||||||
val acc =
|
|
||||||
if isFoundNode curChild then f (Vector.sub (keys, pos), acc) else acc
|
|
||||||
in
|
|
||||||
helpFoldrTrieVector (f, pos - 1, keys, children, acc)
|
|
||||||
end
|
|
||||||
|
|
||||||
and helpFoldr (f, trie, acc) =
|
|
||||||
case trie of
|
|
||||||
CHILDREN {keys, children} =>
|
|
||||||
helpFoldrTrieVector (f, Vector.length keys - 1, keys, children, acc)
|
|
||||||
| FOUND_WITH_CHILDREN {keys, children} =>
|
|
||||||
helpFoldrTrieVector (f, Vector.length keys - 1, keys, children, acc)
|
|
||||||
| FOUND => acc
|
|
||||||
|
|
||||||
fun foldr f initial trie = helpFoldr (f, trie, initial)
|
|
||||||
|
|
||||||
fun foldrWithPrefix f initial trie prefix =
|
|
||||||
case getPrefixSubtrie (prefix, trie) of
|
|
||||||
PREFIX_FOUND (prefix, subtrie) =>
|
|
||||||
let val acc = helpFoldr (f, subtrie, initial)
|
|
||||||
in if isFoundNode subtrie then f (prefix, acc) else acc
|
|
||||||
end
|
|
||||||
| NO_PREFIX_FOUND => initial
|
|
||||||
| PREFIX_MATCHES_WHOLE_TRIE => helpFoldr (f, trie, initial)
|
|
||||||
|
|
||||||
(* recurseHelpGetPrefixList and helpGetPrefixList are basically manually coded
|
|
||||||
* foldr functions over the trie, applying the accumuluator to every found
|
|
||||||
* node, from right to left.
|
|
||||||
* No need to recode it as a usage of a generic foldr function though,
|
|
||||||
* because lower dispatch cost this way. *)
|
|
||||||
fun recurseHelpGetPrefixList (pos, keys, children, acc) =
|
|
||||||
if pos < 0 then
|
|
||||||
acc
|
|
||||||
else
|
|
||||||
let
|
|
||||||
val curChild = Vector.sub (children, pos)
|
|
||||||
val acc = helpGetPrefixList (curChild, acc)
|
|
||||||
val acc =
|
|
||||||
if isFoundNode curChild then Vector.sub (keys, pos) :: acc else acc
|
|
||||||
in
|
|
||||||
recurseHelpGetPrefixList (pos - 1, keys, children, acc)
|
|
||||||
end
|
|
||||||
|
|
||||||
and helpGetPrefixList (trie, acc) =
|
|
||||||
case trie of
|
|
||||||
CHILDREN {keys, children} =>
|
|
||||||
recurseHelpGetPrefixList (Vector.length keys - 1, keys, children, acc)
|
|
||||||
| FOUND_WITH_CHILDREN {keys, children} =>
|
|
||||||
recurseHelpGetPrefixList (Vector.length keys - 1, keys, children, acc)
|
|
||||||
| FOUND => acc
|
|
||||||
|
|
||||||
fun getPrefixList (prefix, trie) =
|
|
||||||
case getPrefixSubtrie (prefix, trie) of
|
|
||||||
PREFIX_FOUND (prefix, subtrie) =>
|
|
||||||
let val lst = helpGetPrefixList (subtrie, [])
|
|
||||||
in if isFoundNode subtrie then prefix :: lst else lst
|
|
||||||
end
|
|
||||||
| NO_PREFIX_FOUND => []
|
|
||||||
| PREFIX_MATCHES_WHOLE_TRIE => helpGetPrefixList (trie, [])
|
|
||||||
|
|
||||||
fun toList trie = helpGetPrefixList (trie, [])
|
|
||||||
|
|
||||||
datatype insert_string_match =
|
|
||||||
NO_INSERT_MATCH
|
|
||||||
(* may need to split string if difference found but prefix matched *)
|
|
||||||
| DIFFERENCE_FOUND_AT of int
|
|
||||||
(* may not need to do anything if insert key matched,
|
|
||||||
* as this is a set where only strings are stored.
|
|
||||||
* however, if this is a non-found node, then I need to change
|
|
||||||
* the tag/case. *)
|
|
||||||
| FULL_INSERT_MATCH
|
|
||||||
(* if insert key contains trie key, may need to recurse down node *)
|
|
||||||
| INSERT_KEY_CONTAINS_TRIE_KEY
|
|
||||||
(* if trie key contains insert key, need to split node *)
|
|
||||||
| TRIE_KEY_CONTAINS_INSERT_KEY
|
|
||||||
|
|
||||||
fun insertKeyMatch (insertKey, trieKey, keyPos) =
|
|
||||||
if
|
|
||||||
keyPos < String.size insertKey
|
|
||||||
then
|
|
||||||
if keyPos < String.size trieKey then
|
|
||||||
let
|
|
||||||
val searchChr = String.sub (insertKey, keyPos)
|
|
||||||
val trieChr = String.sub (trieKey, keyPos)
|
|
||||||
in
|
|
||||||
if searchChr = trieChr then
|
|
||||||
insertKeyMatch (insertKey, trieKey, keyPos + 1)
|
|
||||||
else
|
|
||||||
DIFFERENCE_FOUND_AT keyPos
|
|
||||||
end
|
|
||||||
else
|
|
||||||
INSERT_KEY_CONTAINS_TRIE_KEY
|
|
||||||
else if
|
|
||||||
keyPos = String.size insertKey
|
|
||||||
then
|
|
||||||
if keyPos < String.size trieKey then TRIE_KEY_CONTAINS_INSERT_KEY
|
|
||||||
else if keyPos = String.size trieKey then FULL_INSERT_MATCH
|
|
||||||
else INSERT_KEY_CONTAINS_TRIE_KEY
|
|
||||||
else (* implicit: keyPos > String.size insertKey *) if
|
|
||||||
keyPos <= String.size trieKey
|
|
||||||
then
|
|
||||||
TRIE_KEY_CONTAINS_INSERT_KEY
|
|
||||||
else
|
|
||||||
NO_INSERT_MATCH
|
|
||||||
|
|
||||||
datatype insert_bin_search_result =
|
|
||||||
INSERT_NEW_CHILD of int
|
|
||||||
| FOUND_INSERT_POS of int
|
|
||||||
| APPEND_NEW_CHILD
|
|
||||||
|
|
||||||
fun linearSearch (findChr, keyPos, idx, children) =
|
|
||||||
if idx = Vector.length children then
|
|
||||||
APPEND_NEW_CHILD
|
|
||||||
else
|
|
||||||
let
|
|
||||||
val curStr = Vector.sub (children, idx)
|
|
||||||
val curChr = String.sub (curStr, keyPos)
|
|
||||||
in
|
|
||||||
if curChr > findChr then INSERT_NEW_CHILD idx
|
|
||||||
else linearSearch (findChr, keyPos, idx + 1, children)
|
|
||||||
end
|
|
||||||
|
|
||||||
fun helpInsertBinSearch (findChr, keyPos, children, low, high) =
|
|
||||||
let
|
|
||||||
val mid = low + ((high - low) div 2)
|
|
||||||
in
|
|
||||||
if high >= low then
|
|
||||||
let
|
|
||||||
val midStr = Vector.sub (children, mid)
|
|
||||||
val midChr = String.sub (midStr, keyPos)
|
|
||||||
in
|
|
||||||
if midChr = findChr then
|
|
||||||
FOUND_INSERT_POS mid
|
|
||||||
else if midChr < findChr then
|
|
||||||
helpInsertBinSearch (findChr, keyPos, children, mid + 1, high)
|
|
||||||
else
|
|
||||||
helpInsertBinSearch (findChr, keyPos, children, low, mid - 1)
|
|
||||||
end
|
|
||||||
else
|
|
||||||
linearSearch (findChr, keyPos, if mid >= 0 then mid else 0, children)
|
|
||||||
end
|
|
||||||
|
|
||||||
fun insertBinSearch (findChr, keyPos, children) =
|
|
||||||
helpInsertBinSearch
|
|
||||||
(findChr, keyPos, children, 0, Vector.length children - 1)
|
|
||||||
|
|
||||||
fun insertDifferenceFoundAt
|
|
||||||
( insKey
|
|
||||||
, insIdx
|
|
||||||
, splitTrieKeyStart
|
|
||||||
, trieChild
|
|
||||||
, childKeys
|
|
||||||
, childChildren
|
|
||||||
, parentKeys
|
|
||||||
, parentChildren
|
|
||||||
, parentConstructor
|
|
||||||
) =
|
|
||||||
let
|
|
||||||
(* child node should always have CHILDREN case,
|
|
||||||
* because we are splitting prefix into two,
|
|
||||||
* when neither trieKey nor insKey match the prefix. *)
|
|
||||||
val childNode = CHILDREN {keys = childKeys, children = childChildren}
|
|
||||||
val keys =
|
|
||||||
Vector.mapi
|
|
||||||
(fn (idx, key) => if idx <> insIdx then key else splitTrieKeyStart)
|
|
||||||
parentKeys
|
|
||||||
|
|
||||||
val children =
|
|
||||||
Vector.mapi (fn (idx, elt) => if idx <> insIdx then elt else childNode)
|
|
||||||
parentChildren
|
|
||||||
in
|
|
||||||
parentConstructor {keys = keys, children = children}
|
|
||||||
end
|
|
||||||
|
|
||||||
fun insertNewChild (keys, insIdx, insKey, children, constructor) =
|
|
||||||
let
|
|
||||||
val newLength = Vector.length keys + 1
|
|
||||||
val newKeys = Vector.tabulate (newLength, fn idx =>
|
|
||||||
if idx < insIdx then Vector.sub (keys, idx)
|
|
||||||
else if idx > insIdx then Vector.sub (keys, idx - 1)
|
|
||||||
else insKey)
|
|
||||||
|
|
||||||
val newChildren = Vector.tabulate (newLength, fn idx =>
|
|
||||||
if idx < insIdx then Vector.sub (children, idx)
|
|
||||||
else if idx > insIdx then Vector.sub (children, idx - 1)
|
|
||||||
else FOUND)
|
|
||||||
in
|
|
||||||
constructor {keys = newKeys, children = newChildren}
|
|
||||||
end
|
|
||||||
|
|
||||||
fun appendNewChild (keys, insKey, children, constructor) =
|
|
||||||
let
|
|
||||||
val newKeys = Vector.concat [keys, Vector.fromList [insKey]]
|
|
||||||
val newChildren = Vector.concat [children, Vector.fromList [FOUND]]
|
|
||||||
in
|
|
||||||
constructor {keys = newKeys, children = newChildren}
|
|
||||||
end
|
|
||||||
|
|
||||||
fun foundInsertPos (keys, children, keyPos, insKey, insIdx, trie, constructor) =
|
|
||||||
let
|
|
||||||
val trieKey = Vector.sub (keys, insIdx)
|
|
||||||
in
|
|
||||||
(case insertKeyMatch (insKey, trieKey, keyPos + 1) of
|
|
||||||
(* may need to split string if difference found but prefix matched *)
|
|
||||||
DIFFERENCE_FOUND_AT diffIdx =>
|
|
||||||
let
|
|
||||||
val splitTrieKeyStart = String.substring (trieKey, 0, diffIdx)
|
|
||||||
val trieChild = Vector.sub (children, insIdx)
|
|
||||||
in
|
|
||||||
if String.sub (trieKey, diffIdx) > String.sub (insKey, diffIdx) then
|
|
||||||
(* place insKey before trieKey *)
|
|
||||||
let
|
|
||||||
val childKeys = Vector.fromList [insKey, trieKey]
|
|
||||||
val childChildren = Vector.fromList [FOUND, trieChild]
|
|
||||||
in
|
|
||||||
insertDifferenceFoundAt
|
|
||||||
( insKey
|
|
||||||
, insIdx
|
|
||||||
, splitTrieKeyStart
|
|
||||||
, trieChild
|
|
||||||
, childKeys
|
|
||||||
, childChildren
|
|
||||||
, keys
|
|
||||||
, children
|
|
||||||
, constructor
|
|
||||||
)
|
|
||||||
end
|
|
||||||
else
|
|
||||||
(* place trieKey before insKey *)
|
|
||||||
let
|
|
||||||
val childKeys = Vector.fromList [trieKey, insKey]
|
|
||||||
val childChildren = Vector.fromList [trieChild, FOUND]
|
|
||||||
in
|
|
||||||
insertDifferenceFoundAt
|
|
||||||
( insKey
|
|
||||||
, insIdx
|
|
||||||
, splitTrieKeyStart
|
|
||||||
, trieChild
|
|
||||||
, childKeys
|
|
||||||
, childChildren
|
|
||||||
, keys
|
|
||||||
, children
|
|
||||||
, constructor
|
|
||||||
)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
(* may not need to do anything if insert key matched,
|
|
||||||
* as this is a set where only strings are stored.
|
|
||||||
* however, if this is a non-found node, then I need to change
|
|
||||||
* the tag/case. *)
|
|
||||||
| FULL_INSERT_MATCH =>
|
|
||||||
(* in case of a full match,
|
|
||||||
* need to turn child into FOUND_WITH_CHILDREN
|
|
||||||
* or FOUND case, if it not already either
|
|
||||||
*)
|
|
||||||
(case Vector.sub (children, insIdx) of
|
|
||||||
CHILDREN {keys = childKeys, children = childChildren} =>
|
|
||||||
let
|
|
||||||
val newTrieChild =
|
|
||||||
FOUND_WITH_CHILDREN
|
|
||||||
{keys = childKeys, children = childChildren}
|
|
||||||
val newParentChildren =
|
|
||||||
Vector.mapi
|
|
||||||
(fn (childIdx, elt) =>
|
|
||||||
if insIdx <> childIdx then elt else newTrieChild)
|
|
||||||
children
|
|
||||||
in
|
|
||||||
constructor {keys = keys, children = newParentChildren}
|
|
||||||
end
|
|
||||||
| _ => trie)
|
|
||||||
(* if insert key contains trie key, need to recurse down node *)
|
|
||||||
| INSERT_KEY_CONTAINS_TRIE_KEY =>
|
|
||||||
let
|
|
||||||
val trieChild = Vector.sub (children, insIdx)
|
|
||||||
val newTrieChild =
|
|
||||||
helpInsert (insKey, String.size trieKey, trieChild)
|
|
||||||
val newChildren =
|
|
||||||
Vector.mapi
|
|
||||||
(fn (idx, elt) => if idx <> insIdx then elt else newTrieChild)
|
|
||||||
children
|
|
||||||
in
|
|
||||||
constructor {keys = keys, children = newChildren}
|
|
||||||
end
|
|
||||||
(* if trie key contains insert key, need to split node *)
|
|
||||||
| TRIE_KEY_CONTAINS_INSERT_KEY =>
|
|
||||||
let
|
|
||||||
val trieChild = Vector.sub (children, insIdx)
|
|
||||||
val newKeys =
|
|
||||||
Vector.mapi
|
|
||||||
(fn (idx, key) => if idx <> insIdx then key else insKey) keys
|
|
||||||
|
|
||||||
(* newTrieChild should always be FOUND_WITH_CHILDREN,
|
|
||||||
* because previous part matches insert key,
|
|
||||||
* and esecond part matches trieKey *)
|
|
||||||
val newTrieChild = FOUND_WITH_CHILDREN
|
|
||||||
{ keys = Vector.fromList [trieKey]
|
|
||||||
, children = Vector.fromList [trieChild]
|
|
||||||
}
|
|
||||||
|
|
||||||
val newChildren =
|
|
||||||
Vector.mapi
|
|
||||||
(fn (idx, elt) => if idx <> insIdx then elt else newTrieChild)
|
|
||||||
children
|
|
||||||
in
|
|
||||||
constructor {keys = newKeys, children = newChildren}
|
|
||||||
end
|
|
||||||
| NO_INSERT_MATCH => trie)
|
|
||||||
end
|
|
||||||
|
|
||||||
and helpInsert (insKey, keyPos, trie) : t =
|
|
||||||
case trie of
|
|
||||||
FOUND =>
|
|
||||||
if keyPos = String.size insKey then
|
|
||||||
FOUND
|
|
||||||
else
|
|
||||||
FOUND_WITH_CHILDREN
|
|
||||||
{ keys = Vector.fromList [insKey]
|
|
||||||
, children = Vector.fromList [FOUND]
|
|
||||||
}
|
|
||||||
| CHILDREN {keys, children} =>
|
|
||||||
let
|
|
||||||
val findChr = String.sub (insKey, keyPos)
|
|
||||||
in
|
|
||||||
(case insertBinSearch (findChr, keyPos, keys) of
|
|
||||||
INSERT_NEW_CHILD insIdx =>
|
|
||||||
insertNewChild (keys, insIdx, insKey, children, CHILDREN)
|
|
||||||
| FOUND_INSERT_POS insIdx =>
|
|
||||||
foundInsertPos
|
|
||||||
(keys, children, keyPos, insKey, insIdx, trie, CHILDREN)
|
|
||||||
| APPEND_NEW_CHILD =>
|
|
||||||
appendNewChild (keys, insKey, children, CHILDREN))
|
|
||||||
end
|
|
||||||
| FOUND_WITH_CHILDREN {keys, children} =>
|
|
||||||
let
|
|
||||||
val findChr = String.sub (insKey, keyPos)
|
|
||||||
in
|
|
||||||
(case insertBinSearch (findChr, keyPos, keys) of
|
|
||||||
INSERT_NEW_CHILD insIdx =>
|
|
||||||
insertNewChild
|
|
||||||
(keys, insIdx, insKey, children, FOUND_WITH_CHILDREN)
|
|
||||||
| FOUND_INSERT_POS insIdx =>
|
|
||||||
foundInsertPos
|
|
||||||
( keys
|
|
||||||
, children
|
|
||||||
, keyPos
|
|
||||||
, insKey
|
|
||||||
, insIdx
|
|
||||||
, trie
|
|
||||||
, FOUND_WITH_CHILDREN
|
|
||||||
)
|
|
||||||
| APPEND_NEW_CHILD =>
|
|
||||||
appendNewChild (keys, insKey, children, FOUND_WITH_CHILDREN))
|
|
||||||
end
|
|
||||||
|
|
||||||
fun insert (insKey, trie) =
|
|
||||||
if String.size insKey > 0 then
|
|
||||||
if isEmpty trie then fromString insKey else helpInsert (insKey, 0, trie)
|
|
||||||
else
|
|
||||||
trie
|
|
||||||
|
|
||||||
fun helpAddList (str, acc) = insert (str, acc)
|
|
||||||
|
|
||||||
fun addList (lst, trie) =
|
|
||||||
List.foldl helpAddList trie lst
|
|
||||||
|
|
||||||
fun fromList (hd :: tl) =
|
|
||||||
let val trie = fromString hd
|
|
||||||
in addList (tl, trie)
|
|
||||||
end
|
|
||||||
| fromList ([]) = empty
|
|
||||||
|
|
||||||
datatype remove_result = UNCHANGED | MADE_EMPTY | CHANGED of t
|
|
||||||
|
|
||||||
(* should be called when there is a FULL_SEARCH_MATCH
|
|
||||||
* and child is a terminal FOUND node *)
|
|
||||||
fun removeWhenChildIsMadeEmpty
|
|
||||||
(idx, keys, children, isFoundWithChildren, parentConstructor) =
|
|
||||||
(* if child was made empty, then:
|
|
||||||
* - if the parent only has 1 child, it should be MADE_EMPTY too
|
|
||||||
* - otherwise, just remove the key and child at this idx from parent
|
|
||||||
* *)
|
|
||||||
if Vector.length keys > 1 then
|
|
||||||
if idx > 0 then
|
|
||||||
let
|
|
||||||
val newKeys = Vector.tabulate (Vector.length keys - 1, fn keyIdx =>
|
|
||||||
Vector.sub (keys, if keyIdx >= idx then keyIdx - 1 else keyIdx))
|
|
||||||
|
|
||||||
val newChildren =
|
|
||||||
Vector.tabulate (Vector.length keys - 1, fn childIdx =>
|
|
||||||
Vector.sub
|
|
||||||
(children, if childIdx >= idx then childIdx - 1 else childIdx))
|
|
||||||
|
|
||||||
val newNode =
|
|
||||||
parentConstructor {keys = newKeys, children = newChildren}
|
|
||||||
in
|
|
||||||
CHANGED newNode
|
|
||||||
end
|
|
||||||
else
|
|
||||||
(* if idx = 0, then have to slice first element off from vector *)
|
|
||||||
let
|
|
||||||
val keySlice = VectorSlice.slice (keys, 1, SOME
|
|
||||||
(Vector.length keys - 1))
|
|
||||||
val newKeys = VectorSlice.vector keySlice
|
|
||||||
|
|
||||||
val childrenSlice = VectorSlice.slice (children, 1, SOME
|
|
||||||
(Vector.length children - 1))
|
|
||||||
val newChildren = VectorSlice.vector childrenSlice
|
|
||||||
|
|
||||||
val newNode =
|
|
||||||
parentConstructor {keys = newKeys, children = newChildren}
|
|
||||||
in
|
|
||||||
CHANGED newNode
|
|
||||||
end
|
|
||||||
else (* if the caller was from the FOUND_WITH_CHILDREN case,
|
|
||||||
* then, instead of deleting this node entirely,
|
|
||||||
* we only delete this node's children, and mark this node
|
|
||||||
* as found.
|
|
||||||
* However, in general case where this is a CHILDREN node
|
|
||||||
* whose key was not inserted into the trie,
|
|
||||||
* we should fully delete this node as well. *) if isFoundWithChildren then
|
|
||||||
CHANGED FOUND
|
|
||||||
else
|
|
||||||
MADE_EMPTY
|
|
||||||
|
|
||||||
(* should be called when searchKeyMatch returns FULL_MATCH
|
|
||||||
*in helpRemove function *)
|
|
||||||
fun removeWhenFullMatch
|
|
||||||
(idx, keys, children, isFoundWithChildren, parentConstructor) =
|
|
||||||
(* matching over the child at this idx *)
|
|
||||||
case Vector.sub (children, idx) of
|
|
||||||
(* CHILDREN is a not-found case, so have to leave parent unchanged
|
|
||||||
* as there is no key to delete. *)
|
|
||||||
CHILDREN _ => UNCHANGED
|
|
||||||
(* FOUND_WITH_CHILDREN is a found case containing links to other nodes
|
|
||||||
* so we just need to change the tag from FOUND_WITH_CHILDREN to CHILDREN *)
|
|
||||||
| FOUND_WITH_CHILDREN {keys = childKeys, children = childChildren} =>
|
|
||||||
let
|
|
||||||
val newChild = CHILDREN {keys = childKeys, children = childChildren}
|
|
||||||
val newParentChildren =
|
|
||||||
Vector.mapi
|
|
||||||
(fn (mapIdx, elt) => if mapIdx <> idx then elt else newChild)
|
|
||||||
children
|
|
||||||
|
|
||||||
val newParent =
|
|
||||||
parentConstructor {keys = keys, children = newParentChildren}
|
|
||||||
in
|
|
||||||
CHANGED newParent
|
|
||||||
end
|
|
||||||
| FOUND =>
|
|
||||||
removeWhenChildIsMadeEmpty
|
|
||||||
(idx, keys, children, isFoundWithChildren, parentConstructor)
|
|
||||||
|
|
||||||
fun removeWhenSearchKeyContainsTrieKey
|
|
||||||
(childResult, idx, keys, children, isFoundWithChildren, parentConstructor) =
|
|
||||||
case childResult of
|
|
||||||
(* if result is UNCHANGED, let UNCHANGED bubble to the top.
|
|
||||||
* At the top, can return same trie given as input as there was no
|
|
||||||
* change. *)
|
|
||||||
UNCHANGED => UNCHANGED
|
|
||||||
(* if child was changed, allocate new vector where child at this idx
|
|
||||||
* is updated with newChild, and use it in parent node. *)
|
|
||||||
| CHANGED newChild =>
|
|
||||||
let
|
|
||||||
val newChildren =
|
|
||||||
Vector.mapi
|
|
||||||
(fn (childIdx, elt) => if idx <> childIdx then elt else newChild)
|
|
||||||
children
|
|
||||||
|
|
||||||
val newNode = parentConstructor {keys = keys, children = newChildren}
|
|
||||||
in
|
|
||||||
CHANGED newNode
|
|
||||||
end
|
|
||||||
| MADE_EMPTY =>
|
|
||||||
removeWhenChildIsMadeEmpty
|
|
||||||
(idx, keys, children, isFoundWithChildren, parentConstructor)
|
|
||||||
|
|
||||||
fun helpRemove (removeKey, keyPos, trie) =
|
|
||||||
case trie of
|
|
||||||
CHILDREN {keys, children} =>
|
|
||||||
let
|
|
||||||
val findChr = String.sub (removeKey, keyPos)
|
|
||||||
in
|
|
||||||
(case findBinSearch (findChr, keyPos, keys) of
|
|
||||||
SOME idx =>
|
|
||||||
let
|
|
||||||
val trieKey = Vector.sub (keys, idx)
|
|
||||||
in
|
|
||||||
(case searchKeyMatch (removeKey, trieKey, keyPos + 1) of
|
|
||||||
(* no search match means nothing to delete *)
|
|
||||||
NO_SEARCH_MATCH => UNCHANGED
|
|
||||||
| FULL_SEARCH_MATCH =>
|
|
||||||
removeWhenFullMatch (idx, keys, children, false, CHILDREN)
|
|
||||||
| SEARCH_KEY_CONTAINS_TRIE_KEY =>
|
|
||||||
removeWhenSearchKeyContainsTrieKey
|
|
||||||
( helpRemove
|
|
||||||
( removeKey
|
|
||||||
, String.size trieKey
|
|
||||||
, Vector.sub (children, idx)
|
|
||||||
)
|
|
||||||
, idx
|
|
||||||
, keys
|
|
||||||
, children
|
|
||||||
, false
|
|
||||||
, CHILDREN
|
|
||||||
)
|
|
||||||
| TRIE_KEY_CONTAINS_SEARCH_KEY => UNCHANGED)
|
|
||||||
end
|
|
||||||
| NONE => UNCHANGED)
|
|
||||||
end
|
|
||||||
| FOUND_WITH_CHILDREN {keys, children} =>
|
|
||||||
let
|
|
||||||
val findChr = String.sub (removeKey, keyPos)
|
|
||||||
in
|
|
||||||
(case findBinSearch (findChr, keyPos, keys) of
|
|
||||||
SOME idx =>
|
|
||||||
let
|
|
||||||
val trieKey = Vector.sub (keys, idx)
|
|
||||||
in
|
|
||||||
(case searchKeyMatch (removeKey, trieKey, keyPos + 1) of
|
|
||||||
(* no search match means nothing to delete *)
|
|
||||||
NO_SEARCH_MATCH => UNCHANGED
|
|
||||||
| FULL_SEARCH_MATCH =>
|
|
||||||
removeWhenFullMatch
|
|
||||||
(idx, keys, children, true, FOUND_WITH_CHILDREN)
|
|
||||||
| SEARCH_KEY_CONTAINS_TRIE_KEY =>
|
|
||||||
removeWhenSearchKeyContainsTrieKey
|
|
||||||
( helpRemove
|
|
||||||
( removeKey
|
|
||||||
, String.size trieKey
|
|
||||||
, Vector.sub (children, idx)
|
|
||||||
)
|
|
||||||
, idx
|
|
||||||
, keys
|
|
||||||
, children
|
|
||||||
, true
|
|
||||||
, FOUND_WITH_CHILDREN
|
|
||||||
)
|
|
||||||
| TRIE_KEY_CONTAINS_SEARCH_KEY => UNCHANGED)
|
|
||||||
end
|
|
||||||
| NONE => UNCHANGED)
|
|
||||||
end
|
|
||||||
| FOUND =>
|
|
||||||
(*
|
|
||||||
* This case should only occur if we recurse in a node
|
|
||||||
* when there is a partial, but not full, string match.
|
|
||||||
* Since there was no full string match,
|
|
||||||
* key doesn't exist in trie and so we must leave it unchanged.
|
|
||||||
*)
|
|
||||||
UNCHANGED
|
|
||||||
|
|
||||||
fun remove (removeKey, trie) =
|
|
||||||
if String.size removeKey = 0 orelse isEmpty trie then
|
|
||||||
trie
|
|
||||||
else
|
|
||||||
case helpRemove (removeKey, 0, trie) of
|
|
||||||
CHANGED trie => trie
|
|
||||||
| MADE_EMPTY => empty
|
|
||||||
| UNCHANGED => trie
|
|
||||||
end
|
end
|
||||||
|
|||||||
Reference in New Issue
Block a user