add ternary string set file, and code an insert function for it (plus datatype definitions, etc.)
This commit is contained in:
202
src/ternary-string-set.sml
Normal file
202
src/ternary-string-set.sml
Normal file
@@ -0,0 +1,202 @@
|
|||||||
|
(* An attempt at implementing terneary search trees,
|
||||||
|
* except that each node contains a full string instead of a char *)
|
||||||
|
structure TernaryStringSet =
|
||||||
|
struct
|
||||||
|
datatype t =
|
||||||
|
NODE of {left: t, key: string, follow: t, right: t}
|
||||||
|
(* follow is if insString contains nodeString *)
|
||||||
|
| FOUND_NODE of {left: t, key: string, follow: t, right: t}
|
||||||
|
| LEAF of string
|
||||||
|
| EMPTY
|
||||||
|
|
||||||
|
val empty = EMPTY
|
||||||
|
|
||||||
|
fun isEmpty t = t = EMPTY
|
||||||
|
|
||||||
|
fun fromString str =
|
||||||
|
if String.size str > 0 then LEAF str else EMPTY
|
||||||
|
|
||||||
|
val nodeStringContainsInsString = ~1
|
||||||
|
val insStringContainsNodeString = ~2
|
||||||
|
|
||||||
|
fun getBreakPos (pos, insString, nodeString) =
|
||||||
|
if pos = String.size insString andalso pos = String.size nodeString then
|
||||||
|
pos
|
||||||
|
else if pos = String.size insString then
|
||||||
|
(* if nodeString contains insString, return ~1 *)
|
||||||
|
nodeStringContainsInsString
|
||||||
|
else if pos = String.size nodeString then
|
||||||
|
(* if insString contains nodeString, return ~2 *)
|
||||||
|
insStringContainsNodeString
|
||||||
|
else
|
||||||
|
let
|
||||||
|
val insChr = String.sub (insString, pos)
|
||||||
|
val nodeChr = String.sub (nodeString, pos)
|
||||||
|
in
|
||||||
|
if insChr = nodeChr then
|
||||||
|
(* continue *)
|
||||||
|
getBreakPos (pos + 1, insString, nodeString)
|
||||||
|
else
|
||||||
|
(* return break position *)
|
||||||
|
pos
|
||||||
|
end
|
||||||
|
|
||||||
|
fun helpIns (pos, insString, t) =
|
||||||
|
case t of
|
||||||
|
NODE {left, key = nodeString, follow, right} =>
|
||||||
|
let
|
||||||
|
val breakPos = getBreakPos (pos, insString, nodeString)
|
||||||
|
in
|
||||||
|
if breakPos = String.size insString then
|
||||||
|
(* change node tag *)
|
||||||
|
FOUND_NODE
|
||||||
|
{left = left, key = nodeString, follow = follow, right = right}
|
||||||
|
else if breakPos = nodeStringContainsInsString then
|
||||||
|
(* add new node at current position,
|
||||||
|
* making current node a child of the new node *)
|
||||||
|
FOUND_NODE
|
||||||
|
{left = EMPTY, key = insString, follow = t, right = EMPTY}
|
||||||
|
else if breakPos = insStringContainsNodeString then
|
||||||
|
(* follow *)
|
||||||
|
let
|
||||||
|
val follow = helpIns (breakPos, insString, follow)
|
||||||
|
in
|
||||||
|
NODE
|
||||||
|
{left = left, key = nodeString, follow = follow, right = right}
|
||||||
|
end
|
||||||
|
else
|
||||||
|
(* we have a difference: do we want to go left or right? *)
|
||||||
|
let
|
||||||
|
val insChr = String.sub (insString, breakPos)
|
||||||
|
val nodeChr = String.sub (nodeString, breakPos)
|
||||||
|
in
|
||||||
|
if insChr < nodeChr then
|
||||||
|
let
|
||||||
|
val left = helpIns (breakPos, insString, left)
|
||||||
|
in
|
||||||
|
NODE
|
||||||
|
{ left = left
|
||||||
|
, key = nodeString
|
||||||
|
, follow = follow
|
||||||
|
, right = right
|
||||||
|
}
|
||||||
|
end
|
||||||
|
else
|
||||||
|
let
|
||||||
|
val right = helpIns (breakPos, insString, right)
|
||||||
|
in
|
||||||
|
NODE
|
||||||
|
{ left = left
|
||||||
|
, key = nodeString
|
||||||
|
, follow = follow
|
||||||
|
, right = right
|
||||||
|
}
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
| FOUND_NODE {left, key = nodeString, follow, right} =>
|
||||||
|
let
|
||||||
|
val breakPos = getBreakPos (pos, insString, nodeString)
|
||||||
|
in
|
||||||
|
if breakPos = String.size insString then
|
||||||
|
(* return original tree as nothing to do, since key already exists *)
|
||||||
|
t
|
||||||
|
else if breakPos = nodeStringContainsInsString then
|
||||||
|
(* add new node at current position,
|
||||||
|
* making current node a child of the new node *)
|
||||||
|
FOUND_NODE
|
||||||
|
{left = EMPTY, key = insString, follow = t, right = EMPTY}
|
||||||
|
else if breakPos = insStringContainsNodeString then
|
||||||
|
(* follow *)
|
||||||
|
let
|
||||||
|
val follow = helpIns (breakPos, insString, follow)
|
||||||
|
in
|
||||||
|
FOUND_NODE
|
||||||
|
{left = left, key = nodeString, follow = follow, right = right}
|
||||||
|
end
|
||||||
|
else
|
||||||
|
(* we have a difference: do we want to go left or right? *)
|
||||||
|
let
|
||||||
|
val insChr = String.sub (insString, breakPos)
|
||||||
|
val nodeChr = String.sub (nodeString, breakPos)
|
||||||
|
in
|
||||||
|
if insChr < nodeChr then
|
||||||
|
let
|
||||||
|
val left = helpIns (breakPos, insString, left)
|
||||||
|
in
|
||||||
|
FOUND_NODE
|
||||||
|
{ left = left
|
||||||
|
, key = nodeString
|
||||||
|
, follow = follow
|
||||||
|
, right = right
|
||||||
|
}
|
||||||
|
end
|
||||||
|
else
|
||||||
|
let
|
||||||
|
val right = helpIns (breakPos, insString, right)
|
||||||
|
in
|
||||||
|
FOUND_NODE
|
||||||
|
{ left = left
|
||||||
|
, key = nodeString
|
||||||
|
, follow = follow
|
||||||
|
, right = right
|
||||||
|
}
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
| EMPTY => LEAF insString
|
||||||
|
| LEAF nodeString =>
|
||||||
|
let
|
||||||
|
val breakPos = getBreakPos (pos, insString, nodeString)
|
||||||
|
in
|
||||||
|
if breakPos = String.size insString then
|
||||||
|
(* no change as user tried to insert string that already exists *)
|
||||||
|
t
|
||||||
|
else if breakPos = nodeStringContainsInsString then
|
||||||
|
(* add new node at current position,
|
||||||
|
* making current node a child of the new node *)
|
||||||
|
FOUND_NODE
|
||||||
|
{left = EMPTY, key = insString, follow = t, right = EMPTY}
|
||||||
|
else if breakPos = insStringContainsNodeString then
|
||||||
|
(* transform this node to a FOUND_NODE,
|
||||||
|
* and add a new LEAF at follow position *)
|
||||||
|
let
|
||||||
|
val newLeaf = LEAF insString
|
||||||
|
in
|
||||||
|
FOUND_NODE
|
||||||
|
{ key = nodeString
|
||||||
|
, follow = newLeaf
|
||||||
|
, left = EMPTY
|
||||||
|
, right = EMPTY
|
||||||
|
}
|
||||||
|
end
|
||||||
|
else
|
||||||
|
(* we have a difference: break string and add a new NODE.
|
||||||
|
* The way we insert has implications for searching.
|
||||||
|
* Search should descend down on all three children
|
||||||
|
* if searchKey contains nodeKey.
|
||||||
|
* *)
|
||||||
|
let
|
||||||
|
val breakString = String.substring (insString, 0, breakPos)
|
||||||
|
val insChr = String.sub (insString, breakPos)
|
||||||
|
val nodeChr = String.sub (nodeString, breakPos)
|
||||||
|
in
|
||||||
|
if insChr < nodeChr then
|
||||||
|
(* insString on left *)
|
||||||
|
NODE
|
||||||
|
{ left = LEAF insString
|
||||||
|
, right = t
|
||||||
|
, key = breakString
|
||||||
|
, follow = EMPTY
|
||||||
|
}
|
||||||
|
else
|
||||||
|
(* insString on right *)
|
||||||
|
NODE
|
||||||
|
{ left = t
|
||||||
|
, right = LEAF insString
|
||||||
|
, key = breakString
|
||||||
|
, follow = EMPTY
|
||||||
|
}
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
@@ -1,523 +0,0 @@
|
|||||||
signature STRING_SET =
|
|
||||||
sig
|
|
||||||
(* the type of tries *)
|
|
||||||
type t
|
|
||||||
|
|
||||||
(* the empty trie *)
|
|
||||||
val empty: t
|
|
||||||
|
|
||||||
(* StringSet.isEmpty trie
|
|
||||||
* returns true if the trie is empty *)
|
|
||||||
val isEmpty: t -> bool
|
|
||||||
|
|
||||||
(* StringSet.fromString "hello world"
|
|
||||||
* creates a trie containing just a string *)
|
|
||||||
val fromString: string -> t
|
|
||||||
|
|
||||||
(* StringSet.exists ("hello world", trie)
|
|
||||||
* returns true if the key was inserted into the trie *)
|
|
||||||
val exists: string * t -> bool
|
|
||||||
|
|
||||||
(* StringSet.insert ("myNewString", trie)
|
|
||||||
* inserts a new string into the trie, returning a new trie *)
|
|
||||||
val insert: string * t -> t
|
|
||||||
|
|
||||||
(* StringSet.remove ("stringToRemove", trie)
|
|
||||||
* removes the key from the trie, returning a new trie *)
|
|
||||||
val remove: string * t -> t
|
|
||||||
|
|
||||||
(* StringSet.getPrefixList ("myPrefix", trie)
|
|
||||||
* returns a list of all keys matching the specified prefix *)
|
|
||||||
val getPrefixList: string * t -> string list
|
|
||||||
|
|
||||||
(* StringSet.toList trie
|
|
||||||
* returns a list containing all keys in the trie *)
|
|
||||||
val toList: t -> string list
|
|
||||||
|
|
||||||
(* StringSet.fromList ["hello", "world"]
|
|
||||||
* returns a trie containing all keys in the string list *)
|
|
||||||
val fromList: string list -> t
|
|
||||||
|
|
||||||
(* StringSet.foldl (fn (key, acc) => String.size key + acc) 0 trie
|
|
||||||
* folds a value through the trie, from lowest to highest. *)
|
|
||||||
val foldl: (string * 'b -> 'b) -> 'b -> t -> 'b
|
|
||||||
|
|
||||||
(* StringSet.foldlWithPrefix (fn (key, acc) => String.size key + acc) 0 trie "myPrefix"
|
|
||||||
* folds a value through a subset of the trie containing the specified prefix,
|
|
||||||
* from lowest to highest. *)
|
|
||||||
val foldlWithPrefix: (string * 'b -> 'b) -> 'b -> t -> string -> 'b
|
|
||||||
|
|
||||||
(* StringSet.foldr (fn (key, acc) => String.size key + acc) 0 trie
|
|
||||||
* folds a value through the trie, from highest to lowest. *)
|
|
||||||
val foldr: (string * 'b -> 'b) -> 'b -> t -> 'b
|
|
||||||
|
|
||||||
(* StringSet.foldrWithPrefix (fn (key, acc) => String.size key + acc) 0 trie "myPrefix"
|
|
||||||
* folds a value through a subset of the trie containing the specified prefix,
|
|
||||||
* from highest to lowest. *)
|
|
||||||
val foldrWithPrefix: (string * 'b -> 'b) -> 'b -> t -> string -> 'b
|
|
||||||
end
|
|
||||||
|
|
||||||
structure ZipStringSet =
|
|
||||||
struct
|
|
||||||
datatype t =
|
|
||||||
CHILDREN of
|
|
||||||
{ leftKeys: string vector list
|
|
||||||
, leftChildren: t vector list
|
|
||||||
, rightKeys: string vector list
|
|
||||||
, rightChildren: t vector list
|
|
||||||
}
|
|
||||||
| FOUND_WITH_CHILDREN of
|
|
||||||
{ leftKeys: string vector list
|
|
||||||
, leftChildren: t vector list
|
|
||||||
, rightKeys: string vector list
|
|
||||||
, rightChildren: t vector list
|
|
||||||
}
|
|
||||||
| FOUND
|
|
||||||
|
|
||||||
val maxSize = 32
|
|
||||||
|
|
||||||
val empty = CHILDREN
|
|
||||||
{leftKeys = [], leftChildren = [], rightKeys = [], rightChildren = []}
|
|
||||||
|
|
||||||
fun isEmpty trie =
|
|
||||||
case trie of
|
|
||||||
CHILDREN {leftKeys = [], rightKeys = [], ...} => true
|
|
||||||
| _ => false
|
|
||||||
|
|
||||||
fun fromString str =
|
|
||||||
if String.size str > 0 then
|
|
||||||
CHILDREN
|
|
||||||
{ leftKeys = [Vector.fromList [str]]
|
|
||||||
, leftChildren = [Vector.fromList [FOUND]]
|
|
||||||
, rightKeys = []
|
|
||||||
, rightChildren = []
|
|
||||||
}
|
|
||||||
else
|
|
||||||
empty
|
|
||||||
|
|
||||||
fun helpBinSearch (findChr, keyPos, children, low, high) =
|
|
||||||
if high >= low then
|
|
||||||
let
|
|
||||||
val mid = low + ((high - low) div 2)
|
|
||||||
val midStr = Vector.sub (children, mid)
|
|
||||||
val midChr = String.sub (midStr, keyPos)
|
|
||||||
in
|
|
||||||
if midChr = findChr then
|
|
||||||
SOME mid
|
|
||||||
else if midChr < findChr then
|
|
||||||
helpBinSearch (findChr, keyPos, children, mid + 1, high)
|
|
||||||
else
|
|
||||||
helpBinSearch (findChr, keyPos, children, low, mid - 1)
|
|
||||||
end
|
|
||||||
else
|
|
||||||
NONE
|
|
||||||
|
|
||||||
fun findBinSearch (findChr, keyPos, children) =
|
|
||||||
helpBinSearch (findChr, keyPos, children, 0, Vector.length children - 1)
|
|
||||||
|
|
||||||
datatype search_string_match =
|
|
||||||
NO_SEARCH_MATCH
|
|
||||||
| FULL_SEARCH_MATCH
|
|
||||||
| SEARCH_KEY_CONTAINS_TRIE_KEY
|
|
||||||
| TRIE_KEY_CONTAINS_SEARCH_KEY
|
|
||||||
|
|
||||||
fun searchKeyMatch (searchKey, trieKey, keyPos) =
|
|
||||||
if
|
|
||||||
keyPos < String.size searchKey
|
|
||||||
then
|
|
||||||
if keyPos < String.size trieKey then
|
|
||||||
let
|
|
||||||
val searchChr = String.sub (searchKey, keyPos)
|
|
||||||
val trieChr = String.sub (trieKey, keyPos)
|
|
||||||
in
|
|
||||||
if searchChr = trieChr then
|
|
||||||
searchKeyMatch (searchKey, trieKey, keyPos + 1)
|
|
||||||
else
|
|
||||||
NO_SEARCH_MATCH
|
|
||||||
end
|
|
||||||
else
|
|
||||||
SEARCH_KEY_CONTAINS_TRIE_KEY
|
|
||||||
else if
|
|
||||||
keyPos = String.size searchKey
|
|
||||||
then
|
|
||||||
if keyPos < String.size trieKey then TRIE_KEY_CONTAINS_SEARCH_KEY
|
|
||||||
else if keyPos = String.size trieKey then FULL_SEARCH_MATCH
|
|
||||||
else SEARCH_KEY_CONTAINS_TRIE_KEY
|
|
||||||
else (* implicit: keyPos > String.size searchKey *) if
|
|
||||||
keyPos <= String.size trieKey
|
|
||||||
then
|
|
||||||
TRIE_KEY_CONTAINS_SEARCH_KEY
|
|
||||||
else
|
|
||||||
NO_SEARCH_MATCH
|
|
||||||
|
|
||||||
fun isFoundNode node =
|
|
||||||
case node of
|
|
||||||
FOUND => true
|
|
||||||
| FOUND_WITH_CHILDREN _ => true
|
|
||||||
| CHILDREN _ => false
|
|
||||||
|
|
||||||
fun checkNodeExistsMatch (searchKey, trieKey, keyPos, children, idx) =
|
|
||||||
(case searchKeyMatch (searchKey, trieKey, keyPos + 1) of
|
|
||||||
NO_SEARCH_MATCH => false
|
|
||||||
| FULL_SEARCH_MATCH =>
|
|
||||||
let val trieChild = Vector.sub (children, idx)
|
|
||||||
in isFoundNode trieChild
|
|
||||||
end
|
|
||||||
| SEARCH_KEY_CONTAINS_TRIE_KEY =>
|
|
||||||
let val trieChild = Vector.sub (children, idx)
|
|
||||||
in recurseExists (searchKey, String.size trieKey, trieChild)
|
|
||||||
end
|
|
||||||
| TRIE_KEY_CONTAINS_SEARCH_KEY => false)
|
|
||||||
|
|
||||||
and checkExistsLeft (searchKey, searchChr, keyPos, leftKeys, leftChildren) =
|
|
||||||
case (leftKeys, leftChildren) of
|
|
||||||
(khd :: ktl, chd :: ctl) =>
|
|
||||||
let
|
|
||||||
val firstNode = Vector.sub (khd, 0)
|
|
||||||
val firstNodeChr = String.sub (firstNode, keyPos)
|
|
||||||
in
|
|
||||||
if firstNodeChr < searchChr then
|
|
||||||
(* keep checking leftwards *)
|
|
||||||
checkExistsLeft (searchKey, searchChr, keyPos, ktl, ctl)
|
|
||||||
else if firstNodeChr = searchChr then
|
|
||||||
(* check if there is a full/partial key match at this node *)
|
|
||||||
checkNodeExistsMatch (searchKey, firstNode, keyPos, chd, 0)
|
|
||||||
else
|
|
||||||
(* binary search this node to see if there is a matching chr *)
|
|
||||||
(case findBinSearch (searchChr, keyPos, khd) of
|
|
||||||
SOME idx =>
|
|
||||||
checkNodeExistsMatch
|
|
||||||
(searchKey, Vector.sub (khd, idx), keyPos, chd, idx)
|
|
||||||
| NONE => false)
|
|
||||||
end
|
|
||||||
| (_, _) => false
|
|
||||||
|
|
||||||
and checkExistsRight (searchKey, searchChr, keyPos, rightKeys, rightChildren) =
|
|
||||||
case (rightKeys, rightChildren) of
|
|
||||||
(khd :: ktl, chd :: ctl) =>
|
|
||||||
let
|
|
||||||
val lastNode = Vector.sub (khd, Vector.length khd - 1)
|
|
||||||
val lastNodeChr = String.sub (lastNode, keyPos)
|
|
||||||
in
|
|
||||||
if lastNodeChr > searchChr then
|
|
||||||
(* keep checking rightwards *)
|
|
||||||
checkExistsRight (searchKey, searchChr, keyPos, ktl, ctl)
|
|
||||||
else if lastNodeChr = searchChr then
|
|
||||||
(* check for full/partial match at this node *)
|
|
||||||
checkNodeExistsMatch
|
|
||||||
(searchKey, lastNode, keyPos, chd, Vector.length khd - 1)
|
|
||||||
else
|
|
||||||
(* binary search this node to see if there is a matching chr *)
|
|
||||||
(case findBinSearch (searchChr, keyPos, khd) of
|
|
||||||
SOME idx =>
|
|
||||||
checkNodeExistsMatch
|
|
||||||
(searchKey, Vector.sub (khd, idx), keyPos, chd, idx)
|
|
||||||
| NONE => false)
|
|
||||||
end
|
|
||||||
| (_, _) => false
|
|
||||||
|
|
||||||
and decideExistsDirection
|
|
||||||
(searchKey, keyPos, leftKeys, leftChildren, rightKeys, rightChildren) =
|
|
||||||
case (leftKeys, leftChildren) of
|
|
||||||
(khd :: ktl, chd :: ctl) =>
|
|
||||||
let
|
|
||||||
val searchChr = String.sub (searchKey, keyPos)
|
|
||||||
val firstNode = Vector.sub (khd, 0)
|
|
||||||
val startNodeChr = String.sub (firstNode, keyPos)
|
|
||||||
in
|
|
||||||
if searchChr < startNodeChr then
|
|
||||||
checkExistsLeft (searchKey, searchChr, keyPos, ktl, ctl)
|
|
||||||
else if searchChr = startNodeChr then
|
|
||||||
(* check string match on first key/firstNode
|
|
||||||
* and recurse if full or partial match *)
|
|
||||||
checkNodeExistsMatch (searchKey, firstNode, keyPos, chd, 0)
|
|
||||||
else
|
|
||||||
(* implicit: searchChr > startNodeChr *)
|
|
||||||
let
|
|
||||||
val lastNode = Vector.sub (khd, Vector.length khd - 1)
|
|
||||||
val lastNodeChr = String.sub (lastNode, keyPos)
|
|
||||||
in
|
|
||||||
if searchChr > lastNodeChr then
|
|
||||||
(* check rightKeys/rightChildren *)
|
|
||||||
checkExistsRight
|
|
||||||
(searchKey, searchChr, keyPos, rightKeys, rightChildren)
|
|
||||||
else if searchChr = lastNodeChr then
|
|
||||||
(* check string match on last key/lastNode
|
|
||||||
* and recurse if full or partial match *)
|
|
||||||
checkNodeExistsMatch
|
|
||||||
(searchKey, lastNode, keyPos, chd, Vector.length khd - 1)
|
|
||||||
else
|
|
||||||
(* implicit: searchChr < lastNodeChr
|
|
||||||
* should perform binary search at this node
|
|
||||||
* to find if key exists *)
|
|
||||||
(case findBinSearch (searchChr, keyPos, khd) of
|
|
||||||
SOME idx =>
|
|
||||||
checkNodeExistsMatch
|
|
||||||
(searchKey, Vector.sub (khd, idx), keyPos, chd, idx)
|
|
||||||
| NONE => false)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
| (_, _) =>
|
|
||||||
(* leftKeys and leftChildren are both empty
|
|
||||||
* so check rightKeys/rightChildren *)
|
|
||||||
checkExistsRight
|
|
||||||
( searchKey
|
|
||||||
, String.sub (searchKey, keyPos)
|
|
||||||
, keyPos
|
|
||||||
, rightKeys
|
|
||||||
, rightChildren
|
|
||||||
)
|
|
||||||
|
|
||||||
and recurseExists (searchKey, keyPos, trie) =
|
|
||||||
case trie of
|
|
||||||
CHILDREN {leftKeys, leftChildren, rightKeys, rightChildren} =>
|
|
||||||
decideExistsDirection
|
|
||||||
(searchKey, keyPos, leftKeys, leftChildren, rightKeys, rightChildren)
|
|
||||||
| FOUND_WITH_CHILDREN {leftKeys, leftChildren, rightKeys, rightChildren} =>
|
|
||||||
decideExistsDirection
|
|
||||||
(searchKey, keyPos, leftKeys, leftChildren, rightKeys, rightChildren)
|
|
||||||
| FOUND => false
|
|
||||||
|
|
||||||
fun exists (searchKey, trie) =
|
|
||||||
if isEmpty trie orelse String.size searchKey = 0 then false
|
|
||||||
else recurseExists (searchKey, 0, trie)
|
|
||||||
|
|
||||||
fun checkNodeSubtrieMatch (prefix, trieKey, keyPos, children, idx) =
|
|
||||||
case searchKeyMatch (prefix, trieKey, keyPos + 1) of
|
|
||||||
NO_SEARCH_MATCH => NONE
|
|
||||||
| SEARCH_KEY_CONTAINS_TRIE_KEY =>
|
|
||||||
let val trieChild = Vector.sub (children, idx)
|
|
||||||
in recurseGetPrefixSubtrie (prefix, String.size trieKey, trieChild)
|
|
||||||
end
|
|
||||||
| FULL_SEARCH_MATCH =>
|
|
||||||
let val node = Vector.sub (children, idx)
|
|
||||||
in SOME (prefix, node)
|
|
||||||
end
|
|
||||||
| TRIE_KEY_CONTAINS_SEARCH_KEY =>
|
|
||||||
let val node = Vector.sub (children, idx)
|
|
||||||
in SOME (trieKey, node)
|
|
||||||
end
|
|
||||||
|
|
||||||
and helpGetPrefixSubtrieChildren (prefix, keyPos, keys, children, trie) =
|
|
||||||
let
|
|
||||||
val findChr = String.sub (prefix, keyPos)
|
|
||||||
in
|
|
||||||
case findBinSearch (findChr, keyPos, keys) of
|
|
||||||
SOME idx =>
|
|
||||||
let val trieKey = Vector.sub (keys, idx)
|
|
||||||
in checkNodeSubtrieMatch (prefix, trieKey, keyPos, children, idx)
|
|
||||||
end
|
|
||||||
| NONE => NONE
|
|
||||||
end
|
|
||||||
|
|
||||||
and getPrefixSubtrieLeft (prefix, searchChr, keyPos, leftKeys, leftChildren) =
|
|
||||||
case (leftKeys, leftChildren) of
|
|
||||||
(khd :: ktl, chd :: ctl) =>
|
|
||||||
let
|
|
||||||
val firstNode = Vector.sub (khd, 0)
|
|
||||||
val firstNodeChr = String.sub (firstNode, keyPos)
|
|
||||||
in
|
|
||||||
if firstNodeChr < searchChr then
|
|
||||||
(* keep moving leftwards *)
|
|
||||||
getPrefixSubtrieLeft (prefix, searchChr, keyPos, ktl, ctl)
|
|
||||||
else if firstNodeChr = searchChr then
|
|
||||||
(* check if there is a full/partial key match at this node *)
|
|
||||||
checkNodeSubtrieMatch (prefix, firstNode, keyPos, chd, 0)
|
|
||||||
else
|
|
||||||
(* binary search this node to see if there is a matching chr *)
|
|
||||||
(case findBinSearch (searchChr, keyPos, khd) of
|
|
||||||
SOME idx =>
|
|
||||||
checkNodeSubtrieMatch
|
|
||||||
(prefix, Vector.sub (khd, idx), keyPos, chd, idx)
|
|
||||||
| NONE => NONE)
|
|
||||||
end
|
|
||||||
| (_, _) => NONE
|
|
||||||
|
|
||||||
and getPrefixSubtrieRight
|
|
||||||
(prefix, searchChr, keyPos, rightKeys, rightChildren) =
|
|
||||||
case (rightKeys, rightChildren) of
|
|
||||||
(khd :: ktl, chd :: ctl) =>
|
|
||||||
let
|
|
||||||
val lastNode = Vector.sub (khd, Vector.length khd - 1)
|
|
||||||
val lastNodeChr = String.sub (lastNode, keyPos)
|
|
||||||
in
|
|
||||||
if lastNodeChr > searchChr then
|
|
||||||
(* keep checking rightwards *)
|
|
||||||
getPrefixSubtrieRight (prefix, searchChr, keyPos, ktl, ctl)
|
|
||||||
else if lastNodeChr = searchChr then
|
|
||||||
(* check for full/partial match at this node *)
|
|
||||||
checkNodeSubtrieMatch
|
|
||||||
(prefix, lastNode, keyPos, chd, Vector.length khd - 1)
|
|
||||||
else
|
|
||||||
(* binary search this node to see if there is a matching chr *)
|
|
||||||
(case findBinSearch (searchChr, keyPos, khd) of
|
|
||||||
SOME idx =>
|
|
||||||
checkNodeSubtrieMatch
|
|
||||||
(prefix, Vector.sub (khd, idx), keyPos, chd, idx)
|
|
||||||
| NONE => NONE)
|
|
||||||
end
|
|
||||||
| (_, _) => NONE
|
|
||||||
|
|
||||||
and decideGetPrefixSubtrieDirection
|
|
||||||
(prefix, keyPos, leftKeys, leftChildren, rightKeys, rightChildren) =
|
|
||||||
case (leftKeys, leftChildren) of
|
|
||||||
(khd :: ktl, chd :: ctl) =>
|
|
||||||
let
|
|
||||||
val searchChr = String.sub (prefix, keyPos)
|
|
||||||
val firstNode = Vector.sub (khd, 0)
|
|
||||||
val startNodeChr = String.sub (firstNode, keyPos)
|
|
||||||
in
|
|
||||||
if searchChr < startNodeChr then
|
|
||||||
getPrefixSubtrieLeft (prefix, searchChr, keyPos, ktl, ctl)
|
|
||||||
else if searchChr = startNodeChr then
|
|
||||||
(* check string match on first key/firstNode
|
|
||||||
* and recurse if full or partial match *)
|
|
||||||
checkNodeSubtrieMatch (prefix, firstNode, keyPos, chd, 0)
|
|
||||||
else
|
|
||||||
(* implicit: searchChr > startNodeChr *)
|
|
||||||
let
|
|
||||||
val lastNode = Vector.sub (khd, Vector.length khd - 1)
|
|
||||||
val lastNodeChr = String.sub (lastNode, keyPos)
|
|
||||||
in
|
|
||||||
if searchChr > lastNodeChr then
|
|
||||||
(* check rightKeys/rightChildren *)
|
|
||||||
getPrefixSubtrieRight
|
|
||||||
(prefix, searchChr, keyPos, rightKeys, rightChildren)
|
|
||||||
else if searchChr = lastNodeChr then
|
|
||||||
(* check string match on last key/lastNode
|
|
||||||
* and recurse if full or partial match *)
|
|
||||||
checkNodeSubtrieMatch
|
|
||||||
(prefix, lastNode, keyPos, chd, Vector.length khd - 1)
|
|
||||||
else
|
|
||||||
(* implicit: searchChr < lastNodeChr
|
|
||||||
* should perform binary search at this node
|
|
||||||
* to find if key exists *)
|
|
||||||
(case findBinSearch (searchChr, keyPos, khd) of
|
|
||||||
SOME idx =>
|
|
||||||
checkNodeSubtrieMatch
|
|
||||||
(prefix, Vector.sub (khd, idx), keyPos, chd, idx)
|
|
||||||
| NONE => NONE)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
| (_, _) => NONE
|
|
||||||
|
|
||||||
and recurseGetPrefixSubtrie (prefix, keyPos, trie) =
|
|
||||||
case trie of
|
|
||||||
CHILDREN {leftKeys, leftChildren, rightKeys, rightChildren} =>
|
|
||||||
decideGetPrefixSubtrieDirection
|
|
||||||
(prefix, keyPos, leftKeys, leftChildren, rightKeys, rightChildren)
|
|
||||||
| FOUND_WITH_CHILDREN {leftKeys, leftChildren, rightKeys, rightChildren} =>
|
|
||||||
decideGetPrefixSubtrieDirection
|
|
||||||
(prefix, keyPos, leftKeys, leftChildren, rightKeys, rightChildren)
|
|
||||||
| FOUND => NONE
|
|
||||||
|
|
||||||
fun getPrefixSubtrie (prefix, trie) =
|
|
||||||
if isEmpty trie then NONE else recurseGetPrefixSubtrie (prefix, 0, trie)
|
|
||||||
|
|
||||||
datatype insert_string_match =
|
|
||||||
NO_INSERT_MATCH
|
|
||||||
| DIFFERENCE_FOUND_AT of int
|
|
||||||
| FULL_INSERT_MATCH
|
|
||||||
| INSERT_KEY_CONTAINS_TRIE_KEY
|
|
||||||
| TRIE_KEY_CONTAINS_INSERT_KEY
|
|
||||||
|
|
||||||
fun insertKeyMatch (insertKey, trieKey, keyPos) =
|
|
||||||
if
|
|
||||||
keyPos < String.size insertKey
|
|
||||||
then
|
|
||||||
if keyPos < String.size trieKey then
|
|
||||||
let
|
|
||||||
val searchChr = String.sub (insertKey, keyPos)
|
|
||||||
val trieChr = String.sub (trieKey, keyPos)
|
|
||||||
in
|
|
||||||
if searchChr = trieChr then
|
|
||||||
insertKeyMatch (insertKey, trieKey, keyPos + 1)
|
|
||||||
else
|
|
||||||
DIFFERENCE_FOUND_AT keyPos
|
|
||||||
end
|
|
||||||
else
|
|
||||||
INSERT_KEY_CONTAINS_TRIE_KEY
|
|
||||||
else if
|
|
||||||
keyPos = String.size insertKey
|
|
||||||
then
|
|
||||||
if keyPos < String.size trieKey then TRIE_KEY_CONTAINS_INSERT_KEY
|
|
||||||
else if keyPos = String.size trieKey then FULL_INSERT_MATCH
|
|
||||||
else INSERT_KEY_CONTAINS_TRIE_KEY
|
|
||||||
else (* implicit: keyPos > String.size insertKey *) if
|
|
||||||
keyPos <= String.size trieKey
|
|
||||||
then
|
|
||||||
TRIE_KEY_CONTAINS_INSERT_KEY
|
|
||||||
else
|
|
||||||
NO_INSERT_MATCH
|
|
||||||
|
|
||||||
datatype insert_bin_search_result =
|
|
||||||
INSERT_NEW_CHILD of int
|
|
||||||
| FOUND_INSERT_POS of int
|
|
||||||
| APPEND_NEW_CHILD
|
|
||||||
|
|
||||||
fun linearSearch (findChr, keyPos, idx, children) =
|
|
||||||
if idx = Vector.length children then
|
|
||||||
APPEND_NEW_CHILD
|
|
||||||
else
|
|
||||||
let
|
|
||||||
val curStr = Vector.sub (children, idx)
|
|
||||||
val curChr = String.sub (curStr, keyPos)
|
|
||||||
in
|
|
||||||
if curChr > findChr then INSERT_NEW_CHILD idx
|
|
||||||
else linearSearch (findChr, keyPos, idx + 1, children)
|
|
||||||
end
|
|
||||||
|
|
||||||
fun helpInsertBinSearch (findChr, keyPos, children, low, high) =
|
|
||||||
let
|
|
||||||
val mid = low + ((high - low) div 2)
|
|
||||||
in
|
|
||||||
if high >= low then
|
|
||||||
let
|
|
||||||
val midStr = Vector.sub (children, mid)
|
|
||||||
val midChr = String.sub (midStr, keyPos)
|
|
||||||
in
|
|
||||||
if midChr = findChr then
|
|
||||||
FOUND_INSERT_POS mid
|
|
||||||
else if midChr < findChr then
|
|
||||||
helpInsertBinSearch (findChr, keyPos, children, mid + 1, high)
|
|
||||||
else
|
|
||||||
helpInsertBinSearch (findChr, keyPos, children, low, mid - 1)
|
|
||||||
end
|
|
||||||
else
|
|
||||||
linearSearch (findChr, keyPos, if mid >= 0 then mid else 0, children)
|
|
||||||
end
|
|
||||||
|
|
||||||
fun insertBinSearch (findChr, keyPos, children) =
|
|
||||||
helpInsertBinSearch
|
|
||||||
(findChr, keyPos, children, 0, Vector.length children - 1)
|
|
||||||
|
|
||||||
fun insertDifferenceFoundAtLeft
|
|
||||||
( insKey
|
|
||||||
, insIdx
|
|
||||||
, splitTrieKeyStart
|
|
||||||
, trieChild
|
|
||||||
, childLeftKeys
|
|
||||||
, childLeftChildren
|
|
||||||
, childRightKeys
|
|
||||||
, childRightChildren
|
|
||||||
, parentKeys
|
|
||||||
, parentChildren
|
|
||||||
, parentConstructor
|
|
||||||
) =
|
|
||||||
let
|
|
||||||
(* child node should always have CHILDREN case,
|
|
||||||
* because we are splitting prefix into two,
|
|
||||||
* when neither trieKey nor insKey match the prefix. *)
|
|
||||||
val childNode = CHILDREN {keys = childKeys, children = childChildren}
|
|
||||||
val keys =
|
|
||||||
Vector.mapi
|
|
||||||
(fn (idx, key) => if idx <> insIdx then key else splitTrieKeyStart)
|
|
||||||
parentKeys
|
|
||||||
|
|
||||||
val children =
|
|
||||||
Vector.mapi (fn (idx, elt) => if idx <> insIdx then elt else childNode)
|
|
||||||
parentChildren
|
|
||||||
in
|
|
||||||
parentConstructor {keys = keys, children = children}
|
|
||||||
end
|
|
||||||
end
|
|
||||||
Reference in New Issue
Block a user