add ternary string set file, and code an insert function for it (plus datatype definitions, etc.)
This commit is contained in:
202
src/ternary-string-set.sml
Normal file
202
src/ternary-string-set.sml
Normal file
@@ -0,0 +1,202 @@
|
||||
(* An attempt at implementing terneary search trees,
|
||||
* except that each node contains a full string instead of a char *)
|
||||
structure TernaryStringSet =
|
||||
struct
|
||||
datatype t =
|
||||
NODE of {left: t, key: string, follow: t, right: t}
|
||||
(* follow is if insString contains nodeString *)
|
||||
| FOUND_NODE of {left: t, key: string, follow: t, right: t}
|
||||
| LEAF of string
|
||||
| EMPTY
|
||||
|
||||
val empty = EMPTY
|
||||
|
||||
fun isEmpty t = t = EMPTY
|
||||
|
||||
fun fromString str =
|
||||
if String.size str > 0 then LEAF str else EMPTY
|
||||
|
||||
val nodeStringContainsInsString = ~1
|
||||
val insStringContainsNodeString = ~2
|
||||
|
||||
fun getBreakPos (pos, insString, nodeString) =
|
||||
if pos = String.size insString andalso pos = String.size nodeString then
|
||||
pos
|
||||
else if pos = String.size insString then
|
||||
(* if nodeString contains insString, return ~1 *)
|
||||
nodeStringContainsInsString
|
||||
else if pos = String.size nodeString then
|
||||
(* if insString contains nodeString, return ~2 *)
|
||||
insStringContainsNodeString
|
||||
else
|
||||
let
|
||||
val insChr = String.sub (insString, pos)
|
||||
val nodeChr = String.sub (nodeString, pos)
|
||||
in
|
||||
if insChr = nodeChr then
|
||||
(* continue *)
|
||||
getBreakPos (pos + 1, insString, nodeString)
|
||||
else
|
||||
(* return break position *)
|
||||
pos
|
||||
end
|
||||
|
||||
fun helpIns (pos, insString, t) =
|
||||
case t of
|
||||
NODE {left, key = nodeString, follow, right} =>
|
||||
let
|
||||
val breakPos = getBreakPos (pos, insString, nodeString)
|
||||
in
|
||||
if breakPos = String.size insString then
|
||||
(* change node tag *)
|
||||
FOUND_NODE
|
||||
{left = left, key = nodeString, follow = follow, right = right}
|
||||
else if breakPos = nodeStringContainsInsString then
|
||||
(* add new node at current position,
|
||||
* making current node a child of the new node *)
|
||||
FOUND_NODE
|
||||
{left = EMPTY, key = insString, follow = t, right = EMPTY}
|
||||
else if breakPos = insStringContainsNodeString then
|
||||
(* follow *)
|
||||
let
|
||||
val follow = helpIns (breakPos, insString, follow)
|
||||
in
|
||||
NODE
|
||||
{left = left, key = nodeString, follow = follow, right = right}
|
||||
end
|
||||
else
|
||||
(* we have a difference: do we want to go left or right? *)
|
||||
let
|
||||
val insChr = String.sub (insString, breakPos)
|
||||
val nodeChr = String.sub (nodeString, breakPos)
|
||||
in
|
||||
if insChr < nodeChr then
|
||||
let
|
||||
val left = helpIns (breakPos, insString, left)
|
||||
in
|
||||
NODE
|
||||
{ left = left
|
||||
, key = nodeString
|
||||
, follow = follow
|
||||
, right = right
|
||||
}
|
||||
end
|
||||
else
|
||||
let
|
||||
val right = helpIns (breakPos, insString, right)
|
||||
in
|
||||
NODE
|
||||
{ left = left
|
||||
, key = nodeString
|
||||
, follow = follow
|
||||
, right = right
|
||||
}
|
||||
end
|
||||
end
|
||||
end
|
||||
| FOUND_NODE {left, key = nodeString, follow, right} =>
|
||||
let
|
||||
val breakPos = getBreakPos (pos, insString, nodeString)
|
||||
in
|
||||
if breakPos = String.size insString then
|
||||
(* return original tree as nothing to do, since key already exists *)
|
||||
t
|
||||
else if breakPos = nodeStringContainsInsString then
|
||||
(* add new node at current position,
|
||||
* making current node a child of the new node *)
|
||||
FOUND_NODE
|
||||
{left = EMPTY, key = insString, follow = t, right = EMPTY}
|
||||
else if breakPos = insStringContainsNodeString then
|
||||
(* follow *)
|
||||
let
|
||||
val follow = helpIns (breakPos, insString, follow)
|
||||
in
|
||||
FOUND_NODE
|
||||
{left = left, key = nodeString, follow = follow, right = right}
|
||||
end
|
||||
else
|
||||
(* we have a difference: do we want to go left or right? *)
|
||||
let
|
||||
val insChr = String.sub (insString, breakPos)
|
||||
val nodeChr = String.sub (nodeString, breakPos)
|
||||
in
|
||||
if insChr < nodeChr then
|
||||
let
|
||||
val left = helpIns (breakPos, insString, left)
|
||||
in
|
||||
FOUND_NODE
|
||||
{ left = left
|
||||
, key = nodeString
|
||||
, follow = follow
|
||||
, right = right
|
||||
}
|
||||
end
|
||||
else
|
||||
let
|
||||
val right = helpIns (breakPos, insString, right)
|
||||
in
|
||||
FOUND_NODE
|
||||
{ left = left
|
||||
, key = nodeString
|
||||
, follow = follow
|
||||
, right = right
|
||||
}
|
||||
end
|
||||
end
|
||||
end
|
||||
| EMPTY => LEAF insString
|
||||
| LEAF nodeString =>
|
||||
let
|
||||
val breakPos = getBreakPos (pos, insString, nodeString)
|
||||
in
|
||||
if breakPos = String.size insString then
|
||||
(* no change as user tried to insert string that already exists *)
|
||||
t
|
||||
else if breakPos = nodeStringContainsInsString then
|
||||
(* add new node at current position,
|
||||
* making current node a child of the new node *)
|
||||
FOUND_NODE
|
||||
{left = EMPTY, key = insString, follow = t, right = EMPTY}
|
||||
else if breakPos = insStringContainsNodeString then
|
||||
(* transform this node to a FOUND_NODE,
|
||||
* and add a new LEAF at follow position *)
|
||||
let
|
||||
val newLeaf = LEAF insString
|
||||
in
|
||||
FOUND_NODE
|
||||
{ key = nodeString
|
||||
, follow = newLeaf
|
||||
, left = EMPTY
|
||||
, right = EMPTY
|
||||
}
|
||||
end
|
||||
else
|
||||
(* we have a difference: break string and add a new NODE.
|
||||
* The way we insert has implications for searching.
|
||||
* Search should descend down on all three children
|
||||
* if searchKey contains nodeKey.
|
||||
* *)
|
||||
let
|
||||
val breakString = String.substring (insString, 0, breakPos)
|
||||
val insChr = String.sub (insString, breakPos)
|
||||
val nodeChr = String.sub (nodeString, breakPos)
|
||||
in
|
||||
if insChr < nodeChr then
|
||||
(* insString on left *)
|
||||
NODE
|
||||
{ left = LEAF insString
|
||||
, right = t
|
||||
, key = breakString
|
||||
, follow = EMPTY
|
||||
}
|
||||
else
|
||||
(* insString on right *)
|
||||
NODE
|
||||
{ left = t
|
||||
, right = LEAF insString
|
||||
, key = breakString
|
||||
, follow = EMPTY
|
||||
}
|
||||
end
|
||||
end
|
||||
end
|
||||
@@ -1,523 +0,0 @@
|
||||
signature STRING_SET =
|
||||
sig
|
||||
(* the type of tries *)
|
||||
type t
|
||||
|
||||
(* the empty trie *)
|
||||
val empty: t
|
||||
|
||||
(* StringSet.isEmpty trie
|
||||
* returns true if the trie is empty *)
|
||||
val isEmpty: t -> bool
|
||||
|
||||
(* StringSet.fromString "hello world"
|
||||
* creates a trie containing just a string *)
|
||||
val fromString: string -> t
|
||||
|
||||
(* StringSet.exists ("hello world", trie)
|
||||
* returns true if the key was inserted into the trie *)
|
||||
val exists: string * t -> bool
|
||||
|
||||
(* StringSet.insert ("myNewString", trie)
|
||||
* inserts a new string into the trie, returning a new trie *)
|
||||
val insert: string * t -> t
|
||||
|
||||
(* StringSet.remove ("stringToRemove", trie)
|
||||
* removes the key from the trie, returning a new trie *)
|
||||
val remove: string * t -> t
|
||||
|
||||
(* StringSet.getPrefixList ("myPrefix", trie)
|
||||
* returns a list of all keys matching the specified prefix *)
|
||||
val getPrefixList: string * t -> string list
|
||||
|
||||
(* StringSet.toList trie
|
||||
* returns a list containing all keys in the trie *)
|
||||
val toList: t -> string list
|
||||
|
||||
(* StringSet.fromList ["hello", "world"]
|
||||
* returns a trie containing all keys in the string list *)
|
||||
val fromList: string list -> t
|
||||
|
||||
(* StringSet.foldl (fn (key, acc) => String.size key + acc) 0 trie
|
||||
* folds a value through the trie, from lowest to highest. *)
|
||||
val foldl: (string * 'b -> 'b) -> 'b -> t -> 'b
|
||||
|
||||
(* StringSet.foldlWithPrefix (fn (key, acc) => String.size key + acc) 0 trie "myPrefix"
|
||||
* folds a value through a subset of the trie containing the specified prefix,
|
||||
* from lowest to highest. *)
|
||||
val foldlWithPrefix: (string * 'b -> 'b) -> 'b -> t -> string -> 'b
|
||||
|
||||
(* StringSet.foldr (fn (key, acc) => String.size key + acc) 0 trie
|
||||
* folds a value through the trie, from highest to lowest. *)
|
||||
val foldr: (string * 'b -> 'b) -> 'b -> t -> 'b
|
||||
|
||||
(* StringSet.foldrWithPrefix (fn (key, acc) => String.size key + acc) 0 trie "myPrefix"
|
||||
* folds a value through a subset of the trie containing the specified prefix,
|
||||
* from highest to lowest. *)
|
||||
val foldrWithPrefix: (string * 'b -> 'b) -> 'b -> t -> string -> 'b
|
||||
end
|
||||
|
||||
structure ZipStringSet =
|
||||
struct
|
||||
datatype t =
|
||||
CHILDREN of
|
||||
{ leftKeys: string vector list
|
||||
, leftChildren: t vector list
|
||||
, rightKeys: string vector list
|
||||
, rightChildren: t vector list
|
||||
}
|
||||
| FOUND_WITH_CHILDREN of
|
||||
{ leftKeys: string vector list
|
||||
, leftChildren: t vector list
|
||||
, rightKeys: string vector list
|
||||
, rightChildren: t vector list
|
||||
}
|
||||
| FOUND
|
||||
|
||||
val maxSize = 32
|
||||
|
||||
val empty = CHILDREN
|
||||
{leftKeys = [], leftChildren = [], rightKeys = [], rightChildren = []}
|
||||
|
||||
fun isEmpty trie =
|
||||
case trie of
|
||||
CHILDREN {leftKeys = [], rightKeys = [], ...} => true
|
||||
| _ => false
|
||||
|
||||
fun fromString str =
|
||||
if String.size str > 0 then
|
||||
CHILDREN
|
||||
{ leftKeys = [Vector.fromList [str]]
|
||||
, leftChildren = [Vector.fromList [FOUND]]
|
||||
, rightKeys = []
|
||||
, rightChildren = []
|
||||
}
|
||||
else
|
||||
empty
|
||||
|
||||
fun helpBinSearch (findChr, keyPos, children, low, high) =
|
||||
if high >= low then
|
||||
let
|
||||
val mid = low + ((high - low) div 2)
|
||||
val midStr = Vector.sub (children, mid)
|
||||
val midChr = String.sub (midStr, keyPos)
|
||||
in
|
||||
if midChr = findChr then
|
||||
SOME mid
|
||||
else if midChr < findChr then
|
||||
helpBinSearch (findChr, keyPos, children, mid + 1, high)
|
||||
else
|
||||
helpBinSearch (findChr, keyPos, children, low, mid - 1)
|
||||
end
|
||||
else
|
||||
NONE
|
||||
|
||||
fun findBinSearch (findChr, keyPos, children) =
|
||||
helpBinSearch (findChr, keyPos, children, 0, Vector.length children - 1)
|
||||
|
||||
datatype search_string_match =
|
||||
NO_SEARCH_MATCH
|
||||
| FULL_SEARCH_MATCH
|
||||
| SEARCH_KEY_CONTAINS_TRIE_KEY
|
||||
| TRIE_KEY_CONTAINS_SEARCH_KEY
|
||||
|
||||
fun searchKeyMatch (searchKey, trieKey, keyPos) =
|
||||
if
|
||||
keyPos < String.size searchKey
|
||||
then
|
||||
if keyPos < String.size trieKey then
|
||||
let
|
||||
val searchChr = String.sub (searchKey, keyPos)
|
||||
val trieChr = String.sub (trieKey, keyPos)
|
||||
in
|
||||
if searchChr = trieChr then
|
||||
searchKeyMatch (searchKey, trieKey, keyPos + 1)
|
||||
else
|
||||
NO_SEARCH_MATCH
|
||||
end
|
||||
else
|
||||
SEARCH_KEY_CONTAINS_TRIE_KEY
|
||||
else if
|
||||
keyPos = String.size searchKey
|
||||
then
|
||||
if keyPos < String.size trieKey then TRIE_KEY_CONTAINS_SEARCH_KEY
|
||||
else if keyPos = String.size trieKey then FULL_SEARCH_MATCH
|
||||
else SEARCH_KEY_CONTAINS_TRIE_KEY
|
||||
else (* implicit: keyPos > String.size searchKey *) if
|
||||
keyPos <= String.size trieKey
|
||||
then
|
||||
TRIE_KEY_CONTAINS_SEARCH_KEY
|
||||
else
|
||||
NO_SEARCH_MATCH
|
||||
|
||||
fun isFoundNode node =
|
||||
case node of
|
||||
FOUND => true
|
||||
| FOUND_WITH_CHILDREN _ => true
|
||||
| CHILDREN _ => false
|
||||
|
||||
fun checkNodeExistsMatch (searchKey, trieKey, keyPos, children, idx) =
|
||||
(case searchKeyMatch (searchKey, trieKey, keyPos + 1) of
|
||||
NO_SEARCH_MATCH => false
|
||||
| FULL_SEARCH_MATCH =>
|
||||
let val trieChild = Vector.sub (children, idx)
|
||||
in isFoundNode trieChild
|
||||
end
|
||||
| SEARCH_KEY_CONTAINS_TRIE_KEY =>
|
||||
let val trieChild = Vector.sub (children, idx)
|
||||
in recurseExists (searchKey, String.size trieKey, trieChild)
|
||||
end
|
||||
| TRIE_KEY_CONTAINS_SEARCH_KEY => false)
|
||||
|
||||
and checkExistsLeft (searchKey, searchChr, keyPos, leftKeys, leftChildren) =
|
||||
case (leftKeys, leftChildren) of
|
||||
(khd :: ktl, chd :: ctl) =>
|
||||
let
|
||||
val firstNode = Vector.sub (khd, 0)
|
||||
val firstNodeChr = String.sub (firstNode, keyPos)
|
||||
in
|
||||
if firstNodeChr < searchChr then
|
||||
(* keep checking leftwards *)
|
||||
checkExistsLeft (searchKey, searchChr, keyPos, ktl, ctl)
|
||||
else if firstNodeChr = searchChr then
|
||||
(* check if there is a full/partial key match at this node *)
|
||||
checkNodeExistsMatch (searchKey, firstNode, keyPos, chd, 0)
|
||||
else
|
||||
(* binary search this node to see if there is a matching chr *)
|
||||
(case findBinSearch (searchChr, keyPos, khd) of
|
||||
SOME idx =>
|
||||
checkNodeExistsMatch
|
||||
(searchKey, Vector.sub (khd, idx), keyPos, chd, idx)
|
||||
| NONE => false)
|
||||
end
|
||||
| (_, _) => false
|
||||
|
||||
and checkExistsRight (searchKey, searchChr, keyPos, rightKeys, rightChildren) =
|
||||
case (rightKeys, rightChildren) of
|
||||
(khd :: ktl, chd :: ctl) =>
|
||||
let
|
||||
val lastNode = Vector.sub (khd, Vector.length khd - 1)
|
||||
val lastNodeChr = String.sub (lastNode, keyPos)
|
||||
in
|
||||
if lastNodeChr > searchChr then
|
||||
(* keep checking rightwards *)
|
||||
checkExistsRight (searchKey, searchChr, keyPos, ktl, ctl)
|
||||
else if lastNodeChr = searchChr then
|
||||
(* check for full/partial match at this node *)
|
||||
checkNodeExistsMatch
|
||||
(searchKey, lastNode, keyPos, chd, Vector.length khd - 1)
|
||||
else
|
||||
(* binary search this node to see if there is a matching chr *)
|
||||
(case findBinSearch (searchChr, keyPos, khd) of
|
||||
SOME idx =>
|
||||
checkNodeExistsMatch
|
||||
(searchKey, Vector.sub (khd, idx), keyPos, chd, idx)
|
||||
| NONE => false)
|
||||
end
|
||||
| (_, _) => false
|
||||
|
||||
and decideExistsDirection
|
||||
(searchKey, keyPos, leftKeys, leftChildren, rightKeys, rightChildren) =
|
||||
case (leftKeys, leftChildren) of
|
||||
(khd :: ktl, chd :: ctl) =>
|
||||
let
|
||||
val searchChr = String.sub (searchKey, keyPos)
|
||||
val firstNode = Vector.sub (khd, 0)
|
||||
val startNodeChr = String.sub (firstNode, keyPos)
|
||||
in
|
||||
if searchChr < startNodeChr then
|
||||
checkExistsLeft (searchKey, searchChr, keyPos, ktl, ctl)
|
||||
else if searchChr = startNodeChr then
|
||||
(* check string match on first key/firstNode
|
||||
* and recurse if full or partial match *)
|
||||
checkNodeExistsMatch (searchKey, firstNode, keyPos, chd, 0)
|
||||
else
|
||||
(* implicit: searchChr > startNodeChr *)
|
||||
let
|
||||
val lastNode = Vector.sub (khd, Vector.length khd - 1)
|
||||
val lastNodeChr = String.sub (lastNode, keyPos)
|
||||
in
|
||||
if searchChr > lastNodeChr then
|
||||
(* check rightKeys/rightChildren *)
|
||||
checkExistsRight
|
||||
(searchKey, searchChr, keyPos, rightKeys, rightChildren)
|
||||
else if searchChr = lastNodeChr then
|
||||
(* check string match on last key/lastNode
|
||||
* and recurse if full or partial match *)
|
||||
checkNodeExistsMatch
|
||||
(searchKey, lastNode, keyPos, chd, Vector.length khd - 1)
|
||||
else
|
||||
(* implicit: searchChr < lastNodeChr
|
||||
* should perform binary search at this node
|
||||
* to find if key exists *)
|
||||
(case findBinSearch (searchChr, keyPos, khd) of
|
||||
SOME idx =>
|
||||
checkNodeExistsMatch
|
||||
(searchKey, Vector.sub (khd, idx), keyPos, chd, idx)
|
||||
| NONE => false)
|
||||
end
|
||||
end
|
||||
| (_, _) =>
|
||||
(* leftKeys and leftChildren are both empty
|
||||
* so check rightKeys/rightChildren *)
|
||||
checkExistsRight
|
||||
( searchKey
|
||||
, String.sub (searchKey, keyPos)
|
||||
, keyPos
|
||||
, rightKeys
|
||||
, rightChildren
|
||||
)
|
||||
|
||||
and recurseExists (searchKey, keyPos, trie) =
|
||||
case trie of
|
||||
CHILDREN {leftKeys, leftChildren, rightKeys, rightChildren} =>
|
||||
decideExistsDirection
|
||||
(searchKey, keyPos, leftKeys, leftChildren, rightKeys, rightChildren)
|
||||
| FOUND_WITH_CHILDREN {leftKeys, leftChildren, rightKeys, rightChildren} =>
|
||||
decideExistsDirection
|
||||
(searchKey, keyPos, leftKeys, leftChildren, rightKeys, rightChildren)
|
||||
| FOUND => false
|
||||
|
||||
fun exists (searchKey, trie) =
|
||||
if isEmpty trie orelse String.size searchKey = 0 then false
|
||||
else recurseExists (searchKey, 0, trie)
|
||||
|
||||
fun checkNodeSubtrieMatch (prefix, trieKey, keyPos, children, idx) =
|
||||
case searchKeyMatch (prefix, trieKey, keyPos + 1) of
|
||||
NO_SEARCH_MATCH => NONE
|
||||
| SEARCH_KEY_CONTAINS_TRIE_KEY =>
|
||||
let val trieChild = Vector.sub (children, idx)
|
||||
in recurseGetPrefixSubtrie (prefix, String.size trieKey, trieChild)
|
||||
end
|
||||
| FULL_SEARCH_MATCH =>
|
||||
let val node = Vector.sub (children, idx)
|
||||
in SOME (prefix, node)
|
||||
end
|
||||
| TRIE_KEY_CONTAINS_SEARCH_KEY =>
|
||||
let val node = Vector.sub (children, idx)
|
||||
in SOME (trieKey, node)
|
||||
end
|
||||
|
||||
and helpGetPrefixSubtrieChildren (prefix, keyPos, keys, children, trie) =
|
||||
let
|
||||
val findChr = String.sub (prefix, keyPos)
|
||||
in
|
||||
case findBinSearch (findChr, keyPos, keys) of
|
||||
SOME idx =>
|
||||
let val trieKey = Vector.sub (keys, idx)
|
||||
in checkNodeSubtrieMatch (prefix, trieKey, keyPos, children, idx)
|
||||
end
|
||||
| NONE => NONE
|
||||
end
|
||||
|
||||
and getPrefixSubtrieLeft (prefix, searchChr, keyPos, leftKeys, leftChildren) =
|
||||
case (leftKeys, leftChildren) of
|
||||
(khd :: ktl, chd :: ctl) =>
|
||||
let
|
||||
val firstNode = Vector.sub (khd, 0)
|
||||
val firstNodeChr = String.sub (firstNode, keyPos)
|
||||
in
|
||||
if firstNodeChr < searchChr then
|
||||
(* keep moving leftwards *)
|
||||
getPrefixSubtrieLeft (prefix, searchChr, keyPos, ktl, ctl)
|
||||
else if firstNodeChr = searchChr then
|
||||
(* check if there is a full/partial key match at this node *)
|
||||
checkNodeSubtrieMatch (prefix, firstNode, keyPos, chd, 0)
|
||||
else
|
||||
(* binary search this node to see if there is a matching chr *)
|
||||
(case findBinSearch (searchChr, keyPos, khd) of
|
||||
SOME idx =>
|
||||
checkNodeSubtrieMatch
|
||||
(prefix, Vector.sub (khd, idx), keyPos, chd, idx)
|
||||
| NONE => NONE)
|
||||
end
|
||||
| (_, _) => NONE
|
||||
|
||||
and getPrefixSubtrieRight
|
||||
(prefix, searchChr, keyPos, rightKeys, rightChildren) =
|
||||
case (rightKeys, rightChildren) of
|
||||
(khd :: ktl, chd :: ctl) =>
|
||||
let
|
||||
val lastNode = Vector.sub (khd, Vector.length khd - 1)
|
||||
val lastNodeChr = String.sub (lastNode, keyPos)
|
||||
in
|
||||
if lastNodeChr > searchChr then
|
||||
(* keep checking rightwards *)
|
||||
getPrefixSubtrieRight (prefix, searchChr, keyPos, ktl, ctl)
|
||||
else if lastNodeChr = searchChr then
|
||||
(* check for full/partial match at this node *)
|
||||
checkNodeSubtrieMatch
|
||||
(prefix, lastNode, keyPos, chd, Vector.length khd - 1)
|
||||
else
|
||||
(* binary search this node to see if there is a matching chr *)
|
||||
(case findBinSearch (searchChr, keyPos, khd) of
|
||||
SOME idx =>
|
||||
checkNodeSubtrieMatch
|
||||
(prefix, Vector.sub (khd, idx), keyPos, chd, idx)
|
||||
| NONE => NONE)
|
||||
end
|
||||
| (_, _) => NONE
|
||||
|
||||
and decideGetPrefixSubtrieDirection
|
||||
(prefix, keyPos, leftKeys, leftChildren, rightKeys, rightChildren) =
|
||||
case (leftKeys, leftChildren) of
|
||||
(khd :: ktl, chd :: ctl) =>
|
||||
let
|
||||
val searchChr = String.sub (prefix, keyPos)
|
||||
val firstNode = Vector.sub (khd, 0)
|
||||
val startNodeChr = String.sub (firstNode, keyPos)
|
||||
in
|
||||
if searchChr < startNodeChr then
|
||||
getPrefixSubtrieLeft (prefix, searchChr, keyPos, ktl, ctl)
|
||||
else if searchChr = startNodeChr then
|
||||
(* check string match on first key/firstNode
|
||||
* and recurse if full or partial match *)
|
||||
checkNodeSubtrieMatch (prefix, firstNode, keyPos, chd, 0)
|
||||
else
|
||||
(* implicit: searchChr > startNodeChr *)
|
||||
let
|
||||
val lastNode = Vector.sub (khd, Vector.length khd - 1)
|
||||
val lastNodeChr = String.sub (lastNode, keyPos)
|
||||
in
|
||||
if searchChr > lastNodeChr then
|
||||
(* check rightKeys/rightChildren *)
|
||||
getPrefixSubtrieRight
|
||||
(prefix, searchChr, keyPos, rightKeys, rightChildren)
|
||||
else if searchChr = lastNodeChr then
|
||||
(* check string match on last key/lastNode
|
||||
* and recurse if full or partial match *)
|
||||
checkNodeSubtrieMatch
|
||||
(prefix, lastNode, keyPos, chd, Vector.length khd - 1)
|
||||
else
|
||||
(* implicit: searchChr < lastNodeChr
|
||||
* should perform binary search at this node
|
||||
* to find if key exists *)
|
||||
(case findBinSearch (searchChr, keyPos, khd) of
|
||||
SOME idx =>
|
||||
checkNodeSubtrieMatch
|
||||
(prefix, Vector.sub (khd, idx), keyPos, chd, idx)
|
||||
| NONE => NONE)
|
||||
end
|
||||
end
|
||||
| (_, _) => NONE
|
||||
|
||||
and recurseGetPrefixSubtrie (prefix, keyPos, trie) =
|
||||
case trie of
|
||||
CHILDREN {leftKeys, leftChildren, rightKeys, rightChildren} =>
|
||||
decideGetPrefixSubtrieDirection
|
||||
(prefix, keyPos, leftKeys, leftChildren, rightKeys, rightChildren)
|
||||
| FOUND_WITH_CHILDREN {leftKeys, leftChildren, rightKeys, rightChildren} =>
|
||||
decideGetPrefixSubtrieDirection
|
||||
(prefix, keyPos, leftKeys, leftChildren, rightKeys, rightChildren)
|
||||
| FOUND => NONE
|
||||
|
||||
fun getPrefixSubtrie (prefix, trie) =
|
||||
if isEmpty trie then NONE else recurseGetPrefixSubtrie (prefix, 0, trie)
|
||||
|
||||
datatype insert_string_match =
|
||||
NO_INSERT_MATCH
|
||||
| DIFFERENCE_FOUND_AT of int
|
||||
| FULL_INSERT_MATCH
|
||||
| INSERT_KEY_CONTAINS_TRIE_KEY
|
||||
| TRIE_KEY_CONTAINS_INSERT_KEY
|
||||
|
||||
fun insertKeyMatch (insertKey, trieKey, keyPos) =
|
||||
if
|
||||
keyPos < String.size insertKey
|
||||
then
|
||||
if keyPos < String.size trieKey then
|
||||
let
|
||||
val searchChr = String.sub (insertKey, keyPos)
|
||||
val trieChr = String.sub (trieKey, keyPos)
|
||||
in
|
||||
if searchChr = trieChr then
|
||||
insertKeyMatch (insertKey, trieKey, keyPos + 1)
|
||||
else
|
||||
DIFFERENCE_FOUND_AT keyPos
|
||||
end
|
||||
else
|
||||
INSERT_KEY_CONTAINS_TRIE_KEY
|
||||
else if
|
||||
keyPos = String.size insertKey
|
||||
then
|
||||
if keyPos < String.size trieKey then TRIE_KEY_CONTAINS_INSERT_KEY
|
||||
else if keyPos = String.size trieKey then FULL_INSERT_MATCH
|
||||
else INSERT_KEY_CONTAINS_TRIE_KEY
|
||||
else (* implicit: keyPos > String.size insertKey *) if
|
||||
keyPos <= String.size trieKey
|
||||
then
|
||||
TRIE_KEY_CONTAINS_INSERT_KEY
|
||||
else
|
||||
NO_INSERT_MATCH
|
||||
|
||||
datatype insert_bin_search_result =
|
||||
INSERT_NEW_CHILD of int
|
||||
| FOUND_INSERT_POS of int
|
||||
| APPEND_NEW_CHILD
|
||||
|
||||
fun linearSearch (findChr, keyPos, idx, children) =
|
||||
if idx = Vector.length children then
|
||||
APPEND_NEW_CHILD
|
||||
else
|
||||
let
|
||||
val curStr = Vector.sub (children, idx)
|
||||
val curChr = String.sub (curStr, keyPos)
|
||||
in
|
||||
if curChr > findChr then INSERT_NEW_CHILD idx
|
||||
else linearSearch (findChr, keyPos, idx + 1, children)
|
||||
end
|
||||
|
||||
fun helpInsertBinSearch (findChr, keyPos, children, low, high) =
|
||||
let
|
||||
val mid = low + ((high - low) div 2)
|
||||
in
|
||||
if high >= low then
|
||||
let
|
||||
val midStr = Vector.sub (children, mid)
|
||||
val midChr = String.sub (midStr, keyPos)
|
||||
in
|
||||
if midChr = findChr then
|
||||
FOUND_INSERT_POS mid
|
||||
else if midChr < findChr then
|
||||
helpInsertBinSearch (findChr, keyPos, children, mid + 1, high)
|
||||
else
|
||||
helpInsertBinSearch (findChr, keyPos, children, low, mid - 1)
|
||||
end
|
||||
else
|
||||
linearSearch (findChr, keyPos, if mid >= 0 then mid else 0, children)
|
||||
end
|
||||
|
||||
fun insertBinSearch (findChr, keyPos, children) =
|
||||
helpInsertBinSearch
|
||||
(findChr, keyPos, children, 0, Vector.length children - 1)
|
||||
|
||||
fun insertDifferenceFoundAtLeft
|
||||
( insKey
|
||||
, insIdx
|
||||
, splitTrieKeyStart
|
||||
, trieChild
|
||||
, childLeftKeys
|
||||
, childLeftChildren
|
||||
, childRightKeys
|
||||
, childRightChildren
|
||||
, parentKeys
|
||||
, parentChildren
|
||||
, parentConstructor
|
||||
) =
|
||||
let
|
||||
(* child node should always have CHILDREN case,
|
||||
* because we are splitting prefix into two,
|
||||
* when neither trieKey nor insKey match the prefix. *)
|
||||
val childNode = CHILDREN {keys = childKeys, children = childChildren}
|
||||
val keys =
|
||||
Vector.mapi
|
||||
(fn (idx, key) => if idx <> insIdx then key else splitTrieKeyStart)
|
||||
parentKeys
|
||||
|
||||
val children =
|
||||
Vector.mapi (fn (idx, elt) => if idx <> insIdx then elt else childNode)
|
||||
parentChildren
|
||||
in
|
||||
parentConstructor {keys = keys, children = children}
|
||||
end
|
||||
end
|
||||
Reference in New Issue
Block a user