145 lines
4.6 KiB
Standard ML
145 lines
4.6 KiB
Standard ML
|
|
structure StringSet =
|
||
|
|
struct
|
||
|
|
datatype t =
|
||
|
|
CHILDREN of {keys: string vector, children: t vector}
|
||
|
|
| FOUND_WITH_CHILDREN of {keys: string vector, children: t vector}
|
||
|
|
| FOUND
|
||
|
|
|
||
|
|
fun helpBinSearch (findChr, keyPos, children, low, high) =
|
||
|
|
if high >= low then
|
||
|
|
let
|
||
|
|
val mid = low + ((high - low) div 2)
|
||
|
|
val midStr = Vector.sub (children, mid)
|
||
|
|
val midChr = String.sub (midStr, keyPos)
|
||
|
|
in
|
||
|
|
if midChr = findChr then
|
||
|
|
SOME mid
|
||
|
|
else if midChr < findChr then
|
||
|
|
helpBinSearch (findChr, keyPos, children, mid + 1, high)
|
||
|
|
else
|
||
|
|
helpBinSearch (findChr, keyPos, children, low, mid - 1)
|
||
|
|
end
|
||
|
|
else
|
||
|
|
NONE
|
||
|
|
|
||
|
|
fun binSearch (findChr, keyPos, children) =
|
||
|
|
if Vector.length children > 0 then
|
||
|
|
helpBinSearch (findChr, keyPos, children, 0, Vector.length children - 1)
|
||
|
|
else
|
||
|
|
NONE
|
||
|
|
|
||
|
|
datatype search_string_match =
|
||
|
|
NO_SEARCH_MATCH
|
||
|
|
| FULL_SEARCH_MATCH
|
||
|
|
| SEARCH_KEY_CONTAINS_TRIE_KEY
|
||
|
|
| TRIE_KEY_CONTAINS_SEARCH_KEY
|
||
|
|
|
||
|
|
fun searchKeyMatch (searchKey, trieKey, keyPos) =
|
||
|
|
if
|
||
|
|
keyPos < String.size searchKey
|
||
|
|
then
|
||
|
|
if keyPos < String.size trieKey then
|
||
|
|
let
|
||
|
|
val searchChr = String.sub (searchKey, keyPos)
|
||
|
|
val trieChr = String.sub (trieKey, keyPos)
|
||
|
|
in
|
||
|
|
if searchChr = trieChr then
|
||
|
|
searchKeyMatch (searchKey, trieKey, keyPos + 1)
|
||
|
|
else
|
||
|
|
NO_SEARCH_MATCH
|
||
|
|
end
|
||
|
|
else
|
||
|
|
SEARCH_KEY_CONTAINS_TRIE_KEY
|
||
|
|
else if
|
||
|
|
keyPos = String.size searchKey
|
||
|
|
then
|
||
|
|
if keyPos < String.size trieKey then TRIE_KEY_CONTAINS_SEARCH_KEY
|
||
|
|
else if keyPos = String.size trieKey then FULL_SEARCH_MATCH
|
||
|
|
else SEARCH_KEY_CONTAINS_TRIE_KEY
|
||
|
|
else
|
||
|
|
(* implicit: keyPos > String.size searchKey *)
|
||
|
|
if keyPos <= String.size trieKey then
|
||
|
|
TRIE_KEY_CONTAINS_SEARCH_KEY
|
||
|
|
else
|
||
|
|
NO_SEARCH_MATCH
|
||
|
|
|
||
|
|
fun isFoundNode node =
|
||
|
|
case node of
|
||
|
|
FOUND => true
|
||
|
|
| FOUND_WITH_CHILDREN _ => true
|
||
|
|
| CHILDREN _ => false
|
||
|
|
|
||
|
|
fun exists (searchKey, keyPos, trie) =
|
||
|
|
case trie of
|
||
|
|
CHILDREN {keys, children} =>
|
||
|
|
let
|
||
|
|
val findChr = String.sub (searchKey, keyPos)
|
||
|
|
in
|
||
|
|
(case binSearch (findChr, keyPos, keys) of
|
||
|
|
SOME idx =>
|
||
|
|
let
|
||
|
|
val trieKey = Vector.sub (keys, idx)
|
||
|
|
val nextKeyPos = keyPos + 1
|
||
|
|
in
|
||
|
|
(case searchKeyMatch (searchKey, trieKey, nextKeyPos) of
|
||
|
|
NO_SEARCH_MATCH => false
|
||
|
|
| FULL_SEARCH_MATCH =>
|
||
|
|
let val trieChild = Vector.sub (children, idx)
|
||
|
|
in isFoundNode trieChild
|
||
|
|
end
|
||
|
|
| SEARCH_KEY_CONTAINS_TRIE_KEY =>
|
||
|
|
let val trieChild = Vector.sub (children, idx)
|
||
|
|
in exists (searchKey, nextKeyPos, trieChild)
|
||
|
|
end
|
||
|
|
| TRIE_KEY_CONTAINS_SEARCH_KEY => false)
|
||
|
|
end
|
||
|
|
| NONE => false)
|
||
|
|
end
|
||
|
|
| FOUND_WITH_CHILDREN {keys, children} =>
|
||
|
|
let
|
||
|
|
val findChr = String.sub (searchKey, keyPos)
|
||
|
|
in
|
||
|
|
(case binSearch (findChr, keyPos, keys) of
|
||
|
|
SOME idx =>
|
||
|
|
let
|
||
|
|
val trieKey = Vector.sub (keys, idx)
|
||
|
|
val nextKeyPos = keyPos + 1
|
||
|
|
in
|
||
|
|
(case searchKeyMatch (searchKey, trieKey, nextKeyPos) of
|
||
|
|
NO_SEARCH_MATCH => false
|
||
|
|
| FULL_SEARCH_MATCH =>
|
||
|
|
let val trieChild = Vector.sub (children, idx)
|
||
|
|
in isFoundNode trieChild
|
||
|
|
end
|
||
|
|
| SEARCH_KEY_CONTAINS_TRIE_KEY =>
|
||
|
|
let val trieChild = Vector.sub (children, idx)
|
||
|
|
in exists (searchKey, nextKeyPos, trieChild)
|
||
|
|
end
|
||
|
|
| TRIE_KEY_CONTAINS_SEARCH_KEY => false)
|
||
|
|
end
|
||
|
|
| NONE => false)
|
||
|
|
end
|
||
|
|
| FOUND => keyPos = String.size searchKey
|
||
|
|
|
||
|
|
(*
|
||
|
|
* todo:
|
||
|
|
* Code insert function, with an insertKeyMatch helper function
|
||
|
|
* which is similar to the searchKeyMatch helper function,
|
||
|
|
* except that it may also return the index to split a node at
|
||
|
|
* in a new datatype.
|
||
|
|
*)
|
||
|
|
|
||
|
|
fun insert (insKey, keyPos, trie) =
|
||
|
|
case trie of
|
||
|
|
FOUND =>
|
||
|
|
if keyPos = String.size insKey then
|
||
|
|
FOUND
|
||
|
|
else
|
||
|
|
FOUND_WITH_CHILDREN
|
||
|
|
{ keys = Vector.fromList [insKey]
|
||
|
|
, children = Vector.fromList [FOUND]
|
||
|
|
}
|
||
|
|
| CHILDREN {keys, children} => 0 (* todo *)
|
||
|
|
| FOUND_WITH_CHILDREN {keys, children} => 0 (* todo *)
|
||
|
|
end
|