init with working 'exists' function for string set
This commit is contained in:
144
src/string-set.sml
Normal file
144
src/string-set.sml
Normal file
@@ -0,0 +1,144 @@
|
||||
structure StringSet =
|
||||
struct
|
||||
datatype t =
|
||||
CHILDREN of {keys: string vector, children: t vector}
|
||||
| FOUND_WITH_CHILDREN of {keys: string vector, children: t vector}
|
||||
| FOUND
|
||||
|
||||
fun helpBinSearch (findChr, keyPos, children, low, high) =
|
||||
if high >= low then
|
||||
let
|
||||
val mid = low + ((high - low) div 2)
|
||||
val midStr = Vector.sub (children, mid)
|
||||
val midChr = String.sub (midStr, keyPos)
|
||||
in
|
||||
if midChr = findChr then
|
||||
SOME mid
|
||||
else if midChr < findChr then
|
||||
helpBinSearch (findChr, keyPos, children, mid + 1, high)
|
||||
else
|
||||
helpBinSearch (findChr, keyPos, children, low, mid - 1)
|
||||
end
|
||||
else
|
||||
NONE
|
||||
|
||||
fun binSearch (findChr, keyPos, children) =
|
||||
if Vector.length children > 0 then
|
||||
helpBinSearch (findChr, keyPos, children, 0, Vector.length children - 1)
|
||||
else
|
||||
NONE
|
||||
|
||||
datatype search_string_match =
|
||||
NO_SEARCH_MATCH
|
||||
| FULL_SEARCH_MATCH
|
||||
| SEARCH_KEY_CONTAINS_TRIE_KEY
|
||||
| TRIE_KEY_CONTAINS_SEARCH_KEY
|
||||
|
||||
fun searchKeyMatch (searchKey, trieKey, keyPos) =
|
||||
if
|
||||
keyPos < String.size searchKey
|
||||
then
|
||||
if keyPos < String.size trieKey then
|
||||
let
|
||||
val searchChr = String.sub (searchKey, keyPos)
|
||||
val trieChr = String.sub (trieKey, keyPos)
|
||||
in
|
||||
if searchChr = trieChr then
|
||||
searchKeyMatch (searchKey, trieKey, keyPos + 1)
|
||||
else
|
||||
NO_SEARCH_MATCH
|
||||
end
|
||||
else
|
||||
SEARCH_KEY_CONTAINS_TRIE_KEY
|
||||
else if
|
||||
keyPos = String.size searchKey
|
||||
then
|
||||
if keyPos < String.size trieKey then TRIE_KEY_CONTAINS_SEARCH_KEY
|
||||
else if keyPos = String.size trieKey then FULL_SEARCH_MATCH
|
||||
else SEARCH_KEY_CONTAINS_TRIE_KEY
|
||||
else
|
||||
(* implicit: keyPos > String.size searchKey *)
|
||||
if keyPos <= String.size trieKey then
|
||||
TRIE_KEY_CONTAINS_SEARCH_KEY
|
||||
else
|
||||
NO_SEARCH_MATCH
|
||||
|
||||
fun isFoundNode node =
|
||||
case node of
|
||||
FOUND => true
|
||||
| FOUND_WITH_CHILDREN _ => true
|
||||
| CHILDREN _ => false
|
||||
|
||||
fun exists (searchKey, keyPos, trie) =
|
||||
case trie of
|
||||
CHILDREN {keys, children} =>
|
||||
let
|
||||
val findChr = String.sub (searchKey, keyPos)
|
||||
in
|
||||
(case binSearch (findChr, keyPos, keys) of
|
||||
SOME idx =>
|
||||
let
|
||||
val trieKey = Vector.sub (keys, idx)
|
||||
val nextKeyPos = keyPos + 1
|
||||
in
|
||||
(case searchKeyMatch (searchKey, trieKey, nextKeyPos) of
|
||||
NO_SEARCH_MATCH => false
|
||||
| FULL_SEARCH_MATCH =>
|
||||
let val trieChild = Vector.sub (children, idx)
|
||||
in isFoundNode trieChild
|
||||
end
|
||||
| SEARCH_KEY_CONTAINS_TRIE_KEY =>
|
||||
let val trieChild = Vector.sub (children, idx)
|
||||
in exists (searchKey, nextKeyPos, trieChild)
|
||||
end
|
||||
| TRIE_KEY_CONTAINS_SEARCH_KEY => false)
|
||||
end
|
||||
| NONE => false)
|
||||
end
|
||||
| FOUND_WITH_CHILDREN {keys, children} =>
|
||||
let
|
||||
val findChr = String.sub (searchKey, keyPos)
|
||||
in
|
||||
(case binSearch (findChr, keyPos, keys) of
|
||||
SOME idx =>
|
||||
let
|
||||
val trieKey = Vector.sub (keys, idx)
|
||||
val nextKeyPos = keyPos + 1
|
||||
in
|
||||
(case searchKeyMatch (searchKey, trieKey, nextKeyPos) of
|
||||
NO_SEARCH_MATCH => false
|
||||
| FULL_SEARCH_MATCH =>
|
||||
let val trieChild = Vector.sub (children, idx)
|
||||
in isFoundNode trieChild
|
||||
end
|
||||
| SEARCH_KEY_CONTAINS_TRIE_KEY =>
|
||||
let val trieChild = Vector.sub (children, idx)
|
||||
in exists (searchKey, nextKeyPos, trieChild)
|
||||
end
|
||||
| TRIE_KEY_CONTAINS_SEARCH_KEY => false)
|
||||
end
|
||||
| NONE => false)
|
||||
end
|
||||
| FOUND => keyPos = String.size searchKey
|
||||
|
||||
(*
|
||||
* todo:
|
||||
* Code insert function, with an insertKeyMatch helper function
|
||||
* which is similar to the searchKeyMatch helper function,
|
||||
* except that it may also return the index to split a node at
|
||||
* in a new datatype.
|
||||
*)
|
||||
|
||||
fun insert (insKey, keyPos, trie) =
|
||||
case trie of
|
||||
FOUND =>
|
||||
if keyPos = String.size insKey then
|
||||
FOUND
|
||||
else
|
||||
FOUND_WITH_CHILDREN
|
||||
{ keys = Vector.fromList [insKey]
|
||||
, children = Vector.fromList [FOUND]
|
||||
}
|
||||
| CHILDREN {keys, children} => 0 (* todo *)
|
||||
| FOUND_WITH_CHILDREN {keys, children} => 0 (* todo *)
|
||||
end
|
||||
Reference in New Issue
Block a user