From 2f14c999521584061031e2972b7761eb542ef69f Mon Sep 17 00:00:00 2001 From: Humza Shahid Date: Tue, 3 Sep 2024 10:15:55 +0100 Subject: [PATCH] init with working 'exists' function for string set --- src/string-set.sml | 144 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 144 insertions(+) create mode 100644 src/string-set.sml diff --git a/src/string-set.sml b/src/string-set.sml new file mode 100644 index 0000000..d8e1a5b --- /dev/null +++ b/src/string-set.sml @@ -0,0 +1,144 @@ +structure StringSet = +struct + datatype t = + CHILDREN of {keys: string vector, children: t vector} + | FOUND_WITH_CHILDREN of {keys: string vector, children: t vector} + | FOUND + + fun helpBinSearch (findChr, keyPos, children, low, high) = + if high >= low then + let + val mid = low + ((high - low) div 2) + val midStr = Vector.sub (children, mid) + val midChr = String.sub (midStr, keyPos) + in + if midChr = findChr then + SOME mid + else if midChr < findChr then + helpBinSearch (findChr, keyPos, children, mid + 1, high) + else + helpBinSearch (findChr, keyPos, children, low, mid - 1) + end + else + NONE + + fun binSearch (findChr, keyPos, children) = + if Vector.length children > 0 then + helpBinSearch (findChr, keyPos, children, 0, Vector.length children - 1) + else + NONE + + datatype search_string_match = + NO_SEARCH_MATCH + | FULL_SEARCH_MATCH + | SEARCH_KEY_CONTAINS_TRIE_KEY + | TRIE_KEY_CONTAINS_SEARCH_KEY + + fun searchKeyMatch (searchKey, trieKey, keyPos) = + if + keyPos < String.size searchKey + then + if keyPos < String.size trieKey then + let + val searchChr = String.sub (searchKey, keyPos) + val trieChr = String.sub (trieKey, keyPos) + in + if searchChr = trieChr then + searchKeyMatch (searchKey, trieKey, keyPos + 1) + else + NO_SEARCH_MATCH + end + else + SEARCH_KEY_CONTAINS_TRIE_KEY + else if + keyPos = String.size searchKey + then + if keyPos < String.size trieKey then TRIE_KEY_CONTAINS_SEARCH_KEY + else if keyPos = String.size trieKey then FULL_SEARCH_MATCH + else SEARCH_KEY_CONTAINS_TRIE_KEY + else + (* implicit: keyPos > String.size searchKey *) + if keyPos <= String.size trieKey then + TRIE_KEY_CONTAINS_SEARCH_KEY + else + NO_SEARCH_MATCH + + fun isFoundNode node = + case node of + FOUND => true + | FOUND_WITH_CHILDREN _ => true + | CHILDREN _ => false + + fun exists (searchKey, keyPos, trie) = + case trie of + CHILDREN {keys, children} => + let + val findChr = String.sub (searchKey, keyPos) + in + (case binSearch (findChr, keyPos, keys) of + SOME idx => + let + val trieKey = Vector.sub (keys, idx) + val nextKeyPos = keyPos + 1 + in + (case searchKeyMatch (searchKey, trieKey, nextKeyPos) of + NO_SEARCH_MATCH => false + | FULL_SEARCH_MATCH => + let val trieChild = Vector.sub (children, idx) + in isFoundNode trieChild + end + | SEARCH_KEY_CONTAINS_TRIE_KEY => + let val trieChild = Vector.sub (children, idx) + in exists (searchKey, nextKeyPos, trieChild) + end + | TRIE_KEY_CONTAINS_SEARCH_KEY => false) + end + | NONE => false) + end + | FOUND_WITH_CHILDREN {keys, children} => + let + val findChr = String.sub (searchKey, keyPos) + in + (case binSearch (findChr, keyPos, keys) of + SOME idx => + let + val trieKey = Vector.sub (keys, idx) + val nextKeyPos = keyPos + 1 + in + (case searchKeyMatch (searchKey, trieKey, nextKeyPos) of + NO_SEARCH_MATCH => false + | FULL_SEARCH_MATCH => + let val trieChild = Vector.sub (children, idx) + in isFoundNode trieChild + end + | SEARCH_KEY_CONTAINS_TRIE_KEY => + let val trieChild = Vector.sub (children, idx) + in exists (searchKey, nextKeyPos, trieChild) + end + | TRIE_KEY_CONTAINS_SEARCH_KEY => false) + end + | NONE => false) + end + | FOUND => keyPos = String.size searchKey + + (* + * todo: + * Code insert function, with an insertKeyMatch helper function + * which is similar to the searchKeyMatch helper function, + * except that it may also return the index to split a node at + * in a new datatype. + *) + + fun insert (insKey, keyPos, trie) = + case trie of + FOUND => + if keyPos = String.size insKey then + FOUND + else + FOUND_WITH_CHILDREN + { keys = Vector.fromList [insKey] + , children = Vector.fromList [FOUND] + } + | CHILDREN {keys, children} => 0 (* todo *) + | FOUND_WITH_CHILDREN {keys, children} => 0 (* todo *) +end