From 88ec026921aabcdc14c971284a7a0564870fe927 Mon Sep 17 00:00:00 2001 From: Humza Shahid Date: Tue, 3 Sep 2024 22:09:31 +0100 Subject: [PATCH] progress with coding insert function --- src/string-set.sml | 76 ++++++++++++++++++++++++++++++++++------------ 1 file changed, 57 insertions(+), 19 deletions(-) diff --git a/src/string-set.sml b/src/string-set.sml index d1c35d1..a8e88a6 100644 --- a/src/string-set.sml +++ b/src/string-set.sml @@ -58,10 +58,9 @@ struct else SEARCH_KEY_CONTAINS_TRIE_KEY else (* implicit: keyPos > String.size searchKey *) - if keyPos <= String.size trieKey then - TRIE_KEY_CONTAINS_SEARCH_KEY - else - NO_SEARCH_MATCH + if keyPos <= String.size trieKey + then TRIE_KEY_CONTAINS_SEARCH_KEY + else NO_SEARCH_MATCH fun isFoundNode node = case node of @@ -123,14 +122,14 @@ struct datatype insert_string_match = NO_INSERT_MATCH - (* may need to split string if difference found but prefix matched *) + (* may need to split string if difference found but prefix matched *) | DIFFERENCE_FOUND_AT of int - (* no need to do anything if insert key matched, - * as this is a set where only strings are stored *) + (* no need to do anything if insert key matched, + * as this is a set where only strings are stored *) | FULL_INSERT_MATCH - (* if insert key contains trie key, may need to recurse down node *) + (* if insert key contains trie key, may need to recurse down node *) | INSERT_KEY_CONTAINS_TRIE_KEY - (* if trie key contains insert key, need to split node *) + (* if trie key contains insert key, need to split node *) | TRIE_KEY_CONTAINS_INSERT_KEY fun insertKeyMatch (insertKey, trieKey, keyPos) = @@ -157,20 +156,59 @@ struct else INSERT_KEY_CONTAINS_TRIE_KEY else (* implicit: keyPos > String.size insertKey *) - if keyPos <= String.size trieKey then - TRIE_KEY_CONTAINS_INSERT_KEY + if keyPos <= String.size trieKey + then TRIE_KEY_CONTAINS_INSERT_KEY + else NO_INSERT_MATCH + + datatype insert_bin_search_result = + INSERT_NEW_CHILD of int + | FOUND_INSERT_POS of int + | APPEND_NEW_CHILD + + fun linearSearch (findChr, keyPos, idx, children) = + if idx = Vector.length children then + APPEND_NEW_CHILD + else + let + val curStr = Vector.sub (children, idx) + val curChr = String.sub (curStr, keyPos) + in + if curChr > findChr then INSERT_NEW_CHILD idx + else linearSearch (findChr, keyPos, idx + 1, children) + end + + fun helpInsertBinSearch (findChr, keyPos, children, low, high) = + let + val mid = low + ((high - low) div 2) + in + if high >= low then + let + val midStr = Vector.sub (children, mid) + val midChr = String.sub (midStr, keyPos) + in + if midChr = findChr then + FOUND_INSERT_POS mid + else if midChr < findChr then + helpInsertBinSearch (findChr, keyPos, children, mid + 1, high) + else + helpInsertBinSearch (findChr, keyPos, children, low, mid - 1) + end else - NO_INSERT_MATCH + linearSearch (findChr, keyPos, mid, children) + end + + fun insertBinSearch (findChr, keyPos, children) = + helpInsertBinSearch + (findChr, keyPos, children, 0, Vector.length children - 1) (* * todo: - * Code another function for binary search; this time for insert. - * The difference between findBinSearch and insertBinSearch - * should be that, while findBinSearch returns NONE if the value is not found, - * insertBinSearch should return the appropriate index to insert at - * if the value is not found. - * The main insert function can then create new key/children vectors, - * containing the insertKey and FOUND at the appropriate index. + * Complete code for CHILDREN and FOUND_WITH_CHILDREN cases in insert function. + * The start will be similar to the exists function, + * with binary search possibly followed by a string matching comparison. + * No string matching comparison is needed if the current char is not found + * in the binary search stage, however; + * instead, we will insert a new element into the array when that happens. *) fun insert (insKey, keyPos, trie) =