From fa3245b47cba9fc6b8b08390eec06b12152f9282 Mon Sep 17 00:00:00 2001 From: Humza Shahid Date: Wed, 4 Sep 2024 07:54:35 +0100 Subject: [PATCH] progress coding insert function --- src/string-set.sml | 243 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 218 insertions(+), 25 deletions(-) diff --git a/src/string-set.sml b/src/string-set.sml index a8e88a6..3634002 100644 --- a/src/string-set.sml +++ b/src/string-set.sml @@ -56,11 +56,12 @@ struct if keyPos < String.size trieKey then TRIE_KEY_CONTAINS_SEARCH_KEY else if keyPos = String.size trieKey then FULL_SEARCH_MATCH else SEARCH_KEY_CONTAINS_TRIE_KEY - else - (* implicit: keyPos > String.size searchKey *) - if keyPos <= String.size trieKey - then TRIE_KEY_CONTAINS_SEARCH_KEY - else NO_SEARCH_MATCH + else (* implicit: keyPos > String.size searchKey *) if + keyPos <= String.size trieKey + then + TRIE_KEY_CONTAINS_SEARCH_KEY + else + NO_SEARCH_MATCH fun isFoundNode node = case node of @@ -68,7 +69,7 @@ struct | FOUND_WITH_CHILDREN _ => true | CHILDREN _ => false - fun exists (searchKey, keyPos, trie) = + fun helpExists (searchKey, keyPos, trie) = case trie of CHILDREN {keys, children} => let @@ -88,7 +89,7 @@ struct end | SEARCH_KEY_CONTAINS_TRIE_KEY => let val trieChild = Vector.sub (children, idx) - in exists (searchKey, nextKeyPos, trieChild) + in helpExists (searchKey, nextKeyPos, trieChild) end | TRIE_KEY_CONTAINS_SEARCH_KEY => false) end @@ -112,7 +113,7 @@ struct end | SEARCH_KEY_CONTAINS_TRIE_KEY => let val trieChild = Vector.sub (children, idx) - in exists (searchKey, nextKeyPos, trieChild) + in helpExists (searchKey, nextKeyPos, trieChild) end | TRIE_KEY_CONTAINS_SEARCH_KEY => false) end @@ -120,12 +121,16 @@ struct end | FOUND => keyPos = String.size searchKey + fun exists (searchKey, trie) = helpExists (searchKey, 0, trie) + datatype insert_string_match = NO_INSERT_MATCH (* may need to split string if difference found but prefix matched *) | DIFFERENCE_FOUND_AT of int - (* no need to do anything if insert key matched, - * as this is a set where only strings are stored *) + (* may not need to do anything if insert key matched, + * as this is a set where only strings are stored. + * however, if this is a non-found node, then I need to change + * the tag/case. *) | FULL_INSERT_MATCH (* if insert key contains trie key, may need to recurse down node *) | INSERT_KEY_CONTAINS_TRIE_KEY @@ -154,11 +159,12 @@ struct if keyPos < String.size trieKey then TRIE_KEY_CONTAINS_INSERT_KEY else if keyPos = String.size trieKey then FULL_INSERT_MATCH else INSERT_KEY_CONTAINS_TRIE_KEY - else - (* implicit: keyPos > String.size insertKey *) - if keyPos <= String.size trieKey - then TRIE_KEY_CONTAINS_INSERT_KEY - else NO_INSERT_MATCH + else (* implicit: keyPos > String.size insertKey *) if + keyPos <= String.size trieKey + then + TRIE_KEY_CONTAINS_INSERT_KEY + else + NO_INSERT_MATCH datatype insert_bin_search_result = INSERT_NEW_CHILD of int @@ -211,16 +217,203 @@ struct * instead, we will insert a new element into the array when that happens. *) - fun insert (insKey, keyPos, trie) = + fun insertDifferenceFoundAt + ( insKey + , insIdx + , splitTrieKeyStart + , splitTrieKeyEnd + , trieChild + , childKeys + , childChildren + , parentKeys + , parentChildren + ) = + let + val childNode = CHILDREN {keys = childKeys, children = childChildren} + + val keys = + Vector.mapi + (fn (idx, key) => if idx <> insIdx then key else splitTrieKeyStart) + parentKeys + + val children = + Vector.mapi (fn (idx, elt) => if idx <> insIdx then elt else childNode) + parentChildren + + val node = CHILDREN {keys = keys, children = children} + in + SOME node + end + + + fun helpInsert (insKey, keyPos, trie) : t option = case trie of FOUND => - if keyPos = String.size insKey then - FOUND - else - FOUND_WITH_CHILDREN - { keys = Vector.fromList [insKey] - , children = Vector.fromList [FOUND] - } - | CHILDREN {keys, children} => 0 (* todo *) - | FOUND_WITH_CHILDREN {keys, children} => 0 (* todo *) + let + val node = + if keyPos = String.size insKey then + FOUND + else + FOUND_WITH_CHILDREN + { keys = Vector.fromList [insKey] + , children = Vector.fromList [FOUND] + } + in + SOME node + end + | CHILDREN {keys, children} => + let + val findChr = String.sub (insKey, keyPos) + in + (case insertBinSearch (findChr, keyPos, keys) of + INSERT_NEW_CHILD insIdx => + let + val newLength = Vector.length keys + 1 + val newKeys = Vector.tabulate (newLength, fn idx => + if idx < insIdx then Vector.sub (keys, idx) + else if idx > insIdx then Vector.sub (keys, idx - 1) + else insKey) + + val newChildren = Vector.tabulate (newLength, fn idx => + if idx < insIdx then Vector.sub (children, idx) + else if idx > insIdx then Vector.sub (children, idx - 1) + else FOUND) + + val node = CHILDREN {keys = newKeys, children = newChildren} + in + SOME node + end + | FOUND_INSERT_POS insIdx => + let + val trieKey = Vector.sub (keys, insIdx) + val nextKeyPos = keyPos + 1 + in + (case insertKeyMatch (insKey, trieKey, nextKeyPos) of + NO_INSERT_MATCH => SOME trie + (* may need to split string if difference found but prefix matched *) + | DIFFERENCE_FOUND_AT diffIdx => + let + val splitTrieKeyStart = + String.substring (trieKey, 0, diffIdx) + val splitTrieKeyEnd = String.substring + (trieKey, diffIdx, String.size trieKey - diffIdx) + val trieChild = Vector.sub (children, insIdx) + in + if + String.sub (trieKey, nextKeyPos) + > String.sub (insKey, nextKeyPos) + then + (* place insKey before trieKey *) + let + val childKeys = + Vector.fromList [insKey, splitTrieKeyEnd] + val childChildren = + Vector.fromList [FOUND, trieChild] + in + insertDifferenceFoundAt + ( insKey + , insIdx + , splitTrieKeyStart + , splitTrieKeyEnd + , trieChild + , childKeys + , childChildren + , keys + , children + ) + end + else + (* place trieKey before insKey *) + let + val childKeys = + Vector.fromList [splitTrieKeyEnd, insKey] + val childChildren = + Vector.fromList [trieChild, FOUND] + in + insertDifferenceFoundAt + ( insKey + , insIdx + , splitTrieKeyStart + , splitTrieKeyEnd + , trieChild + , childKeys + , childChildren + , keys + , children + ) + end + end + (* may not need to do anything if insert key matched, + * as this is a set where only strings are stored. + * however, if this is a non-found node, then I need to change + * the tag/case. *) + | FULL_INSERT_MATCH => + let + val node = + FOUND_WITH_CHILDREN {keys = keys, children = children} + in + SOME node + end + (* if insert key contains trie key, need to recurse down node *) + | INSERT_KEY_CONTAINS_TRIE_KEY => + let + val trieChild = Vector.sub (children, insIdx) + in + (case + helpInsert + (insKey, keyPos + String.size trieKey, trieChild) + of + (result as SOME _) => result + | NONE => SOME trie) + end + (* if trie key contains insert key, need to split node *) + | TRIE_KEY_CONTAINS_INSERT_KEY => + let + val trieChild = Vector.sub (children, insIdx) + val newKeys = + Vector.mapi + (fn (idx, key) => + if idx <> insIdx then insKey else key) keys + + val splitTrieKeyEnd = String.substring + ( trieKey + , String.size insKey + , String.size trieKey - String.size insKey + ) + val newTrieChild = FOUND_WITH_CHILDREN + { keys = Vector.fromList [splitTrieKeyEnd] + , children = Vector.fromList [trieChild] + } + + val newChildren = + Vector.mapi + (fn (idx, elt) => + if idx <> insIdx then elt else newTrieChild) + children + + val node = + CHILDREN {keys = newKeys, children = newChildren} + in + SOME node + end) + end + | APPEND_NEW_CHILD => + let + val newKeys = Vector.concat [keys, Vector.fromList [insKey]] + val newChildren = Vector.concat + [children, Vector.fromList [FOUND]] + val node = CHILDREN {keys = newKeys, children = newChildren} + in + SOME node + end) + end + | FOUND_WITH_CHILDREN {keys, children} => raise Empty (* todo *) + + fun insert (insKey, trie) = + if String.size insKey > 0 then + case helpInsert (insKey, 0, trie) of + SOME trie => trie + | NONE => trie + else + trie end