delete insert function and helper functions in persistent-vector.sml, due to a simpler approach: for insertion, we will split the vector into left and right halves, then append the new element to the left half, then concat both halves together again. This is similar to the approach for RRB trees.

This commit is contained in:
2025-12-06 02:19:42 +00:00
parent ca5baf8fd7
commit 470d36f82b

View File

@@ -347,556 +347,103 @@ struct
loopPrevMatch (start, finish, tree, count - 1) loopPrevMatch (start, finish, tree, count - 1)
end end
(* todo: modify below functions so that they also
* use rope-like metadata *)
datatype insert_result =
INSERT_UPDATE of t
| INSERT_SPLIT of t * t
fun getMaxSize tree = fun getMaxSize tree =
case tree of case tree of
LEAF (_, sizes) => Vector.sub (sizes, Vector.length sizes - 1) LEAF (_, sizes) => Vector.sub (sizes, Vector.length sizes - 1)
| BRANCH (_, sizes) => Vector.sub (sizes, Vector.length sizes - 1) | BRANCH (_, sizes) => Vector.sub (sizes, Vector.length sizes - 1)
fun insertSplitSizesLeft (idx, sizes, left, right, prevSize) = fun splitLeft (offset, tree) =
if idx < halfSize then
(* left-split node is in left vector *)
if idx + 1 < halfSize then
(* right-split node is also in left vector *)
let
val ls = getMaxSize left + prevSize
val rs = getMaxSize right + ls
val sizeDiff = rs - prevSize
in
Vector.tabulate (halfSize,
fn i =>
if i = idx then
ls
else if i = idx + 1 then
rs
else if i < idx then
Vector.sub (sizes, i)
else
Vector.sub (sizes, i - 1) + sizeDiff)
end
else
(* left-split node is in left vector
* but right-split node is not *)
let
val ls = getMaxSize left + prevSize
in
Vector.tabulate (halfSize,
fn i =>
if i = idx then
ls
else
Vector.sub (sizes, i))
end
else
(* neither left-split node, nor right-split node,
* are in the left-split vector. *)
let
val slice = VectorSlice.slice (sizes, 0, SOME halfSize)
in
VectorSlice.vector slice
end
fun insertSplitSizesRight (idx, sizes, left, right, leftSizes) =
if idx + 1 >= halfSize then
(* right-split node is in right vector *)
if idx >= halfSize then
(* left-split node is in right vector as well *)
let
val relativeIdx = idx - Vector.length leftSizes
val maxLeftSize = Vector.sub (leftSizes, Vector.length leftSizes - 1)
val prevSize =
if relativeIdx = 0 then
0
else
Vector.sub (sizes, idx - 1)
val ls = getMaxSize left + prevSize
val rs = getMaxSize right + ls
val len = Vector.length sizes - Vector.length leftSizes
val sizeDiff = rs - maxLeftSize
in
Vector.tabulate (len,
fn i =>
if i < relativeIdx then
Vector.sub (sizes, i + relativeIdx) - maxLeftSize
else if i = relativeIdx then
ls
else if i = relativeIdx + 1 then
rs
else
(* i > relativeIdx *)
Vector.sub (sizes, i + relativeIdx - 1) + sizeDiff
)
end
else
(* only right-split node is in right vector *)
let
val maxLeft = Vector.sub (sizes, halfSize - 1)
val len = Vector.length sizes - Vector.length leftSizes
in
Vector.tabulate (len,
fn i =>
if i = 0 then
getMaxSize right
else
Vector.sub (sizes, i + len) - maxLeft
)
end
else
(* neither left-split or right-split node are in right vector *)
let
val len = SOME (Vector.length sizes - Vector.length leftSizes)
val rightSizesSlice = VectorSlice.slice (sizes, halfSize, len)
val maxLeft = Vector.sub (sizes, halfSize - 1)
in
VectorSlice.map (fn sz => sz - maxLeft) rightSizesSlice
end
fun helpInsert (start, finish, tree) =
case tree of case tree of
BRANCH (nodes, sizes) => BRANCH (nodes, sizes) =>
if finish >= Vector.sub (sizes, Vector.length sizes - 1) then if offset <= Vector.sub (sizes, 0) then
(* we want to append *) splitLeft (offset, Vector.sub (nodes, 0))
let else if offset >= Vector.sub (sizes, Vector.length sizes - 1) then
val prevSize = let
if Vector.length sizes > 1 then val prevSize =
Vector.sub (sizes, Vector.length sizes - 1) if Vector.length sizes > 1 then
else Vector.sub (sizes, Vector.length sizes - 1)
0
val descendStart = start - prevSize
val descendFinish = finish - prevSize
val descendNode = Vector.sub (nodes, Vector.length nodes - 1)
in
case helpAppend (descendStart, descendFinish, descendNode) of
UPDATE newLast =>
let
val sizes = Vector.update (sizes, Vector.length sizes - 1, finish)
val nodes = Vector.update (nodes, Vector.length nodes - 1, newLast)
in
INSERT_UPDATE (BRANCH (nodes, sizes))
end
| APPEND newLast =>
if Vector.length nodes > maxSize then
(* we have to split *)
let
val leftLen = SOME halfSize
val rightLen = SOME (Vector.length nodes - halfSize)
val leftNodeSlice = VectorSlice.slice (nodes, 0, leftLen)
val leftSizeSlice = VectorSlice.slice (sizes, 0, leftLen)
val leftNodes = VectorSlice.vector leftNodeSlice
val leftSizes = VectorSlice.vector leftSizeSlice
val rightNodeSlice =
VectorSlice.slice (nodes, halfSize, rightLen)
val rightSizeSlice =
VectorSlice.slice (sizes, halfSize, rightLen)
val rightNodes =
VectorSlice.concat [rightNodeSlice, VectorSlice.full #[newLast]]
val rightSizes =
let
(* we want to maintain relative indexing metadata
* so that each vector only considers the metadata of its
* own nodes.
* So, we need to subtract the maximum sizes of the leftSizes
* from every size on the right node, to maintain relative
* indexing metadata *)
val maxLeftSize = Vector.sub (leftSizes, Vector.length leftSizes - 1)
val maxRightSize = Vector.sub (sizes, Vector.length sizes - 1)
val finish = finish + maxRightSize
in
Vector.tabulate (VectorSlice.length rightSizeSlice + 1,
fn i =>
if i < VectorSlice.length rightSizeSlice then
VectorSlice.sub (rightSizeSlice, i) - maxLeftSize
else
finish)
end
val left = BRANCH (leftNodes, leftSizes)
val right = BRANCH (rightNodes, rightSizes)
in
INSERT_SPLIT (left, right)
end
else else
(* append newLast to current node *) 0
let val result =
val newLast = #[newLast] splitLeft
val finish = (offset - prevSize, Vector.sub (nodes, Vector.length nodes - 1))
#[Vector.sub (sizes, Vector.length sizes - 1) + finish] in
if isEmpty result then
val nodes = Vector.concat [nodes, newLast]
val sizes = Vector.concat [sizes, finish]
in
INSERT_UPDATE (BRANCH (nodes, sizes))
end
end
else
let
val idx = BinSearch.equalOrMore (finish, sizes)
val idx = if idx = ~1 then 0 else idx
val prevSize =
if idx > 0 then
Vector.sub (sizes, idx - 1)
else
0
val descendStart = start - prevSize
val descendFinish = finish - prevSize
val descendNode = Vector.sub (nodes, idx)
in
case helpInsert (descendStart, descendFinish, descendNode) of
INSERT_UPDATE newNode =>
let
val sizes =
if finish > Vector.sub (sizes, idx) then
Vector.update (sizes, idx, finish)
else
sizes
val nodes = Vector.update (nodes, idx, newNode)
in
INSERT_UPDATE (BRANCH (nodes, sizes))
end
| INSERT_SPLIT (left, right) =>
if Vector.length nodes = maxSize then
(* have to split *)
let let
val leftSizes = val len = SOME (Vector.length sizes - 1)
insertSplitSizesLeft (idx, sizes, left, right, prevSize) val sizeSlice = VectorSlice.slice (sizes, 0, len)
val rightSizes = val sizes = VectorSlice.vector sizeSlice
insertSplitSizesRight (idx, sizes, left, right, leftSizes) val nodeSlice = VectorSlice.slice (nodes, 0, len)
val nodes = VectorSlice.vector nodeSlice
val leftNodes =
VectorSlice.slice (nodes, 0, SOME halfSize)
val leftNodes = VectorSlice.vector leftNodes
val rightNodes =
VectorSlice.slice (nodes, halfSize, SOME (Vector.length rightSizes))
val rightNodes = VectorSlice.vector rightNodes
val left = BRANCH (leftNodes, leftSizes)
val right = BRANCH (rightNodes, rightSizes)
in in
INSERT_SPLIT (left, right) BRANCH (nodes, sizes)
end end
else else
(* can join into parent *)
let let
(* join size/metadata table, changing metadata where needed *) val newChildSize = getMaxSize result + prevSize
val sizes = val sizes = Vector.update (sizes, Vector.length sizes - 1, newChildSize)
let val nodes = Vector.update (nodes, Vector.length nodes - 1, result)
val midLeftSize = getMaxSize left + prevSize
val midRightSize = getMaxSize right + midLeftSize
val origIdxSize = Vector.sub (sizes, idx)
val sizeDiff = midRightSize - origIdxSize
in
Vector.tabulate (Vector.length sizes + 1,
fn i =>
if i = idx then
midLeftSize
else if i = idx + 1 then
midRightSize
else if i < idx then
Vector.sub (sizes, i)
else
(* i > idx *)
Vector.sub (sizes, i - 1) + sizeDiff
)
end
val midNodes = #[left, right]
val midNodes = VectorSlice.full midNodes
val leftLen = SOME idx
val rightLen = SOME (Vector.length sizes - idx)
val leftNodes = VectorSlice.slice (nodes, 0, leftLen)
val rightNodes = VectorSlice.slice (nodes, idx, rightLen)
val nodes =
VectorSlice.concat [leftNodes, midNodes, rightNodes]
in in
INSERT_UPDATE (BRANCH (nodes, sizes)) BRANCH (nodes, sizes)
end end
end end
fun helpInsert (start, finish, tree) =
case tree of
BRANCH (nodes, sizes) =>
if finish >= Vector.sub (sizes, Vector.length sizes - 1) then
(* if we want to append *)
case
helpAppend (start, finish, Vector.sub
(nodes, Vector.length sizes - 1))
of
UPDATE newLast =>
let
val sizes = Vector.update
(sizes, Vector.length sizes - 1, finish)
val nodes = Vector.update
(nodes, Vector.length nodes - 1, newLast)
in
INSERT_UPDATE (BRANCH (nodes, sizes))
end
| APPEND newLast =>
if Vector.length nodes = maxSize then
(* have to split *)
let
val leftLen = SOME halfSize
val rightLen = SOME (Vector.length nodes - halfSize)
val leftNodeSlice = VectorSlice.slice (nodes, 0, leftLen)
val rightNodeSlice =
VectorSlice.slice (nodes, halfSize, rightLen)
val leftSizeSlice = VectorSlice.slice (sizes, 0, leftLen)
val rightSizeSlice =
VectorSlice.slice (sizes, halfSize, rightLen)
val leftNodes = VectorSlice.vector leftNodeSlice
val leftSizes = VectorSlice.vector leftSizeSlice
val newLast = VectorSlice.full (#[newLast])
val finish = VectorSlice.full (#[finish])
val rightNodes = VectorSlice.concat [rightNodeSlice, newLast]
val rightSizes = VectorSlice.concat [rightSizeSlice, finish]
val left = BRANCH (leftNodes, leftSizes)
val right = BRANCH (rightNodes, rightSizes)
in
INSERT_SPLIT (left, right)
end
else
(* append newLast to current node *)
let
val newLast = #[newLast]
val finish = #[finish]
val nodes = Vector.concat [nodes, newLast]
val sizes = Vector.concat [sizes, finish]
in
INSERT_UPDATE (BRANCH (nodes, sizes))
end
else else
let let
val idx = BinSearch.equalOrMore (finish, sizes) val idx = BinSearch.equalOrMore (offset, sizes)
val idx = if idx = ~1 then 0 else idx val prevSize =
if idx > 1 then
Vector.sub (sizes, idx - 1)
else
0
val result = splitLeft (offset - prevSize, Vector.sub (nodes, idx))
in in
case helpInsert (start, finish, tree) of if isEmpty result then
INSERT_UPDATE newNode => let
let val len = SOME idx
val sizes = val sizeSlice = VectorSlice.slice (sizes, 0, len)
if finish > Vector.sub (sizes, idx) then val sizes = VectorSlice.vector sizeSlice
Vector.update (sizes, idx, finish) val nodeSlice = VectorSlice.slice (nodes, 0, len)
val nodes = VectorSlice.vector nodeSlice
in
BRANCH (nodes, sizes)
end
else
let
val len = idx + 1
val sizes = Vector.tabulate (len,
fn i =>
if i = idx then
getMaxSize result + prevSize
else else
sizes Vector.sub (sizes, i)
val nodes = Vector.update (nodes, idx, newNode) )
in val nodes = Vector.tabulate (lane,
INSERT_UPDATE (BRANCH (nodes, sizes)) fn i =>
end if i = idx then
| INSERT_SPLIT (left, right) => result
if Vector.length nodes = maxSize then else
(* have to split this node too *) Vector.sub (nodes, i)
let )
(* slice sizes *) in
val leftSize = BRANCH (nodes, sizes)
VectorSlice.full #[getMaxSize left] end
val rightSize =
VectorSlice.full #[getMaxSize right]
val leftLen = SOME idx
val rightLen = SOME (Vector.length nodes - idx - 1)
val leftSizeSlice = VectorSlice.slice (sizes, 0, leftLen)
val rightSizeSlice =
VectorSlice.slice (sizes, idx + 1, rightLen)
val leftSizes = VectorSlice.concat [leftSizeSlice, leftSize]
val rightSizes =
VectorSlice.concat [rightSizeSlice, rightSize]
(* slice nodes *)
val left = VectorSlice.full #[left]
val right = VectorSlice.full #[right]
val leftNodesSlice = VectorSlice.slice (nodes, 0, leftLen)
val rightNodesSlice =
VectorSlice.slice (nodes, idx + 1, rightLen)
val leftNodes = VectorSlice.concat [leftNodesSlice, left]
val rightNodes = VectorSlice.concat [right, rightNodesSlice]
(* join sizes and nodes *)
val left = BRANCH (leftNodes, leftSizes)
val right = BRANCH (rightNodes, rightSizes)
in
INSERT_SPLIT (left, right)
end
else
(* can join children into parent *)
let
val midSizes =
#[getMaxSize left, getMaxSize right]
val midSizes = VectorSlice.full midSizes
val midNodes = #[left, right]
val midNodes = VectorSlice.full midNodes
val leftLen = SOME idx
val rightLen = SOME (Vector.length sizes - idx)
val leftSizes = VectorSlice.slice (sizes, 0, leftLen)
val rightSizes = VectorSlice.slice (sizes, idx, rightLen)
val leftNodes = VectorSlice.slice (nodes, 0, leftLen)
val rightNodes = VectorSlice.slice (nodes, idx, rightLen)
val sizes =
VectorSlice.concat [leftSizes, midSizes, rightSizes]
val nodes =
VectorSlice.concat [leftNodes, midNodes, rightNodes]
in
INSERT_UPDATE (BRANCH (nodes, sizes))
end
end end
| LEAF (items, sizes) => | LEAF (items, sizes) =>
if Vector.length items = 0 then if Vector.length sizes > 0 then
(* leaf is empty, so return leaf containing one item *) if offset > Vector.sub (sizes, Vector.length sizes - 1) then
let tree
val item = #[{start = start, finish = finish}] else if offset < Vector.sub (sizes, 0) then
val size = #[finish] LEAF (#[], #[])
in else
INSERT_UPDATE (LEAF (item, size)) let
end val len = BinSearch.equalOrMore (offset, sizes)
else if finish > Vector.sub (sizes, Vector.length sizes - 1) then val sizes = VectorSlice.slice (sizes, 0, SOME len)
if Vector.length sizes = maxSize then val sizes = VectorSlice.vector sizes
(* have to split *) val nodes = VectorSlice.slice (nodes, 0, SOME len)
let val nodes = VectorSlice.vector nodes
val startLen = SOME halfSize in
val midLen = SOME (Vector.length items - halfSize) LEAF (nodes, sizes)
end
val leftSizes = VectorSlice.slice (sizes, 0, startLen)
val leftItems = VectorSlice.slice (items, 0, startLen)
val midSizes = VectorSlice.slice (sizes, halfSize, midLen)
val midItems = VectorSlice.slice (items, halfSize, midLen)
val rightSizes = VectorSlice.full #[finish]
val rightItems =
VectorSlice.full #[{start = start, finish = finish}]
val rightItems = VectorSlice.concat [midItems, rightItems]
val leftItems = VectorSlice.vector leftItems
val rightSizes = VectorSlice.concat [midSizes, rightSizes]
val leftSizes = VectorSlice.vector leftSizes
val left = LEAF (leftItems, leftSizes)
val right = LEAF (rightItems, rightSizes)
in
INSERT_SPLIT (left, right)
end
else
(* can just append *)
let
val sizes = Vector.concat [sizes, #[finish]]
val item = #[{start = start, finish = finish}]
val items = Vector.concat [items, item]
in
INSERT_UPDATE (LEAF (items, sizes))
end
else if finish < #start (Vector.sub (items, 0)) then
(* prepend *)
if Vector.length sizes = maxSize then
(* have to split *)
let
val leftSizes = VectorSlice.full #[finish]
val leftItems =
VectorSlice.full #[{start = start, finish = finish}]
val midLen = SOME halfSize
val rightLen = SOME (Vector.length items - halfSize)
val midSizes = VectorSlice.slice (sizes, 0, midLen)
val midItems = VectorSlice.slice (items, 0, midLen)
val rightSizes = VectorSlice.slice (sizes, halfSize, rightLen)
val rightItems = VectorSlice.slice (items, halfSize, rightLen)
val leftSizes = VectorSlice.concat [leftSizes, midSizes]
val rightSizes = VectorSlice.vector rightSizes
val leftItems = VectorSlice.concat [leftItems, midItems]
val rightItems = VectorSlice.vector rightItems
val left = LEAF (leftItems, leftSizes)
val right = LEAF (rightItems, rightSizes)
in
INSERT_SPLIT (left, right)
end
else
(* just prepend *)
let
val sizes = Vector.concat [#[finish], sizes]
val item = {start = start, finish = finish}
val items = Vector.concat [#[item], items]
in
INSERT_UPDATE (LEAF (items, sizes))
end
else else
(* insert into middle *) tree
let
val idx = BinSearch.equalOrMore (finish, sizes)
val leftLen = SOME idx
val rightLen = SOME (Vector.length sizes - idx)
val leftSizes = VectorSlice.slice (sizes, 0, leftLen)
val rightSizes = VectorSlice.slice (sizes, idx, rightLen)
val leftItems = VectorSlice.slice (items, 0, leftLen)
val rightItems = VectorSlice.slice (items, idx, rightLen)
val midSize = VectorSlice.full #[finish]
val midItem =
VectorSlice.full #[{start = start, finish = finish}]
in
if Vector.length items = maxSize then
(* have to return split *)
let
val leftSizes = VectorSlice.concat [leftSizes, midSize]
val rightSizes = VectorSlice.vector rightSizes
val leftItems = VectorSlice.concat [leftItems, midItem]
val rightItems = VectorSlice.vector rightItems
val left = LEAF (leftItems, leftSizes)
val right = LEAF (rightItems, rightSizes)
in
INSERT_SPLIT (left, right)
end
else
(* have to return update *)
let
val sizes = VectorSlice.concat [leftSizes, midSize, rightSizes]
val items = VectorSlice.concat [leftItems, midItem, rightItems]
in
INSERT_UPDATE (LEAF (items, sizes))
end
end
fun insert (start, finish, tree) =
case helpInsert (start, finish, tree) of
INSERT_UPDATE tree => tree
| INSERT_SPLIT (left, right) =>
let
val leftSize = getMaxSize left
val sizes = #[leftSize, leftSize + getMaxSize right]
val nodes = #[left, right]
in
BRANCH (nodes, sizes)
end
end end