refactor persistent-vector.sml so that its data representation has less redundant data, addessing note from todo list. (All tests still pass after this refactoring.)

This commit is contained in:
2026-05-01 14:00:19 +01:00
parent e1203cb199
commit 3e8161990b
2 changed files with 89 additions and 91 deletions

View File

@@ -7,14 +7,14 @@ struct
* *)
datatype t =
BRANCH of t vector * int vector
| LEAF of {start: int, finish: int} vector * int vector
| LEAF of int vector * int vector
val maxSize = 32
val halfSize = 16
fun isEmpty t =
case t of
LEAF (_, sizes) => Vector.length sizes = 0
LEAF (_, endSizes) => Vector.length endSizes = 0
| BRANCH (_, sizes) => Vector.length sizes = 0
val empty = LEAF (#[], #[])
@@ -38,15 +38,16 @@ struct
isInRange (nextCheckIdx, Vector.sub (nodes, searchIdx))
end
end
| LEAF (values, sizes) =>
| LEAF (startSizes, endSizes) =>
let
val searchIdx = BinSearch.equalOrMore (checkIdx, sizes)
val searchIdx = BinSearch.equalOrMore (checkIdx, endSizes)
in
if searchIdx = ~1 then
false
else
let
val {start, finish} = Vector.sub (values, searchIdx)
val start = Vector.sub (startSizes, searchIdx)
val finish = Vector.sub (endSizes, searchIdx)
in
checkIdx >= start andalso checkIdx <= finish
end
@@ -55,16 +56,16 @@ struct
fun getFinishIdx t =
case t of
BRANCH (_, sizes) => Vector.sub (sizes, Vector.length sizes - 1)
| LEAF (_, sizes) => Vector.sub (sizes, Vector.length sizes - 1)
| LEAF (_, endSizes) => Vector.sub (endSizes, Vector.length endSizes - 1)
fun getStartIdx t =
case t of
BRANCH (nodes, _) => getStartIdx (Vector.sub (nodes, 0))
| LEAF (items, _) =>
if Vector.length items = 0 then
| LEAF (startSizes, _) =>
if Vector.length startSizes = 0 then
0
else
#start (Vector.sub (items, 0))
Vector.sub (startSizes, 0)
fun helpAppend (start, finish, tree) =
case tree of
@@ -106,26 +107,25 @@ struct
UPDATE newNodes
end
end
| LEAF (values, sizes) =>
if Vector.length values + 1 > maxSize then
| LEAF (startSizes, endSizes) =>
if Vector.length endSizes + 1 > maxSize then
(* when we split a leaf into two vectors,
* we want to adjust the start and finish parameters
* so that they don't contain the offset relevant to the
* "lower" vector, which was split from *)
let
val prevFinish = Vector.sub (sizes, Vector.length sizes - 1)
val prevFinish = Vector.sub (endSizes, Vector.length endSizes - 1)
val start = start - prevFinish
val finish = finish - prevFinish
val newNode = LEAF (#[{start = start, finish = finish}], #[finish])
val newNode = LEAF (#[start], #[finish])
in
APPEND newNode
end
else
let
val newNode = Vector.concat
[values, #[{start = start, finish = finish}]]
val newSizes = Vector.concat [sizes, #[finish]]
val newNode = LEAF (newNode, newSizes)
val newStartSizes = Vector.concat [startSizes, #[start]]
val newEndSizes = Vector.concat [endSizes, #[finish]]
val newNode = LEAF (newStartSizes, newEndSizes)
in
UPDATE newNode
end
@@ -142,19 +142,21 @@ struct
fun getStart tree =
case tree of
LEAF (values, _) => Vector.sub (values, 0)
LEAF (startSizes, endSizes) =>
{start = Vector.sub (startSizes, 0), finish = Vector.sub (endSizes, 0)}
| BRANCH (nodes, _) => getStart (Vector.sub (nodes, 0))
fun helpNextMatch (cursorIdx, tree, absOffset) =
case tree of
LEAF (values, sizes) =>
LEAF (startSizes, endSizes) =>
let
val idx = BinSearch.equalOrMore (cursorIdx, sizes)
val idx = BinSearch.equalOrMore (cursorIdx, endSizes)
in
if idx = ~1 then {start = ~1, finish = ~1}
else
let
val {start, finish} = Vector.sub (values, idx)
val start = Vector.sub (startSizes, idx)
val finish = Vector.sub (endSizes, idx)
in
{start = start + absOffset, finish = finish + absOffset}
end
@@ -211,9 +213,10 @@ struct
fun getLast (tree, absOffset) =
case tree of
LEAF (values, _) =>
LEAF (startSizes, endSizes) =>
let
val {start, finish} = Vector.sub (values, Vector.length values - 1)
val start = Vector.sub (startSizes, Vector.length startSizes - 1)
val finish = Vector.sub (endSizes, Vector.length endSizes - 1)
in
{start = start + absOffset, finish = finish + absOffset}
end
@@ -246,15 +249,16 @@ struct
* or the caller to `helpPrevMatch` will. *)
fun helpPrevMatch (cursorIdx, tree, absOffset) =
case tree of
LEAF (values, sizes) =>
LEAF (startSizes, endSizes) =>
let
val idx = BinSearch.equalOrMore (cursorIdx, sizes)
val idx = BinSearch.equalOrMore (cursorIdx, endSizes)
in
if idx < 0 then
{start = ~1, finish = ~1}
else if idx = 0 then
let
val {start, finish} = Vector.sub (values, 0)
val start = Vector.sub (startSizes, 0)
val finish = Vector.sub (endSizes, 0)
in
if start < cursorIdx then
{start = start + absOffset, finish = finish + absOffset}
@@ -263,13 +267,15 @@ struct
end
else
let
val {start, finish} = Vector.sub (values, idx)
val start = Vector.sub (startSizes, idx)
val finish = Vector.sub (endSizes, idx)
in
if cursorIdx > start then
{start = start + absOffset, finish = finish + absOffset}
else
let
val {start, finish} = Vector.sub (values, idx - 1)
val start = Vector.sub (startSizes, idx - 1)
val finish = Vector.sub (endSizes, idx - 1)
in
{start = start + absOffset, finish = finish + absOffset}
end
@@ -339,37 +345,37 @@ struct
fun splitLeft (splitIdx, tree) =
case tree of
LEAF (items, sizes) =>
if Vector.length items = 0 then
LEAF (startSizes, endSizes) =>
if Vector.length startSizes = 0 then
(* if tree is empty, then just return tree *)
tree
else
let
val {start, ...} = Vector.sub (items, 0)
val start = Vector.sub (startSizes, 0)
in
(* if all items are after splitIdx,
(* if all intervals are after splitIdx,
* then we want to return an empty tree,
* splitting everything *)
if splitIdx < start then
empty
else if splitIdx > Vector.sub (sizes, Vector.length sizes - 1) then
(* if all items are before splitIdx,
else if splitIdx > Vector.sub (endSizes, Vector.length endSizes - 1) then
(* if all intervals are before splitIdx,
* then we want to return the same tree,
* splitting nothing *)
tree
else
(* we want to split from somewhere in middle, keeping left *)
let
val idx = BinSearch.equalOrMore (splitIdx, sizes)
val idx = BinSearch.equalOrMore (splitIdx, endSizes)
val idx = SOME idx
val items = VectorSlice.slice (items, 0, idx)
val items = VectorSlice.vector items
val startSizes = VectorSlice.slice (startSizes, 0, idx)
val startSizes = VectorSlice.vector startSizes
val sizes = VectorSlice.slice (sizes, 0, idx)
val sizes = VectorSlice.vector sizes
val endSizes = VectorSlice.slice (endSizes, 0, idx)
val endSizes = VectorSlice.vector endSizes
in
LEAF (items, sizes)
LEAF (startSizes, endSizes)
end
end
| BRANCH (nodes, sizes) =>
@@ -477,43 +483,42 @@ struct
BRANCH (nodes, sizes)
end
end
| LEAF (items, sizes) =>
if Vector.length items = 0 then
| LEAF (startSizes, endSizes) =>
if Vector.length endSizes = 0 then
tree
else
if splitIdx > Vector.sub (sizes, Vector.length sizes - 1) then
if splitIdx > Vector.sub (endSizes, Vector.length endSizes - 1) then
empty
else if splitIdx < #start (Vector.sub (items, 0)) then
else if splitIdx < Vector.sub (startSizes, 0) then
tree
else
let
val idx = BinSearch.equalOrMore (splitIdx, sizes)
val {start, finish} = Vector.sub (items, idx)
val idx = BinSearch.equalOrMore (splitIdx, endSizes)
val start = Vector.sub (startSizes, idx)
val finish = Vector.sub (endSizes, idx)
val idx =
if splitIdx >= start then
idx + 1
else
idx
in
if idx >= Vector.length items then
if idx >= Vector.length endSizes then
empty
else
let
val prevSize =
if idx > 0 then
Vector.sub (sizes, idx - 1)
Vector.sub (endSizes, idx - 1)
else
0
val len = Vector.length items - idx
val itemsSlice = VectorSlice.slice (items, idx, SOME len)
val items = VectorSlice.map
(fn {start, finish} =>
{start = start - prevSize, finish = finish - prevSize}
)
itemsSlice
val sizes = Vector.map #finish items
val len = Vector.length startSizes - idx
val startSlice = VectorSlice.slice (startSizes, idx, SOME len)
val startSizes = VectorSlice.map (fn i => i - prevSize) startSlice
val endSlice = VectorSlice.slice (endSizes, idx, SOME len)
val endSizes = VectorSlice.map (fn i => i - prevSize) endSlice
in
LEAF (items, sizes)
LEAF (startSizes, endSizes)
end
end
@@ -527,15 +532,12 @@ struct
in
BRANCH (nodes, sizes)
end
| LEAF (items, sizes) =>
| LEAF (startSizes, endSizes) =>
let
val items = Vector.map
(fn {start, finish} =>
{start = start - decBy, finish = finish - decBy}
) items
val sizes = Vector.map #finish items
val startSizes = Vector.map (fn i => i - decBy) startSizes
val endSizes = Vector.map (fn i => i - decBy) endSizes
in
LEAF (items, sizes)
LEAF (startSizes, endSizes)
end
fun incrementBy (incBy, tree) =
@@ -548,15 +550,12 @@ struct
in
BRANCH (nodes, sizes)
end
| LEAF (items, sizes) =>
| LEAF (startSizes, endSizes) =>
let
val items = Vector.map
(fn {start, finish} =>
{start = start + incBy, finish = finish + incBy}
) items
val sizes = Vector.map #finish items
val startSizes = Vector.map (fn i => i + incBy) startSizes
val endSizes = Vector.map (fn i => i + incBy) endSizes
in
LEAF (items, sizes)
LEAF (startSizes, endSizes)
end
fun countDepthLoop (counter, tree) =
@@ -572,26 +571,25 @@ struct
fun mergeSameDepth (left, right) =
case (left, right) of
(LEAF (leftItems, leftSizes), LEAF (rightItems, rightSizes)) =>
if Vector.length leftItems + Vector.length rightItems <= maxSize then
(LEAF (leftStartSizes, leftEndSizes), LEAF (rightStartSizes, rightEndSizes)) =>
if Vector.length leftStartSizes + Vector.length rightStartSizes <= maxSize then
let
val offset = Vector.sub (leftSizes, Vector.length leftSizes - 1)
val newVecLen = Vector.length leftItems + Vector.length rightItems
val items = Vector.tabulate (newVecLen,
val offset = Vector.sub (leftEndSizes, Vector.length leftEndSizes - 1)
val newVecLen = Vector.length leftStartSizes + Vector.length rightStartSizes
val startSizes = Vector.tabulate (newVecLen,
fn i =>
if i < Vector.length leftItems then
Vector.sub (leftItems, i)
if i < Vector.length leftStartSizes then
Vector.sub (leftStartSizes, i)
else
let
val {start, finish} =
Vector.sub (rightItems, i - Vector.length leftItems)
in
{start = start + offset, finish = finish + offset}
end
)
val sizes = Vector.map #finish items
Vector.sub (rightStartSizes, i - Vector.length leftStartSizes) + offset)
val endSizes = Vector.tabulate (newVecLen,
fn i =>
if i < Vector.length leftEndSizes then
Vector.sub (leftEndSizes, i)
else
Vector.sub (rightEndSizes, i - Vector.length leftEndSizes) + offset)
in
MERGE_SAME_DEPTH_UPDATE (LEAF (items, sizes))
MERGE_SAME_DEPTH_UPDATE (LEAF (startSizes, endSizes))
end
else
MERGE_SAME_DEPTH_FULL
@@ -907,14 +905,15 @@ struct
in
branchLoop (0, acc)
end
| LEAF (items, _) =>
| LEAF (startSizes, endSizes) =>
let
fun itemLoop (pos, acc, offset) =
if pos = Vector.length items then
if pos = Vector.length startSizes then
acc
else
let
val {start, finish} = Vector.sub (items, pos)
val start = Vector.sub (startSizes, pos)
val finish = Vector.sub (endSizes, pos)
val item = {start = start + offset, finish = finish + offset}
in
itemLoop (pos + 1, item :: acc, offset)

View File

@@ -1,5 +1,4 @@
# To-do list
- Refactor PersistentVector so that the LEAF node only contains integer vectors (one for the finishIdx, which we already have, and one for the startIdx)
- Add tests for indent, dedent and remove-line-break motions
- Add tests that searchList updates as expected too
- Add tests for other yank motoins