refactor persistent-vector.sml so that its data representation has less redundant data, addessing note from todo list. (All tests still pass after this refactoring.)

This commit is contained in:
2026-05-01 14:00:19 +01:00
parent e1203cb199
commit 3e8161990b
2 changed files with 89 additions and 91 deletions

View File

@@ -7,14 +7,14 @@ struct
* *) * *)
datatype t = datatype t =
BRANCH of t vector * int vector BRANCH of t vector * int vector
| LEAF of {start: int, finish: int} vector * int vector | LEAF of int vector * int vector
val maxSize = 32 val maxSize = 32
val halfSize = 16 val halfSize = 16
fun isEmpty t = fun isEmpty t =
case t of case t of
LEAF (_, sizes) => Vector.length sizes = 0 LEAF (_, endSizes) => Vector.length endSizes = 0
| BRANCH (_, sizes) => Vector.length sizes = 0 | BRANCH (_, sizes) => Vector.length sizes = 0
val empty = LEAF (#[], #[]) val empty = LEAF (#[], #[])
@@ -38,15 +38,16 @@ struct
isInRange (nextCheckIdx, Vector.sub (nodes, searchIdx)) isInRange (nextCheckIdx, Vector.sub (nodes, searchIdx))
end end
end end
| LEAF (values, sizes) => | LEAF (startSizes, endSizes) =>
let let
val searchIdx = BinSearch.equalOrMore (checkIdx, sizes) val searchIdx = BinSearch.equalOrMore (checkIdx, endSizes)
in in
if searchIdx = ~1 then if searchIdx = ~1 then
false false
else else
let let
val {start, finish} = Vector.sub (values, searchIdx) val start = Vector.sub (startSizes, searchIdx)
val finish = Vector.sub (endSizes, searchIdx)
in in
checkIdx >= start andalso checkIdx <= finish checkIdx >= start andalso checkIdx <= finish
end end
@@ -55,16 +56,16 @@ struct
fun getFinishIdx t = fun getFinishIdx t =
case t of case t of
BRANCH (_, sizes) => Vector.sub (sizes, Vector.length sizes - 1) BRANCH (_, sizes) => Vector.sub (sizes, Vector.length sizes - 1)
| LEAF (_, sizes) => Vector.sub (sizes, Vector.length sizes - 1) | LEAF (_, endSizes) => Vector.sub (endSizes, Vector.length endSizes - 1)
fun getStartIdx t = fun getStartIdx t =
case t of case t of
BRANCH (nodes, _) => getStartIdx (Vector.sub (nodes, 0)) BRANCH (nodes, _) => getStartIdx (Vector.sub (nodes, 0))
| LEAF (items, _) => | LEAF (startSizes, _) =>
if Vector.length items = 0 then if Vector.length startSizes = 0 then
0 0
else else
#start (Vector.sub (items, 0)) Vector.sub (startSizes, 0)
fun helpAppend (start, finish, tree) = fun helpAppend (start, finish, tree) =
case tree of case tree of
@@ -106,26 +107,25 @@ struct
UPDATE newNodes UPDATE newNodes
end end
end end
| LEAF (values, sizes) => | LEAF (startSizes, endSizes) =>
if Vector.length values + 1 > maxSize then if Vector.length endSizes + 1 > maxSize then
(* when we split a leaf into two vectors, (* when we split a leaf into two vectors,
* we want to adjust the start and finish parameters * we want to adjust the start and finish parameters
* so that they don't contain the offset relevant to the * so that they don't contain the offset relevant to the
* "lower" vector, which was split from *) * "lower" vector, which was split from *)
let let
val prevFinish = Vector.sub (sizes, Vector.length sizes - 1) val prevFinish = Vector.sub (endSizes, Vector.length endSizes - 1)
val start = start - prevFinish val start = start - prevFinish
val finish = finish - prevFinish val finish = finish - prevFinish
val newNode = LEAF (#[{start = start, finish = finish}], #[finish]) val newNode = LEAF (#[start], #[finish])
in in
APPEND newNode APPEND newNode
end end
else else
let let
val newNode = Vector.concat val newStartSizes = Vector.concat [startSizes, #[start]]
[values, #[{start = start, finish = finish}]] val newEndSizes = Vector.concat [endSizes, #[finish]]
val newSizes = Vector.concat [sizes, #[finish]] val newNode = LEAF (newStartSizes, newEndSizes)
val newNode = LEAF (newNode, newSizes)
in in
UPDATE newNode UPDATE newNode
end end
@@ -142,19 +142,21 @@ struct
fun getStart tree = fun getStart tree =
case tree of case tree of
LEAF (values, _) => Vector.sub (values, 0) LEAF (startSizes, endSizes) =>
{start = Vector.sub (startSizes, 0), finish = Vector.sub (endSizes, 0)}
| BRANCH (nodes, _) => getStart (Vector.sub (nodes, 0)) | BRANCH (nodes, _) => getStart (Vector.sub (nodes, 0))
fun helpNextMatch (cursorIdx, tree, absOffset) = fun helpNextMatch (cursorIdx, tree, absOffset) =
case tree of case tree of
LEAF (values, sizes) => LEAF (startSizes, endSizes) =>
let let
val idx = BinSearch.equalOrMore (cursorIdx, sizes) val idx = BinSearch.equalOrMore (cursorIdx, endSizes)
in in
if idx = ~1 then {start = ~1, finish = ~1} if idx = ~1 then {start = ~1, finish = ~1}
else else
let let
val {start, finish} = Vector.sub (values, idx) val start = Vector.sub (startSizes, idx)
val finish = Vector.sub (endSizes, idx)
in in
{start = start + absOffset, finish = finish + absOffset} {start = start + absOffset, finish = finish + absOffset}
end end
@@ -211,9 +213,10 @@ struct
fun getLast (tree, absOffset) = fun getLast (tree, absOffset) =
case tree of case tree of
LEAF (values, _) => LEAF (startSizes, endSizes) =>
let let
val {start, finish} = Vector.sub (values, Vector.length values - 1) val start = Vector.sub (startSizes, Vector.length startSizes - 1)
val finish = Vector.sub (endSizes, Vector.length endSizes - 1)
in in
{start = start + absOffset, finish = finish + absOffset} {start = start + absOffset, finish = finish + absOffset}
end end
@@ -246,15 +249,16 @@ struct
* or the caller to `helpPrevMatch` will. *) * or the caller to `helpPrevMatch` will. *)
fun helpPrevMatch (cursorIdx, tree, absOffset) = fun helpPrevMatch (cursorIdx, tree, absOffset) =
case tree of case tree of
LEAF (values, sizes) => LEAF (startSizes, endSizes) =>
let let
val idx = BinSearch.equalOrMore (cursorIdx, sizes) val idx = BinSearch.equalOrMore (cursorIdx, endSizes)
in in
if idx < 0 then if idx < 0 then
{start = ~1, finish = ~1} {start = ~1, finish = ~1}
else if idx = 0 then else if idx = 0 then
let let
val {start, finish} = Vector.sub (values, 0) val start = Vector.sub (startSizes, 0)
val finish = Vector.sub (endSizes, 0)
in in
if start < cursorIdx then if start < cursorIdx then
{start = start + absOffset, finish = finish + absOffset} {start = start + absOffset, finish = finish + absOffset}
@@ -263,13 +267,15 @@ struct
end end
else else
let let
val {start, finish} = Vector.sub (values, idx) val start = Vector.sub (startSizes, idx)
val finish = Vector.sub (endSizes, idx)
in in
if cursorIdx > start then if cursorIdx > start then
{start = start + absOffset, finish = finish + absOffset} {start = start + absOffset, finish = finish + absOffset}
else else
let let
val {start, finish} = Vector.sub (values, idx - 1) val start = Vector.sub (startSizes, idx - 1)
val finish = Vector.sub (endSizes, idx - 1)
in in
{start = start + absOffset, finish = finish + absOffset} {start = start + absOffset, finish = finish + absOffset}
end end
@@ -339,37 +345,37 @@ struct
fun splitLeft (splitIdx, tree) = fun splitLeft (splitIdx, tree) =
case tree of case tree of
LEAF (items, sizes) => LEAF (startSizes, endSizes) =>
if Vector.length items = 0 then if Vector.length startSizes = 0 then
(* if tree is empty, then just return tree *) (* if tree is empty, then just return tree *)
tree tree
else else
let let
val {start, ...} = Vector.sub (items, 0) val start = Vector.sub (startSizes, 0)
in in
(* if all items are after splitIdx, (* if all intervals are after splitIdx,
* then we want to return an empty tree, * then we want to return an empty tree,
* splitting everything *) * splitting everything *)
if splitIdx < start then if splitIdx < start then
empty empty
else if splitIdx > Vector.sub (sizes, Vector.length sizes - 1) then else if splitIdx > Vector.sub (endSizes, Vector.length endSizes - 1) then
(* if all items are before splitIdx, (* if all intervals are before splitIdx,
* then we want to return the same tree, * then we want to return the same tree,
* splitting nothing *) * splitting nothing *)
tree tree
else else
(* we want to split from somewhere in middle, keeping left *) (* we want to split from somewhere in middle, keeping left *)
let let
val idx = BinSearch.equalOrMore (splitIdx, sizes) val idx = BinSearch.equalOrMore (splitIdx, endSizes)
val idx = SOME idx val idx = SOME idx
val items = VectorSlice.slice (items, 0, idx) val startSizes = VectorSlice.slice (startSizes, 0, idx)
val items = VectorSlice.vector items val startSizes = VectorSlice.vector startSizes
val sizes = VectorSlice.slice (sizes, 0, idx) val endSizes = VectorSlice.slice (endSizes, 0, idx)
val sizes = VectorSlice.vector sizes val endSizes = VectorSlice.vector endSizes
in in
LEAF (items, sizes) LEAF (startSizes, endSizes)
end end
end end
| BRANCH (nodes, sizes) => | BRANCH (nodes, sizes) =>
@@ -477,43 +483,42 @@ struct
BRANCH (nodes, sizes) BRANCH (nodes, sizes)
end end
end end
| LEAF (items, sizes) => | LEAF (startSizes, endSizes) =>
if Vector.length items = 0 then if Vector.length endSizes = 0 then
tree tree
else else
if splitIdx > Vector.sub (sizes, Vector.length sizes - 1) then if splitIdx > Vector.sub (endSizes, Vector.length endSizes - 1) then
empty empty
else if splitIdx < #start (Vector.sub (items, 0)) then else if splitIdx < Vector.sub (startSizes, 0) then
tree tree
else else
let let
val idx = BinSearch.equalOrMore (splitIdx, sizes) val idx = BinSearch.equalOrMore (splitIdx, endSizes)
val {start, finish} = Vector.sub (items, idx) val start = Vector.sub (startSizes, idx)
val finish = Vector.sub (endSizes, idx)
val idx = val idx =
if splitIdx >= start then if splitIdx >= start then
idx + 1 idx + 1
else else
idx idx
in in
if idx >= Vector.length items then if idx >= Vector.length endSizes then
empty empty
else else
let let
val prevSize = val prevSize =
if idx > 0 then if idx > 0 then
Vector.sub (sizes, idx - 1) Vector.sub (endSizes, idx - 1)
else else
0 0
val len = Vector.length items - idx val len = Vector.length startSizes - idx
val itemsSlice = VectorSlice.slice (items, idx, SOME len) val startSlice = VectorSlice.slice (startSizes, idx, SOME len)
val items = VectorSlice.map val startSizes = VectorSlice.map (fn i => i - prevSize) startSlice
(fn {start, finish} =>
{start = start - prevSize, finish = finish - prevSize} val endSlice = VectorSlice.slice (endSizes, idx, SOME len)
) val endSizes = VectorSlice.map (fn i => i - prevSize) endSlice
itemsSlice
val sizes = Vector.map #finish items
in in
LEAF (items, sizes) LEAF (startSizes, endSizes)
end end
end end
@@ -527,15 +532,12 @@ struct
in in
BRANCH (nodes, sizes) BRANCH (nodes, sizes)
end end
| LEAF (items, sizes) => | LEAF (startSizes, endSizes) =>
let let
val items = Vector.map val startSizes = Vector.map (fn i => i - decBy) startSizes
(fn {start, finish} => val endSizes = Vector.map (fn i => i - decBy) endSizes
{start = start - decBy, finish = finish - decBy}
) items
val sizes = Vector.map #finish items
in in
LEAF (items, sizes) LEAF (startSizes, endSizes)
end end
fun incrementBy (incBy, tree) = fun incrementBy (incBy, tree) =
@@ -548,15 +550,12 @@ struct
in in
BRANCH (nodes, sizes) BRANCH (nodes, sizes)
end end
| LEAF (items, sizes) => | LEAF (startSizes, endSizes) =>
let let
val items = Vector.map val startSizes = Vector.map (fn i => i + incBy) startSizes
(fn {start, finish} => val endSizes = Vector.map (fn i => i + incBy) endSizes
{start = start + incBy, finish = finish + incBy}
) items
val sizes = Vector.map #finish items
in in
LEAF (items, sizes) LEAF (startSizes, endSizes)
end end
fun countDepthLoop (counter, tree) = fun countDepthLoop (counter, tree) =
@@ -572,26 +571,25 @@ struct
fun mergeSameDepth (left, right) = fun mergeSameDepth (left, right) =
case (left, right) of case (left, right) of
(LEAF (leftItems, leftSizes), LEAF (rightItems, rightSizes)) => (LEAF (leftStartSizes, leftEndSizes), LEAF (rightStartSizes, rightEndSizes)) =>
if Vector.length leftItems + Vector.length rightItems <= maxSize then if Vector.length leftStartSizes + Vector.length rightStartSizes <= maxSize then
let let
val offset = Vector.sub (leftSizes, Vector.length leftSizes - 1) val offset = Vector.sub (leftEndSizes, Vector.length leftEndSizes - 1)
val newVecLen = Vector.length leftItems + Vector.length rightItems val newVecLen = Vector.length leftStartSizes + Vector.length rightStartSizes
val items = Vector.tabulate (newVecLen, val startSizes = Vector.tabulate (newVecLen,
fn i => fn i =>
if i < Vector.length leftItems then if i < Vector.length leftStartSizes then
Vector.sub (leftItems, i) Vector.sub (leftStartSizes, i)
else else
let Vector.sub (rightStartSizes, i - Vector.length leftStartSizes) + offset)
val {start, finish} = val endSizes = Vector.tabulate (newVecLen,
Vector.sub (rightItems, i - Vector.length leftItems) fn i =>
in if i < Vector.length leftEndSizes then
{start = start + offset, finish = finish + offset} Vector.sub (leftEndSizes, i)
end else
) Vector.sub (rightEndSizes, i - Vector.length leftEndSizes) + offset)
val sizes = Vector.map #finish items
in in
MERGE_SAME_DEPTH_UPDATE (LEAF (items, sizes)) MERGE_SAME_DEPTH_UPDATE (LEAF (startSizes, endSizes))
end end
else else
MERGE_SAME_DEPTH_FULL MERGE_SAME_DEPTH_FULL
@@ -907,14 +905,15 @@ struct
in in
branchLoop (0, acc) branchLoop (0, acc)
end end
| LEAF (items, _) => | LEAF (startSizes, endSizes) =>
let let
fun itemLoop (pos, acc, offset) = fun itemLoop (pos, acc, offset) =
if pos = Vector.length items then if pos = Vector.length startSizes then
acc acc
else else
let let
val {start, finish} = Vector.sub (items, pos) val start = Vector.sub (startSizes, pos)
val finish = Vector.sub (endSizes, pos)
val item = {start = start + offset, finish = finish + offset} val item = {start = start + offset, finish = finish + offset}
in in
itemLoop (pos + 1, item :: acc, offset) itemLoop (pos + 1, item :: acc, offset)

View File

@@ -1,5 +1,4 @@
# To-do list # To-do list
- Refactor PersistentVector so that the LEAF node only contains integer vectors (one for the finishIdx, which we already have, and one for the startIdx)
- Add tests for indent, dedent and remove-line-break motions - Add tests for indent, dedent and remove-line-break motions
- Add tests that searchList updates as expected too - Add tests that searchList updates as expected too
- Add tests for other yank motoins - Add tests for other yank motoins