return PersistentVector.t when building search-list/executing nfa, because we don't want to use a simple flat vector for the search list now

This commit is contained in:
2025-09-29 14:02:07 +01:00
parent 6d2b43606f
commit 13ccdbb202
3 changed files with 53 additions and 58 deletions

View File

@@ -1,82 +1,76 @@
structure PersistentVector =
struct
(* Clojure-style persistent vector,
* as intermediary data structure
* for building search list *)
(* Clojure-style persistent vector, for building search list.
* There is an "int table" too, which stores the last index
* at the node with the same index.
* We can use the size table for binary search.
* *)
datatype t =
BRANCH of t vector
| LEAF of int vector
BRANCH of t vector * int vector
| LEAF of {start: int, finish: int} vector * int vector
val maxSize = 32
val empty = LEAF #[]
val empty = LEAF (#[], #[])
datatype append_result = APPEND of t | UPDATE of t
fun helpAppend (key, tree) =
fun getFinishIdx t =
case t of
BRANCH (_, sizes) => Vector.sub (sizes, Vector.length sizes - 1)
| LEAF (_, sizes) => Vector.sub (sizes, Vector.length sizes - 1)
fun helpAppend (start, finish, tree) =
case tree of
BRANCH nodes =>
BRANCH (nodes, sizes) =>
let
val lastNode = Vector.sub (nodes, Vector.length nodes - 1)
in
case helpAppend (key, lastNode) of
case helpAppend (start, finish, lastNode) of
UPDATE newLast =>
let
val newNode = Vector.update
(nodes, Vector.length nodes - 1, newLast)
val newNode = BRANCH newNode
val lastPos = Vector.length nodes - 1
val newNode = Vector.update (nodes, lastPos, newLast)
val newSizes = Vector.update (sizes, lastPos, finish)
val newNode = BRANCH (newNode, newSizes)
in
UPDATE newNode
end
| APPEND newVec =>
if Vector.length nodes + 1 > maxSize then
let val newNode = BRANCH #[newVec]
let val newNode = BRANCH (#[newVec], #[finish])
in APPEND newNode
end
else
let
val newNodes = Vector.concat [nodes, #[newVec]]
val newNodes = BRANCH newNodes
val newSizes = Vector.concat [sizes, #[finish]]
val newNodes = BRANCH (newNodes, newSizes)
in
UPDATE newNodes
end
end
| LEAF vec =>
if Vector.length vec + 1 > maxSize then
let val newNode = LEAF #[key]
| LEAF (values, sizes) =>
if Vector.length values + 1 > maxSize then
let val newNode = LEAF (#[{start = start, finish = finish}], #[finish])
in APPEND newNode
end
else
let
val newNode = Vector.concat [vec, #[key]]
val newNode = LEAF newNode
val newNode = Vector.concat
[values, #[{start = start, finish = finish}]]
val newSizes = Vector.concat [sizes, #[finish]]
val newNode = LEAF (newNode, newSizes)
in
UPDATE newNode
end
fun append (key, tree) =
case helpAppend (key, tree) of
UPDATE t => t
| APPEND newNode => BRANCH #[tree, newNode]
fun branchToList (pos, nodes, acc) =
if pos < 0 then
acc
else
fun append (start, finish, tree) =
let
val node = Vector.sub (nodes, pos)
val acc = helpToVector (node, acc)
val maxSize = getFinishIdx tree
in
branchToList (pos - 1, nodes, acc)
end
and helpToVector (tree, acc) =
case tree of
BRANCH nodes => branchToList (Vector.length nodes - 1, nodes, acc)
| LEAF vec => vec :: acc
fun toVector tree =
let val lst = helpToVector (tree, [])
in Vector.concat lst
case helpAppend (start, finish, tree) of
UPDATE t => t
| APPEND newNode => BRANCH (#[tree, newNode], #[maxSize, finish])
end
end

View File

@@ -2,7 +2,7 @@ structure SearchList =
struct
type t = int vector
val empty = Vector.fromList []
val empty = PersistentVector.empty
fun backtrackFull (pos, hd, absIdx, tl, acc, searchPos, searchString, prevTl) =
if pos < 0 then
@@ -47,7 +47,7 @@ struct
, searchString
, hd :: prevTl
)
| [] => PersistentVector.toVector acc
| [] => acc
else
let
val bufferChr = String.sub (hd, pos)
@@ -58,7 +58,7 @@ struct
(* we fully matched the search string *)
let
val foundIdx = absIdx - String.size searchString + 1
val acc = PersistentVector.append (foundIdx, acc)
val acc = PersistentVector.append (foundIdx, absIdx, acc)
in
loopSearch
(pos + 1, hd, absIdx + 1, tl, acc, 0, searchString, prevTl)
@@ -152,9 +152,9 @@ struct
, prevTl
)
end
| [] => PersistentVector.toVector acc
| [] => acc
else if absIdx = finish then
PersistentVector.toVector acc
acc
else
let
val bufferChr = String.sub (hd, pos)
@@ -165,7 +165,7 @@ struct
(* full match *)
let
val foundIdx = absIdx - String.size searchString + 1
val acc = PersistentVector.append (foundIdx, acc)
val acc = PersistentVector.append (foundIdx, absIdx, acc)
in
loopRange
( pos + 1
@@ -242,7 +242,8 @@ struct
if String.size searchString > 0 then
case Nfa.parse searchString of
SOME nfa =>
Nfa.getMatchesInRange (#idx buffer, finishIdx, buffer : LineGap.t, nfa)
Nfa.getMatchesInRange
(#idx buffer, finishIdx, buffer : LineGap.t, nfa)
| NONE => empty
else
empty

View File

@@ -141,7 +141,7 @@ struct
local
fun loop (pos, str, nfa, origNfa, startPos, acc) =
if pos = String.size str then
PersistentVector.toVector acc
acc
else
let
val chr = String.sub (str, pos)
@@ -149,7 +149,7 @@ struct
in
case state of
VALID finishIdx =>
let val acc = PersistentVector.append (pos, acc)
let val acc = PersistentVector.append (pos, finishIdx, acc)
in loop (finishIdx, str, origNfa, origNfa, finishIdx, acc)
end
| INVALID =>
@@ -232,9 +232,9 @@ struct
, startIdx
, finishIdx
)
| [] => PersistentVector.toVector acc
| [] => acc
else if absIdx > finishIdx then
PersistentVector.toVector acc
acc
else
let
val chr = String.sub (hd, strIdx)
@@ -254,9 +254,9 @@ struct
, startIdx
, finishIdx
)
| VALID _ =>
| VALID finishIdx =>
let
val acc = PersistentVector.append (startIdx, acc)
val acc = PersistentVector.append (startIdx, finishIdx, acc)
in
loop
( strIdx + 1
@@ -349,8 +349,8 @@ struct
, finishIdx
)
end
| [] => Vector.fromList [])
| [] => Vector.fromList []
| [] => PersistentVector.empty)
| [] => PersistentVector.empty
end
end
end