return PersistentVector.t when building search-list/executing nfa, because we don't want to use a simple flat vector for the search list now

This commit is contained in:
2025-09-29 14:02:07 +01:00
parent 6d2b43606f
commit 13ccdbb202
3 changed files with 53 additions and 58 deletions

View File

@@ -1,82 +1,76 @@
structure PersistentVector = structure PersistentVector =
struct struct
(* Clojure-style persistent vector, (* Clojure-style persistent vector, for building search list.
* as intermediary data structure * There is an "int table" too, which stores the last index
* for building search list *) * at the node with the same index.
* We can use the size table for binary search.
* *)
datatype t = datatype t =
BRANCH of t vector BRANCH of t vector * int vector
| LEAF of int vector | LEAF of {start: int, finish: int} vector * int vector
val maxSize = 32 val maxSize = 32
val empty = LEAF #[] val empty = LEAF (#[], #[])
datatype append_result = APPEND of t | UPDATE of t datatype append_result = APPEND of t | UPDATE of t
fun helpAppend (key, tree) = fun getFinishIdx t =
case t of
BRANCH (_, sizes) => Vector.sub (sizes, Vector.length sizes - 1)
| LEAF (_, sizes) => Vector.sub (sizes, Vector.length sizes - 1)
fun helpAppend (start, finish, tree) =
case tree of case tree of
BRANCH nodes => BRANCH (nodes, sizes) =>
let let
val lastNode = Vector.sub (nodes, Vector.length nodes - 1) val lastNode = Vector.sub (nodes, Vector.length nodes - 1)
in in
case helpAppend (key, lastNode) of case helpAppend (start, finish, lastNode) of
UPDATE newLast => UPDATE newLast =>
let let
val newNode = Vector.update val lastPos = Vector.length nodes - 1
(nodes, Vector.length nodes - 1, newLast) val newNode = Vector.update (nodes, lastPos, newLast)
val newNode = BRANCH newNode val newSizes = Vector.update (sizes, lastPos, finish)
val newNode = BRANCH (newNode, newSizes)
in in
UPDATE newNode UPDATE newNode
end end
| APPEND newVec => | APPEND newVec =>
if Vector.length nodes + 1 > maxSize then if Vector.length nodes + 1 > maxSize then
let val newNode = BRANCH #[newVec] let val newNode = BRANCH (#[newVec], #[finish])
in APPEND newNode in APPEND newNode
end end
else else
let let
val newNodes = Vector.concat [nodes, #[newVec]] val newNodes = Vector.concat [nodes, #[newVec]]
val newNodes = BRANCH newNodes val newSizes = Vector.concat [sizes, #[finish]]
val newNodes = BRANCH (newNodes, newSizes)
in in
UPDATE newNodes UPDATE newNodes
end end
end end
| LEAF vec => | LEAF (values, sizes) =>
if Vector.length vec + 1 > maxSize then if Vector.length values + 1 > maxSize then
let val newNode = LEAF #[key] let val newNode = LEAF (#[{start = start, finish = finish}], #[finish])
in APPEND newNode in APPEND newNode
end end
else else
let let
val newNode = Vector.concat [vec, #[key]] val newNode = Vector.concat
val newNode = LEAF newNode [values, #[{start = start, finish = finish}]]
val newSizes = Vector.concat [sizes, #[finish]]
val newNode = LEAF (newNode, newSizes)
in in
UPDATE newNode UPDATE newNode
end end
fun append (key, tree) = fun append (start, finish, tree) =
case helpAppend (key, tree) of
UPDATE t => t
| APPEND newNode => BRANCH #[tree, newNode]
fun branchToList (pos, nodes, acc) =
if pos < 0 then
acc
else
let let
val node = Vector.sub (nodes, pos) val maxSize = getFinishIdx tree
val acc = helpToVector (node, acc)
in in
branchToList (pos - 1, nodes, acc) case helpAppend (start, finish, tree) of
end UPDATE t => t
| APPEND newNode => BRANCH (#[tree, newNode], #[maxSize, finish])
and helpToVector (tree, acc) =
case tree of
BRANCH nodes => branchToList (Vector.length nodes - 1, nodes, acc)
| LEAF vec => vec :: acc
fun toVector tree =
let val lst = helpToVector (tree, [])
in Vector.concat lst
end end
end end

View File

@@ -2,7 +2,7 @@ structure SearchList =
struct struct
type t = int vector type t = int vector
val empty = Vector.fromList [] val empty = PersistentVector.empty
fun backtrackFull (pos, hd, absIdx, tl, acc, searchPos, searchString, prevTl) = fun backtrackFull (pos, hd, absIdx, tl, acc, searchPos, searchString, prevTl) =
if pos < 0 then if pos < 0 then
@@ -47,7 +47,7 @@ struct
, searchString , searchString
, hd :: prevTl , hd :: prevTl
) )
| [] => PersistentVector.toVector acc | [] => acc
else else
let let
val bufferChr = String.sub (hd, pos) val bufferChr = String.sub (hd, pos)
@@ -58,7 +58,7 @@ struct
(* we fully matched the search string *) (* we fully matched the search string *)
let let
val foundIdx = absIdx - String.size searchString + 1 val foundIdx = absIdx - String.size searchString + 1
val acc = PersistentVector.append (foundIdx, acc) val acc = PersistentVector.append (foundIdx, absIdx, acc)
in in
loopSearch loopSearch
(pos + 1, hd, absIdx + 1, tl, acc, 0, searchString, prevTl) (pos + 1, hd, absIdx + 1, tl, acc, 0, searchString, prevTl)
@@ -152,9 +152,9 @@ struct
, prevTl , prevTl
) )
end end
| [] => PersistentVector.toVector acc | [] => acc
else if absIdx = finish then else if absIdx = finish then
PersistentVector.toVector acc acc
else else
let let
val bufferChr = String.sub (hd, pos) val bufferChr = String.sub (hd, pos)
@@ -165,7 +165,7 @@ struct
(* full match *) (* full match *)
let let
val foundIdx = absIdx - String.size searchString + 1 val foundIdx = absIdx - String.size searchString + 1
val acc = PersistentVector.append (foundIdx, acc) val acc = PersistentVector.append (foundIdx, absIdx, acc)
in in
loopRange loopRange
( pos + 1 ( pos + 1
@@ -242,7 +242,8 @@ struct
if String.size searchString > 0 then if String.size searchString > 0 then
case Nfa.parse searchString of case Nfa.parse searchString of
SOME nfa => SOME nfa =>
Nfa.getMatchesInRange (#idx buffer, finishIdx, buffer : LineGap.t, nfa) Nfa.getMatchesInRange
(#idx buffer, finishIdx, buffer : LineGap.t, nfa)
| NONE => empty | NONE => empty
else else
empty empty

View File

@@ -141,7 +141,7 @@ struct
local local
fun loop (pos, str, nfa, origNfa, startPos, acc) = fun loop (pos, str, nfa, origNfa, startPos, acc) =
if pos = String.size str then if pos = String.size str then
PersistentVector.toVector acc acc
else else
let let
val chr = String.sub (str, pos) val chr = String.sub (str, pos)
@@ -149,7 +149,7 @@ struct
in in
case state of case state of
VALID finishIdx => VALID finishIdx =>
let val acc = PersistentVector.append (pos, acc) let val acc = PersistentVector.append (pos, finishIdx, acc)
in loop (finishIdx, str, origNfa, origNfa, finishIdx, acc) in loop (finishIdx, str, origNfa, origNfa, finishIdx, acc)
end end
| INVALID => | INVALID =>
@@ -232,9 +232,9 @@ struct
, startIdx , startIdx
, finishIdx , finishIdx
) )
| [] => PersistentVector.toVector acc | [] => acc
else if absIdx > finishIdx then else if absIdx > finishIdx then
PersistentVector.toVector acc acc
else else
let let
val chr = String.sub (hd, strIdx) val chr = String.sub (hd, strIdx)
@@ -254,9 +254,9 @@ struct
, startIdx , startIdx
, finishIdx , finishIdx
) )
| VALID _ => | VALID finishIdx =>
let let
val acc = PersistentVector.append (startIdx, acc) val acc = PersistentVector.append (startIdx, finishIdx, acc)
in in
loop loop
( strIdx + 1 ( strIdx + 1
@@ -349,8 +349,8 @@ struct
, finishIdx , finishIdx
) )
end end
| [] => Vector.fromList []) | [] => PersistentVector.empty)
| [] => Vector.fromList [] | [] => PersistentVector.empty
end end
end end
end end