return PersistentVector.t when building search-list/executing nfa, because we don't want to use a simple flat vector for the search list now
This commit is contained in:
@@ -1,82 +1,76 @@
|
|||||||
structure PersistentVector =
|
structure PersistentVector =
|
||||||
struct
|
struct
|
||||||
(* Clojure-style persistent vector,
|
(* Clojure-style persistent vector, for building search list.
|
||||||
* as intermediary data structure
|
* There is an "int table" too, which stores the last index
|
||||||
* for building search list *)
|
* at the node with the same index.
|
||||||
|
* We can use the size table for binary search.
|
||||||
|
* *)
|
||||||
datatype t =
|
datatype t =
|
||||||
BRANCH of t vector
|
BRANCH of t vector * int vector
|
||||||
| LEAF of int vector
|
| LEAF of {start: int, finish: int} vector * int vector
|
||||||
|
|
||||||
val maxSize = 32
|
val maxSize = 32
|
||||||
|
|
||||||
val empty = LEAF #[]
|
val empty = LEAF (#[], #[])
|
||||||
|
|
||||||
datatype append_result = APPEND of t | UPDATE of t
|
datatype append_result = APPEND of t | UPDATE of t
|
||||||
|
|
||||||
fun helpAppend (key, tree) =
|
fun getFinishIdx t =
|
||||||
|
case t of
|
||||||
|
BRANCH (_, sizes) => Vector.sub (sizes, Vector.length sizes - 1)
|
||||||
|
| LEAF (_, sizes) => Vector.sub (sizes, Vector.length sizes - 1)
|
||||||
|
|
||||||
|
fun helpAppend (start, finish, tree) =
|
||||||
case tree of
|
case tree of
|
||||||
BRANCH nodes =>
|
BRANCH (nodes, sizes) =>
|
||||||
let
|
let
|
||||||
val lastNode = Vector.sub (nodes, Vector.length nodes - 1)
|
val lastNode = Vector.sub (nodes, Vector.length nodes - 1)
|
||||||
in
|
in
|
||||||
case helpAppend (key, lastNode) of
|
case helpAppend (start, finish, lastNode) of
|
||||||
UPDATE newLast =>
|
UPDATE newLast =>
|
||||||
let
|
let
|
||||||
val newNode = Vector.update
|
val lastPos = Vector.length nodes - 1
|
||||||
(nodes, Vector.length nodes - 1, newLast)
|
val newNode = Vector.update (nodes, lastPos, newLast)
|
||||||
val newNode = BRANCH newNode
|
val newSizes = Vector.update (sizes, lastPos, finish)
|
||||||
|
val newNode = BRANCH (newNode, newSizes)
|
||||||
in
|
in
|
||||||
UPDATE newNode
|
UPDATE newNode
|
||||||
end
|
end
|
||||||
| APPEND newVec =>
|
| APPEND newVec =>
|
||||||
if Vector.length nodes + 1 > maxSize then
|
if Vector.length nodes + 1 > maxSize then
|
||||||
let val newNode = BRANCH #[newVec]
|
let val newNode = BRANCH (#[newVec], #[finish])
|
||||||
in APPEND newNode
|
in APPEND newNode
|
||||||
end
|
end
|
||||||
else
|
else
|
||||||
let
|
let
|
||||||
val newNodes = Vector.concat [nodes, #[newVec]]
|
val newNodes = Vector.concat [nodes, #[newVec]]
|
||||||
val newNodes = BRANCH newNodes
|
val newSizes = Vector.concat [sizes, #[finish]]
|
||||||
|
val newNodes = BRANCH (newNodes, newSizes)
|
||||||
in
|
in
|
||||||
UPDATE newNodes
|
UPDATE newNodes
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
| LEAF vec =>
|
| LEAF (values, sizes) =>
|
||||||
if Vector.length vec + 1 > maxSize then
|
if Vector.length values + 1 > maxSize then
|
||||||
let val newNode = LEAF #[key]
|
let val newNode = LEAF (#[{start = start, finish = finish}], #[finish])
|
||||||
in APPEND newNode
|
in APPEND newNode
|
||||||
end
|
end
|
||||||
else
|
else
|
||||||
let
|
let
|
||||||
val newNode = Vector.concat [vec, #[key]]
|
val newNode = Vector.concat
|
||||||
val newNode = LEAF newNode
|
[values, #[{start = start, finish = finish}]]
|
||||||
|
val newSizes = Vector.concat [sizes, #[finish]]
|
||||||
|
val newNode = LEAF (newNode, newSizes)
|
||||||
in
|
in
|
||||||
UPDATE newNode
|
UPDATE newNode
|
||||||
end
|
end
|
||||||
|
|
||||||
fun append (key, tree) =
|
fun append (start, finish, tree) =
|
||||||
case helpAppend (key, tree) of
|
|
||||||
UPDATE t => t
|
|
||||||
| APPEND newNode => BRANCH #[tree, newNode]
|
|
||||||
|
|
||||||
fun branchToList (pos, nodes, acc) =
|
|
||||||
if pos < 0 then
|
|
||||||
acc
|
|
||||||
else
|
|
||||||
let
|
let
|
||||||
val node = Vector.sub (nodes, pos)
|
val maxSize = getFinishIdx tree
|
||||||
val acc = helpToVector (node, acc)
|
|
||||||
in
|
in
|
||||||
branchToList (pos - 1, nodes, acc)
|
case helpAppend (start, finish, tree) of
|
||||||
end
|
UPDATE t => t
|
||||||
|
| APPEND newNode => BRANCH (#[tree, newNode], #[maxSize, finish])
|
||||||
and helpToVector (tree, acc) =
|
|
||||||
case tree of
|
|
||||||
BRANCH nodes => branchToList (Vector.length nodes - 1, nodes, acc)
|
|
||||||
| LEAF vec => vec :: acc
|
|
||||||
|
|
||||||
fun toVector tree =
|
|
||||||
let val lst = helpToVector (tree, [])
|
|
||||||
in Vector.concat lst
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ structure SearchList =
|
|||||||
struct
|
struct
|
||||||
type t = int vector
|
type t = int vector
|
||||||
|
|
||||||
val empty = Vector.fromList []
|
val empty = PersistentVector.empty
|
||||||
|
|
||||||
fun backtrackFull (pos, hd, absIdx, tl, acc, searchPos, searchString, prevTl) =
|
fun backtrackFull (pos, hd, absIdx, tl, acc, searchPos, searchString, prevTl) =
|
||||||
if pos < 0 then
|
if pos < 0 then
|
||||||
@@ -47,7 +47,7 @@ struct
|
|||||||
, searchString
|
, searchString
|
||||||
, hd :: prevTl
|
, hd :: prevTl
|
||||||
)
|
)
|
||||||
| [] => PersistentVector.toVector acc
|
| [] => acc
|
||||||
else
|
else
|
||||||
let
|
let
|
||||||
val bufferChr = String.sub (hd, pos)
|
val bufferChr = String.sub (hd, pos)
|
||||||
@@ -58,7 +58,7 @@ struct
|
|||||||
(* we fully matched the search string *)
|
(* we fully matched the search string *)
|
||||||
let
|
let
|
||||||
val foundIdx = absIdx - String.size searchString + 1
|
val foundIdx = absIdx - String.size searchString + 1
|
||||||
val acc = PersistentVector.append (foundIdx, acc)
|
val acc = PersistentVector.append (foundIdx, absIdx, acc)
|
||||||
in
|
in
|
||||||
loopSearch
|
loopSearch
|
||||||
(pos + 1, hd, absIdx + 1, tl, acc, 0, searchString, prevTl)
|
(pos + 1, hd, absIdx + 1, tl, acc, 0, searchString, prevTl)
|
||||||
@@ -152,9 +152,9 @@ struct
|
|||||||
, prevTl
|
, prevTl
|
||||||
)
|
)
|
||||||
end
|
end
|
||||||
| [] => PersistentVector.toVector acc
|
| [] => acc
|
||||||
else if absIdx = finish then
|
else if absIdx = finish then
|
||||||
PersistentVector.toVector acc
|
acc
|
||||||
else
|
else
|
||||||
let
|
let
|
||||||
val bufferChr = String.sub (hd, pos)
|
val bufferChr = String.sub (hd, pos)
|
||||||
@@ -165,7 +165,7 @@ struct
|
|||||||
(* full match *)
|
(* full match *)
|
||||||
let
|
let
|
||||||
val foundIdx = absIdx - String.size searchString + 1
|
val foundIdx = absIdx - String.size searchString + 1
|
||||||
val acc = PersistentVector.append (foundIdx, acc)
|
val acc = PersistentVector.append (foundIdx, absIdx, acc)
|
||||||
in
|
in
|
||||||
loopRange
|
loopRange
|
||||||
( pos + 1
|
( pos + 1
|
||||||
@@ -242,7 +242,8 @@ struct
|
|||||||
if String.size searchString > 0 then
|
if String.size searchString > 0 then
|
||||||
case Nfa.parse searchString of
|
case Nfa.parse searchString of
|
||||||
SOME nfa =>
|
SOME nfa =>
|
||||||
Nfa.getMatchesInRange (#idx buffer, finishIdx, buffer : LineGap.t, nfa)
|
Nfa.getMatchesInRange
|
||||||
|
(#idx buffer, finishIdx, buffer : LineGap.t, nfa)
|
||||||
| NONE => empty
|
| NONE => empty
|
||||||
else
|
else
|
||||||
empty
|
empty
|
||||||
|
|||||||
@@ -141,7 +141,7 @@ struct
|
|||||||
local
|
local
|
||||||
fun loop (pos, str, nfa, origNfa, startPos, acc) =
|
fun loop (pos, str, nfa, origNfa, startPos, acc) =
|
||||||
if pos = String.size str then
|
if pos = String.size str then
|
||||||
PersistentVector.toVector acc
|
acc
|
||||||
else
|
else
|
||||||
let
|
let
|
||||||
val chr = String.sub (str, pos)
|
val chr = String.sub (str, pos)
|
||||||
@@ -149,7 +149,7 @@ struct
|
|||||||
in
|
in
|
||||||
case state of
|
case state of
|
||||||
VALID finishIdx =>
|
VALID finishIdx =>
|
||||||
let val acc = PersistentVector.append (pos, acc)
|
let val acc = PersistentVector.append (pos, finishIdx, acc)
|
||||||
in loop (finishIdx, str, origNfa, origNfa, finishIdx, acc)
|
in loop (finishIdx, str, origNfa, origNfa, finishIdx, acc)
|
||||||
end
|
end
|
||||||
| INVALID =>
|
| INVALID =>
|
||||||
@@ -232,9 +232,9 @@ struct
|
|||||||
, startIdx
|
, startIdx
|
||||||
, finishIdx
|
, finishIdx
|
||||||
)
|
)
|
||||||
| [] => PersistentVector.toVector acc
|
| [] => acc
|
||||||
else if absIdx > finishIdx then
|
else if absIdx > finishIdx then
|
||||||
PersistentVector.toVector acc
|
acc
|
||||||
else
|
else
|
||||||
let
|
let
|
||||||
val chr = String.sub (hd, strIdx)
|
val chr = String.sub (hd, strIdx)
|
||||||
@@ -254,9 +254,9 @@ struct
|
|||||||
, startIdx
|
, startIdx
|
||||||
, finishIdx
|
, finishIdx
|
||||||
)
|
)
|
||||||
| VALID _ =>
|
| VALID finishIdx =>
|
||||||
let
|
let
|
||||||
val acc = PersistentVector.append (startIdx, acc)
|
val acc = PersistentVector.append (startIdx, finishIdx, acc)
|
||||||
in
|
in
|
||||||
loop
|
loop
|
||||||
( strIdx + 1
|
( strIdx + 1
|
||||||
@@ -349,8 +349,8 @@ struct
|
|||||||
, finishIdx
|
, finishIdx
|
||||||
)
|
)
|
||||||
end
|
end
|
||||||
| [] => Vector.fromList [])
|
| [] => PersistentVector.empty)
|
||||||
| [] => Vector.fromList []
|
| [] => PersistentVector.empty
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|||||||
Reference in New Issue
Block a user