at end of char loop, track if length of dstate changed. If it did not, that means that we have encountered a loop that is at the end; thus, we should add the endMarker

This commit is contained in:
2025-10-11 13:39:28 +01:00
parent b2931753d0
commit 7f1f1f7bdc
3 changed files with 10 additions and 3581 deletions

View File

@@ -740,9 +740,14 @@ struct
, unmarkedState
, unmarkedIdx
, followSet
, prevDstateLength
) =
if char < 0 then
if Vector.length dtran = unmarkedIdx then
(* no follows from this state: insert endMarker to signal end *)
(dstates, Dtran.insert (unmarkedIdx, Char.ord Fn.endMarker, 0, dtran))
else if Vector.length dstates = prevDstateLength then
(* no follows, except looping back to itself. So insert endMarker *)
(dstates, Dtran.insert (unmarkedIdx, Char.ord Fn.endMarker, 0, dtran))
else
(dstates, dtran)
@@ -755,6 +760,7 @@ struct
, unmarkedState
, unmarkedIdx
, followSet
, prevDstateLength
)
else
let
@@ -771,6 +777,7 @@ struct
, unmarkedState
, unmarkedIdx
, followSet
, prevDstateLength
)
| _ =>
let
@@ -786,13 +793,11 @@ struct
, unmarkedState
, unmarkedIdx
, followSet
, prevDstateLength
)
end
end
fun makeEndmarkerVec i =
if i = Char.ord Fn.endMarker then Char.ord Fn.endMarker else ~1
fun convertLoop (regex, dstates, dtran, followSet) =
case getUnmarkedTransitionsIfExists (0, dstates) of
SOME (unmarkedIdx, unamarkedTransition) =>
@@ -813,6 +818,7 @@ struct
, unamarkedTransition
, unmarkedIdx
, followSet
, Vector.length dstates
)
in
convertLoop (regex, dstates, dtran, followSet)
@@ -926,6 +932,3 @@ structure CaseSensitiveDfa =
fun charIsEqual (a: char, b: char) = a = b
fun charIsNotEqual (a: char, b: char) = a <> b
end)
val fs = CaseSensitiveDfa.fromString
val s = "(a|b)*abb#"

3574
temp.txt

File diff suppressed because it is too large Load Diff

View File

@@ -541,7 +541,7 @@ struct
(* assert *)
val expectedSoccerMatches = [(0, 3)]
val expectedGPhraseMatches = [(0, 0)]
val expectedGPhraseMatches = [(0, 0), (10, 10)]
val expectedOPhraseMatches = []
val isExpected =