at end of char loop, track if length of dstate changed. If it did not, that means that we have encountered a loop that is at the end; thus, we should add the endMarker
This commit is contained in:
@@ -740,9 +740,14 @@ struct
|
|||||||
, unmarkedState
|
, unmarkedState
|
||||||
, unmarkedIdx
|
, unmarkedIdx
|
||||||
, followSet
|
, followSet
|
||||||
|
, prevDstateLength
|
||||||
) =
|
) =
|
||||||
if char < 0 then
|
if char < 0 then
|
||||||
if Vector.length dtran = unmarkedIdx then
|
if Vector.length dtran = unmarkedIdx then
|
||||||
|
(* no follows from this state: insert endMarker to signal end *)
|
||||||
|
(dstates, Dtran.insert (unmarkedIdx, Char.ord Fn.endMarker, 0, dtran))
|
||||||
|
else if Vector.length dstates = prevDstateLength then
|
||||||
|
(* no follows, except looping back to itself. So insert endMarker *)
|
||||||
(dstates, Dtran.insert (unmarkedIdx, Char.ord Fn.endMarker, 0, dtran))
|
(dstates, Dtran.insert (unmarkedIdx, Char.ord Fn.endMarker, 0, dtran))
|
||||||
else
|
else
|
||||||
(dstates, dtran)
|
(dstates, dtran)
|
||||||
@@ -755,6 +760,7 @@ struct
|
|||||||
, unmarkedState
|
, unmarkedState
|
||||||
, unmarkedIdx
|
, unmarkedIdx
|
||||||
, followSet
|
, followSet
|
||||||
|
, prevDstateLength
|
||||||
)
|
)
|
||||||
else
|
else
|
||||||
let
|
let
|
||||||
@@ -771,6 +777,7 @@ struct
|
|||||||
, unmarkedState
|
, unmarkedState
|
||||||
, unmarkedIdx
|
, unmarkedIdx
|
||||||
, followSet
|
, followSet
|
||||||
|
, prevDstateLength
|
||||||
)
|
)
|
||||||
| _ =>
|
| _ =>
|
||||||
let
|
let
|
||||||
@@ -786,13 +793,11 @@ struct
|
|||||||
, unmarkedState
|
, unmarkedState
|
||||||
, unmarkedIdx
|
, unmarkedIdx
|
||||||
, followSet
|
, followSet
|
||||||
|
, prevDstateLength
|
||||||
)
|
)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
fun makeEndmarkerVec i =
|
|
||||||
if i = Char.ord Fn.endMarker then Char.ord Fn.endMarker else ~1
|
|
||||||
|
|
||||||
fun convertLoop (regex, dstates, dtran, followSet) =
|
fun convertLoop (regex, dstates, dtran, followSet) =
|
||||||
case getUnmarkedTransitionsIfExists (0, dstates) of
|
case getUnmarkedTransitionsIfExists (0, dstates) of
|
||||||
SOME (unmarkedIdx, unamarkedTransition) =>
|
SOME (unmarkedIdx, unamarkedTransition) =>
|
||||||
@@ -813,6 +818,7 @@ struct
|
|||||||
, unamarkedTransition
|
, unamarkedTransition
|
||||||
, unmarkedIdx
|
, unmarkedIdx
|
||||||
, followSet
|
, followSet
|
||||||
|
, Vector.length dstates
|
||||||
)
|
)
|
||||||
in
|
in
|
||||||
convertLoop (regex, dstates, dtran, followSet)
|
convertLoop (regex, dstates, dtran, followSet)
|
||||||
@@ -926,6 +932,3 @@ structure CaseSensitiveDfa =
|
|||||||
fun charIsEqual (a: char, b: char) = a = b
|
fun charIsEqual (a: char, b: char) = a = b
|
||||||
fun charIsNotEqual (a: char, b: char) = a <> b
|
fun charIsNotEqual (a: char, b: char) = a <> b
|
||||||
end)
|
end)
|
||||||
|
|
||||||
val fs = CaseSensitiveDfa.fromString
|
|
||||||
val s = "(a|b)*abb#"
|
|
||||||
|
|||||||
@@ -541,7 +541,7 @@ struct
|
|||||||
|
|
||||||
(* assert *)
|
(* assert *)
|
||||||
val expectedSoccerMatches = [(0, 3)]
|
val expectedSoccerMatches = [(0, 3)]
|
||||||
val expectedGPhraseMatches = [(0, 0)]
|
val expectedGPhraseMatches = [(0, 0), (10, 10)]
|
||||||
val expectedOPhraseMatches = []
|
val expectedOPhraseMatches = []
|
||||||
|
|
||||||
val isExpected =
|
val isExpected =
|
||||||
|
|||||||
Reference in New Issue
Block a user