fix bug in implementation of DFA algorithm: we need to add an end marker, and this will be used to tell us whether we have reached the final state in the DFA

This commit is contained in:
2025-10-06 11:49:10 +01:00
parent 3f30d49420
commit cca2602429

View File

@@ -313,25 +313,25 @@ struct
| GROUP regex => getFollowsForPositionAndChar (regex, pos, curChr) | GROUP regex => getFollowsForPositionAndChar (regex, pos, curChr)
| CONCAT {l, r, leftMaxState, ...} => | CONCAT {l, r, leftMaxState, ...} =>
if pos <= leftMaxState then
let let
val nodeToFollow = if pos <= leftMaxState then l else r val result = getFollowsForPositionAndChar (l, pos, curChr)
val result =
getFollowsForPositionAndChar (nodeToFollow, pos, curChr)
val {sawConcat, follows, charIsMatch} = result val {sawConcat, follows, charIsMatch} = result
in in
if charIsMatch then if charIsMatch then
if sawConcat then if sawConcat then
(* saw concat, so we got follow pos already *) (* we already saw a concat and got followpos *)
result result
else else
(* get followpos *)
let val fp = followpos (curChr, regex, follows) let val fp = followpos (curChr, regex, follows)
in {sawConcat = true, follows = fp, charIsMatch = true} in {sawConcat = true, follows = fp, charIsMatch = true}
end end
else else
(* char does not match, so don't get followpos *) (* char is not match, so don't get follow pos *)
result result
end end
else
getFollowsForPositionAndChar (r, pos, curChr)
| ZERO_OR_ONE child => | ZERO_OR_ONE child =>
getFollowsForPositionAndCharLoop (pos, regex, child, curChr) getFollowsForPositionAndCharLoop (pos, regex, child, curChr)
| ZERO_OR_MORE child => | ZERO_OR_MORE child =>
@@ -360,10 +360,15 @@ struct
hd :: tl => hd :: tl =>
let let
val fpList = getFollowsForPositionAndChar (regex, hd, char) val fpList = getFollowsForPositionAndChar (regex, hd, char)
val {sawConcat, follows, charIsMatch} = fpList
val follows =
if charIsMatch andalso not sawConcat then 0 :: follows
else follows
val followSet = val followSet =
List.foldl List.foldl
(fn (fp, followSet) => Set.insertOrReplace (fp, (), followSet)) (fn (fp, followSet) => Set.insertOrReplace (fp, (), followSet))
followSet (#follows fpList) followSet follows
in in
getFollowPositionsFromList (tl, regex, char, followSet) getFollowPositionsFromList (tl, regex, char, followSet)
end end
@@ -501,8 +506,19 @@ struct
end end
fun fromString str = fun fromString str =
case ParseDfa.parse (str ^ "\^@", 0) of case ParseDfa.parse (str, 0) of
SOME (ast, _) => ToDfa.convert ast SOME (ast, numStates) =>
let
val endMarker = CHAR_LITERAL {char = #"\^@", position = numStates + 1}
val ast = CONCAT
{ l = ast
, leftMaxState = numStates
, r = endMarker
, rightMaxState = numStates + 1
}
in
ToDfa.convert ast
end
| NONE => Vector.fromList [] | NONE => Vector.fromList []
type dfa = int vector vector type dfa = int vector vector