fix bug in regex-test: dfa-gen.sml should add the position of the endMarker to the followSet as well

This commit is contained in:
2025-10-12 00:22:14 +01:00
parent bc16421a24
commit ce3470e612
2 changed files with 16 additions and 31 deletions

View File

@@ -2,7 +2,6 @@ signature DFA_GEN_PARAMS =
sig sig
val endMarker: char val endMarker: char
val charIsEqual: char * char -> bool val charIsEqual: char * char -> bool
val charIsNotEqual: char * char -> bool
end end
signature DFA_GEN = signature DFA_GEN =
@@ -597,7 +596,12 @@ struct
fun addToFollowSet (tree, followSet) = fun addToFollowSet (tree, followSet) =
case tree of case tree of
WILDCARD _ => followSet WILDCARD _ => followSet
| CHAR_LITERAL _ => followSet | CHAR_LITERAL {char, position} =>
(* we add the endMarker and its position to the followSet *)
if char = Fn.endMarker then
Set.insertOrReplace (position, [Char.ord Fn.endMarker], followSet)
else
followSet
| IS_ANY_CHARACTER _ => followSet | IS_ANY_CHARACTER _ => followSet
| NOT_ANY_CHARACTER _ => followSet | NOT_ANY_CHARACTER _ => followSet
| CONCAT {l, r, ...} => | CONCAT {l, r, ...} =>
@@ -605,11 +609,11 @@ struct
val followSet = addToFollowSet (l, followSet) val followSet = addToFollowSet (l, followSet)
val followSet = addToFollowSet (r, followSet) val followSet = addToFollowSet (r, followSet)
val lp = lastpos (l, []) val lpOfLeft = lastpos (l, [])
val fp = firstpos (r, []) val fpOfRight = firstpos (r, [])
val fp = Set.addFromList (fp, Set.LEAF) val fpOfRight = Set.addFromList (fpOfRight, Set.LEAF)
in in
addKeysToFollowSet (lp, fp, followSet) addKeysToFollowSet (lpOfLeft, fpOfRight, followSet)
end end
| ALTERNATION {l, r, ...} => | ALTERNATION {l, r, ...} =>
let val followSet = addToFollowSet (l, followSet) let val followSet = addToFollowSet (l, followSet)
@@ -617,9 +621,10 @@ struct
end end
| ZERO_OR_MORE child => | ZERO_OR_MORE child =>
let let
val lp = lastpos (child, []) val followSet = addToFollowSet (child, followSet)
val fp = firstpos (child, []) val fp = firstpos (child, [])
val fp = Set.addFromList (fp, Set.LEAF) val fp = Set.addFromList (fp, Set.LEAF)
val lp = lastpos (child, [])
in in
addKeysToFollowSet (lp, fp, followSet) addKeysToFollowSet (lp, fp, followSet)
end end
@@ -666,11 +671,11 @@ struct
fun isCharMatch (regex, pos, curChr) = fun isCharMatch (regex, pos, curChr) =
case regex of case regex of
CHAR_LITERAL {char, ...} => Fn.charIsEqual (char, curChr) CHAR_LITERAL {char, ...} => Fn.charIsEqual (char, curChr)
| WILDCARD _ => Fn.charIsNotEqual (curChr, Fn.endMarker) | WILDCARD _ => true
| IS_ANY_CHARACTER {chars, ...} => chrExistsInVec (0, chars, curChr) | IS_ANY_CHARACTER {chars, ...} => chrExistsInVec (0, chars, curChr)
| NOT_ANY_CHARACTER {chars, ...} => | NOT_ANY_CHARACTER {chars, ...} =>
let val charIsValid = chrExistsInVec (0, chars, curChr) let val charIsValid = chrExistsInVec (0, chars, curChr)
in not charIsValid andalso Fn.charIsNotEqual (curChr, Fn.endMarker) in not charIsValid
end end
| ALTERNATION {l, r, leftMaxState, ...} => | ALTERNATION {l, r, leftMaxState, ...} =>
if pos > leftMaxState then isCharMatch (r, pos, curChr) if pos > leftMaxState then isCharMatch (r, pos, curChr)
@@ -743,25 +748,7 @@ struct
, prevDstateLength , prevDstateLength
) = ) =
if char < 0 then if char < 0 then
if Vector.length dtran = unmarkedIdx then (dstates, dtran)
(* no follows from this state: insert endMarker to signal end *)
(dstates, Dtran.insert (unmarkedIdx, Char.ord Fn.endMarker, 0, dtran))
else if Vector.length dstates = prevDstateLength then
(* no follows, except looping back to itself. So insert endMarker *)
(dstates, Dtran.insert (unmarkedIdx, Char.ord Fn.endMarker, 0, dtran))
else
(dstates, dtran)
else if Char.chr char = Fn.endMarker then
convertChar
( char - 1
, regex
, dstates
, dtran
, unmarkedState
, unmarkedIdx
, followSet
, prevDstateLength
)
else else
let let
val u = positionsThatCorrespondToChar val u = positionsThatCorrespondToChar
@@ -922,7 +909,6 @@ structure CaseInsensitiveDfa =
(struct (struct
val endMarker = #"\^@" val endMarker = #"\^@"
fun charIsEqual (a: char, b: char) = Char.toLower a = Char.toLower b fun charIsEqual (a: char, b: char) = Char.toLower a = Char.toLower b
fun charIsNotEqual (a: char, b: char) = a <> b
end) end)
structure CaseSensitiveDfa = structure CaseSensitiveDfa =
@@ -930,5 +916,4 @@ structure CaseSensitiveDfa =
(struct (struct
val endMarker = #"\^@" val endMarker = #"\^@"
fun charIsEqual (a: char, b: char) = a = b fun charIsEqual (a: char, b: char) = a = b
fun charIsNotEqual (a: char, b: char) = a <> b
end) end)

View File

@@ -123,7 +123,7 @@ struct
(* arrange *) (* arrange *)
val sentence = "favo" val sentence = "favo"
val regexString = "favou?" val regexString = "favou?"
val dfa = CsDfa.fromString "favorite" val dfa = CsDfa.fromString regexString
(* act *) (* act *)
val matches = CsDfa.matchString (dfa, sentence) val matches = CsDfa.matchString (dfa, sentence)