From ce3470e6120dd08fbf4eadf84d7da091fe1b61c8 Mon Sep 17 00:00:00 2001 From: Humza Shahid Date: Sun, 12 Oct 2025 00:22:14 +0100 Subject: [PATCH] fix bug in regex-test: dfa-gen.sml should add the position of the endMarker to the followSet as well --- fcore/search-list/dfa-gen.sml | 45 ++++++++++++----------------------- test/regex-tests.sml | 2 +- 2 files changed, 16 insertions(+), 31 deletions(-) diff --git a/fcore/search-list/dfa-gen.sml b/fcore/search-list/dfa-gen.sml index e0f5da1..f434c48 100644 --- a/fcore/search-list/dfa-gen.sml +++ b/fcore/search-list/dfa-gen.sml @@ -2,7 +2,6 @@ signature DFA_GEN_PARAMS = sig val endMarker: char val charIsEqual: char * char -> bool - val charIsNotEqual: char * char -> bool end signature DFA_GEN = @@ -597,7 +596,12 @@ struct fun addToFollowSet (tree, followSet) = case tree of WILDCARD _ => followSet - | CHAR_LITERAL _ => followSet + | CHAR_LITERAL {char, position} => + (* we add the endMarker and its position to the followSet *) + if char = Fn.endMarker then + Set.insertOrReplace (position, [Char.ord Fn.endMarker], followSet) + else + followSet | IS_ANY_CHARACTER _ => followSet | NOT_ANY_CHARACTER _ => followSet | CONCAT {l, r, ...} => @@ -605,11 +609,11 @@ struct val followSet = addToFollowSet (l, followSet) val followSet = addToFollowSet (r, followSet) - val lp = lastpos (l, []) - val fp = firstpos (r, []) - val fp = Set.addFromList (fp, Set.LEAF) + val lpOfLeft = lastpos (l, []) + val fpOfRight = firstpos (r, []) + val fpOfRight = Set.addFromList (fpOfRight, Set.LEAF) in - addKeysToFollowSet (lp, fp, followSet) + addKeysToFollowSet (lpOfLeft, fpOfRight, followSet) end | ALTERNATION {l, r, ...} => let val followSet = addToFollowSet (l, followSet) @@ -617,9 +621,10 @@ struct end | ZERO_OR_MORE child => let - val lp = lastpos (child, []) + val followSet = addToFollowSet (child, followSet) val fp = firstpos (child, []) val fp = Set.addFromList (fp, Set.LEAF) + val lp = lastpos (child, []) in addKeysToFollowSet (lp, fp, followSet) end @@ -666,11 +671,11 @@ struct fun isCharMatch (regex, pos, curChr) = case regex of CHAR_LITERAL {char, ...} => Fn.charIsEqual (char, curChr) - | WILDCARD _ => Fn.charIsNotEqual (curChr, Fn.endMarker) + | WILDCARD _ => true | IS_ANY_CHARACTER {chars, ...} => chrExistsInVec (0, chars, curChr) | NOT_ANY_CHARACTER {chars, ...} => let val charIsValid = chrExistsInVec (0, chars, curChr) - in not charIsValid andalso Fn.charIsNotEqual (curChr, Fn.endMarker) + in not charIsValid end | ALTERNATION {l, r, leftMaxState, ...} => if pos > leftMaxState then isCharMatch (r, pos, curChr) @@ -743,25 +748,7 @@ struct , prevDstateLength ) = if char < 0 then - if Vector.length dtran = unmarkedIdx then - (* no follows from this state: insert endMarker to signal end *) - (dstates, Dtran.insert (unmarkedIdx, Char.ord Fn.endMarker, 0, dtran)) - else if Vector.length dstates = prevDstateLength then - (* no follows, except looping back to itself. So insert endMarker *) - (dstates, Dtran.insert (unmarkedIdx, Char.ord Fn.endMarker, 0, dtran)) - else - (dstates, dtran) - else if Char.chr char = Fn.endMarker then - convertChar - ( char - 1 - , regex - , dstates - , dtran - , unmarkedState - , unmarkedIdx - , followSet - , prevDstateLength - ) + (dstates, dtran) else let val u = positionsThatCorrespondToChar @@ -922,7 +909,6 @@ structure CaseInsensitiveDfa = (struct val endMarker = #"\^@" fun charIsEqual (a: char, b: char) = Char.toLower a = Char.toLower b - fun charIsNotEqual (a: char, b: char) = a <> b end) structure CaseSensitiveDfa = @@ -930,5 +916,4 @@ structure CaseSensitiveDfa = (struct val endMarker = #"\^@" fun charIsEqual (a: char, b: char) = a = b - fun charIsNotEqual (a: char, b: char) = a <> b end) diff --git a/test/regex-tests.sml b/test/regex-tests.sml index f0c7373..eddb47f 100644 --- a/test/regex-tests.sml +++ b/test/regex-tests.sml @@ -123,7 +123,7 @@ struct (* arrange *) val sentence = "favo" val regexString = "favou?" - val dfa = CsDfa.fromString "favorite" + val dfa = CsDfa.fromString regexString (* act *) val matches = CsDfa.matchString (dfa, sentence)