From dcd930f855a22eabb2bbdf5ff6a041766189f686 Mon Sep 17 00:00:00 2001 From: Humza Shahid Date: Mon, 6 Oct 2025 17:08:57 +0100 Subject: [PATCH] begin expanding ? and + regex symbols, which we can represent using a combination of the others --- fcore/search-list/dfa-gen.sml | 120 ++++++++++++++++++++++++++++------ 1 file changed, 101 insertions(+), 19 deletions(-) diff --git a/fcore/search-list/dfa-gen.sml b/fcore/search-list/dfa-gen.sml index e4d7f0a..bcce534 100644 --- a/fcore/search-list/dfa-gen.sml +++ b/fcore/search-list/dfa-gen.sml @@ -10,6 +10,103 @@ struct | GROUP of regex | WILDCARD of int + fun incrementStates regex = + case regex of + CHAR_LITERAL {position, char} => + let val i = position + 1 + in (CHAR_LITERAL {position = i, char = char}, i) + end + | WILDCARD i => let val i = i + 1 in (WILDCARD i, i) end + | CONCAT {l, r, ...} => + let + val (l, leftMaxState) = incrementStates l + val (r, rightMaxState) = incrementStates r + val concat = CONCAT + { l = l + , leftMaxState = leftMaxState + , r = r + , rightMaxState = rightMaxState + } + in + (concat, rightMaxState) + end + | ALTERNATION {l, r, ...} => + let + val (l, leftMaxState) = incrementStates l + val (r, rightMaxState) = incrementStates r + val concat = ALTERNATION + { l = l + , leftMaxState = leftMaxState + , r = r + , rightMaxState = rightMaxState + } + in + (concat, rightMaxState) + end + | ZERO_OR_MORE child => + let val (child, maxStates) = incrementStates child + in (ZERO_OR_MORE child, maxStates) + end + | _ => raise Fail "should not call increment on ? or +" + + fun expandRegex regex = + case regex of + CHAR_LITERAL {position, ...} => (regex, position) + | WILDCARD i => (regex, i) + | CONCAT {l, r, leftMaxState, rightMaxState} => + let + val (l, leftMaxState) = expandRegex l + val (r, rightMaxState) = expandRegex r + val node = CONCAT + { l = l + , r = r + , leftMaxState = leftMaxState + , rightMaxState = rightMaxState + } + in + (node, rightMaxState) + end + | ALTERNATION {l, r, leftMaxState, rightMaxState} => + let + val (l, leftMaxState) = expandRegex l + val (r, rightMaxState) = expandRegex r + val node = ALTERNATION + { l = l + , r = r + , leftMaxState = leftMaxState + , rightMaxState = rightMaxState + } + in + (node, rightMaxState) + end + | GROUP regex => + let val (regex, maxState) = expandRegex regex + in (GROUP regex, maxState) + end + | ZERO_OR_MORE regex => + let val (regex, maxState) = expandRegex regex + in (ZERO_OR_MORE regex, maxState) + end + + (* + symbol. + * We can expand this by constructing a concat, + * putting the child in the concat's left, + * and an option Kleene star version of child on the right *) + | ONE_OR_MORE regex => + let + val (l, leftMaxState) = expandRegex regex + val (r, rightMaxState) = incrementStates l + val r = ZERO_OR_MORE r + val node = CONCAT + { l = l + , leftMaxState = leftMaxState + , r = r + , rightMaxState = rightMaxState + } + in + (node, rightMaxState) + end + structure Set = struct datatype 'a set = BRANCH of 'a set * int * 'a * 'a set | LEAF @@ -350,26 +447,10 @@ struct else result end - | ZERO_OR_ONE child => - getFollowsForPositionAndCharLoop (pos, regex, child, curChr) + | ZERO_OR_ONE _ => + raise Fail "dfa-gen.sml 451: should expand so we don't have ?" | ONE_OR_MORE child => - getFollowsForPositionAndCharLoop (pos, regex, child, curChr) - - and getFollowsForPositionAndCharLoop (pos, regex, child, curChr) = - let - val result = getFollowsForPositionAndChar (child, pos, curChr) - val {sawConcat, follows, charIsMatch} = result - in - if charIsMatch then - if sawConcat then - result - else - let val fp = followpos (curChr, regex, follows) - in {sawConcat = false, follows = fp, charIsMatch = true} - end - else - result - end + raise Fail "dfa-gen.sml 451: should expand so we don't have +" fun getFollowPositionsFromList (lst: int list, regex, char, followSet) = case lst of @@ -525,6 +606,7 @@ struct case ParseDfa.parse (str, 0) of SOME (ast, numStates) => let + val (ast, numStates) = expandRegex ast val endMarker = CHAR_LITERAL {char = #"\^@", position = numStates + 1} val ast = CONCAT { l = ast