From dfb9153896d422c423526d4100d9264091ee48fd Mon Sep 17 00:00:00 2001 From: Humza Shahid Date: Thu, 2 Oct 2025 04:34:16 +0100 Subject: [PATCH] annotate CONCAT and ALTERNATION nodes with max states of left and right position during parsing. This makes it easier to find a given state. --- fcore/search-list/nfa.sml | 108 ++++++++++++++++++++++++++++---------- 1 file changed, 79 insertions(+), 29 deletions(-) diff --git a/fcore/search-list/nfa.sml b/fcore/search-list/nfa.sml index e1766b8..92695f7 100644 --- a/fcore/search-list/nfa.sml +++ b/fcore/search-list/nfa.sml @@ -2,14 +2,49 @@ structure Nfa = struct datatype regex = CHAR_LITERAL of {char: char, position: int} - | CONCAT of regex * regex - | ALTERNATION of regex * regex + | CONCAT of {l: regex, r: regex, leftMaxState: int, rightMaxState: int} + | ALTERNATION of {l: regex, r: regex, leftMaxState: int, rightMaxState: int} | ZERO_OR_ONE of regex | ZERO_OR_MORE of regex | ONE_OR_MORE of regex | GROUP of regex | WILDCARD of int + structure Set = + struct + datatype set = BRANCH of set * int * bool * set | LEAF + + fun insertIfNew (newKey, tree) = + case tree of + BRANCH (l, curKey, isMarked, r) => + if newKey > curKey then + let val r = insertIfNew (newKey, r) + in BRANCH (l, curKey, isMarked, r) + end + else if newKey < curKey then + let val l = insertIfNew (newKey, l) + in BRANCH (l, curKey, isMarked, r) + end + else + tree + | LEAF => BRANCH (LEAF, newKey, false, LEAF) + + fun setMarked (findKey, tree) = + case tree of + BRANCH (l, curKey, isMarked, r) => + if findKey > curKey then + let val r = setMarked (findKey, r) + in BRANCH (l, curKey, isMarked, r) + end + else if findKey < curKey then + let val l = setMarked (findKey, l) + in BRANCH (l, curKey, isMarked, r) + end + else + BRANCH (l, curKey, true, r) + | LEAF => (* this case should not occur *) LEAF + end + structure ParseNfa = struct (* parsing through precedence climbing algorithm. *) @@ -78,9 +113,16 @@ struct val chr = CHAR_LITERAL {char = chr, position = stateNum + 1} in case climb (pos + 2, str, chr, altLevel, stateNum + 1) of - SOME (pos, rhs, stateNum) => - let val result = ALTERNATION (lhs, rhs) - in SOME (pos, result, stateNum) + SOME (pos, rhs, rightStateNum) => + let + val result = ALTERNATION + { l = lhs + , r = rhs + , leftMaxState = stateNum + , rightMaxState = rightStateNum + } + in + SOME (pos, result, rightStateNum) end | NONE => NONE end @@ -112,11 +154,18 @@ struct SOME (pos, lhs, stateNum) else case computeAtom (pos, str, stateNum) of - SOME (nextPos, curAtom, stateNum) => - (case climb (nextPos, str, curAtom, concatLevel, stateNum) of - SOME (pos, rhs, stateNum) => - let val result = CONCAT (lhs, rhs) - in SOME (pos, result, stateNum) + SOME (nextPos, curAtom, atomStateNum) => + (case climb (nextPos, str, curAtom, concatLevel, atomStateNum) of + SOME (pos, rhs, rightStateNum) => + let + val result = CONCAT + { l = lhs + , r = rhs + , leftMaxState = stateNum + , rightMaxState = rightStateNum + } + in + SOME (pos, result, rightStateNum) end | NONE => NONE) | NONE => NONE @@ -145,8 +194,8 @@ struct CHAR_LITERAL _ => false | WILDCARD _ => false - | CONCAT (r1, r2) => isNullable r1 andalso isNullable r2 - | ALTERNATION (r1, r2) => isNullable r1 orelse isNullable r2 + | CONCAT {l, r, ...} => isNullable l andalso isNullable r + | ALTERNATION {l, r, ...} => isNullable l orelse isNullable r | ZERO_OR_ONE _ => true | ZERO_OR_MORE _ => true @@ -159,16 +208,16 @@ struct CHAR_LITERAL {position, ...} => position :: acc | WILDCARD i => i :: acc - | CONCAT (r1, r2) => - if isNullable r1 then - let val acc = firstpos (r1, acc) - in firstpos (r2, acc) + | CONCAT {l, r, ...} => + if isNullable l then + let val acc = firstpos (l, acc) + in firstpos (r, acc) end else - firstpos (r1, acc) - | ALTERNATION (r1, r2) => - let val acc = firstpos (r1, acc) - in firstpos (r2, acc) + firstpos (l, acc) + | ALTERNATION {l, r, ...} => + let val acc = firstpos (l, acc) + in firstpos (r, acc) end | ZERO_OR_ONE regex => firstpos (regex, acc) @@ -181,16 +230,16 @@ struct CHAR_LITERAL {position, ...} => position :: acc | WILDCARD i => i :: acc - | CONCAT (r1, r2) => - if isNullable r2 then - let val acc = lastpos (r1, acc) - in lastpos (r2, acc) + | CONCAT {l, r, ...} => + if isNullable r then + let val acc = lastpos (l, acc) + in lastpos (r, acc) end else - lastpos (r2, acc) - | ALTERNATION (r1, r2) => - let val acc = lastpos (r1, acc) - in lastpos (r2, acc) + lastpos (l, acc) + | ALTERNATION {l, r, ...} => + let val acc = lastpos (l, acc) + in lastpos (r, acc) end | ZERO_OR_ONE regex => lastpos (regex, acc) @@ -200,7 +249,7 @@ struct fun followpos tree = case tree of - CONCAT (_, r2) => firstpos (r2, []) + CONCAT {r, ...} => firstpos (r, []) | ZERO_OR_MORE r => firstpos (r, []) | ZERO_OR_ONE r => firstpos (r, []) | ONE_OR_MORE r => firstpos (r, []) @@ -213,5 +262,6 @@ struct | NONE => NONE fun firstpos regex = ToDfa.firstpos (regex, []) + fun lastpos regex = ToDfa.lastpos (regex, []) end