add list of follows to leaves in regex parse tree (only changed data type; need to populate follows list later)

This commit is contained in:
2025-10-10 03:49:09 +01:00
parent 108a30ea79
commit 58c3e65fdd

View File

@@ -22,10 +22,10 @@ end
functor MakeDfaGen(Fn: DFA_GEN_PARAMS): DFA_GEN = functor MakeDfaGen(Fn: DFA_GEN_PARAMS): DFA_GEN =
struct struct
datatype parse_tree = datatype parse_tree =
CHAR_LITERAL of {char: char, position: int} CHAR_LITERAL of {char: char, position: int, follows: int list}
| WILDCARD of int | WILDCARD of {position: int, follows: int list}
| IS_ANY_CHARACTER of {chars: char vector, position: int} | IS_ANY_CHARACTER of {chars: char vector, position: int, follows: int list}
| NOT_ANY_CHARACTER of {chars: char vector, position: int} | NOT_ANY_CHARACTER of {chars: char vector, position: int, follows: int list}
| CONCAT of | CONCAT of
{ l: parse_tree { l: parse_tree
, r: parse_tree , r: parse_tree
@@ -66,7 +66,7 @@ struct
fun firstpos (tree, acc) = fun firstpos (tree, acc) =
case tree of case tree of
CHAR_LITERAL {position, ...} => position :: acc CHAR_LITERAL {position, ...} => position :: acc
| WILDCARD i => i :: acc | WILDCARD {position, ...} => position :: acc
| IS_ANY_CHARACTER {position, ...} => position :: acc | IS_ANY_CHARACTER {position, ...} => position :: acc
| NOT_ANY_CHARACTER {position, ...} => position :: acc | NOT_ANY_CHARACTER {position, ...} => position :: acc
| CONCAT {firstpos = fp, ...} => fp @ acc | CONCAT {firstpos = fp, ...} => fp @ acc
@@ -79,7 +79,7 @@ struct
fun lastpos (tree, acc) = fun lastpos (tree, acc) =
case tree of case tree of
CHAR_LITERAL {position, ...} => position :: acc CHAR_LITERAL {position, ...} => position :: acc
| WILDCARD i => i :: acc | WILDCARD {position, ...} => position :: acc
| IS_ANY_CHARACTER {position, ...} => position :: acc | IS_ANY_CHARACTER {position, ...} => position :: acc
| NOT_ANY_CHARACTER {position, ...} => position :: acc | NOT_ANY_CHARACTER {position, ...} => position :: acc
| CONCAT {lastpos = lp, ...} => lp @ acc | CONCAT {lastpos = lp, ...} => lp @ acc
@@ -113,10 +113,8 @@ struct
case lst of case lst of
[] => tree [] => tree
| (k, v) :: tl => | (k, v) :: tl =>
let let val tree = insertOrReplace (k, v, tree)
val tree = insertOrReplace (k, v, tree) in addFromList (tl, tree)
in
addFromList (tl, tree)
end end
fun getOrDefault (findKey, tree, default) = fun getOrDefault (findKey, tree, default) =
@@ -360,7 +358,9 @@ struct
case getCharsInBrackets (pos, str, []) of case getCharsInBrackets (pos, str, []) of
SOME (pos, chars) => SOME (pos, chars) =>
let let
val node = IS_ANY_CHARACTER {chars = chars, position = stateNum + 1} val node =
IS_ANY_CHARACTER
{chars = chars, position = stateNum + 1, follows = []}
in in
SOME (pos, node, stateNum + 1) SOME (pos, node, stateNum + 1)
end end
@@ -371,7 +371,8 @@ struct
SOME (pos, chars) => SOME (pos, chars) =>
let let
val node = val node =
NOT_ANY_CHARACTER {chars = chars, position = stateNum + 1} NOT_ANY_CHARACTER
{chars = chars, position = stateNum + 1, follows = []}
in in
SOME (pos, node, stateNum + 1) SOME (pos, node, stateNum + 1)
end end
@@ -408,14 +409,19 @@ struct
NONE NONE
else if isValid then else if isValid then
let let
val chr = CHAR_LITERAL {char = chr, position = stateNum + 1} val chr =
CHAR_LITERAL
{char = chr, position = stateNum + 1, follows = []}
in in
SOME (pos + 2, chr, stateNum + 1) SOME (pos + 2, chr, stateNum + 1)
end end
else else
NONE NONE
end end
| #"." => SOME (pos + 1, WILDCARD (stateNum + 1), stateNum + 1) | #"." =>
let val w = WILDCARD {position = stateNum + 1, follows = []}
in SOME (pos + 1, w, stateNum + 1)
end
| #"[" => | #"[" =>
if pos + 1 = String.size str then if pos + 1 = String.size str then
NONE NONE
@@ -434,8 +440,12 @@ struct
if Fn.charIsEqual (chr, Fn.endMarker) then if Fn.charIsEqual (chr, Fn.endMarker) then
NONE NONE
else else
let val chr = CHAR_LITERAL {char = chr, position = stateNum + 1} let
in SOME (pos + 1, chr, stateNum + 1) val chr =
CHAR_LITERAL
{char = chr, position = stateNum + 1, follows = []}
in
SOME (pos + 1, chr, stateNum + 1)
end end
and climb (pos, str, lhs, level, stateNum) : (int * parse_tree * int) option = and climb (pos, str, lhs, level, stateNum) : (int * parse_tree * int) option =
@@ -449,7 +459,9 @@ struct
else if pos + 1 < String.size str then else if pos + 1 < String.size str then
let let
val chr = String.sub (str, pos + 1) val chr = String.sub (str, pos + 1)
val chr = CHAR_LITERAL {char = chr, position = stateNum + 1} val chr =
CHAR_LITERAL
{char = chr, position = stateNum + 1, follows = []}
in in
case climb (pos + 2, str, chr, altLevel, stateNum + 1) of case climb (pos + 2, str, chr, altLevel, stateNum + 1) of
SOME (pos, rhs, rightStateNum) => SOME (pos, rhs, rightStateNum) =>
@@ -583,7 +595,7 @@ struct
* even if the curChr is the endmarker. *) * even if the curChr is the endmarker. *)
fun getFollowsForPositionAndChar (regex: parse_tree, pos, curChr) = fun getFollowsForPositionAndChar (regex: parse_tree, pos, curChr) =
case regex of case regex of
CHAR_LITERAL {char, position = _} => CHAR_LITERAL {char, ...} =>
let val charIsMatch = Fn.charIsEqual (char, curChr) let val charIsMatch = Fn.charIsEqual (char, curChr)
in {sawConcat = false, follows = [], charIsMatch = charIsMatch} in {sawConcat = false, follows = [], charIsMatch = charIsMatch}
end end
@@ -817,7 +829,8 @@ struct
let let
val fp = firstpos (ast, []) val fp = firstpos (ast, [])
val endMarker = val endMarker =
CHAR_LITERAL {char = Fn.endMarker, position = numStates + 1} CHAR_LITERAL
{char = Fn.endMarker, position = numStates + 1, follows = []}
val ast = CONCAT val ast = CONCAT
{ l = ast { l = ast
, leftMaxState = numStates , leftMaxState = numStates