refactor nfa.sml so that lists in CONCAT and ALTERNATION cases don't need the state to be tupled with the regex

This commit is contained in:
2025-09-30 13:52:35 +01:00
parent 45fbd85183
commit 5fa784b4c6

View File

@@ -4,13 +4,23 @@ struct
datatype regex = datatype regex =
CHAR_LITERAL of char * state CHAR_LITERAL of char * state
| CONCAT of (regex * state) list * state | CONCAT of regex list * state
| ALTERNATION of (regex * state) list * state | ALTERNATION of regex list * state
| ZERO_OR_ONE of regex * state | ZERO_OR_ONE of regex * state
| ZERO_OR_MORE of regex * state | ZERO_OR_MORE of regex * state
| ONE_OR_MORE of regex * state | ONE_OR_MORE of regex * state
| GROUP of regex * state | GROUP of regex * state
fun getState regex =
case regex of
CHAR_LITERAL (_, state) => state
| CONCAT (_, state) => state
| ALTERNATION (_, state) => state
| ZERO_OR_ONE (_, state) => state
| ZERO_OR_MORE (_, state) => state
| ONE_OR_MORE (_, state) => state
| GROUP (_, state) => state
structure NfaMatch = structure NfaMatch =
struct struct
(* test to see if NFA matches. (* test to see if NFA matches.
@@ -38,42 +48,46 @@ struct
local local
fun loop (tl, maxValid) = fun loop (tl, maxValid) =
case tl of case tl of
(_, VALID curValid) :: tl => loop (tl, Int.max (maxValid, curValid)) hd :: tl =>
| (_, UNTESTED) :: _ => UNTESTED (case getState hd of
| (_, INVALID) :: _ => VALID curValid => loop (tl, Int.max (curValid, maxValid))
raise Fail | UNTESTED => UNTESTED
"nfa.sml 24: \ | INVALID =>
\should not have INVALID state in acc" raise Fail
"nfa.sml 24: \
\should not have INVALID state in acc")
| [] => VALID maxValid | [] => VALID maxValid
in in
fun getAlternationState acc = fun getAlternationState acc =
case acc of case acc of
(_, VALID maxValid) :: tl => loop (tl, maxValid) hd :: tl =>
| (_, UNTESTED) :: _ => UNTESTED (case getState hd of
| (_, INVALID) :: _ => VALID maxValid => loop (tl, maxValid)
raise Fail | UNTESTED => UNTESTED
"nfa.sml 26: \ | INVALID =>
\should not have INVALID state in acc" raise Fail
"nfa.sml 65: \
\should not have INVALID state in acc")
| [] => UNTESTED | [] => UNTESTED
end end
fun rebuildConcat (lst, chr, idx) = fun rebuildConcat (lst, chr, idx) =
case lst of case lst of
[(hd, _)] => [hd] =>
let let
val (hd: regex, state: state) = rebuild (hd, chr, idx) val (hd: regex, state: state) = rebuild (hd, chr, idx)
val result = [(hd, state)] val result = [hd]
val concat = CONCAT (result, state) val concat = CONCAT (result, state)
in in
(concat, state) (concat, state)
end end
| (hd, _) :: tl => | hd :: tl =>
let let
val (hd, state) = rebuild (hd, chr, idx) val (hd, state) = rebuild (hd, chr, idx)
in in
case state of case state of
UNTESTED => UNTESTED =>
let val concat = CONCAT ((hd, state) :: tl, UNTESTED) let val concat = CONCAT (hd :: tl, UNTESTED)
in (concat, UNTESTED) in (concat, UNTESTED)
end end
| INVALID => | INVALID =>
@@ -93,25 +107,25 @@ struct
and rebuildAlternation (lst, chr, idx, acc) = and rebuildAlternation (lst, chr, idx, acc) =
case lst of case lst of
[(hd, _)] => [hd] =>
let let
val (hd, state) = rebuild (hd, chr, idx) val (hd, state) = rebuild (hd, chr, idx)
val acc = val acc =
case state of case state of
VALID _ => (hd, state) :: acc VALID _ => hd :: acc
| UNTESTED => (hd, state) :: acc | UNTESTED => hd :: acc
| INVALID => acc | INVALID => acc
val state = getAlternationState acc val state = getAlternationState acc
in in
(ALTERNATION (acc, state), state) (ALTERNATION (acc, state), state)
end end
| (hd, _) :: tl => | hd :: tl =>
let let
val (hd, state) = rebuild (hd, chr, idx) val (hd, state) = rebuild (hd, chr, idx)
val acc = val acc =
case state of case state of
VALID _ => (hd, state) :: acc VALID _ => hd :: acc
| UNTESTED => (hd, state) :: acc | UNTESTED => hd :: acc
| INVALID => acc | INVALID => acc
in in
rebuildAlternation (tl, chr, idx, acc) rebuildAlternation (tl, chr, idx, acc)
@@ -252,8 +266,7 @@ struct
SOME rhs => SOME rhs =>
let let
val rhs = GROUP (rhs, UNTESTED) val rhs = GROUP (rhs, UNTESTED)
val result = CONCAT val result = CONCAT ([lhs, rhs], UNTESTED)
([(lhs, UNTESTED), (rhs, UNTESTED)], UNTESTED)
in in
climb (groupEndIdx + 1, str, result, groupLevel) climb (groupEndIdx + 1, str, result, groupLevel)
end end
@@ -274,10 +287,8 @@ struct
val result = val result =
case rhs of case rhs of
ALTERNATION (lst, state) => ALTERNATION (lst, state) =>
ALTERNATION ((lhs, UNTESTED) :: lst, UNTESTED) ALTERNATION (lhs :: lst, UNTESTED)
| _ => | _ => ALTERNATION ([lhs, rhs], UNTESTED)
ALTERNATION
([(lhs, UNTESTED), (rhs, UNTESTED)], UNTESTED)
in in
SOME (pos, result) SOME (pos, result)
end end
@@ -317,10 +328,8 @@ struct
let let
val result = val result =
case rhs of case rhs of
CONCAT (lst, _) => CONCAT (lst, _) => CONCAT (lhs :: lst, UNTESTED)
CONCAT ((lhs, UNTESTED) :: lst, UNTESTED) | _ => CONCAT ([lhs, rhs], UNTESTED)
| _ =>
CONCAT ([(lhs, UNTESTED), (rhs, UNTESTED)], UNTESTED)
in in
SOME (pos, result) SOME (pos, result)
end end