flatten repeated concatenations and alternations into a single list when possible

This commit is contained in:
2025-09-28 22:23:48 +01:00
parent 032ca56bbf
commit d75b1a18ff

View File

@@ -1,9 +1,9 @@
structure Nfa = structure Nfa =
struct struct
datatype regex = datatype regex =
CONCAT of regex * regex CHAR_LITERAL of char
| CHAR_LITERAL of char | CONCAT of regex list
| ALTERNATION of regex * regex | ALTERNATION of regex list
| ZERO_OR_ONE of regex | ZERO_OR_ONE of regex
| ZERO_OR_MORE of regex | ZERO_OR_MORE of regex
| ONE_OR_MORE of regex | ONE_OR_MORE of regex
@@ -45,7 +45,7 @@ struct
(str, pos + 1, groupEndIdx - pos - 1) (str, pos + 1, groupEndIdx - pos - 1)
val rhs = climb substr val rhs = climb substr
val rhs = GROUP rhs val rhs = GROUP rhs
val result = CONCAT (lhs, rhs) val result = CONCAT [lhs, rhs]
in in
helpClimb (groupEndIdx + 1, str, result, groupLevel) helpClimb (groupEndIdx + 1, str, result, groupLevel)
end end
@@ -57,7 +57,10 @@ struct
val chr = String.sub (str, pos + 1) val chr = String.sub (str, pos + 1)
val chr = CHAR_LITERAL chr val chr = CHAR_LITERAL chr
val (pos, rhs) = helpClimb (pos + 2, str, chr, altLevel) val (pos, rhs) = helpClimb (pos + 2, str, chr, altLevel)
val result = ALTERNATION (lhs, rhs) val result =
case rhs of
ALTERNATION lst => ALTERNATION (lhs :: lst)
| _ => ALTERNATION [lhs, rhs]
in in
(pos, result) (pos, result)
end end
@@ -89,7 +92,10 @@ struct
let let
val chr = CHAR_LITERAL chr val chr = CHAR_LITERAL chr
val (pos, rhs) = helpClimb (pos + 1, str, chr, concatLevel) val (pos, rhs) = helpClimb (pos + 1, str, chr, concatLevel)
val result = CONCAT (lhs, rhs) val result =
case rhs of
CONCAT lst => CONCAT (lhs :: lst)
| _ => CONCAT [lhs, rhs]
in in
(pos, result) (pos, result)
end end