refactor nfa.sml so that lists in CONCAT and ALTERNATION cases don't need the state to be tupled with the regex
This commit is contained in:
@@ -4,13 +4,23 @@ struct
|
|||||||
|
|
||||||
datatype regex =
|
datatype regex =
|
||||||
CHAR_LITERAL of char * state
|
CHAR_LITERAL of char * state
|
||||||
| CONCAT of (regex * state) list * state
|
| CONCAT of regex list * state
|
||||||
| ALTERNATION of (regex * state) list * state
|
| ALTERNATION of regex list * state
|
||||||
| ZERO_OR_ONE of regex * state
|
| ZERO_OR_ONE of regex * state
|
||||||
| ZERO_OR_MORE of regex * state
|
| ZERO_OR_MORE of regex * state
|
||||||
| ONE_OR_MORE of regex * state
|
| ONE_OR_MORE of regex * state
|
||||||
| GROUP of regex * state
|
| GROUP of regex * state
|
||||||
|
|
||||||
|
fun getState regex =
|
||||||
|
case regex of
|
||||||
|
CHAR_LITERAL (_, state) => state
|
||||||
|
| CONCAT (_, state) => state
|
||||||
|
| ALTERNATION (_, state) => state
|
||||||
|
| ZERO_OR_ONE (_, state) => state
|
||||||
|
| ZERO_OR_MORE (_, state) => state
|
||||||
|
| ONE_OR_MORE (_, state) => state
|
||||||
|
| GROUP (_, state) => state
|
||||||
|
|
||||||
structure NfaMatch =
|
structure NfaMatch =
|
||||||
struct
|
struct
|
||||||
(* test to see if NFA matches.
|
(* test to see if NFA matches.
|
||||||
@@ -38,42 +48,46 @@ struct
|
|||||||
local
|
local
|
||||||
fun loop (tl, maxValid) =
|
fun loop (tl, maxValid) =
|
||||||
case tl of
|
case tl of
|
||||||
(_, VALID curValid) :: tl => loop (tl, Int.max (maxValid, curValid))
|
hd :: tl =>
|
||||||
| (_, UNTESTED) :: _ => UNTESTED
|
(case getState hd of
|
||||||
| (_, INVALID) :: _ =>
|
VALID curValid => loop (tl, Int.max (curValid, maxValid))
|
||||||
|
| UNTESTED => UNTESTED
|
||||||
|
| INVALID =>
|
||||||
raise Fail
|
raise Fail
|
||||||
"nfa.sml 24: \
|
"nfa.sml 24: \
|
||||||
\should not have INVALID state in acc"
|
\should not have INVALID state in acc")
|
||||||
| [] => VALID maxValid
|
| [] => VALID maxValid
|
||||||
in
|
in
|
||||||
fun getAlternationState acc =
|
fun getAlternationState acc =
|
||||||
case acc of
|
case acc of
|
||||||
(_, VALID maxValid) :: tl => loop (tl, maxValid)
|
hd :: tl =>
|
||||||
| (_, UNTESTED) :: _ => UNTESTED
|
(case getState hd of
|
||||||
| (_, INVALID) :: _ =>
|
VALID maxValid => loop (tl, maxValid)
|
||||||
|
| UNTESTED => UNTESTED
|
||||||
|
| INVALID =>
|
||||||
raise Fail
|
raise Fail
|
||||||
"nfa.sml 26: \
|
"nfa.sml 65: \
|
||||||
\should not have INVALID state in acc"
|
\should not have INVALID state in acc")
|
||||||
| [] => UNTESTED
|
| [] => UNTESTED
|
||||||
end
|
end
|
||||||
|
|
||||||
fun rebuildConcat (lst, chr, idx) =
|
fun rebuildConcat (lst, chr, idx) =
|
||||||
case lst of
|
case lst of
|
||||||
[(hd, _)] =>
|
[hd] =>
|
||||||
let
|
let
|
||||||
val (hd: regex, state: state) = rebuild (hd, chr, idx)
|
val (hd: regex, state: state) = rebuild (hd, chr, idx)
|
||||||
val result = [(hd, state)]
|
val result = [hd]
|
||||||
val concat = CONCAT (result, state)
|
val concat = CONCAT (result, state)
|
||||||
in
|
in
|
||||||
(concat, state)
|
(concat, state)
|
||||||
end
|
end
|
||||||
| (hd, _) :: tl =>
|
| hd :: tl =>
|
||||||
let
|
let
|
||||||
val (hd, state) = rebuild (hd, chr, idx)
|
val (hd, state) = rebuild (hd, chr, idx)
|
||||||
in
|
in
|
||||||
case state of
|
case state of
|
||||||
UNTESTED =>
|
UNTESTED =>
|
||||||
let val concat = CONCAT ((hd, state) :: tl, UNTESTED)
|
let val concat = CONCAT (hd :: tl, UNTESTED)
|
||||||
in (concat, UNTESTED)
|
in (concat, UNTESTED)
|
||||||
end
|
end
|
||||||
| INVALID =>
|
| INVALID =>
|
||||||
@@ -93,25 +107,25 @@ struct
|
|||||||
|
|
||||||
and rebuildAlternation (lst, chr, idx, acc) =
|
and rebuildAlternation (lst, chr, idx, acc) =
|
||||||
case lst of
|
case lst of
|
||||||
[(hd, _)] =>
|
[hd] =>
|
||||||
let
|
let
|
||||||
val (hd, state) = rebuild (hd, chr, idx)
|
val (hd, state) = rebuild (hd, chr, idx)
|
||||||
val acc =
|
val acc =
|
||||||
case state of
|
case state of
|
||||||
VALID _ => (hd, state) :: acc
|
VALID _ => hd :: acc
|
||||||
| UNTESTED => (hd, state) :: acc
|
| UNTESTED => hd :: acc
|
||||||
| INVALID => acc
|
| INVALID => acc
|
||||||
val state = getAlternationState acc
|
val state = getAlternationState acc
|
||||||
in
|
in
|
||||||
(ALTERNATION (acc, state), state)
|
(ALTERNATION (acc, state), state)
|
||||||
end
|
end
|
||||||
| (hd, _) :: tl =>
|
| hd :: tl =>
|
||||||
let
|
let
|
||||||
val (hd, state) = rebuild (hd, chr, idx)
|
val (hd, state) = rebuild (hd, chr, idx)
|
||||||
val acc =
|
val acc =
|
||||||
case state of
|
case state of
|
||||||
VALID _ => (hd, state) :: acc
|
VALID _ => hd :: acc
|
||||||
| UNTESTED => (hd, state) :: acc
|
| UNTESTED => hd :: acc
|
||||||
| INVALID => acc
|
| INVALID => acc
|
||||||
in
|
in
|
||||||
rebuildAlternation (tl, chr, idx, acc)
|
rebuildAlternation (tl, chr, idx, acc)
|
||||||
@@ -252,8 +266,7 @@ struct
|
|||||||
SOME rhs =>
|
SOME rhs =>
|
||||||
let
|
let
|
||||||
val rhs = GROUP (rhs, UNTESTED)
|
val rhs = GROUP (rhs, UNTESTED)
|
||||||
val result = CONCAT
|
val result = CONCAT ([lhs, rhs], UNTESTED)
|
||||||
([(lhs, UNTESTED), (rhs, UNTESTED)], UNTESTED)
|
|
||||||
in
|
in
|
||||||
climb (groupEndIdx + 1, str, result, groupLevel)
|
climb (groupEndIdx + 1, str, result, groupLevel)
|
||||||
end
|
end
|
||||||
@@ -274,10 +287,8 @@ struct
|
|||||||
val result =
|
val result =
|
||||||
case rhs of
|
case rhs of
|
||||||
ALTERNATION (lst, state) =>
|
ALTERNATION (lst, state) =>
|
||||||
ALTERNATION ((lhs, UNTESTED) :: lst, UNTESTED)
|
ALTERNATION (lhs :: lst, UNTESTED)
|
||||||
| _ =>
|
| _ => ALTERNATION ([lhs, rhs], UNTESTED)
|
||||||
ALTERNATION
|
|
||||||
([(lhs, UNTESTED), (rhs, UNTESTED)], UNTESTED)
|
|
||||||
in
|
in
|
||||||
SOME (pos, result)
|
SOME (pos, result)
|
||||||
end
|
end
|
||||||
@@ -317,10 +328,8 @@ struct
|
|||||||
let
|
let
|
||||||
val result =
|
val result =
|
||||||
case rhs of
|
case rhs of
|
||||||
CONCAT (lst, _) =>
|
CONCAT (lst, _) => CONCAT (lhs :: lst, UNTESTED)
|
||||||
CONCAT ((lhs, UNTESTED) :: lst, UNTESTED)
|
| _ => CONCAT ([lhs, rhs], UNTESTED)
|
||||||
| _ =>
|
|
||||||
CONCAT ([(lhs, UNTESTED), (rhs, UNTESTED)], UNTESTED)
|
|
||||||
in
|
in
|
||||||
SOME (pos, result)
|
SOME (pos, result)
|
||||||
end
|
end
|
||||||
|
|||||||
Reference in New Issue
Block a user