when parsing a string into an NFA, return an option type if the syntax is invalid

This commit is contained in:
2025-09-29 13:34:55 +01:00
parent 7dc94632d6
commit 6d2b43606f
2 changed files with 79 additions and 60 deletions

View File

@@ -240,12 +240,10 @@ struct
fun buildRange (buffer, searchString, finishIdx) = fun buildRange (buffer, searchString, finishIdx) =
if String.size searchString > 0 then if String.size searchString > 0 then
let case Nfa.parse searchString of
val nfa = Nfa.parse searchString SOME nfa =>
val startIdx = #idx buffer Nfa.getMatchesInRange (#idx buffer, finishIdx, buffer : LineGap.t, nfa)
in | NONE => empty
Nfa.getMatchesInRange (startIdx, finishIdx, buffer : LineGap.t, nfa)
end
else else
empty empty

View File

@@ -357,9 +357,7 @@ struct
structure ParseNfa = structure ParseNfa =
struct struct
(* parsing through precedence climbing algorithm. (* parsing through precedence climbing algorithm. *)
* Todo: return a a `regex option`, and use bounds-checking
* to ensure we don't raise an exception. *)
val groupLevel = 1 val groupLevel = 1
val postfixLevel = 2 val postfixLevel = 2
@@ -369,107 +367,130 @@ struct
local local
fun loop (pos, str, openParens, closeParens) = fun loop (pos, str, openParens, closeParens) =
if pos = String.size str then if pos = String.size str then
pos NONE
else else
case String.sub (str, pos) of case String.sub (str, pos) of
#"(" => loop (pos + 1, str, openParens + 1, closeParens) #"(" => loop (pos + 1, str, openParens + 1, closeParens)
| #")" => | #")" =>
if closeParens + 1 = openParens then pos if closeParens + 1 = openParens then SOME pos
else loop (pos + 1, str, openParens, closeParens + 1) else loop (pos + 1, str, openParens, closeParens + 1)
| _ => loop (pos + 1, str, openParens, closeParens) | _ => loop (pos + 1, str, openParens, closeParens)
in in
fun getRightParenIdx (pos, str) = loop (pos, str, 1, 0) fun getRightParenIdx (pos, str) = loop (pos, str, 1, 0)
end end
fun climb (pos, str, lhs, level) = fun climb (pos, str, lhs, level) : (int * regex) option =
if pos = String.size str then if pos = String.size str then
(pos, lhs) SOME (pos, lhs)
else else
case String.sub (str, pos) of case String.sub (str, pos) of
#")" => (pos + 1, lhs) #")" => SOME (pos + 1, lhs)
| #"(" => | #"(" =>
if level < groupLevel then if level < groupLevel then
(pos, lhs) SOME (pos, lhs)
else else
let (case getRightParenIdx (pos + 1, str) of
val groupEndIdx = getRightParenIdx (pos + 1, str) SOME groupEndIdx =>
val substr = String.substring let
(str, pos + 1, groupEndIdx - pos - 1) val substr = String.substring
val rhs = parse substr (str, pos + 1, groupEndIdx - pos - 1)
val rhs = GROUP (rhs, UNTESTED) in
val result = CONCAT (case parse substr of
([(lhs, UNTESTED), (rhs, UNTESTED)], UNTESTED) SOME rhs =>
in let
climb (groupEndIdx + 1, str, result, groupLevel) val rhs = GROUP (rhs, UNTESTED)
end val result = CONCAT
([(lhs, UNTESTED), (rhs, UNTESTED)], UNTESTED)
in
climb (groupEndIdx + 1, str, result, groupLevel)
end
| NONE => NONE)
end
| NONE => NONE)
| #"|" => | #"|" =>
if level < altLevel then if level < altLevel then
(pos, lhs) SOME (pos, lhs)
else else if pos + 1 < String.size str then
let let
val chr = String.sub (str, pos + 1) val chr = String.sub (str, pos + 1)
val chr = CHAR_LITERAL (chr, UNTESTED) val chr = CHAR_LITERAL (chr, UNTESTED)
val (pos, rhs) = climb (pos + 2, str, chr, altLevel)
val result =
case rhs of
ALTERNATION (lst, state) =>
ALTERNATION ((lhs, UNTESTED) :: lst, UNTESTED)
| _ =>
ALTERNATION ([(lhs, UNTESTED), (rhs, UNTESTED)], UNTESTED)
in in
(pos, result) case climb (pos + 2, str, chr, altLevel) of
SOME (pos, rhs) =>
let
val result =
case rhs of
ALTERNATION (lst, state) =>
ALTERNATION ((lhs, UNTESTED) :: lst, UNTESTED)
| _ =>
ALTERNATION
([(lhs, UNTESTED), (rhs, UNTESTED)], UNTESTED)
in
SOME (pos, result)
end
| NONE => NONE
end end
else
NONE
| #"?" => | #"?" =>
if level < postfixLevel then if level < postfixLevel then
(pos, lhs) SOME (pos, lhs)
else else
let val lhs = ZERO_OR_ONE (lhs, UNTESTED) let val lhs = ZERO_OR_ONE (lhs, UNTESTED)
in climb (pos + 1, str, lhs, postfixLevel) in climb (pos + 1, str, lhs, postfixLevel)
end end
| #"*" => | #"*" =>
if level < postfixLevel then if level < postfixLevel then
(pos, lhs) SOME (pos, lhs)
else else
let val lhs = ZERO_OR_MORE (lhs, UNTESTED) let val lhs = ZERO_OR_MORE (lhs, UNTESTED)
in climb (pos + 1, str, lhs, postfixLevel) in climb (pos + 1, str, lhs, postfixLevel)
end end
| #"+" => | #"+" =>
if level < postfixLevel then if level < postfixLevel then
(pos, lhs) SOME (pos, lhs)
else else
let val lhs = ONE_OR_MORE (lhs, UNTESTED) let val lhs = ONE_OR_MORE (lhs, UNTESTED)
in climb (pos + 1, str, lhs, postfixLevel) in climb (pos + 1, str, lhs, postfixLevel)
end end
| chr => | chr =>
if level < concatLevel then if level < concatLevel then
(pos, lhs) SOME (pos, lhs)
else else
let case
val chr = CHAR_LITERAL (chr, UNTESTED) climb (pos + 1, str, CHAR_LITERAL (chr, UNTESTED), concatLevel)
val (pos, rhs) = climb (pos + 1, str, chr, concatLevel) of
val result = SOME (pos, rhs) =>
case rhs of let
CONCAT (lst, _) => CONCAT ((lhs, UNTESTED) :: lst, UNTESTED) val result =
| _ => CONCAT ([(lhs, UNTESTED), (rhs, UNTESTED)], UNTESTED) case rhs of
in CONCAT (lst, _) =>
(pos, result) CONCAT ((lhs, UNTESTED) :: lst, UNTESTED)
end | _ =>
CONCAT ([(lhs, UNTESTED), (rhs, UNTESTED)], UNTESTED)
in
SOME (pos, result)
end
| NONE => NONE
and loop (pos, str, ast) = and loop (pos, str, ast) =
if pos = String.size str then if pos = String.size str then
ast SOME ast
else else
let val (pos, ast) = climb (pos, str, ast, altLevel) case climb (pos, str, ast, altLevel) of
in loop (pos, str, ast) SOME (pos, ast) => loop (pos, str, ast)
end | NONE => NONE
and parse str = and parse str =
let if String.size str > 0 then
val chr = String.sub (str, 0) let
val chr = CHAR_LITERAL (chr, UNTESTED) val chr = String.sub (str, 0)
in val chr = CHAR_LITERAL (chr, UNTESTED)
loop (1, str, chr) in
end loop (1, str, chr)
end
else
NONE
end end
val parse = ParseNfa.parse val parse = ParseNfa.parse