From 6d2b43606f269301498ba85a1765c1282d77b7c3 Mon Sep 17 00:00:00 2001 From: Humza Shahid Date: Mon, 29 Sep 2025 13:34:55 +0100 Subject: [PATCH] when parsing a string into an NFA, return an option type if the syntax is invalid --- fcore/search-list.sml | 10 ++- fcore/search-list/nfa.sml | 129 ++++++++++++++++++++++---------------- 2 files changed, 79 insertions(+), 60 deletions(-) diff --git a/fcore/search-list.sml b/fcore/search-list.sml index 2a0f752..401abbd 100644 --- a/fcore/search-list.sml +++ b/fcore/search-list.sml @@ -240,12 +240,10 @@ struct fun buildRange (buffer, searchString, finishIdx) = if String.size searchString > 0 then - let - val nfa = Nfa.parse searchString - val startIdx = #idx buffer - in - Nfa.getMatchesInRange (startIdx, finishIdx, buffer : LineGap.t, nfa) - end + case Nfa.parse searchString of + SOME nfa => + Nfa.getMatchesInRange (#idx buffer, finishIdx, buffer : LineGap.t, nfa) + | NONE => empty else empty diff --git a/fcore/search-list/nfa.sml b/fcore/search-list/nfa.sml index d054df3..a6ba84e 100644 --- a/fcore/search-list/nfa.sml +++ b/fcore/search-list/nfa.sml @@ -357,9 +357,7 @@ struct structure ParseNfa = struct - (* parsing through precedence climbing algorithm. - * Todo: return a a `regex option`, and use bounds-checking - * to ensure we don't raise an exception. *) + (* parsing through precedence climbing algorithm. *) val groupLevel = 1 val postfixLevel = 2 @@ -369,107 +367,130 @@ struct local fun loop (pos, str, openParens, closeParens) = if pos = String.size str then - pos + NONE else case String.sub (str, pos) of #"(" => loop (pos + 1, str, openParens + 1, closeParens) | #")" => - if closeParens + 1 = openParens then pos + if closeParens + 1 = openParens then SOME pos else loop (pos + 1, str, openParens, closeParens + 1) | _ => loop (pos + 1, str, openParens, closeParens) in fun getRightParenIdx (pos, str) = loop (pos, str, 1, 0) end - fun climb (pos, str, lhs, level) = + fun climb (pos, str, lhs, level) : (int * regex) option = if pos = String.size str then - (pos, lhs) + SOME (pos, lhs) else case String.sub (str, pos) of - #")" => (pos + 1, lhs) + #")" => SOME (pos + 1, lhs) | #"(" => if level < groupLevel then - (pos, lhs) + SOME (pos, lhs) else - let - val groupEndIdx = getRightParenIdx (pos + 1, str) - val substr = String.substring - (str, pos + 1, groupEndIdx - pos - 1) - val rhs = parse substr - val rhs = GROUP (rhs, UNTESTED) - val result = CONCAT - ([(lhs, UNTESTED), (rhs, UNTESTED)], UNTESTED) - in - climb (groupEndIdx + 1, str, result, groupLevel) - end + (case getRightParenIdx (pos + 1, str) of + SOME groupEndIdx => + let + val substr = String.substring + (str, pos + 1, groupEndIdx - pos - 1) + in + (case parse substr of + SOME rhs => + let + val rhs = GROUP (rhs, UNTESTED) + val result = CONCAT + ([(lhs, UNTESTED), (rhs, UNTESTED)], UNTESTED) + in + climb (groupEndIdx + 1, str, result, groupLevel) + end + | NONE => NONE) + end + | NONE => NONE) | #"|" => if level < altLevel then - (pos, lhs) - else + SOME (pos, lhs) + else if pos + 1 < String.size str then let val chr = String.sub (str, pos + 1) val chr = CHAR_LITERAL (chr, UNTESTED) - val (pos, rhs) = climb (pos + 2, str, chr, altLevel) - val result = - case rhs of - ALTERNATION (lst, state) => - ALTERNATION ((lhs, UNTESTED) :: lst, UNTESTED) - | _ => - ALTERNATION ([(lhs, UNTESTED), (rhs, UNTESTED)], UNTESTED) in - (pos, result) + case climb (pos + 2, str, chr, altLevel) of + SOME (pos, rhs) => + let + val result = + case rhs of + ALTERNATION (lst, state) => + ALTERNATION ((lhs, UNTESTED) :: lst, UNTESTED) + | _ => + ALTERNATION + ([(lhs, UNTESTED), (rhs, UNTESTED)], UNTESTED) + in + SOME (pos, result) + end + | NONE => NONE end + else + NONE | #"?" => if level < postfixLevel then - (pos, lhs) + SOME (pos, lhs) else let val lhs = ZERO_OR_ONE (lhs, UNTESTED) in climb (pos + 1, str, lhs, postfixLevel) end | #"*" => if level < postfixLevel then - (pos, lhs) + SOME (pos, lhs) else let val lhs = ZERO_OR_MORE (lhs, UNTESTED) in climb (pos + 1, str, lhs, postfixLevel) end | #"+" => if level < postfixLevel then - (pos, lhs) + SOME (pos, lhs) else let val lhs = ONE_OR_MORE (lhs, UNTESTED) in climb (pos + 1, str, lhs, postfixLevel) end | chr => if level < concatLevel then - (pos, lhs) + SOME (pos, lhs) else - let - val chr = CHAR_LITERAL (chr, UNTESTED) - val (pos, rhs) = climb (pos + 1, str, chr, concatLevel) - val result = - case rhs of - CONCAT (lst, _) => CONCAT ((lhs, UNTESTED) :: lst, UNTESTED) - | _ => CONCAT ([(lhs, UNTESTED), (rhs, UNTESTED)], UNTESTED) - in - (pos, result) - end + case + climb (pos + 1, str, CHAR_LITERAL (chr, UNTESTED), concatLevel) + of + SOME (pos, rhs) => + let + val result = + case rhs of + CONCAT (lst, _) => + CONCAT ((lhs, UNTESTED) :: lst, UNTESTED) + | _ => + CONCAT ([(lhs, UNTESTED), (rhs, UNTESTED)], UNTESTED) + in + SOME (pos, result) + end + | NONE => NONE and loop (pos, str, ast) = if pos = String.size str then - ast + SOME ast else - let val (pos, ast) = climb (pos, str, ast, altLevel) - in loop (pos, str, ast) - end + case climb (pos, str, ast, altLevel) of + SOME (pos, ast) => loop (pos, str, ast) + | NONE => NONE and parse str = - let - val chr = String.sub (str, 0) - val chr = CHAR_LITERAL (chr, UNTESTED) - in - loop (1, str, chr) - end + if String.size str > 0 then + let + val chr = String.sub (str, 0) + val chr = CHAR_LITERAL (chr, UNTESTED) + in + loop (1, str, chr) + end + else + NONE end val parse = ParseNfa.parse