From 4dfee016ebb21b3c59fe6041935a14ab0751812a Mon Sep 17 00:00:00 2001 From: Humza Shahid Date: Tue, 7 Oct 2025 12:13:41 +0100 Subject: [PATCH] handle edge case in char-range: in a range like a-z, the second character may be an escape sequence, and we need to handle that case if so --- fcore/search-list/dfa-gen.sml | 51 ++++++++++++++++++++++++++++------- temp.txt | 2 +- 2 files changed, 43 insertions(+), 10 deletions(-) diff --git a/fcore/search-list/dfa-gen.sml b/fcore/search-list/dfa-gen.sml index 483ef54..80dce99 100644 --- a/fcore/search-list/dfa-gen.sml +++ b/fcore/search-list/dfa-gen.sml @@ -177,15 +177,48 @@ struct pos + 1 < String.size str andalso String.sub (str, pos + 1) = #"-" andalso pos + 2 < String.size str then - (* handle character ranges like a-z *) - let - val chr2 = String.sub (str, pos + 2) - val lowChr = if chr < chr2 then chr else chr2 - val highChr = if chr > chr2 then chr else chr2 - val acc = getCharsBetween (lowChr, highChr, acc) - in - getCharsInBrackets (pos + 3, str, acc) - end + (* handle character ranges like a-z. + * There are edge cases regarding + * the second character in the range. + * We have to check that any unescaped metacharacters + * return an invalid parse state. + * We also have to unescape any escape sequences. + * *) + case String.sub (str, pos + 2) of + #"\\" => + (* second char contains an escape sequence *) + if pos + 3 < String.size str then + let + val chr2 = String.sub (str, pos + 3) + val (isValid, chr2) = isValidEscapeSequence chr2 + val acc = + if chr < chr2 then getCharsBetween (chr, chr2, acc) + else getCharsBetween (chr2, chr, acc) + in + if isValid then getCharsInBrackets (pos + 4, str, acc) + else NONE + end + else + NONE + | #"(" => NONE + | #")" => NONE + | #"[" => NONE + | #"]" => NONE + | #"+" => NONE + | #"*" => NONE + | #"|" => NONE + | #"?" => NONE + | #"." => NONE + | #"-" => NONE + | chr2 => + (* valid char range *) + let + val acc = + if chr < chr2 then getCharsBetween (chr, chr2, acc) + else getCharsBetween (chr2, chr, acc) + in + getCharsInBrackets (pos + 3, str, acc) + end else getCharsInBrackets (pos + 1, str, chr :: acc) diff --git a/temp.txt b/temp.txt index ad50577..65624dc 100644 --- a/temp.txt +++ b/temp.txt @@ -1 +1 @@ -abcdefghijklmnopqrstuvwxyz + - * / +Hello, World! - + * / ?