From 44c2fbb3c73a295699e185e22871d2e4dcaad58a Mon Sep 17 00:00:00 2001 From: Humza Shahid Date: Tue, 7 Oct 2025 09:48:10 +0100 Subject: [PATCH] handle character ranges like a-z in character class and negated character class --- fcore/search-list/dfa-gen.sml | 30 +++++++++++++++++++++++++++++- temp.txt | 4 +--- 2 files changed, 30 insertions(+), 4 deletions(-) diff --git a/fcore/search-list/dfa-gen.sml b/fcore/search-list/dfa-gen.sml index c5f2c58..483ef54 100644 --- a/fcore/search-list/dfa-gen.sml +++ b/fcore/search-list/dfa-gen.sml @@ -126,6 +126,7 @@ struct | #"|" => (true, chr) | #"?" => (true, chr) | #"." => (true, chr) + | #"-" => (true, chr) (* standard escape sequences *) | #"a" => (true, #"\a") | #"b" => (true, #"\b") @@ -138,6 +139,17 @@ struct | #"\"" => (true, chr) | _ => (false, chr) + fun getCharsBetween (lowChr, highChr, acc) = + if lowChr = highChr then + highChr :: acc + else + let + val acc = lowChr :: acc + val lowChr = Char.succ lowChr + in + getCharsBetween (lowChr, highChr, acc) + end + fun getCharsInBrackets (pos, str, acc) = if pos = String.size str then NONE @@ -159,7 +171,23 @@ struct let val chars = Vector.fromList acc in SOME (pos + 1, chars) end - | chr => getCharsInBrackets (pos + 1, str, chr :: acc) + | #"-" => NONE + | chr => + if + pos + 1 < String.size str andalso String.sub (str, pos + 1) = #"-" + andalso pos + 2 < String.size str + then + (* handle character ranges like a-z *) + let + val chr2 = String.sub (str, pos + 2) + val lowChr = if chr < chr2 then chr else chr2 + val highChr = if chr > chr2 then chr else chr2 + val acc = getCharsBetween (lowChr, highChr, acc) + in + getCharsInBrackets (pos + 3, str, acc) + end + else + getCharsInBrackets (pos + 1, str, chr :: acc) fun parseCharacterClass (pos, str, stateNum) = case getCharsInBrackets (pos, str, []) of diff --git a/temp.txt b/temp.txt index c550179..ad50577 100644 --- a/temp.txt +++ b/temp.txt @@ -1,3 +1 @@ -hello( -fellow -yellow +abcdefghijklmnopqrstuvwxyz + - * /