From 71786a494c470b855c74a281ac445e135f41b151 Mon Sep 17 00:00:00 2001 From: Humza Shahid Date: Mon, 6 Oct 2025 21:58:50 +0100 Subject: [PATCH] fix minor bug with escape sequences: we should pattern match on an unescaped char, and we should return an escaped char. For example, it makes sense to pattern match on plain unescaped /home/humza/Downloads/sml/shf/temp.txt"n" and return /home/humza/Downloads/sml/shf/temp.txt"\n". This is because user inputs escape-chars as a two-char sequence, prepended by a backslash \ character --- fcore/search-list/dfa-gen.sml | 49 ++++++++++++++++++----------------- temp.txt | 2 +- 2 files changed, 26 insertions(+), 25 deletions(-) diff --git a/fcore/search-list/dfa-gen.sml b/fcore/search-list/dfa-gen.sml index 1ba181c..29e5d7b 100644 --- a/fcore/search-list/dfa-gen.sml +++ b/fcore/search-list/dfa-gen.sml @@ -113,25 +113,25 @@ struct fun isValidEscapeSequence chr = case chr of (* regex metacharacters *) - #"(" => true - | #")" => true - | #"[" => true - | #"]" => true - | #"+" => true - | #"*" => true - | #"|" => true - | #"?" => true + #"(" => (true, chr) + | #")" => (true, chr) + | #"[" => (true, chr) + | #"]" => (true, chr) + | #"+" => (true, chr) + | #"*" => (true, chr) + | #"|" => (true, chr) + | #"?" => (true, chr) (* standard escape sequences *) - | #"\a" => true - | #"\b" => true - | #"\t" => true - | #"\n" => true - | #"\v" => true - | #"\f" => true - | #"\r" => true - | #"\\" => true - | #"\"" => true - | _ => false + | #"a" => (true, #"\a") + | #"b" => (true, #"\b") + | #"t" => (true, #"\t") + | #"n" => (true, #"\n") + | #"v" => (true, #"\v") + | #"f" => (true, #"\f") + | #"r" => (true, #"\r") + | #"\\" => (true, chr) + | #"\"" => (true, chr) + | _ => (false, chr) fun computeAtom (pos, str, stateNum) = if pos = String.size str then @@ -151,11 +151,6 @@ struct | NONE => NONE end | NONE => NONE) - | #")" => NONE - | #"?" => NONE - | #"*" => NONE - | #"+" => NONE - | #"." => SOME (pos + 1, WILDCARD (stateNum + 1), stateNum + 1) | #"\\" => (* escape sequences *) if pos + 1 = String.size str then @@ -163,8 +158,9 @@ struct else let val chr = String.sub (str, pos + 1) + val (isValid, chr) = isValidEscapeSequence chr in - if isValidEscapeSequence chr then + if isValid then let val chr = CHAR_LITERAL {char = chr, position = stateNum + 1} in @@ -173,6 +169,11 @@ struct else NONE end + | #"." => SOME (pos + 1, WILDCARD (stateNum + 1), stateNum + 1) + | #")" => NONE + | #"?" => NONE + | #"*" => NONE + | #"+" => NONE (* todo: [character classes] *) | chr => let val chr = CHAR_LITERAL {char = chr, position = stateNum + 1} diff --git a/temp.txt b/temp.txt index cba94ed..c550179 100644 --- a/temp.txt +++ b/temp.txt @@ -1,3 +1,3 @@ -hello +hello( fellow yellow