fix minor bug with escape sequences: we should pattern match on an unescaped char, and we should return an escaped char. For example, it makes sense to pattern match on plain unescaped /home/humza/Downloads/sml/shf/temp.txt"n" and return /home/humza/Downloads/sml/shf/temp.txt"\n". This is because user inputs escape-chars as a two-char sequence, prepended by a backslash \ character

This commit is contained in:
2025-10-06 21:58:50 +01:00
parent cc5c0bf95c
commit 71786a494c
2 changed files with 26 additions and 25 deletions

View File

@@ -113,25 +113,25 @@ struct
fun isValidEscapeSequence chr =
case chr of
(* regex metacharacters *)
#"(" => true
| #")" => true
| #"[" => true
| #"]" => true
| #"+" => true
| #"*" => true
| #"|" => true
| #"?" => true
#"(" => (true, chr)
| #")" => (true, chr)
| #"[" => (true, chr)
| #"]" => (true, chr)
| #"+" => (true, chr)
| #"*" => (true, chr)
| #"|" => (true, chr)
| #"?" => (true, chr)
(* standard escape sequences *)
| #"\a" => true
| #"\b" => true
| #"\t" => true
| #"\n" => true
| #"\v" => true
| #"\f" => true
| #"\r" => true
| #"\\" => true
| #"\"" => true
| _ => false
| #"a" => (true, #"\a")
| #"b" => (true, #"\b")
| #"t" => (true, #"\t")
| #"n" => (true, #"\n")
| #"v" => (true, #"\v")
| #"f" => (true, #"\f")
| #"r" => (true, #"\r")
| #"\\" => (true, chr)
| #"\"" => (true, chr)
| _ => (false, chr)
fun computeAtom (pos, str, stateNum) =
if pos = String.size str then
@@ -151,11 +151,6 @@ struct
| NONE => NONE
end
| NONE => NONE)
| #")" => NONE
| #"?" => NONE
| #"*" => NONE
| #"+" => NONE
| #"." => SOME (pos + 1, WILDCARD (stateNum + 1), stateNum + 1)
| #"\\" =>
(* escape sequences *)
if pos + 1 = String.size str then
@@ -163,8 +158,9 @@ struct
else
let
val chr = String.sub (str, pos + 1)
val (isValid, chr) = isValidEscapeSequence chr
in
if isValidEscapeSequence chr then
if isValid then
let
val chr = CHAR_LITERAL {char = chr, position = stateNum + 1}
in
@@ -173,6 +169,11 @@ struct
else
NONE
end
| #"." => SOME (pos + 1, WILDCARD (stateNum + 1), stateNum + 1)
| #")" => NONE
| #"?" => NONE
| #"*" => NONE
| #"+" => NONE
(* todo: [character classes] *)
| chr =>
let val chr = CHAR_LITERAL {char = chr, position = stateNum + 1}

View File

@@ -1,3 +1,3 @@
hello
hello(
fellow
yellow