fix minor bug with escape sequences: we should pattern match on an unescaped char, and we should return an escaped char. For example, it makes sense to pattern match on plain unescaped /home/humza/Downloads/sml/shf/temp.txt"n" and return /home/humza/Downloads/sml/shf/temp.txt"\n". This is because user inputs escape-chars as a two-char sequence, prepended by a backslash \ character

This commit is contained in:
2025-10-06 21:58:50 +01:00
parent cc5c0bf95c
commit 71786a494c
2 changed files with 26 additions and 25 deletions

View File

@@ -113,25 +113,25 @@ struct
fun isValidEscapeSequence chr = fun isValidEscapeSequence chr =
case chr of case chr of
(* regex metacharacters *) (* regex metacharacters *)
#"(" => true #"(" => (true, chr)
| #")" => true | #")" => (true, chr)
| #"[" => true | #"[" => (true, chr)
| #"]" => true | #"]" => (true, chr)
| #"+" => true | #"+" => (true, chr)
| #"*" => true | #"*" => (true, chr)
| #"|" => true | #"|" => (true, chr)
| #"?" => true | #"?" => (true, chr)
(* standard escape sequences *) (* standard escape sequences *)
| #"\a" => true | #"a" => (true, #"\a")
| #"\b" => true | #"b" => (true, #"\b")
| #"\t" => true | #"t" => (true, #"\t")
| #"\n" => true | #"n" => (true, #"\n")
| #"\v" => true | #"v" => (true, #"\v")
| #"\f" => true | #"f" => (true, #"\f")
| #"\r" => true | #"r" => (true, #"\r")
| #"\\" => true | #"\\" => (true, chr)
| #"\"" => true | #"\"" => (true, chr)
| _ => false | _ => (false, chr)
fun computeAtom (pos, str, stateNum) = fun computeAtom (pos, str, stateNum) =
if pos = String.size str then if pos = String.size str then
@@ -151,11 +151,6 @@ struct
| NONE => NONE | NONE => NONE
end end
| NONE => NONE) | NONE => NONE)
| #")" => NONE
| #"?" => NONE
| #"*" => NONE
| #"+" => NONE
| #"." => SOME (pos + 1, WILDCARD (stateNum + 1), stateNum + 1)
| #"\\" => | #"\\" =>
(* escape sequences *) (* escape sequences *)
if pos + 1 = String.size str then if pos + 1 = String.size str then
@@ -163,8 +158,9 @@ struct
else else
let let
val chr = String.sub (str, pos + 1) val chr = String.sub (str, pos + 1)
val (isValid, chr) = isValidEscapeSequence chr
in in
if isValidEscapeSequence chr then if isValid then
let let
val chr = CHAR_LITERAL {char = chr, position = stateNum + 1} val chr = CHAR_LITERAL {char = chr, position = stateNum + 1}
in in
@@ -173,6 +169,11 @@ struct
else else
NONE NONE
end end
| #"." => SOME (pos + 1, WILDCARD (stateNum + 1), stateNum + 1)
| #")" => NONE
| #"?" => NONE
| #"*" => NONE
| #"+" => NONE
(* todo: [character classes] *) (* todo: [character classes] *)
| chr => | chr =>
let val chr = CHAR_LITERAL {char = chr, position = stateNum + 1} let val chr = CHAR_LITERAL {char = chr, position = stateNum + 1}

View File

@@ -1,3 +1,3 @@
hello hello(
fellow fellow
yellow yellow