fix minor bug with escape sequences: we should pattern match on an unescaped char, and we should return an escaped char. For example, it makes sense to pattern match on plain unescaped /home/humza/Downloads/sml/shf/temp.txt"n" and return /home/humza/Downloads/sml/shf/temp.txt"\n". This is because user inputs escape-chars as a two-char sequence, prepended by a backslash \ character
This commit is contained in:
@@ -113,25 +113,25 @@ struct
|
|||||||
fun isValidEscapeSequence chr =
|
fun isValidEscapeSequence chr =
|
||||||
case chr of
|
case chr of
|
||||||
(* regex metacharacters *)
|
(* regex metacharacters *)
|
||||||
#"(" => true
|
#"(" => (true, chr)
|
||||||
| #")" => true
|
| #")" => (true, chr)
|
||||||
| #"[" => true
|
| #"[" => (true, chr)
|
||||||
| #"]" => true
|
| #"]" => (true, chr)
|
||||||
| #"+" => true
|
| #"+" => (true, chr)
|
||||||
| #"*" => true
|
| #"*" => (true, chr)
|
||||||
| #"|" => true
|
| #"|" => (true, chr)
|
||||||
| #"?" => true
|
| #"?" => (true, chr)
|
||||||
(* standard escape sequences *)
|
(* standard escape sequences *)
|
||||||
| #"\a" => true
|
| #"a" => (true, #"\a")
|
||||||
| #"\b" => true
|
| #"b" => (true, #"\b")
|
||||||
| #"\t" => true
|
| #"t" => (true, #"\t")
|
||||||
| #"\n" => true
|
| #"n" => (true, #"\n")
|
||||||
| #"\v" => true
|
| #"v" => (true, #"\v")
|
||||||
| #"\f" => true
|
| #"f" => (true, #"\f")
|
||||||
| #"\r" => true
|
| #"r" => (true, #"\r")
|
||||||
| #"\\" => true
|
| #"\\" => (true, chr)
|
||||||
| #"\"" => true
|
| #"\"" => (true, chr)
|
||||||
| _ => false
|
| _ => (false, chr)
|
||||||
|
|
||||||
fun computeAtom (pos, str, stateNum) =
|
fun computeAtom (pos, str, stateNum) =
|
||||||
if pos = String.size str then
|
if pos = String.size str then
|
||||||
@@ -151,11 +151,6 @@ struct
|
|||||||
| NONE => NONE
|
| NONE => NONE
|
||||||
end
|
end
|
||||||
| NONE => NONE)
|
| NONE => NONE)
|
||||||
| #")" => NONE
|
|
||||||
| #"?" => NONE
|
|
||||||
| #"*" => NONE
|
|
||||||
| #"+" => NONE
|
|
||||||
| #"." => SOME (pos + 1, WILDCARD (stateNum + 1), stateNum + 1)
|
|
||||||
| #"\\" =>
|
| #"\\" =>
|
||||||
(* escape sequences *)
|
(* escape sequences *)
|
||||||
if pos + 1 = String.size str then
|
if pos + 1 = String.size str then
|
||||||
@@ -163,8 +158,9 @@ struct
|
|||||||
else
|
else
|
||||||
let
|
let
|
||||||
val chr = String.sub (str, pos + 1)
|
val chr = String.sub (str, pos + 1)
|
||||||
|
val (isValid, chr) = isValidEscapeSequence chr
|
||||||
in
|
in
|
||||||
if isValidEscapeSequence chr then
|
if isValid then
|
||||||
let
|
let
|
||||||
val chr = CHAR_LITERAL {char = chr, position = stateNum + 1}
|
val chr = CHAR_LITERAL {char = chr, position = stateNum + 1}
|
||||||
in
|
in
|
||||||
@@ -173,6 +169,11 @@ struct
|
|||||||
else
|
else
|
||||||
NONE
|
NONE
|
||||||
end
|
end
|
||||||
|
| #"." => SOME (pos + 1, WILDCARD (stateNum + 1), stateNum + 1)
|
||||||
|
| #")" => NONE
|
||||||
|
| #"?" => NONE
|
||||||
|
| #"*" => NONE
|
||||||
|
| #"+" => NONE
|
||||||
(* todo: [character classes] *)
|
(* todo: [character classes] *)
|
||||||
| chr =>
|
| chr =>
|
||||||
let val chr = CHAR_LITERAL {char = chr, position = stateNum + 1}
|
let val chr = CHAR_LITERAL {char = chr, position = stateNum + 1}
|
||||||
|
|||||||
Reference in New Issue
Block a user