implement escape sequences for regex
This commit is contained in:
@@ -1,63 +0,0 @@
|
|||||||
structure EscapeString =
|
|
||||||
struct
|
|
||||||
(* The specific escape sequences handled are in the
|
|
||||||
* `rewriteAcc` function below.
|
|
||||||
* These escape sequences were decided on because
|
|
||||||
* they are the same escape sequences recognised in Standard ML.
|
|
||||||
* A reference is available here:
|
|
||||||
* https://smlfamily.github.io/Basis/char.html#SIG:CHAR.fromCString:VAL
|
|
||||||
*
|
|
||||||
* However, thre omissions have been made:
|
|
||||||
* - \ddd denoting an integer in the range 0 - 255
|
|
||||||
* - \uxxxx denoting the character whose code is the integer xxxx
|
|
||||||
* - \f f\ denoting a sequence of characters to ignore
|
|
||||||
*
|
|
||||||
* In the first two cases, it is easier to type the character directly.
|
|
||||||
* In the third case, there is little use for it.
|
|
||||||
* *)
|
|
||||||
fun rewriteAcc (#"\\", acc) =
|
|
||||||
(case acc of
|
|
||||||
#"a" :: tl => #"\a" :: tl
|
|
||||||
| #"b" :: tl => #"\b" :: tl
|
|
||||||
| #"t" :: tl => #"\t" :: tl
|
|
||||||
| #"n" :: tl => #"\n" :: tl
|
|
||||||
| #"v" :: tl => #"\v" :: tl
|
|
||||||
| #"f" :: tl => #"\f" :: tl
|
|
||||||
| #"r" :: tl => #"\r" :: tl
|
|
||||||
| #"?" :: tl => #"?" :: tl
|
|
||||||
| #"\\" :: tl => #"\\" :: tl
|
|
||||||
| #"\"" :: tl => #"\"" :: tl
|
|
||||||
| #"'" :: tl => #"'" :: tl
|
|
||||||
| #"^" :: hd :: tl =>
|
|
||||||
(* handle control characters *)
|
|
||||||
let
|
|
||||||
val code = Char.ord hd
|
|
||||||
in
|
|
||||||
if code >= 64 andalso code <= 95 then
|
|
||||||
let val chr = Char.chr (code - 64)
|
|
||||||
in chr :: tl
|
|
||||||
end
|
|
||||||
else
|
|
||||||
(* invalid escape sequence: leave alone *)
|
|
||||||
#"\\" :: acc
|
|
||||||
end
|
|
||||||
| _ =>
|
|
||||||
(* when there is no valid escape sequence,
|
|
||||||
* just leave slash in output *)
|
|
||||||
#"\\" :: acc)
|
|
||||||
| rewriteAcc (chr, acc) = chr :: acc
|
|
||||||
|
|
||||||
fun help (pos, str, acc) =
|
|
||||||
if pos < 0 then
|
|
||||||
String.implode acc
|
|
||||||
else
|
|
||||||
let
|
|
||||||
val chr = String.sub (str, pos)
|
|
||||||
val acc = rewriteAcc (chr, acc)
|
|
||||||
in
|
|
||||||
help (pos - 1, str, acc)
|
|
||||||
end
|
|
||||||
|
|
||||||
fun unescape str =
|
|
||||||
help (String.size str - 1, str, [])
|
|
||||||
end
|
|
||||||
@@ -5,9 +5,7 @@ struct
|
|||||||
open MailboxType
|
open MailboxType
|
||||||
|
|
||||||
fun buildTempSearchList (searchString, buffer, cursorIdx) =
|
fun buildTempSearchList (searchString, buffer, cursorIdx) =
|
||||||
let val unescapedString = EscapeString.unescape searchString
|
SearchList.buildRange (buffer, searchString, cursorIdx + 1111)
|
||||||
in SearchList.buildRange (buffer, unescapedString, cursorIdx + 1111)
|
|
||||||
end
|
|
||||||
|
|
||||||
fun addChr
|
fun addChr
|
||||||
(app: app_type, searchString, searchCursorIdx, searchScrollColumn, chr) =
|
(app: app_type, searchString, searchCursorIdx, searchScrollColumn, chr) =
|
||||||
@@ -66,7 +64,6 @@ struct
|
|||||||
} = app
|
} = app
|
||||||
|
|
||||||
val buffer = LineGap.goToStart buffer
|
val buffer = LineGap.goToStart buffer
|
||||||
val searchString = EscapeString.unescape searchString
|
|
||||||
val initialMsg = [SEARCH (buffer, searchString, time)]
|
val initialMsg = [SEARCH (buffer, searchString, time)]
|
||||||
|
|
||||||
(* move LineGap to first line displayed on screen *)
|
(* move LineGap to first line displayed on screen *)
|
||||||
|
|||||||
@@ -109,6 +109,30 @@ struct
|
|||||||
fun getRightParenIdx (pos, str) = loop (pos, str, 1, 0)
|
fun getRightParenIdx (pos, str) = loop (pos, str, 1, 0)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
(* assumes previous char is a backslash *)
|
||||||
|
fun isValidEscapeSequence chr =
|
||||||
|
case chr of
|
||||||
|
(* regex metacharacters *)
|
||||||
|
#"(" => true
|
||||||
|
| #")" => true
|
||||||
|
| #"[" => true
|
||||||
|
| #"]" => true
|
||||||
|
| #"+" => true
|
||||||
|
| #"*" => true
|
||||||
|
| #"|" => true
|
||||||
|
| #"?" => true
|
||||||
|
(* standard escape sequences *)
|
||||||
|
| #"\a" => true
|
||||||
|
| #"\b" => true
|
||||||
|
| #"\t" => true
|
||||||
|
| #"\n" => true
|
||||||
|
| #"\v" => true
|
||||||
|
| #"\f" => true
|
||||||
|
| #"\r" => true
|
||||||
|
| #"\\" => true
|
||||||
|
| #"\"" => true
|
||||||
|
| _ => false
|
||||||
|
|
||||||
fun computeAtom (pos, str, stateNum) =
|
fun computeAtom (pos, str, stateNum) =
|
||||||
if pos = String.size str then
|
if pos = String.size str then
|
||||||
NONE
|
NONE
|
||||||
@@ -132,6 +156,24 @@ struct
|
|||||||
| #"*" => NONE
|
| #"*" => NONE
|
||||||
| #"+" => NONE
|
| #"+" => NONE
|
||||||
| #"." => SOME (pos + 1, WILDCARD (stateNum + 1), stateNum + 1)
|
| #"." => SOME (pos + 1, WILDCARD (stateNum + 1), stateNum + 1)
|
||||||
|
| #"\\" =>
|
||||||
|
(* escape sequences *)
|
||||||
|
if pos + 1 = String.size str then
|
||||||
|
NONE
|
||||||
|
else
|
||||||
|
let
|
||||||
|
val chr = String.sub (str, pos + 1)
|
||||||
|
in
|
||||||
|
if isValidEscapeSequence chr then
|
||||||
|
let
|
||||||
|
val chr = CHAR_LITERAL {char = chr, position = stateNum + 1}
|
||||||
|
in
|
||||||
|
SOME (pos + 2, chr, stateNum + 1)
|
||||||
|
end
|
||||||
|
else
|
||||||
|
NONE
|
||||||
|
end
|
||||||
|
(* todo: [character classes] *)
|
||||||
| chr =>
|
| chr =>
|
||||||
let val chr = CHAR_LITERAL {char = chr, position = stateNum + 1}
|
let val chr = CHAR_LITERAL {char = chr, position = stateNum + 1}
|
||||||
in SOME (pos + 1, chr, stateNum + 1)
|
in SOME (pos + 1, chr, stateNum + 1)
|
||||||
|
|||||||
Reference in New Issue
Block a user