implement escape sequences for regex

This commit is contained in:
2025-10-06 21:44:57 +01:00
parent 0bcb3e1dfc
commit cc5c0bf95c
4 changed files with 43 additions and 68 deletions

View File

@@ -1,63 +0,0 @@
structure EscapeString =
struct
(* The specific escape sequences handled are in the
* `rewriteAcc` function below.
* These escape sequences were decided on because
* they are the same escape sequences recognised in Standard ML.
* A reference is available here:
* https://smlfamily.github.io/Basis/char.html#SIG:CHAR.fromCString:VAL
*
* However, thre omissions have been made:
* - \ddd denoting an integer in the range 0 - 255
* - \uxxxx denoting the character whose code is the integer xxxx
* - \f f\ denoting a sequence of characters to ignore
*
* In the first two cases, it is easier to type the character directly.
* In the third case, there is little use for it.
* *)
fun rewriteAcc (#"\\", acc) =
(case acc of
#"a" :: tl => #"\a" :: tl
| #"b" :: tl => #"\b" :: tl
| #"t" :: tl => #"\t" :: tl
| #"n" :: tl => #"\n" :: tl
| #"v" :: tl => #"\v" :: tl
| #"f" :: tl => #"\f" :: tl
| #"r" :: tl => #"\r" :: tl
| #"?" :: tl => #"?" :: tl
| #"\\" :: tl => #"\\" :: tl
| #"\"" :: tl => #"\"" :: tl
| #"'" :: tl => #"'" :: tl
| #"^" :: hd :: tl =>
(* handle control characters *)
let
val code = Char.ord hd
in
if code >= 64 andalso code <= 95 then
let val chr = Char.chr (code - 64)
in chr :: tl
end
else
(* invalid escape sequence: leave alone *)
#"\\" :: acc
end
| _ =>
(* when there is no valid escape sequence,
* just leave slash in output *)
#"\\" :: acc)
| rewriteAcc (chr, acc) = chr :: acc
fun help (pos, str, acc) =
if pos < 0 then
String.implode acc
else
let
val chr = String.sub (str, pos)
val acc = rewriteAcc (chr, acc)
in
help (pos - 1, str, acc)
end
fun unescape str =
help (String.size str - 1, str, [])
end

View File

@@ -5,9 +5,7 @@ struct
open MailboxType
fun buildTempSearchList (searchString, buffer, cursorIdx) =
let val unescapedString = EscapeString.unescape searchString
in SearchList.buildRange (buffer, unescapedString, cursorIdx + 1111)
end
SearchList.buildRange (buffer, searchString, cursorIdx + 1111)
fun addChr
(app: app_type, searchString, searchCursorIdx, searchScrollColumn, chr) =
@@ -66,7 +64,6 @@ struct
} = app
val buffer = LineGap.goToStart buffer
val searchString = EscapeString.unescape searchString
val initialMsg = [SEARCH (buffer, searchString, time)]
(* move LineGap to first line displayed on screen *)

View File

@@ -109,6 +109,30 @@ struct
fun getRightParenIdx (pos, str) = loop (pos, str, 1, 0)
end
(* assumes previous char is a backslash *)
fun isValidEscapeSequence chr =
case chr of
(* regex metacharacters *)
#"(" => true
| #")" => true
| #"[" => true
| #"]" => true
| #"+" => true
| #"*" => true
| #"|" => true
| #"?" => true
(* standard escape sequences *)
| #"\a" => true
| #"\b" => true
| #"\t" => true
| #"\n" => true
| #"\v" => true
| #"\f" => true
| #"\r" => true
| #"\\" => true
| #"\"" => true
| _ => false
fun computeAtom (pos, str, stateNum) =
if pos = String.size str then
NONE
@@ -132,6 +156,24 @@ struct
| #"*" => NONE
| #"+" => NONE
| #"." => SOME (pos + 1, WILDCARD (stateNum + 1), stateNum + 1)
| #"\\" =>
(* escape sequences *)
if pos + 1 = String.size str then
NONE
else
let
val chr = String.sub (str, pos + 1)
in
if isValidEscapeSequence chr then
let
val chr = CHAR_LITERAL {char = chr, position = stateNum + 1}
in
SOME (pos + 2, chr, stateNum + 1)
end
else
NONE
end
(* todo: [character classes] *)
| chr =>
let val chr = CHAR_LITERAL {char = chr, position = stateNum + 1}
in SOME (pos + 1, chr, stateNum + 1)

View File

@@ -6,7 +6,6 @@ lib/brolib-sml/src/gap_set.sml
lib/cozette-sml/fonts-with-z-index/cozette-ascii.mlb
(* FUNCTIONAL CORE *)
fcore/escape-string.sml
fcore/bin-search.sml
ann