From cc5c0bf95c2bf14f516e745eafc6307e23609cca Mon Sep 17 00:00:00 2001 From: Humza Shahid Date: Mon, 6 Oct 2025 21:44:57 +0100 Subject: [PATCH] implement escape sequences for regex --- fcore/escape-string.sml | 63 ------------------------ fcore/normal-mode/normal-search-mode.sml | 5 +- fcore/search-list/dfa-gen.sml | 42 ++++++++++++++++ shf.mlb | 1 - 4 files changed, 43 insertions(+), 68 deletions(-) delete mode 100644 fcore/escape-string.sml diff --git a/fcore/escape-string.sml b/fcore/escape-string.sml deleted file mode 100644 index ed6c687..0000000 --- a/fcore/escape-string.sml +++ /dev/null @@ -1,63 +0,0 @@ -structure EscapeString = -struct - (* The specific escape sequences handled are in the - * `rewriteAcc` function below. - * These escape sequences were decided on because - * they are the same escape sequences recognised in Standard ML. - * A reference is available here: - * https://smlfamily.github.io/Basis/char.html#SIG:CHAR.fromCString:VAL - * - * However, thre omissions have been made: - * - \ddd denoting an integer in the range 0 - 255 - * - \uxxxx denoting the character whose code is the integer xxxx - * - \f f\ denoting a sequence of characters to ignore - * - * In the first two cases, it is easier to type the character directly. - * In the third case, there is little use for it. - * *) - fun rewriteAcc (#"\\", acc) = - (case acc of - #"a" :: tl => #"\a" :: tl - | #"b" :: tl => #"\b" :: tl - | #"t" :: tl => #"\t" :: tl - | #"n" :: tl => #"\n" :: tl - | #"v" :: tl => #"\v" :: tl - | #"f" :: tl => #"\f" :: tl - | #"r" :: tl => #"\r" :: tl - | #"?" :: tl => #"?" :: tl - | #"\\" :: tl => #"\\" :: tl - | #"\"" :: tl => #"\"" :: tl - | #"'" :: tl => #"'" :: tl - | #"^" :: hd :: tl => - (* handle control characters *) - let - val code = Char.ord hd - in - if code >= 64 andalso code <= 95 then - let val chr = Char.chr (code - 64) - in chr :: tl - end - else - (* invalid escape sequence: leave alone *) - #"\\" :: acc - end - | _ => - (* when there is no valid escape sequence, - * just leave slash in output *) - #"\\" :: acc) - | rewriteAcc (chr, acc) = chr :: acc - - fun help (pos, str, acc) = - if pos < 0 then - String.implode acc - else - let - val chr = String.sub (str, pos) - val acc = rewriteAcc (chr, acc) - in - help (pos - 1, str, acc) - end - - fun unescape str = - help (String.size str - 1, str, []) -end diff --git a/fcore/normal-mode/normal-search-mode.sml b/fcore/normal-mode/normal-search-mode.sml index 610705f..0a7d0b1 100644 --- a/fcore/normal-mode/normal-search-mode.sml +++ b/fcore/normal-mode/normal-search-mode.sml @@ -5,9 +5,7 @@ struct open MailboxType fun buildTempSearchList (searchString, buffer, cursorIdx) = - let val unescapedString = EscapeString.unescape searchString - in SearchList.buildRange (buffer, unescapedString, cursorIdx + 1111) - end + SearchList.buildRange (buffer, searchString, cursorIdx + 1111) fun addChr (app: app_type, searchString, searchCursorIdx, searchScrollColumn, chr) = @@ -66,7 +64,6 @@ struct } = app val buffer = LineGap.goToStart buffer - val searchString = EscapeString.unescape searchString val initialMsg = [SEARCH (buffer, searchString, time)] (* move LineGap to first line displayed on screen *) diff --git a/fcore/search-list/dfa-gen.sml b/fcore/search-list/dfa-gen.sml index 5b62085..1ba181c 100644 --- a/fcore/search-list/dfa-gen.sml +++ b/fcore/search-list/dfa-gen.sml @@ -109,6 +109,30 @@ struct fun getRightParenIdx (pos, str) = loop (pos, str, 1, 0) end + (* assumes previous char is a backslash *) + fun isValidEscapeSequence chr = + case chr of + (* regex metacharacters *) + #"(" => true + | #")" => true + | #"[" => true + | #"]" => true + | #"+" => true + | #"*" => true + | #"|" => true + | #"?" => true + (* standard escape sequences *) + | #"\a" => true + | #"\b" => true + | #"\t" => true + | #"\n" => true + | #"\v" => true + | #"\f" => true + | #"\r" => true + | #"\\" => true + | #"\"" => true + | _ => false + fun computeAtom (pos, str, stateNum) = if pos = String.size str then NONE @@ -132,6 +156,24 @@ struct | #"*" => NONE | #"+" => NONE | #"." => SOME (pos + 1, WILDCARD (stateNum + 1), stateNum + 1) + | #"\\" => + (* escape sequences *) + if pos + 1 = String.size str then + NONE + else + let + val chr = String.sub (str, pos + 1) + in + if isValidEscapeSequence chr then + let + val chr = CHAR_LITERAL {char = chr, position = stateNum + 1} + in + SOME (pos + 2, chr, stateNum + 1) + end + else + NONE + end + (* todo: [character classes] *) | chr => let val chr = CHAR_LITERAL {char = chr, position = stateNum + 1} in SOME (pos + 1, chr, stateNum + 1) diff --git a/shf.mlb b/shf.mlb index af7e1e9..87b7585 100644 --- a/shf.mlb +++ b/shf.mlb @@ -6,7 +6,6 @@ lib/brolib-sml/src/gap_set.sml lib/cozette-sml/fonts-with-z-index/cozette-ascii.mlb (* FUNCTIONAL CORE *) -fcore/escape-string.sml fcore/bin-search.sml ann