From 319731547816e2559b4bcda8be0e3ee9b548b2f2 Mon Sep 17 00:00:00 2001 From: Humza Shahid Date: Thu, 9 Oct 2025 06:22:21 +0100 Subject: [PATCH] fix failing tests for escaping regex metacharacters --- fcore/search-list/dfa-gen.sml | 12 ++- temp.txt | 188 +--------------------------------- test/regex-tests.sml | 4 +- 3 files changed, 9 insertions(+), 195 deletions(-) diff --git a/fcore/search-list/dfa-gen.sml b/fcore/search-list/dfa-gen.sml index 7551ee5..449cc5f 100644 --- a/fcore/search-list/dfa-gen.sml +++ b/fcore/search-list/dfa-gen.sml @@ -241,7 +241,6 @@ struct let val chars = Vector.fromList acc in SOME (pos + 1, chars) end - | #"-" => NONE | chr => if pos + 1 < String.size str andalso String.sub (str, pos + 1) = #"-" @@ -352,10 +351,6 @@ struct NONE end | #"." => SOME (pos + 1, WILDCARD (stateNum + 1), stateNum + 1) - | #")" => NONE - | #"?" => NONE - | #"*" => NONE - | #"+" => NONE | #"[" => if pos + 1 = String.size str then NONE @@ -363,6 +358,13 @@ struct parseNegateCharacterClass (pos + 2, str, stateNum) else parseCharacterClass (pos + 1, str, stateNum) + | #")" => NONE + | #"]" => NONE + | #"+" => NONE + | #"*" => NONE + | #"|" => NONE + | #"?" => NONE + | #"-" => NONE | chr => if Fn.charIsEqual (chr, Fn.endMarker) then NONE diff --git a/temp.txt b/temp.txt index faf412c..9fd27b9 100644 --- a/temp.txt +++ b/temp.txt @@ -1,187 +1 @@ -structure PersistentVector = -struct - (* Clojure-style persistent vector, for building search list. - * There is an "int table" too, which stores the last index - * at the node with the same index. - * We can use the size table for binary search. - * *) - datatype t = - BRANCH of t vector * int vector - | LEAF of {start: int, finish: int} vector * int vector - - val maxSize = 32 - - fun isEmpty t = - case t of - LEAF (_, sizes) => Vector.length sizes = 0 - | _ => false - - val empty = LEAF (#[], #[]) - - datatype append_result = APPEND of t | UPDATE of t - - fun isInRange (checkIdx, t) = - case t of - BRANCH (nodes, sizes) => - let - val searchIdx = BinSearch.equalOrMore (checkIdx, sizes) - in - if searchIdx = ~1 then - false - else - isInRange (checkIdx, Vector.sub (nodes, searchIdx)) - end - | LEAF (values, sizes) => - let - val searchIdx = BinSearch.equalOrMore (checkIdx, sizes) - in - if searchIdx = ~1 then - false - else - let - val {start, finish} = Vector.sub (values, searchIdx) - in - checkIdx >= start andalso checkIdx <= finish - end - end - - fun getFinishIdx t = - case t of - BRANCH (_, sizes) => Vector.sub (sizes, Vector.length sizes - 1) - | LEAF (_, sizes) => Vector.sub (sizes, Vector.length sizes - 1) - - fun helpAppend (start, finish, tree) = - case tree of - BRANCH (nodes, sizes) => - let - val lastNode = Vector.sub (nodes, Vector.length nodes - 1) - in - case helpAppend (start, finish, lastNode) of - UPDATE newLast => - let - val lastPos = Vector.length nodes - 1 - val newNode = Vector.update (nodes, lastPos, newLast) - val newSizes = Vector.update (sizes, lastPos, finish) - val newNode = BRANCH (newNode, newSizes) - in - UPDATE newNode - end - | APPEND newVec => - if Vector.length nodes + 1 > maxSize then - let val newNode = BRANCH (#[newVec], #[finish]) - in APPEND newNode - end - else - let - val newNodes = Vector.concat [nodes, #[newVec]] - val newSizes = Vector.concat [sizes, #[finish]] - val newNodes = BRANCH (newNodes, newSizes) - in - UPDATE newNodes - end - end - | LEAF (values, sizes) => - if Vector.length values + 1 > maxSize then - let val newNode = LEAF (#[{start = start, finish = finish}], #[finish]) - in APPEND newNode - end - else - let - val newNode = Vector.concat - [values, #[{start = start, finish = finish}]] - val newSizes = Vector.concat [sizes, #[finish]] - val newNode = LEAF (newNode, newSizes) - in - UPDATE newNode - end - - fun append (start, finish, tree) = - case helpAppend (start, finish, tree) of - UPDATE t => t - | APPEND newNode => - let - val maxSize = getFinishIdx tree - in - BRANCH (#[tree, newNode], #[maxSize, finish]) - end - - fun getStart tree = - case tree of - LEAF (values, _) => Vector.sub (values, 0) - | BRANCH (nodes, _) => getStart (Vector.sub (nodes, 0)) - - fun helpNextMatch (cusorIdx, tree) = - case tree of - LEAF (values, sizes) => - let - val idx = BinSearch.equalOrLess (cusorIdx, sizes) - in - if idx = ~1 then - {start = ~1, finish = ~1} - else - Vector.sub (values, idx) - end - | BRANCH (nodes, sizes) => - let - val idx = BinSearch.equalOrLess (cusorIdx, sizes) - in - if idx = ~1 then - {start = ~1, finish = ~1} - else - helpNextMatch (cusorIdx, Vector.sub (nodes, idx)) - end - - fun startNextMatch (cusorIdx, tree) = - case tree of - LEAF (values, sizes) => - if Vector.length sizes = 0 then - {start = ~1, finish = ~1} - else - let - val idx = BinSearch.equalOrLess (cusorIdx, sizes) - val idx = if idx = ~1 then 0 else idx - in - Vector.sub (values, idx) - end - | BRANCH (nodes, sizes) => - let - val idx = BinSearch.equalOrLess (cusorIdx, sizes) - in - if idx = ~1 then - {start = ~1, finish = ~1} - else - helpNextMatch (cusorIdx, Vector.sub (nodes, idx)) - end - - fun loopNextMatch (prevStart, prevFinish, tree, count) = - if count = 0 then - prevStart - else - let - val {start, finish} = startNextMatch (prevFinish + 1, tree) - in - if start = ~1 then - let - val {start, finish} = getStart tree - in - loopNextMatch (start, finish, tree, count - 1) - end - else - loopNextMatch (start, finish, tree, count - 1) - end - - fun nextMatch (cusorIdx, tree, count) = - if isEmpty tree then ~1 - else - let - val {start, finish} = startNextMatch (cusorIdx, tree) - val count = - if cusorIdx >= start andalso cusorIdx <= finish then - count - else - count - 1 - in - loopNextMatch (start, finish, tree, count); - getStart tree - end -end +hello | world diff --git a/test/regex-tests.sml b/test/regex-tests.sml index fdcab26..d3375d2 100644 --- a/test/regex-tests.sml +++ b/test/regex-tests.sml @@ -174,7 +174,7 @@ struct , test "recognises [" (fn _ => recogniseEscapeSequence ("\\[", "hello [ world")) , test "recognises ]" (fn _ => - recogniseEscapeSequence ("\\[", "hello ] world")) + recogniseEscapeSequence ("\\]", "hello ] world")) , test "recognises +" (fn _ => recogniseEscapeSequence ("\\+", "hello + world")) , test "recognises |" (fn _ => @@ -201,8 +201,6 @@ struct doesNotRecogniseUnescaped ("|", "hello | world")) , test "does not recognise ?" (fn _ => doesNotRecogniseUnescaped ("?", "hello ? world")) - , test "does not recognise ." (fn _ => - doesNotRecogniseUnescaped (".", "hello . world")) , test "does not recognise -" (fn _ => doesNotRecogniseUnescaped ("-", "hello - world")) ]