fix failing tests for escaping regex metacharacters

This commit is contained in:
2025-10-09 06:22:21 +01:00
parent 8bd8977027
commit 3197315478
3 changed files with 9 additions and 195 deletions

View File

@@ -241,7 +241,6 @@ struct
let val chars = Vector.fromList acc
in SOME (pos + 1, chars)
end
| #"-" => NONE
| chr =>
if
pos + 1 < String.size str andalso String.sub (str, pos + 1) = #"-"
@@ -352,10 +351,6 @@ struct
NONE
end
| #"." => SOME (pos + 1, WILDCARD (stateNum + 1), stateNum + 1)
| #")" => NONE
| #"?" => NONE
| #"*" => NONE
| #"+" => NONE
| #"[" =>
if pos + 1 = String.size str then
NONE
@@ -363,6 +358,13 @@ struct
parseNegateCharacterClass (pos + 2, str, stateNum)
else
parseCharacterClass (pos + 1, str, stateNum)
| #")" => NONE
| #"]" => NONE
| #"+" => NONE
| #"*" => NONE
| #"|" => NONE
| #"?" => NONE
| #"-" => NONE
| chr =>
if Fn.charIsEqual (chr, Fn.endMarker) then
NONE

188
temp.txt
View File

@@ -1,187 +1 @@
structure PersistentVector =
struct
(* Clojure-style persistent vector, for building search list.
* There is an "int table" too, which stores the last index
* at the node with the same index.
* We can use the size table for binary search.
* *)
datatype t =
BRANCH of t vector * int vector
| LEAF of {start: int, finish: int} vector * int vector
val maxSize = 32
fun isEmpty t =
case t of
LEAF (_, sizes) => Vector.length sizes = 0
| _ => false
val empty = LEAF (#[], #[])
datatype append_result = APPEND of t | UPDATE of t
fun isInRange (checkIdx, t) =
case t of
BRANCH (nodes, sizes) =>
let
val searchIdx = BinSearch.equalOrMore (checkIdx, sizes)
in
if searchIdx = ~1 then
false
else
isInRange (checkIdx, Vector.sub (nodes, searchIdx))
end
| LEAF (values, sizes) =>
let
val searchIdx = BinSearch.equalOrMore (checkIdx, sizes)
in
if searchIdx = ~1 then
false
else
let
val {start, finish} = Vector.sub (values, searchIdx)
in
checkIdx >= start andalso checkIdx <= finish
end
end
fun getFinishIdx t =
case t of
BRANCH (_, sizes) => Vector.sub (sizes, Vector.length sizes - 1)
| LEAF (_, sizes) => Vector.sub (sizes, Vector.length sizes - 1)
fun helpAppend (start, finish, tree) =
case tree of
BRANCH (nodes, sizes) =>
let
val lastNode = Vector.sub (nodes, Vector.length nodes - 1)
in
case helpAppend (start, finish, lastNode) of
UPDATE newLast =>
let
val lastPos = Vector.length nodes - 1
val newNode = Vector.update (nodes, lastPos, newLast)
val newSizes = Vector.update (sizes, lastPos, finish)
val newNode = BRANCH (newNode, newSizes)
in
UPDATE newNode
end
| APPEND newVec =>
if Vector.length nodes + 1 > maxSize then
let val newNode = BRANCH (#[newVec], #[finish])
in APPEND newNode
end
else
let
val newNodes = Vector.concat [nodes, #[newVec]]
val newSizes = Vector.concat [sizes, #[finish]]
val newNodes = BRANCH (newNodes, newSizes)
in
UPDATE newNodes
end
end
| LEAF (values, sizes) =>
if Vector.length values + 1 > maxSize then
let val newNode = LEAF (#[{start = start, finish = finish}], #[finish])
in APPEND newNode
end
else
let
val newNode = Vector.concat
[values, #[{start = start, finish = finish}]]
val newSizes = Vector.concat [sizes, #[finish]]
val newNode = LEAF (newNode, newSizes)
in
UPDATE newNode
end
fun append (start, finish, tree) =
case helpAppend (start, finish, tree) of
UPDATE t => t
| APPEND newNode =>
let
val maxSize = getFinishIdx tree
in
BRANCH (#[tree, newNode], #[maxSize, finish])
end
fun getStart tree =
case tree of
LEAF (values, _) => Vector.sub (values, 0)
| BRANCH (nodes, _) => getStart (Vector.sub (nodes, 0))
fun helpNextMatch (cusorIdx, tree) =
case tree of
LEAF (values, sizes) =>
let
val idx = BinSearch.equalOrLess (cusorIdx, sizes)
in
if idx = ~1 then
{start = ~1, finish = ~1}
else
Vector.sub (values, idx)
end
| BRANCH (nodes, sizes) =>
let
val idx = BinSearch.equalOrLess (cusorIdx, sizes)
in
if idx = ~1 then
{start = ~1, finish = ~1}
else
helpNextMatch (cusorIdx, Vector.sub (nodes, idx))
end
fun startNextMatch (cusorIdx, tree) =
case tree of
LEAF (values, sizes) =>
if Vector.length sizes = 0 then
{start = ~1, finish = ~1}
else
let
val idx = BinSearch.equalOrLess (cusorIdx, sizes)
val idx = if idx = ~1 then 0 else idx
in
Vector.sub (values, idx)
end
| BRANCH (nodes, sizes) =>
let
val idx = BinSearch.equalOrLess (cusorIdx, sizes)
in
if idx = ~1 then
{start = ~1, finish = ~1}
else
helpNextMatch (cusorIdx, Vector.sub (nodes, idx))
end
fun loopNextMatch (prevStart, prevFinish, tree, count) =
if count = 0 then
prevStart
else
let
val {start, finish} = startNextMatch (prevFinish + 1, tree)
in
if start = ~1 then
let
val {start, finish} = getStart tree
in
loopNextMatch (start, finish, tree, count - 1)
end
else
loopNextMatch (start, finish, tree, count - 1)
end
fun nextMatch (cusorIdx, tree, count) =
if isEmpty tree then ~1
else
let
val {start, finish} = startNextMatch (cusorIdx, tree)
val count =
if cusorIdx >= start andalso cusorIdx <= finish then
count
else
count - 1
in
loopNextMatch (start, finish, tree, count);
getStart tree
end
end
hello | world

View File

@@ -174,7 +174,7 @@ struct
, test "recognises [" (fn _ =>
recogniseEscapeSequence ("\\[", "hello [ world"))
, test "recognises ]" (fn _ =>
recogniseEscapeSequence ("\\[", "hello ] world"))
recogniseEscapeSequence ("\\]", "hello ] world"))
, test "recognises +" (fn _ =>
recogniseEscapeSequence ("\\+", "hello + world"))
, test "recognises |" (fn _ =>
@@ -201,8 +201,6 @@ struct
doesNotRecogniseUnescaped ("|", "hello | world"))
, test "does not recognise ?" (fn _ =>
doesNotRecogniseUnescaped ("?", "hello ? world"))
, test "does not recognise ." (fn _ =>
doesNotRecogniseUnescaped (".", "hello . world"))
, test "does not recognise -" (fn _ =>
doesNotRecogniseUnescaped ("-", "hello - world"))
]