fix failing tests for escaping regex metacharacters
This commit is contained in:
@@ -241,7 +241,6 @@ struct
|
||||
let val chars = Vector.fromList acc
|
||||
in SOME (pos + 1, chars)
|
||||
end
|
||||
| #"-" => NONE
|
||||
| chr =>
|
||||
if
|
||||
pos + 1 < String.size str andalso String.sub (str, pos + 1) = #"-"
|
||||
@@ -352,10 +351,6 @@ struct
|
||||
NONE
|
||||
end
|
||||
| #"." => SOME (pos + 1, WILDCARD (stateNum + 1), stateNum + 1)
|
||||
| #")" => NONE
|
||||
| #"?" => NONE
|
||||
| #"*" => NONE
|
||||
| #"+" => NONE
|
||||
| #"[" =>
|
||||
if pos + 1 = String.size str then
|
||||
NONE
|
||||
@@ -363,6 +358,13 @@ struct
|
||||
parseNegateCharacterClass (pos + 2, str, stateNum)
|
||||
else
|
||||
parseCharacterClass (pos + 1, str, stateNum)
|
||||
| #")" => NONE
|
||||
| #"]" => NONE
|
||||
| #"+" => NONE
|
||||
| #"*" => NONE
|
||||
| #"|" => NONE
|
||||
| #"?" => NONE
|
||||
| #"-" => NONE
|
||||
| chr =>
|
||||
if Fn.charIsEqual (chr, Fn.endMarker) then
|
||||
NONE
|
||||
|
||||
188
temp.txt
188
temp.txt
@@ -1,187 +1 @@
|
||||
structure PersistentVector =
|
||||
struct
|
||||
(* Clojure-style persistent vector, for building search list.
|
||||
* There is an "int table" too, which stores the last index
|
||||
* at the node with the same index.
|
||||
* We can use the size table for binary search.
|
||||
* *)
|
||||
datatype t =
|
||||
BRANCH of t vector * int vector
|
||||
| LEAF of {start: int, finish: int} vector * int vector
|
||||
|
||||
val maxSize = 32
|
||||
|
||||
fun isEmpty t =
|
||||
case t of
|
||||
LEAF (_, sizes) => Vector.length sizes = 0
|
||||
| _ => false
|
||||
|
||||
val empty = LEAF (#[], #[])
|
||||
|
||||
datatype append_result = APPEND of t | UPDATE of t
|
||||
|
||||
fun isInRange (checkIdx, t) =
|
||||
case t of
|
||||
BRANCH (nodes, sizes) =>
|
||||
let
|
||||
val searchIdx = BinSearch.equalOrMore (checkIdx, sizes)
|
||||
in
|
||||
if searchIdx = ~1 then
|
||||
false
|
||||
else
|
||||
isInRange (checkIdx, Vector.sub (nodes, searchIdx))
|
||||
end
|
||||
| LEAF (values, sizes) =>
|
||||
let
|
||||
val searchIdx = BinSearch.equalOrMore (checkIdx, sizes)
|
||||
in
|
||||
if searchIdx = ~1 then
|
||||
false
|
||||
else
|
||||
let
|
||||
val {start, finish} = Vector.sub (values, searchIdx)
|
||||
in
|
||||
checkIdx >= start andalso checkIdx <= finish
|
||||
end
|
||||
end
|
||||
|
||||
fun getFinishIdx t =
|
||||
case t of
|
||||
BRANCH (_, sizes) => Vector.sub (sizes, Vector.length sizes - 1)
|
||||
| LEAF (_, sizes) => Vector.sub (sizes, Vector.length sizes - 1)
|
||||
|
||||
fun helpAppend (start, finish, tree) =
|
||||
case tree of
|
||||
BRANCH (nodes, sizes) =>
|
||||
let
|
||||
val lastNode = Vector.sub (nodes, Vector.length nodes - 1)
|
||||
in
|
||||
case helpAppend (start, finish, lastNode) of
|
||||
UPDATE newLast =>
|
||||
let
|
||||
val lastPos = Vector.length nodes - 1
|
||||
val newNode = Vector.update (nodes, lastPos, newLast)
|
||||
val newSizes = Vector.update (sizes, lastPos, finish)
|
||||
val newNode = BRANCH (newNode, newSizes)
|
||||
in
|
||||
UPDATE newNode
|
||||
end
|
||||
| APPEND newVec =>
|
||||
if Vector.length nodes + 1 > maxSize then
|
||||
let val newNode = BRANCH (#[newVec], #[finish])
|
||||
in APPEND newNode
|
||||
end
|
||||
else
|
||||
let
|
||||
val newNodes = Vector.concat [nodes, #[newVec]]
|
||||
val newSizes = Vector.concat [sizes, #[finish]]
|
||||
val newNodes = BRANCH (newNodes, newSizes)
|
||||
in
|
||||
UPDATE newNodes
|
||||
end
|
||||
end
|
||||
| LEAF (values, sizes) =>
|
||||
if Vector.length values + 1 > maxSize then
|
||||
let val newNode = LEAF (#[{start = start, finish = finish}], #[finish])
|
||||
in APPEND newNode
|
||||
end
|
||||
else
|
||||
let
|
||||
val newNode = Vector.concat
|
||||
[values, #[{start = start, finish = finish}]]
|
||||
val newSizes = Vector.concat [sizes, #[finish]]
|
||||
val newNode = LEAF (newNode, newSizes)
|
||||
in
|
||||
UPDATE newNode
|
||||
end
|
||||
|
||||
fun append (start, finish, tree) =
|
||||
case helpAppend (start, finish, tree) of
|
||||
UPDATE t => t
|
||||
| APPEND newNode =>
|
||||
let
|
||||
val maxSize = getFinishIdx tree
|
||||
in
|
||||
BRANCH (#[tree, newNode], #[maxSize, finish])
|
||||
end
|
||||
|
||||
fun getStart tree =
|
||||
case tree of
|
||||
LEAF (values, _) => Vector.sub (values, 0)
|
||||
| BRANCH (nodes, _) => getStart (Vector.sub (nodes, 0))
|
||||
|
||||
fun helpNextMatch (cusorIdx, tree) =
|
||||
case tree of
|
||||
LEAF (values, sizes) =>
|
||||
let
|
||||
val idx = BinSearch.equalOrLess (cusorIdx, sizes)
|
||||
in
|
||||
if idx = ~1 then
|
||||
{start = ~1, finish = ~1}
|
||||
else
|
||||
Vector.sub (values, idx)
|
||||
end
|
||||
| BRANCH (nodes, sizes) =>
|
||||
let
|
||||
val idx = BinSearch.equalOrLess (cusorIdx, sizes)
|
||||
in
|
||||
if idx = ~1 then
|
||||
{start = ~1, finish = ~1}
|
||||
else
|
||||
helpNextMatch (cusorIdx, Vector.sub (nodes, idx))
|
||||
end
|
||||
|
||||
fun startNextMatch (cusorIdx, tree) =
|
||||
case tree of
|
||||
LEAF (values, sizes) =>
|
||||
if Vector.length sizes = 0 then
|
||||
{start = ~1, finish = ~1}
|
||||
else
|
||||
let
|
||||
val idx = BinSearch.equalOrLess (cusorIdx, sizes)
|
||||
val idx = if idx = ~1 then 0 else idx
|
||||
in
|
||||
Vector.sub (values, idx)
|
||||
end
|
||||
| BRANCH (nodes, sizes) =>
|
||||
let
|
||||
val idx = BinSearch.equalOrLess (cusorIdx, sizes)
|
||||
in
|
||||
if idx = ~1 then
|
||||
{start = ~1, finish = ~1}
|
||||
else
|
||||
helpNextMatch (cusorIdx, Vector.sub (nodes, idx))
|
||||
end
|
||||
|
||||
fun loopNextMatch (prevStart, prevFinish, tree, count) =
|
||||
if count = 0 then
|
||||
prevStart
|
||||
else
|
||||
let
|
||||
val {start, finish} = startNextMatch (prevFinish + 1, tree)
|
||||
in
|
||||
if start = ~1 then
|
||||
let
|
||||
val {start, finish} = getStart tree
|
||||
in
|
||||
loopNextMatch (start, finish, tree, count - 1)
|
||||
end
|
||||
else
|
||||
loopNextMatch (start, finish, tree, count - 1)
|
||||
end
|
||||
|
||||
fun nextMatch (cusorIdx, tree, count) =
|
||||
if isEmpty tree then ~1
|
||||
else
|
||||
let
|
||||
val {start, finish} = startNextMatch (cusorIdx, tree)
|
||||
val count =
|
||||
if cusorIdx >= start andalso cusorIdx <= finish then
|
||||
count
|
||||
else
|
||||
count - 1
|
||||
in
|
||||
loopNextMatch (start, finish, tree, count);
|
||||
getStart tree
|
||||
end
|
||||
end
|
||||
hello | world
|
||||
|
||||
@@ -174,7 +174,7 @@ struct
|
||||
, test "recognises [" (fn _ =>
|
||||
recogniseEscapeSequence ("\\[", "hello [ world"))
|
||||
, test "recognises ]" (fn _ =>
|
||||
recogniseEscapeSequence ("\\[", "hello ] world"))
|
||||
recogniseEscapeSequence ("\\]", "hello ] world"))
|
||||
, test "recognises +" (fn _ =>
|
||||
recogniseEscapeSequence ("\\+", "hello + world"))
|
||||
, test "recognises |" (fn _ =>
|
||||
@@ -201,8 +201,6 @@ struct
|
||||
doesNotRecogniseUnescaped ("|", "hello | world"))
|
||||
, test "does not recognise ?" (fn _ =>
|
||||
doesNotRecogniseUnescaped ("?", "hello ? world"))
|
||||
, test "does not recognise ." (fn _ =>
|
||||
doesNotRecogniseUnescaped (".", "hello . world"))
|
||||
, test "does not recognise -" (fn _ =>
|
||||
doesNotRecogniseUnescaped ("-", "hello - world"))
|
||||
]
|
||||
|
||||
Reference in New Issue
Block a user