fix failing tests for escaping regex metacharacters
This commit is contained in:
@@ -241,7 +241,6 @@ struct
|
|||||||
let val chars = Vector.fromList acc
|
let val chars = Vector.fromList acc
|
||||||
in SOME (pos + 1, chars)
|
in SOME (pos + 1, chars)
|
||||||
end
|
end
|
||||||
| #"-" => NONE
|
|
||||||
| chr =>
|
| chr =>
|
||||||
if
|
if
|
||||||
pos + 1 < String.size str andalso String.sub (str, pos + 1) = #"-"
|
pos + 1 < String.size str andalso String.sub (str, pos + 1) = #"-"
|
||||||
@@ -352,10 +351,6 @@ struct
|
|||||||
NONE
|
NONE
|
||||||
end
|
end
|
||||||
| #"." => SOME (pos + 1, WILDCARD (stateNum + 1), stateNum + 1)
|
| #"." => SOME (pos + 1, WILDCARD (stateNum + 1), stateNum + 1)
|
||||||
| #")" => NONE
|
|
||||||
| #"?" => NONE
|
|
||||||
| #"*" => NONE
|
|
||||||
| #"+" => NONE
|
|
||||||
| #"[" =>
|
| #"[" =>
|
||||||
if pos + 1 = String.size str then
|
if pos + 1 = String.size str then
|
||||||
NONE
|
NONE
|
||||||
@@ -363,6 +358,13 @@ struct
|
|||||||
parseNegateCharacterClass (pos + 2, str, stateNum)
|
parseNegateCharacterClass (pos + 2, str, stateNum)
|
||||||
else
|
else
|
||||||
parseCharacterClass (pos + 1, str, stateNum)
|
parseCharacterClass (pos + 1, str, stateNum)
|
||||||
|
| #")" => NONE
|
||||||
|
| #"]" => NONE
|
||||||
|
| #"+" => NONE
|
||||||
|
| #"*" => NONE
|
||||||
|
| #"|" => NONE
|
||||||
|
| #"?" => NONE
|
||||||
|
| #"-" => NONE
|
||||||
| chr =>
|
| chr =>
|
||||||
if Fn.charIsEqual (chr, Fn.endMarker) then
|
if Fn.charIsEqual (chr, Fn.endMarker) then
|
||||||
NONE
|
NONE
|
||||||
|
|||||||
188
temp.txt
188
temp.txt
@@ -1,187 +1 @@
|
|||||||
structure PersistentVector =
|
hello | world
|
||||||
struct
|
|
||||||
(* Clojure-style persistent vector, for building search list.
|
|
||||||
* There is an "int table" too, which stores the last index
|
|
||||||
* at the node with the same index.
|
|
||||||
* We can use the size table for binary search.
|
|
||||||
* *)
|
|
||||||
datatype t =
|
|
||||||
BRANCH of t vector * int vector
|
|
||||||
| LEAF of {start: int, finish: int} vector * int vector
|
|
||||||
|
|
||||||
val maxSize = 32
|
|
||||||
|
|
||||||
fun isEmpty t =
|
|
||||||
case t of
|
|
||||||
LEAF (_, sizes) => Vector.length sizes = 0
|
|
||||||
| _ => false
|
|
||||||
|
|
||||||
val empty = LEAF (#[], #[])
|
|
||||||
|
|
||||||
datatype append_result = APPEND of t | UPDATE of t
|
|
||||||
|
|
||||||
fun isInRange (checkIdx, t) =
|
|
||||||
case t of
|
|
||||||
BRANCH (nodes, sizes) =>
|
|
||||||
let
|
|
||||||
val searchIdx = BinSearch.equalOrMore (checkIdx, sizes)
|
|
||||||
in
|
|
||||||
if searchIdx = ~1 then
|
|
||||||
false
|
|
||||||
else
|
|
||||||
isInRange (checkIdx, Vector.sub (nodes, searchIdx))
|
|
||||||
end
|
|
||||||
| LEAF (values, sizes) =>
|
|
||||||
let
|
|
||||||
val searchIdx = BinSearch.equalOrMore (checkIdx, sizes)
|
|
||||||
in
|
|
||||||
if searchIdx = ~1 then
|
|
||||||
false
|
|
||||||
else
|
|
||||||
let
|
|
||||||
val {start, finish} = Vector.sub (values, searchIdx)
|
|
||||||
in
|
|
||||||
checkIdx >= start andalso checkIdx <= finish
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
fun getFinishIdx t =
|
|
||||||
case t of
|
|
||||||
BRANCH (_, sizes) => Vector.sub (sizes, Vector.length sizes - 1)
|
|
||||||
| LEAF (_, sizes) => Vector.sub (sizes, Vector.length sizes - 1)
|
|
||||||
|
|
||||||
fun helpAppend (start, finish, tree) =
|
|
||||||
case tree of
|
|
||||||
BRANCH (nodes, sizes) =>
|
|
||||||
let
|
|
||||||
val lastNode = Vector.sub (nodes, Vector.length nodes - 1)
|
|
||||||
in
|
|
||||||
case helpAppend (start, finish, lastNode) of
|
|
||||||
UPDATE newLast =>
|
|
||||||
let
|
|
||||||
val lastPos = Vector.length nodes - 1
|
|
||||||
val newNode = Vector.update (nodes, lastPos, newLast)
|
|
||||||
val newSizes = Vector.update (sizes, lastPos, finish)
|
|
||||||
val newNode = BRANCH (newNode, newSizes)
|
|
||||||
in
|
|
||||||
UPDATE newNode
|
|
||||||
end
|
|
||||||
| APPEND newVec =>
|
|
||||||
if Vector.length nodes + 1 > maxSize then
|
|
||||||
let val newNode = BRANCH (#[newVec], #[finish])
|
|
||||||
in APPEND newNode
|
|
||||||
end
|
|
||||||
else
|
|
||||||
let
|
|
||||||
val newNodes = Vector.concat [nodes, #[newVec]]
|
|
||||||
val newSizes = Vector.concat [sizes, #[finish]]
|
|
||||||
val newNodes = BRANCH (newNodes, newSizes)
|
|
||||||
in
|
|
||||||
UPDATE newNodes
|
|
||||||
end
|
|
||||||
end
|
|
||||||
| LEAF (values, sizes) =>
|
|
||||||
if Vector.length values + 1 > maxSize then
|
|
||||||
let val newNode = LEAF (#[{start = start, finish = finish}], #[finish])
|
|
||||||
in APPEND newNode
|
|
||||||
end
|
|
||||||
else
|
|
||||||
let
|
|
||||||
val newNode = Vector.concat
|
|
||||||
[values, #[{start = start, finish = finish}]]
|
|
||||||
val newSizes = Vector.concat [sizes, #[finish]]
|
|
||||||
val newNode = LEAF (newNode, newSizes)
|
|
||||||
in
|
|
||||||
UPDATE newNode
|
|
||||||
end
|
|
||||||
|
|
||||||
fun append (start, finish, tree) =
|
|
||||||
case helpAppend (start, finish, tree) of
|
|
||||||
UPDATE t => t
|
|
||||||
| APPEND newNode =>
|
|
||||||
let
|
|
||||||
val maxSize = getFinishIdx tree
|
|
||||||
in
|
|
||||||
BRANCH (#[tree, newNode], #[maxSize, finish])
|
|
||||||
end
|
|
||||||
|
|
||||||
fun getStart tree =
|
|
||||||
case tree of
|
|
||||||
LEAF (values, _) => Vector.sub (values, 0)
|
|
||||||
| BRANCH (nodes, _) => getStart (Vector.sub (nodes, 0))
|
|
||||||
|
|
||||||
fun helpNextMatch (cusorIdx, tree) =
|
|
||||||
case tree of
|
|
||||||
LEAF (values, sizes) =>
|
|
||||||
let
|
|
||||||
val idx = BinSearch.equalOrLess (cusorIdx, sizes)
|
|
||||||
in
|
|
||||||
if idx = ~1 then
|
|
||||||
{start = ~1, finish = ~1}
|
|
||||||
else
|
|
||||||
Vector.sub (values, idx)
|
|
||||||
end
|
|
||||||
| BRANCH (nodes, sizes) =>
|
|
||||||
let
|
|
||||||
val idx = BinSearch.equalOrLess (cusorIdx, sizes)
|
|
||||||
in
|
|
||||||
if idx = ~1 then
|
|
||||||
{start = ~1, finish = ~1}
|
|
||||||
else
|
|
||||||
helpNextMatch (cusorIdx, Vector.sub (nodes, idx))
|
|
||||||
end
|
|
||||||
|
|
||||||
fun startNextMatch (cusorIdx, tree) =
|
|
||||||
case tree of
|
|
||||||
LEAF (values, sizes) =>
|
|
||||||
if Vector.length sizes = 0 then
|
|
||||||
{start = ~1, finish = ~1}
|
|
||||||
else
|
|
||||||
let
|
|
||||||
val idx = BinSearch.equalOrLess (cusorIdx, sizes)
|
|
||||||
val idx = if idx = ~1 then 0 else idx
|
|
||||||
in
|
|
||||||
Vector.sub (values, idx)
|
|
||||||
end
|
|
||||||
| BRANCH (nodes, sizes) =>
|
|
||||||
let
|
|
||||||
val idx = BinSearch.equalOrLess (cusorIdx, sizes)
|
|
||||||
in
|
|
||||||
if idx = ~1 then
|
|
||||||
{start = ~1, finish = ~1}
|
|
||||||
else
|
|
||||||
helpNextMatch (cusorIdx, Vector.sub (nodes, idx))
|
|
||||||
end
|
|
||||||
|
|
||||||
fun loopNextMatch (prevStart, prevFinish, tree, count) =
|
|
||||||
if count = 0 then
|
|
||||||
prevStart
|
|
||||||
else
|
|
||||||
let
|
|
||||||
val {start, finish} = startNextMatch (prevFinish + 1, tree)
|
|
||||||
in
|
|
||||||
if start = ~1 then
|
|
||||||
let
|
|
||||||
val {start, finish} = getStart tree
|
|
||||||
in
|
|
||||||
loopNextMatch (start, finish, tree, count - 1)
|
|
||||||
end
|
|
||||||
else
|
|
||||||
loopNextMatch (start, finish, tree, count - 1)
|
|
||||||
end
|
|
||||||
|
|
||||||
fun nextMatch (cusorIdx, tree, count) =
|
|
||||||
if isEmpty tree then ~1
|
|
||||||
else
|
|
||||||
let
|
|
||||||
val {start, finish} = startNextMatch (cusorIdx, tree)
|
|
||||||
val count =
|
|
||||||
if cusorIdx >= start andalso cusorIdx <= finish then
|
|
||||||
count
|
|
||||||
else
|
|
||||||
count - 1
|
|
||||||
in
|
|
||||||
loopNextMatch (start, finish, tree, count);
|
|
||||||
getStart tree
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|||||||
@@ -174,7 +174,7 @@ struct
|
|||||||
, test "recognises [" (fn _ =>
|
, test "recognises [" (fn _ =>
|
||||||
recogniseEscapeSequence ("\\[", "hello [ world"))
|
recogniseEscapeSequence ("\\[", "hello [ world"))
|
||||||
, test "recognises ]" (fn _ =>
|
, test "recognises ]" (fn _ =>
|
||||||
recogniseEscapeSequence ("\\[", "hello ] world"))
|
recogniseEscapeSequence ("\\]", "hello ] world"))
|
||||||
, test "recognises +" (fn _ =>
|
, test "recognises +" (fn _ =>
|
||||||
recogniseEscapeSequence ("\\+", "hello + world"))
|
recogniseEscapeSequence ("\\+", "hello + world"))
|
||||||
, test "recognises |" (fn _ =>
|
, test "recognises |" (fn _ =>
|
||||||
@@ -201,8 +201,6 @@ struct
|
|||||||
doesNotRecogniseUnescaped ("|", "hello | world"))
|
doesNotRecogniseUnescaped ("|", "hello | world"))
|
||||||
, test "does not recognise ?" (fn _ =>
|
, test "does not recognise ?" (fn _ =>
|
||||||
doesNotRecogniseUnescaped ("?", "hello ? world"))
|
doesNotRecogniseUnescaped ("?", "hello ? world"))
|
||||||
, test "does not recognise ." (fn _ =>
|
|
||||||
doesNotRecogniseUnescaped (".", "hello . world"))
|
|
||||||
, test "does not recognise -" (fn _ =>
|
, test "does not recognise -" (fn _ =>
|
||||||
doesNotRecogniseUnescaped ("-", "hello - world"))
|
doesNotRecogniseUnescaped ("-", "hello - world"))
|
||||||
]
|
]
|
||||||
|
|||||||
Reference in New Issue
Block a user