2025-10-06 09:55:05 +01:00
|
|
|
structure SearchList =
|
|
|
|
|
struct
|
2025-10-17 23:08:16 +01:00
|
|
|
structure Dfa = CaseInsensitiveDfa
|
2025-10-06 09:55:05 +01:00
|
|
|
|
2025-10-17 23:08:16 +01:00
|
|
|
fun buildLoop (idx, buffer, dfa, acc, curState, startPos, prevFinalPos) =
|
2025-10-08 06:35:49 +01:00
|
|
|
let
|
2025-10-17 23:08:16 +01:00
|
|
|
val buffer = LineGap.goToIdx (idx, buffer)
|
2025-10-08 06:35:49 +01:00
|
|
|
in
|
2025-10-17 23:08:16 +01:00
|
|
|
if idx = #textLength buffer then
|
|
|
|
|
let
|
|
|
|
|
val acc =
|
|
|
|
|
if prevFinalPos < 0 then acc
|
|
|
|
|
else PersistentVector.append (startPos, prevFinalPos, acc)
|
|
|
|
|
in
|
|
|
|
|
(buffer, acc)
|
|
|
|
|
end
|
2025-10-08 06:35:49 +01:00
|
|
|
else
|
|
|
|
|
let
|
2025-10-17 23:08:16 +01:00
|
|
|
val chr = LineGap.sub (idx, buffer)
|
|
|
|
|
val newState = Dfa.nextState (dfa, curState, chr)
|
2025-10-08 06:35:49 +01:00
|
|
|
val prevFinalPos =
|
2025-10-17 23:08:16 +01:00
|
|
|
if Dfa.isFinal (dfa, newState) then idx else prevFinalPos
|
2025-10-08 06:35:49 +01:00
|
|
|
in
|
2025-10-17 23:08:16 +01:00
|
|
|
if Dfa.isDead newState then
|
2025-10-08 06:35:49 +01:00
|
|
|
if prevFinalPos = ~1 then
|
|
|
|
|
(* no match found: restart search from `startPos + 1` *)
|
2025-10-17 23:08:16 +01:00
|
|
|
buildLoop (startPos + 1, buffer, dfa, acc, 0, startPos + 1, ~1)
|
2025-10-08 06:35:49 +01:00
|
|
|
else
|
|
|
|
|
(* match found: append and continue *)
|
|
|
|
|
let
|
|
|
|
|
val acc = PersistentVector.append (startPos, prevFinalPos, acc)
|
2025-10-06 09:55:05 +01:00
|
|
|
|
2025-10-08 06:35:49 +01:00
|
|
|
(* we start 1 idx after the final position we found *)
|
|
|
|
|
val newStart = prevFinalPos + 1
|
|
|
|
|
in
|
2025-10-17 23:08:16 +01:00
|
|
|
buildLoop (newStart, buffer, dfa, acc, 0, newStart, ~1)
|
2025-10-08 06:35:49 +01:00
|
|
|
end
|
2025-10-06 09:55:05 +01:00
|
|
|
else
|
2025-10-08 06:35:49 +01:00
|
|
|
buildLoop
|
2025-10-17 23:08:16 +01:00
|
|
|
(idx + 1, buffer, dfa, acc, newState, startPos, prevFinalPos)
|
2025-10-08 06:35:49 +01:00
|
|
|
end
|
|
|
|
|
end
|
2025-10-06 09:55:05 +01:00
|
|
|
|
2025-10-17 23:08:16 +01:00
|
|
|
fun build (buffer, dfa) =
|
2025-10-08 06:35:49 +01:00
|
|
|
if Vector.length dfa > 0 then
|
2025-10-17 23:08:16 +01:00
|
|
|
let val buffer = LineGap.goToStart buffer
|
|
|
|
|
in buildLoop (0, buffer, dfa, PersistentVector.empty, 0, 0, ~1)
|
|
|
|
|
end
|
2025-10-08 06:35:49 +01:00
|
|
|
else
|
2025-10-17 23:08:16 +01:00
|
|
|
(buffer, PersistentVector.empty)
|
2025-10-07 14:30:23 +01:00
|
|
|
|
2025-10-06 09:55:05 +01:00
|
|
|
fun rangeLoop
|
|
|
|
|
( dfa
|
|
|
|
|
, bufferPos
|
|
|
|
|
, buffer
|
|
|
|
|
, finishIdx
|
|
|
|
|
, searchList
|
|
|
|
|
, curState
|
|
|
|
|
, startPos
|
|
|
|
|
, prevFinalPos
|
|
|
|
|
) =
|
|
|
|
|
if bufferPos = #textLength buffer orelse bufferPos > finishIdx then
|
|
|
|
|
let
|
|
|
|
|
val searchList =
|
|
|
|
|
if prevFinalPos = ~1 then searchList
|
|
|
|
|
else PersistentVector.append (startPos, prevFinalPos, searchList)
|
|
|
|
|
in
|
|
|
|
|
(buffer, searchList)
|
|
|
|
|
end
|
|
|
|
|
else
|
|
|
|
|
let
|
|
|
|
|
val buffer = LineGap.goToIdx (bufferPos, buffer)
|
|
|
|
|
val chr = LineGap.sub (bufferPos, buffer)
|
2025-10-17 23:08:16 +01:00
|
|
|
val newState = Dfa.nextState (dfa, curState, chr)
|
2025-10-06 09:55:05 +01:00
|
|
|
val prevFinalPos =
|
2025-10-17 23:08:16 +01:00
|
|
|
if Dfa.isFinal (dfa, newState) then bufferPos else prevFinalPos
|
2025-10-06 09:55:05 +01:00
|
|
|
in
|
2025-10-17 23:08:16 +01:00
|
|
|
if Dfa.isDead newState then
|
2025-10-06 09:55:05 +01:00
|
|
|
if prevFinalPos = ~1 then
|
|
|
|
|
(* no match found: restart search from `startPos + 1` *)
|
|
|
|
|
rangeLoop
|
|
|
|
|
( dfa
|
|
|
|
|
, startPos + 1
|
|
|
|
|
, buffer
|
|
|
|
|
, finishIdx
|
|
|
|
|
, searchList
|
|
|
|
|
, 0
|
|
|
|
|
, startPos + 1
|
|
|
|
|
, ~1
|
|
|
|
|
)
|
|
|
|
|
else
|
|
|
|
|
(* match found: append and continue *)
|
|
|
|
|
let
|
|
|
|
|
val searchList =
|
|
|
|
|
PersistentVector.append (startPos, prevFinalPos, searchList)
|
2025-10-06 11:58:03 +01:00
|
|
|
|
|
|
|
|
(* we start 1 idx after the final position we found *)
|
|
|
|
|
val newStart = prevFinalPos + 1
|
2025-10-06 09:55:05 +01:00
|
|
|
in
|
|
|
|
|
rangeLoop
|
2025-10-06 11:58:03 +01:00
|
|
|
(dfa, newStart, buffer, finishIdx, searchList, 0, newStart, ~1)
|
2025-10-06 09:55:05 +01:00
|
|
|
end
|
|
|
|
|
else
|
|
|
|
|
(* continue searching for match *)
|
|
|
|
|
rangeLoop
|
|
|
|
|
( dfa
|
|
|
|
|
, bufferPos + 1
|
|
|
|
|
, buffer
|
|
|
|
|
, finishIdx
|
|
|
|
|
, searchList
|
|
|
|
|
, newState
|
|
|
|
|
, startPos
|
|
|
|
|
, prevFinalPos
|
|
|
|
|
)
|
|
|
|
|
end
|
|
|
|
|
|
2025-10-08 05:40:29 +01:00
|
|
|
fun buildRange (buffer, finishIdx, dfa) =
|
|
|
|
|
if Vector.length dfa > 0 then
|
2025-10-08 05:20:33 +01:00
|
|
|
rangeLoop
|
|
|
|
|
( dfa
|
|
|
|
|
, #idx buffer
|
|
|
|
|
, buffer
|
|
|
|
|
, finishIdx
|
|
|
|
|
, PersistentVector.empty
|
|
|
|
|
, 0
|
|
|
|
|
, #idx buffer
|
|
|
|
|
, ~1
|
|
|
|
|
)
|
2025-10-06 09:55:05 +01:00
|
|
|
else
|
|
|
|
|
(buffer, PersistentVector.empty)
|
2026-01-18 09:59:00 +00:00
|
|
|
|
|
|
|
|
fun insertUntilMatch
|
|
|
|
|
(idx, buffer, searchList, dfa, curState, startPos, prevFinalPos) =
|
2026-04-24 16:29:16 +01:00
|
|
|
if Dfa.isDead curState then
|
|
|
|
|
if prevFinalPos = ~1 then
|
|
|
|
|
(* no match found: restart search from `startPos + 1` *)
|
|
|
|
|
insertUntilMatch
|
|
|
|
|
(startPos + 1, buffer, searchList, dfa, 0, startPos + 1, ~1)
|
2026-01-18 09:59:00 +00:00
|
|
|
else if PersistentVector.isInRange (prevFinalPos, searchList) then
|
|
|
|
|
(buffer, searchList)
|
|
|
|
|
else
|
2026-04-24 16:29:16 +01:00
|
|
|
(* new match. Insert and continue *)
|
2026-01-18 09:59:00 +00:00
|
|
|
let
|
|
|
|
|
val searchList =
|
|
|
|
|
PersistentVector.insertMatchKeepingAbsoluteInddices
|
|
|
|
|
(startPos, prevFinalPos, searchList)
|
2026-04-24 16:29:16 +01:00
|
|
|
val newStart = prevFinalPos + 1
|
2026-01-18 09:59:00 +00:00
|
|
|
in
|
2026-04-24 16:29:16 +01:00
|
|
|
insertUntilMatch (newStart, buffer, searchList, dfa, 0, newStart, ~1)
|
2026-01-18 09:59:00 +00:00
|
|
|
end
|
2026-04-24 16:29:16 +01:00
|
|
|
else if idx = #textLength buffer then
|
|
|
|
|
if prevFinalPos < 0 then
|
|
|
|
|
(buffer, searchList)
|
2026-03-28 00:45:08 +00:00
|
|
|
else if PersistentVector.isInRange (prevFinalPos, searchList) then
|
|
|
|
|
(buffer, searchList)
|
2026-02-06 21:25:44 +00:00
|
|
|
else
|
|
|
|
|
let
|
|
|
|
|
val searchList =
|
|
|
|
|
PersistentVector.insertMatchKeepingAbsoluteInddices
|
|
|
|
|
(startPos, prevFinalPos, searchList)
|
|
|
|
|
in
|
2026-04-24 16:29:16 +01:00
|
|
|
(buffer, searchList)
|
2026-02-06 21:25:44 +00:00
|
|
|
end
|
2026-01-18 09:59:00 +00:00
|
|
|
else
|
|
|
|
|
let
|
|
|
|
|
val buffer = LineGap.goToIdx (idx, buffer)
|
|
|
|
|
val chr = LineGap.sub (idx, buffer)
|
|
|
|
|
val newState = Dfa.nextState (dfa, curState, chr)
|
|
|
|
|
val prevFinalPos =
|
|
|
|
|
if Dfa.isFinal (dfa, newState) then idx else prevFinalPos
|
|
|
|
|
in
|
2026-02-06 21:25:44 +00:00
|
|
|
(* continue *)
|
|
|
|
|
insertUntilMatch
|
|
|
|
|
(idx + 1, buffer, searchList, dfa, newState, startPos, prevFinalPos)
|
2026-01-18 09:59:00 +00:00
|
|
|
end
|
|
|
|
|
|
2026-02-06 20:30:07 +00:00
|
|
|
fun tryExtendingPrevMatch
|
2026-02-07 02:25:45 +00:00
|
|
|
(idx, buffer, searchList, dfa, finalPos, curState, start) =
|
2026-04-24 16:29:16 +01:00
|
|
|
if Dfa.isDead curState then
|
2026-02-10 09:56:35 +00:00
|
|
|
let
|
|
|
|
|
val searchList =
|
|
|
|
|
PersistentVector.extendExistingMatch (start, finalPos, searchList)
|
|
|
|
|
in
|
2026-04-24 16:29:16 +01:00
|
|
|
insertUntilMatch
|
|
|
|
|
(finalPos + 1, buffer, searchList, dfa, 0, finalPos + 1, ~1)
|
2026-02-10 09:56:35 +00:00
|
|
|
end
|
2026-04-24 16:29:16 +01:00
|
|
|
else if idx = #textLength buffer then
|
2026-02-07 02:25:45 +00:00
|
|
|
let
|
|
|
|
|
val searchList =
|
|
|
|
|
PersistentVector.extendExistingMatch (start, finalPos, searchList)
|
|
|
|
|
in
|
2026-04-24 16:29:16 +01:00
|
|
|
(buffer, searchList)
|
2026-02-07 02:25:45 +00:00
|
|
|
end
|
2026-02-06 20:30:07 +00:00
|
|
|
else
|
|
|
|
|
let
|
|
|
|
|
val buffer = LineGap.goToIdx (idx, buffer)
|
|
|
|
|
val chr = LineGap.sub (idx, buffer)
|
|
|
|
|
val newState = Dfa.nextState (dfa, curState, chr)
|
|
|
|
|
val finalPos = if Dfa.isFinal (dfa, newState) then idx else finalPos
|
|
|
|
|
in
|
2026-02-07 02:25:45 +00:00
|
|
|
(* continue *)
|
|
|
|
|
tryExtendingPrevMatch
|
|
|
|
|
(idx + 1, buffer, searchList, dfa, finalPos, newState, start)
|
2026-02-06 20:30:07 +00:00
|
|
|
end
|
|
|
|
|
|
2026-01-18 09:59:00 +00:00
|
|
|
fun deleteBufferAndSearchList (start, length, buffer, searchList, dfa) =
|
|
|
|
|
let
|
|
|
|
|
val buffer = LineGap.delete (start, length, buffer)
|
|
|
|
|
val searchList = PersistentVector.delete (start, length, searchList)
|
2026-02-07 02:25:45 +00:00
|
|
|
val oldStart = PersistentVector.prevMatch (start, searchList, 1)
|
2026-01-18 09:59:00 +00:00
|
|
|
in
|
2026-02-06 21:25:44 +00:00
|
|
|
if Vector.length dfa = 0 then
|
|
|
|
|
(buffer, searchList)
|
2026-02-07 02:25:45 +00:00
|
|
|
else if oldStart >= start orelse oldStart = ~1 then
|
|
|
|
|
(* no previous match, so try searching for a match from start of buffer *)
|
|
|
|
|
insertUntilMatch (0, buffer, searchList, dfa, 0, 0, ~1)
|
2026-02-06 21:25:44 +00:00
|
|
|
else
|
2026-02-07 02:25:45 +00:00
|
|
|
tryExtendingPrevMatch
|
|
|
|
|
(oldStart, buffer, searchList, dfa, ~1, 0, oldStart)
|
2026-01-18 09:59:00 +00:00
|
|
|
end
|
2026-02-08 02:32:32 +00:00
|
|
|
|
|
|
|
|
(* inserts into buffer and searchList both *)
|
|
|
|
|
fun insert (insIdx, insString, buffer, searchList, dfa) =
|
|
|
|
|
let
|
|
|
|
|
val buffer = LineGap.insert (insIdx, insString, buffer)
|
|
|
|
|
|
|
|
|
|
(* incremennt existing elements in the searchList after the insIdx
|
|
|
|
|
* by the length of the string that was just inserted *)
|
|
|
|
|
val searchList =
|
|
|
|
|
let
|
|
|
|
|
val searchListLeft = PersistentVector.splitLeft (insIdx, searchList)
|
2026-02-08 03:17:19 +00:00
|
|
|
|
|
|
|
|
val insLength = String.size insString
|
2026-02-08 02:32:32 +00:00
|
|
|
val searchListRight =
|
2026-02-08 03:17:19 +00:00
|
|
|
PersistentVector.splitRight (insIdx + insLength, searchList)
|
|
|
|
|
val searchListRight = PersistentVector.empty
|
2026-02-08 02:32:32 +00:00
|
|
|
in
|
2026-02-08 03:17:19 +00:00
|
|
|
if PersistentVector.isEmpty searchListLeft then searchListRight
|
|
|
|
|
else if PersistentVector.isEmpty searchListRight then searchListLeft
|
|
|
|
|
else PersistentVector.merge (searchListLeft, searchListRight)
|
2026-02-08 02:32:32 +00:00
|
|
|
end
|
|
|
|
|
|
|
|
|
|
val oldStart = PersistentVector.prevMatch (insIdx, searchList, 1)
|
|
|
|
|
in
|
|
|
|
|
if Vector.length dfa = 0 then
|
|
|
|
|
(buffer, searchList)
|
|
|
|
|
else if oldStart >= insIdx orelse oldStart = ~1 then
|
|
|
|
|
(* no previous match, so try searching for a match from start of buffer *)
|
|
|
|
|
insertUntilMatch (0, buffer, searchList, dfa, 0, 0, ~1)
|
|
|
|
|
else
|
|
|
|
|
tryExtendingPrevMatch
|
|
|
|
|
(oldStart, buffer, searchList, dfa, ~1, 0, oldStart)
|
|
|
|
|
end
|
2025-10-06 09:55:05 +01:00
|
|
|
end
|