Files
sml-projects/fcore/search-list/search-list.sml

252 lines
8.2 KiB
Standard ML
Raw Normal View History

2025-10-06 09:55:05 +01:00
structure SearchList =
struct
structure Dfa = CaseInsensitiveDfa
2025-10-06 09:55:05 +01:00
fun buildLoop (idx, buffer, dfa, acc, curState, startPos, prevFinalPos) =
let
val buffer = LineGap.goToIdx (idx, buffer)
in
if idx = #textLength buffer then
let
val acc =
if prevFinalPos < 0 then acc
else PersistentVector.append (startPos, prevFinalPos, acc)
in
(buffer, acc)
end
else
let
val chr = LineGap.sub (idx, buffer)
val newState = Dfa.nextState (dfa, curState, chr)
val prevFinalPos =
if Dfa.isFinal (dfa, newState) then idx else prevFinalPos
in
if Dfa.isDead newState then
if prevFinalPos = ~1 then
(* no match found: restart search from `startPos + 1` *)
buildLoop (startPos + 1, buffer, dfa, acc, 0, startPos + 1, ~1)
else
(* match found: append and continue *)
let
val acc = PersistentVector.append (startPos, prevFinalPos, acc)
2025-10-06 09:55:05 +01:00
(* we start 1 idx after the final position we found *)
val newStart = prevFinalPos + 1
in
buildLoop (newStart, buffer, dfa, acc, 0, newStart, ~1)
end
2025-10-06 09:55:05 +01:00
else
buildLoop
(idx + 1, buffer, dfa, acc, newState, startPos, prevFinalPos)
end
end
2025-10-06 09:55:05 +01:00
fun build (buffer, dfa) =
if Vector.length dfa > 0 then
let val buffer = LineGap.goToStart buffer
in buildLoop (0, buffer, dfa, PersistentVector.empty, 0, 0, ~1)
end
else
(buffer, PersistentVector.empty)
2025-10-06 09:55:05 +01:00
fun rangeLoop
( dfa
, bufferPos
, buffer
, finishIdx
, searchList
, curState
, startPos
, prevFinalPos
) =
if bufferPos = #textLength buffer orelse bufferPos > finishIdx then
let
val searchList =
if prevFinalPos = ~1 then searchList
else PersistentVector.append (startPos, prevFinalPos, searchList)
in
(buffer, searchList)
end
else
let
val buffer = LineGap.goToIdx (bufferPos, buffer)
val chr = LineGap.sub (bufferPos, buffer)
val newState = Dfa.nextState (dfa, curState, chr)
2025-10-06 09:55:05 +01:00
val prevFinalPos =
if Dfa.isFinal (dfa, newState) then bufferPos else prevFinalPos
2025-10-06 09:55:05 +01:00
in
if Dfa.isDead newState then
2025-10-06 09:55:05 +01:00
if prevFinalPos = ~1 then
(* no match found: restart search from `startPos + 1` *)
rangeLoop
( dfa
, startPos + 1
, buffer
, finishIdx
, searchList
, 0
, startPos + 1
, ~1
)
else
(* match found: append and continue *)
let
val searchList =
PersistentVector.append (startPos, prevFinalPos, searchList)
(* we start 1 idx after the final position we found *)
val newStart = prevFinalPos + 1
2025-10-06 09:55:05 +01:00
in
rangeLoop
(dfa, newStart, buffer, finishIdx, searchList, 0, newStart, ~1)
2025-10-06 09:55:05 +01:00
end
else
(* continue searching for match *)
rangeLoop
( dfa
, bufferPos + 1
, buffer
, finishIdx
, searchList
, newState
, startPos
, prevFinalPos
)
end
fun buildRange (buffer, finishIdx, dfa) =
if Vector.length dfa > 0 then
rangeLoop
( dfa
, #idx buffer
, buffer
, finishIdx
, PersistentVector.empty
, 0
, #idx buffer
, ~1
)
2025-10-06 09:55:05 +01:00
else
(buffer, PersistentVector.empty)
fun insertUntilMatch
(idx, buffer, searchList, dfa, curState, startPos, prevFinalPos) =
if idx = #textLength buffer then
if prevFinalPos < 0 then
(buffer, searchList)
else if PersistentVector.isInRange (prevFinalPos, searchList) then
(buffer, searchList)
else
let
val searchList =
PersistentVector.insertMatchKeepingAbsoluteInddices
(startPos, prevFinalPos, searchList)
in
(buffer, searchList)
end
else if PersistentVector.isInRange (idx, searchList) then
(buffer, searchList)
else if Dfa.isDead curState then
if prevFinalPos = ~1 then
(* no match found: restart search from `startPos + 1` *)
insertUntilMatch
(startPos + 1, buffer, searchList, dfa, 0, startPos + 1, ~1)
else
(* new match. Insert and continue *)
let
val searchList =
PersistentVector.insertMatchKeepingAbsoluteInddices
(startPos, prevFinalPos, searchList)
val newStart = prevFinalPos + 1
in
insertUntilMatch (newStart, buffer, searchList, dfa, 0, newStart, ~1)
end
else
let
val buffer = LineGap.goToIdx (idx, buffer)
val chr = LineGap.sub (idx, buffer)
val newState = Dfa.nextState (dfa, curState, chr)
val prevFinalPos =
if Dfa.isFinal (dfa, newState) then idx else prevFinalPos
in
(* continue *)
insertUntilMatch
(idx + 1, buffer, searchList, dfa, newState, startPos, prevFinalPos)
end
fun tryExtendingPrevMatch
(idx, buffer, searchList, dfa, finalPos, curState, start) =
if idx = #textLength buffer then
(* reached end of buffer without finding anything
* so return current buffer and searchList *)
(buffer, searchList)
else if Dfa.isDead curState then
let
val searchList =
PersistentVector.extendExistingMatch (start, finalPos, searchList)
in
insertUntilMatch
(finalPos + 1, buffer, searchList, dfa, 0, finalPos + 1, ~1)
end
else
let
val buffer = LineGap.goToIdx (idx, buffer)
val chr = LineGap.sub (idx, buffer)
val newState = Dfa.nextState (dfa, curState, chr)
val finalPos = if Dfa.isFinal (dfa, newState) then idx else finalPos
in
(* continue *)
tryExtendingPrevMatch
(idx + 1, buffer, searchList, dfa, finalPos, newState, start)
end
fun deleteBufferAndSearchList (start, length, buffer, searchList, dfa) =
let
val buffer = LineGap.delete (start, length, buffer)
val searchList = PersistentVector.delete (start, length, searchList)
val oldStart = PersistentVector.prevMatch (start, searchList, 1)
in
if Vector.length dfa = 0 then
(buffer, searchList)
else if oldStart >= start orelse oldStart = ~1 then
(* no previous match, so try searching for a match from start of buffer *)
insertUntilMatch (0, buffer, searchList, dfa, 0, 0, ~1)
else
tryExtendingPrevMatch
(oldStart, buffer, searchList, dfa, ~1, 0, oldStart)
end
(* inserts into buffer and searchList both *)
fun insert (insIdx, insString, buffer, searchList, dfa) =
let
val buffer = LineGap.insert (insIdx, insString, buffer)
(* incremennt existing elements in the searchList after the insIdx
* by the length of the string that was just inserted *)
val searchList =
let
val searchListLeft = PersistentVector.splitLeft (insIdx, searchList)
val insLength = String.size insString
val searchListRight =
PersistentVector.splitRight (insIdx + insLength, searchList)
val searchListRight = PersistentVector.empty
in
if PersistentVector.isEmpty searchListLeft then searchListRight
else if PersistentVector.isEmpty searchListRight then searchListLeft
else PersistentVector.merge (searchListLeft, searchListRight)
end
val oldStart = PersistentVector.prevMatch (insIdx, searchList, 1)
in
if Vector.length dfa = 0 then
(buffer, searchList)
else if oldStart >= insIdx orelse oldStart = ~1 then
(* no previous match, so try searching for a match from start of buffer *)
insertUntilMatch (0, buffer, searchList, dfa, 0, 0, ~1)
else
tryExtendingPrevMatch
(oldStart, buffer, searchList, dfa, ~1, 0, oldStart)
end
2025-10-06 09:55:05 +01:00
end