first pass of 'get matches in range from nfa' functionality
This commit is contained in:
Binary file not shown.
@@ -57,7 +57,6 @@ struct
|
||||
| [] => UNTESTED
|
||||
end
|
||||
|
||||
local
|
||||
fun rebuildConcat (lst, chr, idx) =
|
||||
case lst of
|
||||
[(hd, _)] =>
|
||||
@@ -132,9 +131,14 @@ struct
|
||||
| ALTERNATION (lst, UNTESTED) => rebuildAlternation (lst, chr, idx, [])
|
||||
| ALTERNATION (_, state) => (nfa, state)
|
||||
|
||||
| _ =>
|
||||
raise Fail "nfa.sml 69: not char literal or concat or alternation"
|
||||
| _ => raise Fail "nfa.sml 69: not char literal or concat or alternation"
|
||||
|
||||
(* get all matches in string.
|
||||
* Todo:
|
||||
* - Append {start: int, finish: int} into PersistentVector instead
|
||||
* - Search through gap buffer instead of string
|
||||
* *)
|
||||
local
|
||||
fun loop (pos, str, nfa, origNfa, startPos, acc) =
|
||||
if pos = String.size str then
|
||||
PersistentVector.toVector acc
|
||||
@@ -158,6 +162,197 @@ struct
|
||||
fun getMatches (str, nfa) =
|
||||
loop (0, str, nfa, nfa, 0, PersistentVector.empty)
|
||||
end
|
||||
|
||||
local
|
||||
fun backtrackRange
|
||||
(hd, tl, prevStrings, origNfa, acc, absIdx, startIdx, finishIdx) =
|
||||
case prevStrings of
|
||||
prevHd :: prevTl =>
|
||||
let
|
||||
val prevIdx = absIdx - String.size prevHd
|
||||
val tl = hd :: tl
|
||||
in
|
||||
if prevIdx < startIdx then
|
||||
(* keep backtracking *)
|
||||
backtrackRange
|
||||
( prevHd
|
||||
, tl
|
||||
, prevTl
|
||||
, origNfa
|
||||
, acc
|
||||
, prevIdx
|
||||
, startIdx
|
||||
, finishIdx
|
||||
)
|
||||
else
|
||||
let
|
||||
val strIdx = startIdx - prevIdx + 1
|
||||
val absIdx = absIdx + strIdx
|
||||
in
|
||||
loop
|
||||
( strIdx
|
||||
, prevHd
|
||||
, tl
|
||||
, prevTl
|
||||
, origNfa
|
||||
, origNfa
|
||||
, acc
|
||||
, absIdx
|
||||
, absIdx
|
||||
, finishIdx
|
||||
)
|
||||
end
|
||||
end
|
||||
| [] => raise Fail "nfa.sml 188: should not backtrack to empty list"
|
||||
|
||||
and loop
|
||||
( strIdx
|
||||
, hd
|
||||
, tl
|
||||
, prevStrings
|
||||
, nfa
|
||||
, origNfa
|
||||
, acc
|
||||
, absIdx
|
||||
, startIdx
|
||||
, finishIdx
|
||||
) =
|
||||
if strIdx = String.size hd then
|
||||
case tl of
|
||||
newHd :: newTl =>
|
||||
loop
|
||||
( 0
|
||||
, newHd
|
||||
, newTl
|
||||
, hd :: prevStrings
|
||||
, nfa
|
||||
, origNfa
|
||||
, acc
|
||||
, absIdx
|
||||
, startIdx
|
||||
, finishIdx
|
||||
)
|
||||
| [] => PersistentVector.toVector acc
|
||||
else if absIdx > finishIdx then
|
||||
PersistentVector.toVector acc
|
||||
else
|
||||
let
|
||||
val chr = String.sub (hd, strIdx)
|
||||
val (nfa, state) = rebuild (nfa, chr, absIdx)
|
||||
in
|
||||
case state of
|
||||
UNTESTED =>
|
||||
loop
|
||||
( startIdx + 1
|
||||
, hd
|
||||
, tl
|
||||
, prevStrings
|
||||
, nfa
|
||||
, origNfa
|
||||
, acc
|
||||
, absIdx + 1
|
||||
, startIdx
|
||||
, finishIdx
|
||||
)
|
||||
| VALID _ =>
|
||||
let
|
||||
val acc = PersistentVector.append (startIdx, acc)
|
||||
in
|
||||
loop
|
||||
( startIdx + 1
|
||||
, hd
|
||||
, tl
|
||||
, prevStrings
|
||||
, origNfa
|
||||
, origNfa
|
||||
, acc
|
||||
, absIdx + 1
|
||||
, absIdx + 1
|
||||
, finishIdx
|
||||
)
|
||||
end
|
||||
| INVALID =>
|
||||
let
|
||||
val prevIdx = absIdx - strIdx
|
||||
in
|
||||
if prevIdx < startIdx then
|
||||
backtrackRange
|
||||
( hd
|
||||
, tl
|
||||
, prevStrings
|
||||
, origNfa
|
||||
, acc
|
||||
, prevIdx
|
||||
, startIdx
|
||||
, finishIdx
|
||||
)
|
||||
else
|
||||
let
|
||||
val strIdx = startIdx - prevIdx + 1
|
||||
val absIdx = absIdx + strIdx
|
||||
in
|
||||
loop
|
||||
( strIdx
|
||||
, hd
|
||||
, tl
|
||||
, prevStrings
|
||||
, origNfa
|
||||
, origNfa
|
||||
, acc
|
||||
, absIdx
|
||||
, absIdx
|
||||
, finishIdx
|
||||
)
|
||||
end
|
||||
end
|
||||
end
|
||||
in
|
||||
(* Prerequisite: move buffer to 'start' parameter before calling *)
|
||||
fun getMatchesInRange (startIdx, finishIdx, buffer: LineGap.t, nfa) =
|
||||
let
|
||||
val {rightStrings, idx = bufferIdx, ...} = buffer
|
||||
val strIdx = startIdx - bufferIdx
|
||||
in
|
||||
case rightStrings of
|
||||
hd :: tl =>
|
||||
if strIdx < String.size hd then
|
||||
(* strIdx is in this string *)
|
||||
loop
|
||||
( strIdx
|
||||
, hd
|
||||
, tl
|
||||
, []
|
||||
, nfa
|
||||
, nfa
|
||||
, PersistentVector.empty
|
||||
, startIdx
|
||||
, startIdx
|
||||
, finishIdx
|
||||
)
|
||||
else
|
||||
(* strIdx is in tl *)
|
||||
(case tl of
|
||||
stlhd :: stltl =>
|
||||
let
|
||||
val strIdx = strIdx - String.size hd
|
||||
in
|
||||
loop
|
||||
( strIdx
|
||||
, stlhd
|
||||
, stltl
|
||||
, []
|
||||
, nfa
|
||||
, nfa
|
||||
, PersistentVector.empty
|
||||
, startIdx
|
||||
, startIdx
|
||||
, finishIdx
|
||||
)
|
||||
end
|
||||
| [] => Vector.fromList [])
|
||||
| [] => Vector.fromList []
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
structure ParseNfa =
|
||||
|
||||
Reference in New Issue
Block a user