take care of todo note addressing efficiency: don't update dtran vector on each 'convertChar' loop, but accumulate set and then append set to end of dtran at end of 'convertChar' loop

This commit is contained in:
2025-10-06 08:11:30 +01:00
parent 6ae38189cf
commit a3287e71b9

View File

@@ -398,9 +398,16 @@ struct
type dtran = int list Set.set type dtran = int list Set.set
fun convertChar fun convertChar
(char, regex, dstates, dtran: dtran vector, curStates, curStatesIdx) = (char, regex, dstates, dtran: dtran vector, curStates, curStatesIdx,
setForCurStates) =
if char < 0 then if char < 0 then
(dstates, dtran) let
(* append setForCurStates which was accumulated in this function
* to the end of dtran. *)
val dtran = Vector.concat [dtran, Vector.fromList [setForCurStates]]
in
(dstates, dtran)
end
else else
let let
(* get union of all follow positions *) (* get union of all follow positions *)
@@ -410,38 +417,18 @@ struct
[] => [] =>
(* no follow positions from here, so don't add to dstates *) (* no follow positions from here, so don't add to dstates *)
convertChar convertChar
(char - 1, regex, dstates, dtran, curStates, curStatesIdx) (char - 1, regex, dstates, dtran, curStates, curStatesIdx, setForCurStates)
| _ => | _ =>
let let
(* add follow positions to dstates if they are not already inside (* add follow positions to dstates if they are not already inside
* and if follow is not empty *) * and if follow is not empty *)
val dstates = appendIfNew (0, dstates, u) val dstates = appendIfNew (0, dstates, u)
(* update dtran to include transitions for char. (* update dtran to include transitions for char. *)
* Todo: The code below updates the same vector each time. val setForCurStates = Set.insertOrReplace (char, u, setForCurStates)
* It would be more efficient if we accumulate the set,
* and then later append/update it once the loop is done. *)
val dtran =
if curStatesIdx >= Vector.length dtran then
(* corresponding idx doesn't exist in dtran
* so we append to dtran instead *)
let
val transitions = Set.insertOrReplace (char, u, Set.LEAF)
in
Vector.concat [dtran, Vector.fromList [transitions]]
end
else
(* corresponding state idx does exist in dtran, so we update it *)
let
val transitions = Vector.sub (dtran, curStatesIdx)
val transitions =
Set.insertOrReplace (char, u, transitions)
in
Vector.update (dtran, curStatesIdx, transitions)
end
in in
convertChar convertChar
(char - 1, regex, dstates, dtran, curStates, curStatesIdx) (char - 1, regex, dstates, dtran, curStates, curStatesIdx, setForCurStates)
end end
end end
@@ -458,7 +445,8 @@ struct
end end
val (dstates, dtran) = convertChar val (dstates, dtran) = convertChar
(255, regex, dstates, dtran, unamarkedTransition, unmarkedIdx) (255, regex, dstates, dtran, unamarkedTransition, unmarkedIdx,
Set.LEAF)
in in
convertLoop (regex, dstates, dtran) convertLoop (regex, dstates, dtran)
end end