take care of todo note addressing efficiency: don't update dtran vector on each 'convertChar' loop, but accumulate set and then append set to end of dtran at end of 'convertChar' loop
This commit is contained in:
@@ -398,9 +398,16 @@ struct
|
|||||||
type dtran = int list Set.set
|
type dtran = int list Set.set
|
||||||
|
|
||||||
fun convertChar
|
fun convertChar
|
||||||
(char, regex, dstates, dtran: dtran vector, curStates, curStatesIdx) =
|
(char, regex, dstates, dtran: dtran vector, curStates, curStatesIdx,
|
||||||
|
setForCurStates) =
|
||||||
if char < 0 then
|
if char < 0 then
|
||||||
(dstates, dtran)
|
let
|
||||||
|
(* append setForCurStates which was accumulated in this function
|
||||||
|
* to the end of dtran. *)
|
||||||
|
val dtran = Vector.concat [dtran, Vector.fromList [setForCurStates]]
|
||||||
|
in
|
||||||
|
(dstates, dtran)
|
||||||
|
end
|
||||||
else
|
else
|
||||||
let
|
let
|
||||||
(* get union of all follow positions *)
|
(* get union of all follow positions *)
|
||||||
@@ -410,38 +417,18 @@ struct
|
|||||||
[] =>
|
[] =>
|
||||||
(* no follow positions from here, so don't add to dstates *)
|
(* no follow positions from here, so don't add to dstates *)
|
||||||
convertChar
|
convertChar
|
||||||
(char - 1, regex, dstates, dtran, curStates, curStatesIdx)
|
(char - 1, regex, dstates, dtran, curStates, curStatesIdx, setForCurStates)
|
||||||
| _ =>
|
| _ =>
|
||||||
let
|
let
|
||||||
(* add follow positions to dstates if they are not already inside
|
(* add follow positions to dstates if they are not already inside
|
||||||
* and if follow is not empty *)
|
* and if follow is not empty *)
|
||||||
val dstates = appendIfNew (0, dstates, u)
|
val dstates = appendIfNew (0, dstates, u)
|
||||||
|
|
||||||
(* update dtran to include transitions for char.
|
(* update dtran to include transitions for char. *)
|
||||||
* Todo: The code below updates the same vector each time.
|
val setForCurStates = Set.insertOrReplace (char, u, setForCurStates)
|
||||||
* It would be more efficient if we accumulate the set,
|
|
||||||
* and then later append/update it once the loop is done. *)
|
|
||||||
val dtran =
|
|
||||||
if curStatesIdx >= Vector.length dtran then
|
|
||||||
(* corresponding idx doesn't exist in dtran
|
|
||||||
* so we append to dtran instead *)
|
|
||||||
let
|
|
||||||
val transitions = Set.insertOrReplace (char, u, Set.LEAF)
|
|
||||||
in
|
|
||||||
Vector.concat [dtran, Vector.fromList [transitions]]
|
|
||||||
end
|
|
||||||
else
|
|
||||||
(* corresponding state idx does exist in dtran, so we update it *)
|
|
||||||
let
|
|
||||||
val transitions = Vector.sub (dtran, curStatesIdx)
|
|
||||||
val transitions =
|
|
||||||
Set.insertOrReplace (char, u, transitions)
|
|
||||||
in
|
|
||||||
Vector.update (dtran, curStatesIdx, transitions)
|
|
||||||
end
|
|
||||||
in
|
in
|
||||||
convertChar
|
convertChar
|
||||||
(char - 1, regex, dstates, dtran, curStates, curStatesIdx)
|
(char - 1, regex, dstates, dtran, curStates, curStatesIdx, setForCurStates)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
@@ -458,7 +445,8 @@ struct
|
|||||||
end
|
end
|
||||||
|
|
||||||
val (dstates, dtran) = convertChar
|
val (dstates, dtran) = convertChar
|
||||||
(255, regex, dstates, dtran, unamarkedTransition, unmarkedIdx)
|
(255, regex, dstates, dtran, unamarkedTransition, unmarkedIdx,
|
||||||
|
Set.LEAF)
|
||||||
in
|
in
|
||||||
convertLoop (regex, dstates, dtran)
|
convertLoop (regex, dstates, dtran)
|
||||||
end
|
end
|
||||||
|
|||||||
Reference in New Issue
Block a user