fix bug in implementation of DFA algorithm: we need to add an end marker, and this will be used to tell us whether we have reached the final state in the DFA
This commit is contained in:
@@ -313,25 +313,25 @@ struct
|
|||||||
| GROUP regex => getFollowsForPositionAndChar (regex, pos, curChr)
|
| GROUP regex => getFollowsForPositionAndChar (regex, pos, curChr)
|
||||||
|
|
||||||
| CONCAT {l, r, leftMaxState, ...} =>
|
| CONCAT {l, r, leftMaxState, ...} =>
|
||||||
let
|
if pos <= leftMaxState then
|
||||||
val nodeToFollow = if pos <= leftMaxState then l else r
|
let
|
||||||
val result =
|
val result = getFollowsForPositionAndChar (l, pos, curChr)
|
||||||
getFollowsForPositionAndChar (nodeToFollow, pos, curChr)
|
val {sawConcat, follows, charIsMatch} = result
|
||||||
val {sawConcat, follows, charIsMatch} = result
|
in
|
||||||
in
|
if charIsMatch then
|
||||||
if charIsMatch then
|
if sawConcat then
|
||||||
if sawConcat then
|
(* we already saw a concat and got followpos *)
|
||||||
(* saw concat, so we got follow pos already *)
|
result
|
||||||
result
|
else
|
||||||
|
let val fp = followpos (curChr, regex, follows)
|
||||||
|
in {sawConcat = true, follows = fp, charIsMatch = true}
|
||||||
|
end
|
||||||
else
|
else
|
||||||
(* get followpos *)
|
(* char is not match, so don't get follow pos *)
|
||||||
let val fp = followpos (curChr, regex, follows)
|
result
|
||||||
in {sawConcat = true, follows = fp, charIsMatch = true}
|
end
|
||||||
end
|
else
|
||||||
else
|
getFollowsForPositionAndChar (r, pos, curChr)
|
||||||
(* char does not match, so don't get followpos *)
|
|
||||||
result
|
|
||||||
end
|
|
||||||
| ZERO_OR_ONE child =>
|
| ZERO_OR_ONE child =>
|
||||||
getFollowsForPositionAndCharLoop (pos, regex, child, curChr)
|
getFollowsForPositionAndCharLoop (pos, regex, child, curChr)
|
||||||
| ZERO_OR_MORE child =>
|
| ZERO_OR_MORE child =>
|
||||||
@@ -360,10 +360,15 @@ struct
|
|||||||
hd :: tl =>
|
hd :: tl =>
|
||||||
let
|
let
|
||||||
val fpList = getFollowsForPositionAndChar (regex, hd, char)
|
val fpList = getFollowsForPositionAndChar (regex, hd, char)
|
||||||
|
val {sawConcat, follows, charIsMatch} = fpList
|
||||||
|
val follows =
|
||||||
|
if charIsMatch andalso not sawConcat then 0 :: follows
|
||||||
|
else follows
|
||||||
|
|
||||||
val followSet =
|
val followSet =
|
||||||
List.foldl
|
List.foldl
|
||||||
(fn (fp, followSet) => Set.insertOrReplace (fp, (), followSet))
|
(fn (fp, followSet) => Set.insertOrReplace (fp, (), followSet))
|
||||||
followSet (#follows fpList)
|
followSet follows
|
||||||
in
|
in
|
||||||
getFollowPositionsFromList (tl, regex, char, followSet)
|
getFollowPositionsFromList (tl, regex, char, followSet)
|
||||||
end
|
end
|
||||||
@@ -501,8 +506,19 @@ struct
|
|||||||
end
|
end
|
||||||
|
|
||||||
fun fromString str =
|
fun fromString str =
|
||||||
case ParseDfa.parse (str ^ "\^@", 0) of
|
case ParseDfa.parse (str, 0) of
|
||||||
SOME (ast, _) => ToDfa.convert ast
|
SOME (ast, numStates) =>
|
||||||
|
let
|
||||||
|
val endMarker = CHAR_LITERAL {char = #"\^@", position = numStates + 1}
|
||||||
|
val ast = CONCAT
|
||||||
|
{ l = ast
|
||||||
|
, leftMaxState = numStates
|
||||||
|
, r = endMarker
|
||||||
|
, rightMaxState = numStates + 1
|
||||||
|
}
|
||||||
|
in
|
||||||
|
ToDfa.convert ast
|
||||||
|
end
|
||||||
| NONE => Vector.fromList []
|
| NONE => Vector.fromList []
|
||||||
|
|
||||||
type dfa = int vector vector
|
type dfa = int vector vector
|
||||||
|
|||||||
Reference in New Issue
Block a user