fix bug in regex-test: dfa-gen.sml should add the position of the endMarker to the followSet as well
This commit is contained in:
@@ -2,7 +2,6 @@ signature DFA_GEN_PARAMS =
|
|||||||
sig
|
sig
|
||||||
val endMarker: char
|
val endMarker: char
|
||||||
val charIsEqual: char * char -> bool
|
val charIsEqual: char * char -> bool
|
||||||
val charIsNotEqual: char * char -> bool
|
|
||||||
end
|
end
|
||||||
|
|
||||||
signature DFA_GEN =
|
signature DFA_GEN =
|
||||||
@@ -597,7 +596,12 @@ struct
|
|||||||
fun addToFollowSet (tree, followSet) =
|
fun addToFollowSet (tree, followSet) =
|
||||||
case tree of
|
case tree of
|
||||||
WILDCARD _ => followSet
|
WILDCARD _ => followSet
|
||||||
| CHAR_LITERAL _ => followSet
|
| CHAR_LITERAL {char, position} =>
|
||||||
|
(* we add the endMarker and its position to the followSet *)
|
||||||
|
if char = Fn.endMarker then
|
||||||
|
Set.insertOrReplace (position, [Char.ord Fn.endMarker], followSet)
|
||||||
|
else
|
||||||
|
followSet
|
||||||
| IS_ANY_CHARACTER _ => followSet
|
| IS_ANY_CHARACTER _ => followSet
|
||||||
| NOT_ANY_CHARACTER _ => followSet
|
| NOT_ANY_CHARACTER _ => followSet
|
||||||
| CONCAT {l, r, ...} =>
|
| CONCAT {l, r, ...} =>
|
||||||
@@ -605,11 +609,11 @@ struct
|
|||||||
val followSet = addToFollowSet (l, followSet)
|
val followSet = addToFollowSet (l, followSet)
|
||||||
val followSet = addToFollowSet (r, followSet)
|
val followSet = addToFollowSet (r, followSet)
|
||||||
|
|
||||||
val lp = lastpos (l, [])
|
val lpOfLeft = lastpos (l, [])
|
||||||
val fp = firstpos (r, [])
|
val fpOfRight = firstpos (r, [])
|
||||||
val fp = Set.addFromList (fp, Set.LEAF)
|
val fpOfRight = Set.addFromList (fpOfRight, Set.LEAF)
|
||||||
in
|
in
|
||||||
addKeysToFollowSet (lp, fp, followSet)
|
addKeysToFollowSet (lpOfLeft, fpOfRight, followSet)
|
||||||
end
|
end
|
||||||
| ALTERNATION {l, r, ...} =>
|
| ALTERNATION {l, r, ...} =>
|
||||||
let val followSet = addToFollowSet (l, followSet)
|
let val followSet = addToFollowSet (l, followSet)
|
||||||
@@ -617,9 +621,10 @@ struct
|
|||||||
end
|
end
|
||||||
| ZERO_OR_MORE child =>
|
| ZERO_OR_MORE child =>
|
||||||
let
|
let
|
||||||
val lp = lastpos (child, [])
|
val followSet = addToFollowSet (child, followSet)
|
||||||
val fp = firstpos (child, [])
|
val fp = firstpos (child, [])
|
||||||
val fp = Set.addFromList (fp, Set.LEAF)
|
val fp = Set.addFromList (fp, Set.LEAF)
|
||||||
|
val lp = lastpos (child, [])
|
||||||
in
|
in
|
||||||
addKeysToFollowSet (lp, fp, followSet)
|
addKeysToFollowSet (lp, fp, followSet)
|
||||||
end
|
end
|
||||||
@@ -666,11 +671,11 @@ struct
|
|||||||
fun isCharMatch (regex, pos, curChr) =
|
fun isCharMatch (regex, pos, curChr) =
|
||||||
case regex of
|
case regex of
|
||||||
CHAR_LITERAL {char, ...} => Fn.charIsEqual (char, curChr)
|
CHAR_LITERAL {char, ...} => Fn.charIsEqual (char, curChr)
|
||||||
| WILDCARD _ => Fn.charIsNotEqual (curChr, Fn.endMarker)
|
| WILDCARD _ => true
|
||||||
| IS_ANY_CHARACTER {chars, ...} => chrExistsInVec (0, chars, curChr)
|
| IS_ANY_CHARACTER {chars, ...} => chrExistsInVec (0, chars, curChr)
|
||||||
| NOT_ANY_CHARACTER {chars, ...} =>
|
| NOT_ANY_CHARACTER {chars, ...} =>
|
||||||
let val charIsValid = chrExistsInVec (0, chars, curChr)
|
let val charIsValid = chrExistsInVec (0, chars, curChr)
|
||||||
in not charIsValid andalso Fn.charIsNotEqual (curChr, Fn.endMarker)
|
in not charIsValid
|
||||||
end
|
end
|
||||||
| ALTERNATION {l, r, leftMaxState, ...} =>
|
| ALTERNATION {l, r, leftMaxState, ...} =>
|
||||||
if pos > leftMaxState then isCharMatch (r, pos, curChr)
|
if pos > leftMaxState then isCharMatch (r, pos, curChr)
|
||||||
@@ -743,25 +748,7 @@ struct
|
|||||||
, prevDstateLength
|
, prevDstateLength
|
||||||
) =
|
) =
|
||||||
if char < 0 then
|
if char < 0 then
|
||||||
if Vector.length dtran = unmarkedIdx then
|
(dstates, dtran)
|
||||||
(* no follows from this state: insert endMarker to signal end *)
|
|
||||||
(dstates, Dtran.insert (unmarkedIdx, Char.ord Fn.endMarker, 0, dtran))
|
|
||||||
else if Vector.length dstates = prevDstateLength then
|
|
||||||
(* no follows, except looping back to itself. So insert endMarker *)
|
|
||||||
(dstates, Dtran.insert (unmarkedIdx, Char.ord Fn.endMarker, 0, dtran))
|
|
||||||
else
|
|
||||||
(dstates, dtran)
|
|
||||||
else if Char.chr char = Fn.endMarker then
|
|
||||||
convertChar
|
|
||||||
( char - 1
|
|
||||||
, regex
|
|
||||||
, dstates
|
|
||||||
, dtran
|
|
||||||
, unmarkedState
|
|
||||||
, unmarkedIdx
|
|
||||||
, followSet
|
|
||||||
, prevDstateLength
|
|
||||||
)
|
|
||||||
else
|
else
|
||||||
let
|
let
|
||||||
val u = positionsThatCorrespondToChar
|
val u = positionsThatCorrespondToChar
|
||||||
@@ -922,7 +909,6 @@ structure CaseInsensitiveDfa =
|
|||||||
(struct
|
(struct
|
||||||
val endMarker = #"\^@"
|
val endMarker = #"\^@"
|
||||||
fun charIsEqual (a: char, b: char) = Char.toLower a = Char.toLower b
|
fun charIsEqual (a: char, b: char) = Char.toLower a = Char.toLower b
|
||||||
fun charIsNotEqual (a: char, b: char) = a <> b
|
|
||||||
end)
|
end)
|
||||||
|
|
||||||
structure CaseSensitiveDfa =
|
structure CaseSensitiveDfa =
|
||||||
@@ -930,5 +916,4 @@ structure CaseSensitiveDfa =
|
|||||||
(struct
|
(struct
|
||||||
val endMarker = #"\^@"
|
val endMarker = #"\^@"
|
||||||
fun charIsEqual (a: char, b: char) = a = b
|
fun charIsEqual (a: char, b: char) = a = b
|
||||||
fun charIsNotEqual (a: char, b: char) = a <> b
|
|
||||||
end)
|
end)
|
||||||
|
|||||||
@@ -123,7 +123,7 @@ struct
|
|||||||
(* arrange *)
|
(* arrange *)
|
||||||
val sentence = "favo"
|
val sentence = "favo"
|
||||||
val regexString = "favou?"
|
val regexString = "favou?"
|
||||||
val dfa = CsDfa.fromString "favorite"
|
val dfa = CsDfa.fromString regexString
|
||||||
|
|
||||||
(* act *)
|
(* act *)
|
||||||
val matches = CsDfa.matchString (dfa, sentence)
|
val matches = CsDfa.matchString (dfa, sentence)
|
||||||
|
|||||||
Reference in New Issue
Block a user