begin computing firstpos and lastpos during parsing
This commit is contained in:
@@ -27,14 +27,91 @@ struct
|
|||||||
| IS_ANY_CHARACTER of {chars: char vector, position: int}
|
| IS_ANY_CHARACTER of {chars: char vector, position: int}
|
||||||
| NOT_ANY_CHARACTER of {chars: char vector, position: int}
|
| NOT_ANY_CHARACTER of {chars: char vector, position: int}
|
||||||
| CONCAT of
|
| CONCAT of
|
||||||
{l: parse_tree, r: parse_tree, leftMaxState: int, rightMaxState: int}
|
{ l: parse_tree
|
||||||
|
, r: parse_tree
|
||||||
|
, leftMaxState: int
|
||||||
|
, rightMaxState: int
|
||||||
|
, firstpos: int list
|
||||||
|
, lastpos: int list
|
||||||
|
}
|
||||||
| ALTERNATION of
|
| ALTERNATION of
|
||||||
{l: parse_tree, r: parse_tree, leftMaxState: int, rightMaxState: int}
|
{ l: parse_tree
|
||||||
|
, r: parse_tree
|
||||||
|
, leftMaxState: int
|
||||||
|
, rightMaxState: int
|
||||||
|
, firstpos: int list
|
||||||
|
, lastpos: int list
|
||||||
|
}
|
||||||
| ZERO_OR_ONE of parse_tree
|
| ZERO_OR_ONE of parse_tree
|
||||||
| ZERO_OR_MORE of parse_tree
|
| ZERO_OR_MORE of parse_tree
|
||||||
| ONE_OR_MORE of parse_tree
|
| ONE_OR_MORE of parse_tree
|
||||||
| GROUP of parse_tree
|
| GROUP of parse_tree
|
||||||
|
|
||||||
|
fun isNullable tree =
|
||||||
|
case tree of
|
||||||
|
CHAR_LITERAL _ => false
|
||||||
|
| WILDCARD _ => false
|
||||||
|
| IS_ANY_CHARACTER _ => false
|
||||||
|
| NOT_ANY_CHARACTER _ => false
|
||||||
|
|
||||||
|
| CONCAT {l, r, ...} => isNullable l andalso isNullable r
|
||||||
|
| ALTERNATION {l, r, ...} => isNullable l orelse isNullable r
|
||||||
|
|
||||||
|
| ZERO_OR_ONE _ => true
|
||||||
|
| ZERO_OR_MORE _ => true
|
||||||
|
| ONE_OR_MORE regex => isNullable regex
|
||||||
|
|
||||||
|
| GROUP regex => isNullable regex
|
||||||
|
|
||||||
|
|
||||||
|
fun firstpos (tree, acc) =
|
||||||
|
case tree of
|
||||||
|
CHAR_LITERAL {position, ...} => position :: acc
|
||||||
|
| IS_ANY_CHARACTER {position, ...} => position :: acc
|
||||||
|
| NOT_ANY_CHARACTER {position, ...} => position :: acc
|
||||||
|
| WILDCARD i => i :: acc
|
||||||
|
|
||||||
|
| CONCAT {l, r, ...} =>
|
||||||
|
if isNullable l then
|
||||||
|
let val acc = firstpos (l, acc)
|
||||||
|
in firstpos (r, acc)
|
||||||
|
end
|
||||||
|
else
|
||||||
|
firstpos (l, acc)
|
||||||
|
| ALTERNATION {l, r, ...} =>
|
||||||
|
let val acc = firstpos (l, acc)
|
||||||
|
in firstpos (r, acc)
|
||||||
|
end
|
||||||
|
|
||||||
|
| ZERO_OR_ONE regex => firstpos (regex, acc)
|
||||||
|
| ZERO_OR_MORE regex => firstpos (regex, acc)
|
||||||
|
| ONE_OR_MORE regex => firstpos (regex, acc)
|
||||||
|
| GROUP regex => firstpos (regex, acc)
|
||||||
|
|
||||||
|
fun lastpos (tree, acc) =
|
||||||
|
case tree of
|
||||||
|
CHAR_LITERAL {position, ...} => position :: acc
|
||||||
|
| IS_ANY_CHARACTER {position, ...} => position :: acc
|
||||||
|
| NOT_ANY_CHARACTER {position, ...} => position :: acc
|
||||||
|
| WILDCARD i => i :: acc
|
||||||
|
|
||||||
|
| CONCAT {l, r, ...} =>
|
||||||
|
if isNullable r then
|
||||||
|
let val acc = lastpos (l, acc)
|
||||||
|
in lastpos (r, acc)
|
||||||
|
end
|
||||||
|
else
|
||||||
|
lastpos (r, acc)
|
||||||
|
| ALTERNATION {l, r, ...} =>
|
||||||
|
let val acc = lastpos (l, acc)
|
||||||
|
in lastpos (r, acc)
|
||||||
|
end
|
||||||
|
|
||||||
|
| ZERO_OR_ONE regex => lastpos (regex, acc)
|
||||||
|
| ZERO_OR_MORE regex => lastpos (regex, acc)
|
||||||
|
| ONE_OR_MORE regex => lastpos (regex, acc)
|
||||||
|
| GROUP regex => lastpos (regex, acc)
|
||||||
|
|
||||||
structure Set =
|
structure Set =
|
||||||
struct
|
struct
|
||||||
datatype 'a set = BRANCH of 'a set * int * 'a * 'a set | LEAF
|
datatype 'a set = BRANCH of 'a set * int * 'a * 'a set | LEAF
|
||||||
@@ -389,11 +466,19 @@ struct
|
|||||||
case climb (pos + 2, str, chr, altLevel, stateNum + 1) of
|
case climb (pos + 2, str, chr, altLevel, stateNum + 1) of
|
||||||
SOME (pos, rhs, rightStateNum) =>
|
SOME (pos, rhs, rightStateNum) =>
|
||||||
let
|
let
|
||||||
|
val fp = let val acc = firstpos (lhs, [])
|
||||||
|
in firstpos (rhs, acc)
|
||||||
|
end
|
||||||
|
val lp = let val acc = lastpos (lhs, [])
|
||||||
|
in lastpos (rhs, acc)
|
||||||
|
end
|
||||||
val result = ALTERNATION
|
val result = ALTERNATION
|
||||||
{ l = lhs
|
{ l = lhs
|
||||||
, r = rhs
|
, r = rhs
|
||||||
, leftMaxState = stateNum
|
, leftMaxState = stateNum
|
||||||
, rightMaxState = rightStateNum
|
, rightMaxState = rightStateNum
|
||||||
|
, firstpos = fp
|
||||||
|
, lastpos = lp
|
||||||
}
|
}
|
||||||
in
|
in
|
||||||
SOME (pos, result, rightStateNum)
|
SOME (pos, result, rightStateNum)
|
||||||
@@ -432,11 +517,29 @@ struct
|
|||||||
(case climb (nextPos, str, curAtom, concatLevel, atomStateNum) of
|
(case climb (nextPos, str, curAtom, concatLevel, atomStateNum) of
|
||||||
SOME (pos, rhs, rightStateNum) =>
|
SOME (pos, rhs, rightStateNum) =>
|
||||||
let
|
let
|
||||||
|
val fp =
|
||||||
|
if isNullable lhs then
|
||||||
|
let val acc = firstpos (lhs, [])
|
||||||
|
in firstpos (rhs, acc)
|
||||||
|
end
|
||||||
|
else
|
||||||
|
firstpos (lhs, [])
|
||||||
|
|
||||||
|
val lp =
|
||||||
|
if isNullable rhs then
|
||||||
|
let val acc = lastpos (lhs, [])
|
||||||
|
in lastpos (rhs, acc)
|
||||||
|
end
|
||||||
|
else
|
||||||
|
lastpos (rhs, [])
|
||||||
|
|
||||||
val result = CONCAT
|
val result = CONCAT
|
||||||
{ l = lhs
|
{ l = lhs
|
||||||
, r = rhs
|
, r = rhs
|
||||||
, leftMaxState = stateNum
|
, leftMaxState = stateNum
|
||||||
, rightMaxState = rightStateNum
|
, rightMaxState = rightStateNum
|
||||||
|
, firstpos = fp
|
||||||
|
, lastpos = lp
|
||||||
}
|
}
|
||||||
in
|
in
|
||||||
SOME (pos, result, rightStateNum)
|
SOME (pos, result, rightStateNum)
|
||||||
@@ -463,70 +566,6 @@ struct
|
|||||||
|
|
||||||
structure ToDfa =
|
structure ToDfa =
|
||||||
struct
|
struct
|
||||||
fun isNullable tree =
|
|
||||||
case tree of
|
|
||||||
CHAR_LITERAL _ => false
|
|
||||||
| WILDCARD _ => false
|
|
||||||
| IS_ANY_CHARACTER _ => false
|
|
||||||
| NOT_ANY_CHARACTER _ => false
|
|
||||||
|
|
||||||
| CONCAT {l, r, ...} => isNullable l andalso isNullable r
|
|
||||||
| ALTERNATION {l, r, ...} => isNullable l orelse isNullable r
|
|
||||||
|
|
||||||
| ZERO_OR_ONE _ => true
|
|
||||||
| ZERO_OR_MORE _ => true
|
|
||||||
| ONE_OR_MORE regex => isNullable regex
|
|
||||||
|
|
||||||
| GROUP regex => isNullable regex
|
|
||||||
|
|
||||||
fun firstpos (tree, acc) =
|
|
||||||
case tree of
|
|
||||||
CHAR_LITERAL {position, ...} => position :: acc
|
|
||||||
| IS_ANY_CHARACTER {position, ...} => position :: acc
|
|
||||||
| NOT_ANY_CHARACTER {position, ...} => position :: acc
|
|
||||||
| WILDCARD i => i :: acc
|
|
||||||
|
|
||||||
| CONCAT {l, r, ...} =>
|
|
||||||
if isNullable l then
|
|
||||||
let val acc = firstpos (l, acc)
|
|
||||||
in firstpos (r, acc)
|
|
||||||
end
|
|
||||||
else
|
|
||||||
firstpos (l, acc)
|
|
||||||
| ALTERNATION {l, r, ...} =>
|
|
||||||
let val acc = firstpos (l, acc)
|
|
||||||
in firstpos (r, acc)
|
|
||||||
end
|
|
||||||
|
|
||||||
| ZERO_OR_ONE regex => firstpos (regex, acc)
|
|
||||||
| ZERO_OR_MORE regex => firstpos (regex, acc)
|
|
||||||
| ONE_OR_MORE regex => firstpos (regex, acc)
|
|
||||||
| GROUP regex => firstpos (regex, acc)
|
|
||||||
|
|
||||||
fun lastpos (tree, acc) =
|
|
||||||
case tree of
|
|
||||||
CHAR_LITERAL {position, ...} => position :: acc
|
|
||||||
| IS_ANY_CHARACTER {position, ...} => position :: acc
|
|
||||||
| NOT_ANY_CHARACTER {position, ...} => position :: acc
|
|
||||||
| WILDCARD i => i :: acc
|
|
||||||
|
|
||||||
| CONCAT {l, r, ...} =>
|
|
||||||
if isNullable r then
|
|
||||||
let val acc = lastpos (l, acc)
|
|
||||||
in lastpos (r, acc)
|
|
||||||
end
|
|
||||||
else
|
|
||||||
lastpos (r, acc)
|
|
||||||
| ALTERNATION {l, r, ...} =>
|
|
||||||
let val acc = lastpos (l, acc)
|
|
||||||
in lastpos (r, acc)
|
|
||||||
end
|
|
||||||
|
|
||||||
| ZERO_OR_ONE regex => lastpos (regex, acc)
|
|
||||||
| ZERO_OR_MORE regex => lastpos (regex, acc)
|
|
||||||
| ONE_OR_MORE regex => lastpos (regex, acc)
|
|
||||||
| GROUP regex => lastpos (regex, acc)
|
|
||||||
|
|
||||||
fun followpos (char, regex, acc) =
|
fun followpos (char, regex, acc) =
|
||||||
case regex of
|
case regex of
|
||||||
CONCAT {r, ...} => firstpos (r, acc)
|
CONCAT {r, ...} => firstpos (r, acc)
|
||||||
@@ -576,7 +615,7 @@ struct
|
|||||||
in
|
in
|
||||||
{sawConcat = false, follows = [], charIsMatch = charIsValid}
|
{sawConcat = false, follows = [], charIsMatch = charIsValid}
|
||||||
end
|
end
|
||||||
| ALTERNATION {l, r, leftMaxState, rightMaxState} =>
|
| ALTERNATION {l, r, leftMaxState, rightMaxState, ...} =>
|
||||||
let val nodeToFollow = if pos <= leftMaxState then l else r
|
let val nodeToFollow = if pos <= leftMaxState then l else r
|
||||||
in getFollowsForPositionAndChar (nodeToFollow, pos, curChr)
|
in getFollowsForPositionAndChar (nodeToFollow, pos, curChr)
|
||||||
end
|
end
|
||||||
@@ -788,6 +827,7 @@ struct
|
|||||||
case ParseDfa.parse (str, 0) of
|
case ParseDfa.parse (str, 0) of
|
||||||
SOME (ast, numStates) =>
|
SOME (ast, numStates) =>
|
||||||
let
|
let
|
||||||
|
val fp = firstpos (ast, [])
|
||||||
val endMarker =
|
val endMarker =
|
||||||
CHAR_LITERAL {char = Fn.endMarker, position = numStates + 1}
|
CHAR_LITERAL {char = Fn.endMarker, position = numStates + 1}
|
||||||
val ast = CONCAT
|
val ast = CONCAT
|
||||||
@@ -795,6 +835,8 @@ struct
|
|||||||
, leftMaxState = numStates
|
, leftMaxState = numStates
|
||||||
, r = endMarker
|
, r = endMarker
|
||||||
, rightMaxState = numStates + 1
|
, rightMaxState = numStates + 1
|
||||||
|
, firstpos = fp
|
||||||
|
, lastpos = []
|
||||||
}
|
}
|
||||||
in
|
in
|
||||||
ToDfa.convert ast
|
ToDfa.convert ast
|
||||||
|
|||||||
Reference in New Issue
Block a user