add list of follows to leaves in regex parse tree (only changed data type; need to populate follows list later)
This commit is contained in:
@@ -22,10 +22,10 @@ end
|
|||||||
functor MakeDfaGen(Fn: DFA_GEN_PARAMS): DFA_GEN =
|
functor MakeDfaGen(Fn: DFA_GEN_PARAMS): DFA_GEN =
|
||||||
struct
|
struct
|
||||||
datatype parse_tree =
|
datatype parse_tree =
|
||||||
CHAR_LITERAL of {char: char, position: int}
|
CHAR_LITERAL of {char: char, position: int, follows: int list}
|
||||||
| WILDCARD of int
|
| WILDCARD of {position: int, follows: int list}
|
||||||
| IS_ANY_CHARACTER of {chars: char vector, position: int}
|
| IS_ANY_CHARACTER of {chars: char vector, position: int, follows: int list}
|
||||||
| NOT_ANY_CHARACTER of {chars: char vector, position: int}
|
| NOT_ANY_CHARACTER of {chars: char vector, position: int, follows: int list}
|
||||||
| CONCAT of
|
| CONCAT of
|
||||||
{ l: parse_tree
|
{ l: parse_tree
|
||||||
, r: parse_tree
|
, r: parse_tree
|
||||||
@@ -66,7 +66,7 @@ struct
|
|||||||
fun firstpos (tree, acc) =
|
fun firstpos (tree, acc) =
|
||||||
case tree of
|
case tree of
|
||||||
CHAR_LITERAL {position, ...} => position :: acc
|
CHAR_LITERAL {position, ...} => position :: acc
|
||||||
| WILDCARD i => i :: acc
|
| WILDCARD {position, ...} => position :: acc
|
||||||
| IS_ANY_CHARACTER {position, ...} => position :: acc
|
| IS_ANY_CHARACTER {position, ...} => position :: acc
|
||||||
| NOT_ANY_CHARACTER {position, ...} => position :: acc
|
| NOT_ANY_CHARACTER {position, ...} => position :: acc
|
||||||
| CONCAT {firstpos = fp, ...} => fp @ acc
|
| CONCAT {firstpos = fp, ...} => fp @ acc
|
||||||
@@ -79,7 +79,7 @@ struct
|
|||||||
fun lastpos (tree, acc) =
|
fun lastpos (tree, acc) =
|
||||||
case tree of
|
case tree of
|
||||||
CHAR_LITERAL {position, ...} => position :: acc
|
CHAR_LITERAL {position, ...} => position :: acc
|
||||||
| WILDCARD i => i :: acc
|
| WILDCARD {position, ...} => position :: acc
|
||||||
| IS_ANY_CHARACTER {position, ...} => position :: acc
|
| IS_ANY_CHARACTER {position, ...} => position :: acc
|
||||||
| NOT_ANY_CHARACTER {position, ...} => position :: acc
|
| NOT_ANY_CHARACTER {position, ...} => position :: acc
|
||||||
| CONCAT {lastpos = lp, ...} => lp @ acc
|
| CONCAT {lastpos = lp, ...} => lp @ acc
|
||||||
@@ -113,10 +113,8 @@ struct
|
|||||||
case lst of
|
case lst of
|
||||||
[] => tree
|
[] => tree
|
||||||
| (k, v) :: tl =>
|
| (k, v) :: tl =>
|
||||||
let
|
let val tree = insertOrReplace (k, v, tree)
|
||||||
val tree = insertOrReplace (k, v, tree)
|
in addFromList (tl, tree)
|
||||||
in
|
|
||||||
addFromList (tl, tree)
|
|
||||||
end
|
end
|
||||||
|
|
||||||
fun getOrDefault (findKey, tree, default) =
|
fun getOrDefault (findKey, tree, default) =
|
||||||
@@ -360,7 +358,9 @@ struct
|
|||||||
case getCharsInBrackets (pos, str, []) of
|
case getCharsInBrackets (pos, str, []) of
|
||||||
SOME (pos, chars) =>
|
SOME (pos, chars) =>
|
||||||
let
|
let
|
||||||
val node = IS_ANY_CHARACTER {chars = chars, position = stateNum + 1}
|
val node =
|
||||||
|
IS_ANY_CHARACTER
|
||||||
|
{chars = chars, position = stateNum + 1, follows = []}
|
||||||
in
|
in
|
||||||
SOME (pos, node, stateNum + 1)
|
SOME (pos, node, stateNum + 1)
|
||||||
end
|
end
|
||||||
@@ -371,7 +371,8 @@ struct
|
|||||||
SOME (pos, chars) =>
|
SOME (pos, chars) =>
|
||||||
let
|
let
|
||||||
val node =
|
val node =
|
||||||
NOT_ANY_CHARACTER {chars = chars, position = stateNum + 1}
|
NOT_ANY_CHARACTER
|
||||||
|
{chars = chars, position = stateNum + 1, follows = []}
|
||||||
in
|
in
|
||||||
SOME (pos, node, stateNum + 1)
|
SOME (pos, node, stateNum + 1)
|
||||||
end
|
end
|
||||||
@@ -408,14 +409,19 @@ struct
|
|||||||
NONE
|
NONE
|
||||||
else if isValid then
|
else if isValid then
|
||||||
let
|
let
|
||||||
val chr = CHAR_LITERAL {char = chr, position = stateNum + 1}
|
val chr =
|
||||||
|
CHAR_LITERAL
|
||||||
|
{char = chr, position = stateNum + 1, follows = []}
|
||||||
in
|
in
|
||||||
SOME (pos + 2, chr, stateNum + 1)
|
SOME (pos + 2, chr, stateNum + 1)
|
||||||
end
|
end
|
||||||
else
|
else
|
||||||
NONE
|
NONE
|
||||||
end
|
end
|
||||||
| #"." => SOME (pos + 1, WILDCARD (stateNum + 1), stateNum + 1)
|
| #"." =>
|
||||||
|
let val w = WILDCARD {position = stateNum + 1, follows = []}
|
||||||
|
in SOME (pos + 1, w, stateNum + 1)
|
||||||
|
end
|
||||||
| #"[" =>
|
| #"[" =>
|
||||||
if pos + 1 = String.size str then
|
if pos + 1 = String.size str then
|
||||||
NONE
|
NONE
|
||||||
@@ -434,8 +440,12 @@ struct
|
|||||||
if Fn.charIsEqual (chr, Fn.endMarker) then
|
if Fn.charIsEqual (chr, Fn.endMarker) then
|
||||||
NONE
|
NONE
|
||||||
else
|
else
|
||||||
let val chr = CHAR_LITERAL {char = chr, position = stateNum + 1}
|
let
|
||||||
in SOME (pos + 1, chr, stateNum + 1)
|
val chr =
|
||||||
|
CHAR_LITERAL
|
||||||
|
{char = chr, position = stateNum + 1, follows = []}
|
||||||
|
in
|
||||||
|
SOME (pos + 1, chr, stateNum + 1)
|
||||||
end
|
end
|
||||||
|
|
||||||
and climb (pos, str, lhs, level, stateNum) : (int * parse_tree * int) option =
|
and climb (pos, str, lhs, level, stateNum) : (int * parse_tree * int) option =
|
||||||
@@ -449,7 +459,9 @@ struct
|
|||||||
else if pos + 1 < String.size str then
|
else if pos + 1 < String.size str then
|
||||||
let
|
let
|
||||||
val chr = String.sub (str, pos + 1)
|
val chr = String.sub (str, pos + 1)
|
||||||
val chr = CHAR_LITERAL {char = chr, position = stateNum + 1}
|
val chr =
|
||||||
|
CHAR_LITERAL
|
||||||
|
{char = chr, position = stateNum + 1, follows = []}
|
||||||
in
|
in
|
||||||
case climb (pos + 2, str, chr, altLevel, stateNum + 1) of
|
case climb (pos + 2, str, chr, altLevel, stateNum + 1) of
|
||||||
SOME (pos, rhs, rightStateNum) =>
|
SOME (pos, rhs, rightStateNum) =>
|
||||||
@@ -583,7 +595,7 @@ struct
|
|||||||
* even if the curChr is the endmarker. *)
|
* even if the curChr is the endmarker. *)
|
||||||
fun getFollowsForPositionAndChar (regex: parse_tree, pos, curChr) =
|
fun getFollowsForPositionAndChar (regex: parse_tree, pos, curChr) =
|
||||||
case regex of
|
case regex of
|
||||||
CHAR_LITERAL {char, position = _} =>
|
CHAR_LITERAL {char, ...} =>
|
||||||
let val charIsMatch = Fn.charIsEqual (char, curChr)
|
let val charIsMatch = Fn.charIsEqual (char, curChr)
|
||||||
in {sawConcat = false, follows = [], charIsMatch = charIsMatch}
|
in {sawConcat = false, follows = [], charIsMatch = charIsMatch}
|
||||||
end
|
end
|
||||||
@@ -817,7 +829,8 @@ struct
|
|||||||
let
|
let
|
||||||
val fp = firstpos (ast, [])
|
val fp = firstpos (ast, [])
|
||||||
val endMarker =
|
val endMarker =
|
||||||
CHAR_LITERAL {char = Fn.endMarker, position = numStates + 1}
|
CHAR_LITERAL
|
||||||
|
{char = Fn.endMarker, position = numStates + 1, follows = []}
|
||||||
val ast = CONCAT
|
val ast = CONCAT
|
||||||
{ l = ast
|
{ l = ast
|
||||||
, leftMaxState = numStates
|
, leftMaxState = numStates
|
||||||
|
|||||||
Reference in New Issue
Block a user