add handling for [character class] type (but note that we don't parse a character class yet)
This commit is contained in:
@@ -2,13 +2,14 @@ structure DfaGen =
|
|||||||
struct
|
struct
|
||||||
datatype regex =
|
datatype regex =
|
||||||
CHAR_LITERAL of {char: char, position: int}
|
CHAR_LITERAL of {char: char, position: int}
|
||||||
|
| WILDCARD of int
|
||||||
|
| IS_ANY_CHARACTER of {chars: char vector, position: int}
|
||||||
| CONCAT of {l: regex, r: regex, leftMaxState: int, rightMaxState: int}
|
| CONCAT of {l: regex, r: regex, leftMaxState: int, rightMaxState: int}
|
||||||
| ALTERNATION of {l: regex, r: regex, leftMaxState: int, rightMaxState: int}
|
| ALTERNATION of {l: regex, r: regex, leftMaxState: int, rightMaxState: int}
|
||||||
| ZERO_OR_ONE of regex
|
| ZERO_OR_ONE of regex
|
||||||
| ZERO_OR_MORE of regex
|
| ZERO_OR_MORE of regex
|
||||||
| ONE_OR_MORE of regex
|
| ONE_OR_MORE of regex
|
||||||
| GROUP of regex
|
| GROUP of regex
|
||||||
| WILDCARD of int
|
|
||||||
|
|
||||||
structure Set =
|
structure Set =
|
||||||
struct
|
struct
|
||||||
@@ -274,6 +275,7 @@ struct
|
|||||||
case tree of
|
case tree of
|
||||||
CHAR_LITERAL _ => false
|
CHAR_LITERAL _ => false
|
||||||
| WILDCARD _ => false
|
| WILDCARD _ => false
|
||||||
|
| IS_ANY_CHARACTER _ => false
|
||||||
|
|
||||||
| CONCAT {l, r, ...} => isNullable l andalso isNullable r
|
| CONCAT {l, r, ...} => isNullable l andalso isNullable r
|
||||||
| ALTERNATION {l, r, ...} => isNullable l orelse isNullable r
|
| ALTERNATION {l, r, ...} => isNullable l orelse isNullable r
|
||||||
@@ -287,6 +289,7 @@ struct
|
|||||||
fun firstpos (tree, acc) =
|
fun firstpos (tree, acc) =
|
||||||
case tree of
|
case tree of
|
||||||
CHAR_LITERAL {position, ...} => position :: acc
|
CHAR_LITERAL {position, ...} => position :: acc
|
||||||
|
| IS_ANY_CHARACTER {position, ...} => position :: acc
|
||||||
| WILDCARD i => i :: acc
|
| WILDCARD i => i :: acc
|
||||||
|
|
||||||
| CONCAT {l, r, ...} =>
|
| CONCAT {l, r, ...} =>
|
||||||
@@ -309,6 +312,7 @@ struct
|
|||||||
fun lastpos (tree, acc) =
|
fun lastpos (tree, acc) =
|
||||||
case tree of
|
case tree of
|
||||||
CHAR_LITERAL {position, ...} => position :: acc
|
CHAR_LITERAL {position, ...} => position :: acc
|
||||||
|
| IS_ANY_CHARACTER {position, ...} => position :: acc
|
||||||
| WILDCARD i => i :: acc
|
| WILDCARD i => i :: acc
|
||||||
|
|
||||||
| CONCAT {l, r, ...} =>
|
| CONCAT {l, r, ...} =>
|
||||||
@@ -335,6 +339,13 @@ struct
|
|||||||
| ONE_OR_MORE r => firstpos (r, acc)
|
| ONE_OR_MORE r => firstpos (r, acc)
|
||||||
| _ => acc
|
| _ => acc
|
||||||
|
|
||||||
|
fun chrExistsInVec (idx, vec, curChr) =
|
||||||
|
if idx = Vector.length vec then
|
||||||
|
false
|
||||||
|
else
|
||||||
|
Vector.sub (vec, idx) = curChr
|
||||||
|
orelse chrExistsInVec (idx + 1, vec, curChr)
|
||||||
|
|
||||||
(* Does two things:
|
(* Does two things:
|
||||||
* 1. Descends to the leaf matching 'pos'.
|
* 1. Descends to the leaf matching 'pos'.
|
||||||
* 2. If the character at 'pos' matches the current character,
|
* 2. If the character at 'pos' matches the current character,
|
||||||
@@ -353,6 +364,10 @@ struct
|
|||||||
* So we don't want to match it, but the wildcard can match
|
* So we don't want to match it, but the wildcard can match
|
||||||
* any other character that has a different ASCII code. *)
|
* any other character that has a different ASCII code. *)
|
||||||
{sawConcat = false, follows = [], charIsMatch = curChr <> #"\^@"}
|
{sawConcat = false, follows = [], charIsMatch = curChr <> #"\^@"}
|
||||||
|
| IS_ANY_CHARACTER {chars, ...} =>
|
||||||
|
let val chrExists = chrExistsInVec (0, chars, curChr)
|
||||||
|
in {sawConcat = false, follows = [], charIsMatch = chrExists}
|
||||||
|
end
|
||||||
| ALTERNATION {l, r, leftMaxState, rightMaxState} =>
|
| ALTERNATION {l, r, leftMaxState, rightMaxState} =>
|
||||||
let val nodeToFollow = if pos <= leftMaxState then l else r
|
let val nodeToFollow = if pos <= leftMaxState then l else r
|
||||||
in getFollowsForPositionAndChar (nodeToFollow, pos, curChr)
|
in getFollowsForPositionAndChar (nodeToFollow, pos, curChr)
|
||||||
|
|||||||
Reference in New Issue
Block a user