add support for [^negated_character_classes], although we don't parse them yet
This commit is contained in:
@@ -4,6 +4,7 @@ struct
|
|||||||
CHAR_LITERAL of {char: char, position: int}
|
CHAR_LITERAL of {char: char, position: int}
|
||||||
| WILDCARD of int
|
| WILDCARD of int
|
||||||
| IS_ANY_CHARACTER of {chars: char vector, position: int}
|
| IS_ANY_CHARACTER of {chars: char vector, position: int}
|
||||||
|
| NOT_ANY_CHARACTER of {chars: char vector, position: int}
|
||||||
| CONCAT of {l: regex, r: regex, leftMaxState: int, rightMaxState: int}
|
| CONCAT of {l: regex, r: regex, leftMaxState: int, rightMaxState: int}
|
||||||
| ALTERNATION of {l: regex, r: regex, leftMaxState: int, rightMaxState: int}
|
| ALTERNATION of {l: regex, r: regex, leftMaxState: int, rightMaxState: int}
|
||||||
| ZERO_OR_ONE of regex
|
| ZERO_OR_ONE of regex
|
||||||
@@ -11,6 +12,8 @@ struct
|
|||||||
| ONE_OR_MORE of regex
|
| ONE_OR_MORE of regex
|
||||||
| GROUP of regex
|
| GROUP of regex
|
||||||
|
|
||||||
|
val endMarker = #"\^@"
|
||||||
|
|
||||||
structure Set =
|
structure Set =
|
||||||
struct
|
struct
|
||||||
datatype 'a set = BRANCH of 'a set * int * 'a * 'a set | LEAF
|
datatype 'a set = BRANCH of 'a set * int * 'a * 'a set | LEAF
|
||||||
@@ -276,6 +279,7 @@ struct
|
|||||||
CHAR_LITERAL _ => false
|
CHAR_LITERAL _ => false
|
||||||
| WILDCARD _ => false
|
| WILDCARD _ => false
|
||||||
| IS_ANY_CHARACTER _ => false
|
| IS_ANY_CHARACTER _ => false
|
||||||
|
| NOT_ANY_CHARACTER _ => false
|
||||||
|
|
||||||
| CONCAT {l, r, ...} => isNullable l andalso isNullable r
|
| CONCAT {l, r, ...} => isNullable l andalso isNullable r
|
||||||
| ALTERNATION {l, r, ...} => isNullable l orelse isNullable r
|
| ALTERNATION {l, r, ...} => isNullable l orelse isNullable r
|
||||||
@@ -290,6 +294,7 @@ struct
|
|||||||
case tree of
|
case tree of
|
||||||
CHAR_LITERAL {position, ...} => position :: acc
|
CHAR_LITERAL {position, ...} => position :: acc
|
||||||
| IS_ANY_CHARACTER {position, ...} => position :: acc
|
| IS_ANY_CHARACTER {position, ...} => position :: acc
|
||||||
|
| NOT_ANY_CHARACTER {position, ...} => position :: acc
|
||||||
| WILDCARD i => i :: acc
|
| WILDCARD i => i :: acc
|
||||||
|
|
||||||
| CONCAT {l, r, ...} =>
|
| CONCAT {l, r, ...} =>
|
||||||
@@ -313,6 +318,7 @@ struct
|
|||||||
case tree of
|
case tree of
|
||||||
CHAR_LITERAL {position, ...} => position :: acc
|
CHAR_LITERAL {position, ...} => position :: acc
|
||||||
| IS_ANY_CHARACTER {position, ...} => position :: acc
|
| IS_ANY_CHARACTER {position, ...} => position :: acc
|
||||||
|
| NOT_ANY_CHARACTER {position, ...} => position :: acc
|
||||||
| WILDCARD i => i :: acc
|
| WILDCARD i => i :: acc
|
||||||
|
|
||||||
| CONCAT {l, r, ...} =>
|
| CONCAT {l, r, ...} =>
|
||||||
@@ -363,11 +369,18 @@ struct
|
|||||||
* as an end marker which will not appear anywhere else.
|
* as an end marker which will not appear anywhere else.
|
||||||
* So we don't want to match it, but the wildcard can match
|
* So we don't want to match it, but the wildcard can match
|
||||||
* any other character that has a different ASCII code. *)
|
* any other character that has a different ASCII code. *)
|
||||||
{sawConcat = false, follows = [], charIsMatch = curChr <> #"\^@"}
|
{sawConcat = false, follows = [], charIsMatch = curChr <> endMarker}
|
||||||
| IS_ANY_CHARACTER {chars, ...} =>
|
| IS_ANY_CHARACTER {chars, ...} =>
|
||||||
let val chrExists = chrExistsInVec (0, chars, curChr)
|
let val chrExists = chrExistsInVec (0, chars, curChr)
|
||||||
in {sawConcat = false, follows = [], charIsMatch = chrExists}
|
in {sawConcat = false, follows = [], charIsMatch = chrExists}
|
||||||
end
|
end
|
||||||
|
| NOT_ANY_CHARACTER {chars, ...} =>
|
||||||
|
let
|
||||||
|
val charIsValid = chrExistsInVec (0, chars, curChr)
|
||||||
|
val charIsValid = not charIsValid andalso curChr <> endMarker
|
||||||
|
in
|
||||||
|
{sawConcat = false, follows = [], charIsMatch = charIsValid}
|
||||||
|
end
|
||||||
| ALTERNATION {l, r, leftMaxState, rightMaxState} =>
|
| ALTERNATION {l, r, leftMaxState, rightMaxState} =>
|
||||||
let val nodeToFollow = if pos <= leftMaxState then l else r
|
let val nodeToFollow = if pos <= leftMaxState then l else r
|
||||||
in getFollowsForPositionAndChar (nodeToFollow, pos, curChr)
|
in getFollowsForPositionAndChar (nodeToFollow, pos, curChr)
|
||||||
@@ -578,7 +591,8 @@ struct
|
|||||||
case ParseDfa.parse (str, 0) of
|
case ParseDfa.parse (str, 0) of
|
||||||
SOME (ast, numStates) =>
|
SOME (ast, numStates) =>
|
||||||
let
|
let
|
||||||
val endMarker = CHAR_LITERAL {char = #"\^@", position = numStates + 1}
|
val endMarker =
|
||||||
|
CHAR_LITERAL {char = endMarker, position = numStates + 1}
|
||||||
val ast = CONCAT
|
val ast = CONCAT
|
||||||
{ l = ast
|
{ l = ast
|
||||||
, leftMaxState = numStates
|
, leftMaxState = numStates
|
||||||
|
|||||||
Reference in New Issue
Block a user