make dfa-gen.sml compile again, with parity before reimplementing it

This commit is contained in:
2025-10-11 13:23:44 +01:00
parent 96f0afc2b2
commit b2931753d0
2 changed files with 29 additions and 19 deletions

View File

@@ -639,8 +639,6 @@ struct
let let
val record = {transitions = newStates, marked = false} val record = {transitions = newStates, marked = false}
val dstates = Vector.concat [dstates, Vector.fromList [record]] val dstates = Vector.concat [dstates, Vector.fromList [record]]
val () = print
("658 new append = " ^ PolyML.makestring newStates ^ "\n")
in in
(pos, dstates) (pos, dstates)
end end
@@ -685,7 +683,8 @@ struct
| ONE_OR_MORE child => isCharMatch (child, pos, curChr) | ONE_OR_MORE child => isCharMatch (child, pos, curChr)
| GROUP child => isCharMatch (child, pos, curChr) | GROUP child => isCharMatch (child, pos, curChr)
fun positionsThatCorrespondToChar (char, curStates, regex, acc, followSet) = fun positionsThatCorrespondToChar
(char, curStates, regex, acc, followSet, hasAnyMatch) =
case curStates of case curStates of
[] => List.concat (Set.valuesToList acc) [] => List.concat (Set.valuesToList acc)
| pos :: tl => | pos :: tl =>
@@ -702,10 +701,12 @@ struct
(* store union of new and previous follows so far *) (* store union of new and previous follows so far *)
val acc = Set.insertOrReplace (char, allFollowList, acc) val acc = Set.insertOrReplace (char, allFollowList, acc)
in in
positionsThatCorrespondToChar (char, tl, regex, acc, followSet) positionsThatCorrespondToChar
(char, tl, regex, acc, followSet, true)
end end
else else
positionsThatCorrespondToChar (char, tl, regex, acc, followSet) positionsThatCorrespondToChar
(char, tl, regex, acc, followSet, hasAnyMatch)
structure Dtran = structure Dtran =
struct struct
@@ -741,11 +742,24 @@ struct
, followSet , followSet
) = ) =
if char < 0 then if char < 0 then
(dstates, dtran) if Vector.length dtran = unmarkedIdx then
(dstates, Dtran.insert (unmarkedIdx, Char.ord Fn.endMarker, 0, dtran))
else
(dstates, dtran)
else if Char.chr char = Fn.endMarker then
convertChar
( char - 1
, regex
, dstates
, dtran
, unmarkedState
, unmarkedIdx
, followSet
)
else else
let let
val u = positionsThatCorrespondToChar val u = positionsThatCorrespondToChar
(char, unmarkedState, regex, Set.LEAF, followSet) (char, unmarkedState, regex, Set.LEAF, followSet, false)
in in
case u of case u of
[] => [] =>
@@ -804,17 +818,10 @@ struct
convertLoop (regex, dstates, dtran, followSet) convertLoop (regex, dstates, dtran, followSet)
end end
| NONE => | NONE =>
let Vector.map
val result = (fn set =>
Vector.map Vector.tabulate (256, fn i => Set.getOrDefault (i, set, ~1)))
(fn set => dtran
Vector.tabulate (256, fn i => Set.getOrDefault (i, set, ~1)))
dtran
val endMarker = Vector.tabulate (256, makeEndmarkerVec)
val endMarker = Vector.fromList [endMarker]
in
Vector.concat [result, endMarker]
end
fun convert regex = fun convert regex =
let let
@@ -827,7 +834,7 @@ struct
val dstates = Vector.fromList [{transitions = first, marked = false}] val dstates = Vector.fromList [{transitions = first, marked = false}]
in in
convertLoop (regex, dstates, Vector.fromList [], followSet) convertLoop (regex, dstates, Vector.fromList [Set.LEAF], followSet)
end end
end end

View File

@@ -3572,3 +3572,6 @@ struct
end end
end end
gut feeling gut feeling
Mississipi
goooal