structure ViWordDfa = struct val startState: Word8.word = 0w0 val startAlpha: Word8.word = 0w1 val startSpace: Word8.word = 0w2 val startPunct: Word8.word = 0w3 val alphaToSpace: Word8.word = 0w4 val punctToSpace: Word8.word = 0w5 val alphaToPunct: Word8.word = 0w6 val punctToAlpha: Word8.word = 0w7 val spaceToAlpha: Word8.word = 0w8 val spaceToPunct: Word8.word = 0w9 fun makeStart i = let val chr = Char.chr i in if Char.isAlphaNum chr orelse chr = #"_" then startAlpha else if Char.isSpace chr then startSpace else startPunct end fun makeStartAlpha i = let val chr = Char.chr i in if Char.isAlphaNum chr orelse chr = #"_" then startAlpha else if Char.isSpace chr then alphaToSpace else alphaToPunct end fun makeStartSpace i = let val chr = Char.chr i in if Char.isAlphaNum chr orelse chr = #"_" then spaceToAlpha else if Char.isSpace chr then startSpace else spaceToPunct end fun makeStartPunct i = let val chr = Char.chr i in if Char.isAlphaNum chr orelse chr = #"_" then punctToAlpha else if Char.isSpace chr then punctToSpace else startPunct end val startTable = Vector.tabulate (255, makeStart) val startAlphaTable = Vector.tabulate (255, makeStartAlpha) val startSpaceTable = Vector.tabulate (255, makeStartSpace) val startPunctTable = Vector.tabulate (255, makeStartPunct) val alphaToSpaceTable = startSpaceTable val punctToSpaceTable = startSpaceTable val tables = #[ startTable , startAlphaTable , startSpaceTable , startPunctTable , alphaToSpaceTable , punctToSpaceTable ] fun next (currentState, chr) = let val currentState = Word8.toInt currentState val currentTable = Vector.sub (tables, currentState) val charIdx = Char.ord chr in Vector.sub (currentTable, charIdx) end structure StartOfNextWord = MakeNextDfaLoop (struct val startState = startState fun fStart (idx, absIdx, str, tl, currentState, counter) = if idx = String.size str then case tl of str :: tl => fStart (0, absIdx, str, tl, currentState, counter) | [] => Int.max (0, absIdx - 2) else let val chr = String.sub (str, idx) val newState = next (currentState, chr) in if newState = alphaToPunct orelse newState = punctToAlpha orelse newState = spaceToAlpha orelse newState = spaceToPunct then if counter - 1 = 0 then absIdx else (* reset state *) fStart (idx + 1, absIdx + 1, str, tl, startState, counter - 1) else fStart (idx + 1, absIdx + 1, str, tl, newState, counter) end end) structure EndOfPrevWord = MakePrevDfaLoop (struct val startState = startState fun fStart (idx, absIdx, str, tl, currentState, counter) = if idx < 0 then case tl of str :: tl => fStart (String.size str - 1, absIdx, str, tl, currentState, counter) | [] => 0 else let val chr = String.sub (str, idx) val newState = next (currentState, chr) in if newState = alphaToPunct orelse newState = punctToAlpha orelse newState = spaceToAlpha orelse newState = spaceToPunct then if counter - 1 = 0 then absIdx else fStart (idx - 1, absIdx - 1, str, tl, startState, counter - 1) else fStart (idx - 1, absIdx - 1, str, tl, newState, counter) end end) (* w *) val startOfNextWord = StartOfNextWord.next (* ge *) val endOfPrevWord = EndOfPrevWord.prev end