structure Scanner = struct structure Dfa = CaseSensitiveDfa val intDfa = Dfa.fromString "[0-9]+" val wordDfa = Dfa.fromString "0w[0-9]+" val realDfa = Dfa.fromString "[0-9]+.[0-9]+" val idDfa = Dfa.fromString "[a-zA-Z][a-zA-Z0-9_']*" val longIdDfa = Dfa.fromString "[a-zA-Z[(a-zA-Z0-9_']*(\\.[a-zA-Z][a-zA-Z0-9_']*)+" fun skipFormattingChars (str, pos) = if pos = String.size str then raise Fail "unterminated formatting char sequence" else let val chr = String.sub (str, pos) in if Char.isSpace chr then skipFormattingChars (str, pos + 1) else if chr = #"\\" then pos + 1 else raise Fail "encountered non-space char in formatting sequence" end fun getEscapeChr (str, pos, acc) = case String.sub (str, pos) of #"a" => (pos + 1, acc ^ "\a") | #"b" => (pos + 1, acc ^ "\b") | #"t" => (pos + 1, acc ^ "\t") | #"n" => (pos + 1, acc ^ "\n") | #"v" => (pos + 1, acc ^ "\v") | #"f" => (pos + 1, acc ^ "\f") | #"r" => (pos + 1, acc ^ "\r") | #"\"" => (pos + 1, acc ^ "\"") | #"\\" => (pos + 1, acc ^ "\\") | chr => if Char.isSpace chr then (* formatting characters ahead *) let val nextPos = skipFormattingChars (str, pos + 1) in (nextPos, acc) end else raise Fail "invalid escape char" fun helpExtractString (pos, str, acc) = if pos >= String.size str then raise Fail ("unterminated string: [" ^ acc ^ "]\n") else case String.sub (str, pos) of #"\"" => (pos + 1, acc) | #"\\" => if pos + 1 >= String.size str then raise Fail ("unterminated string: [" ^ acc ^ "\\]\n") else let val (nextPos, acc) = getEscapeChr (str, pos + 1, acc) in helpExtractString (nextPos, str, acc) end | chr => helpExtractString (pos + 1, str, acc ^ String.implode [chr]) fun extractString (pos, str) = helpExtractString (pos, str, "") fun extractChar (pos, str) = let val (nextPos, newString) = extractString (pos, str) in if String.size newString = 1 then (nextPos, newString) else raise Fail ("expected char of size 1 but got char of size" ^ Int.toString (String.size newString) ^ "\n") end end