Files
sml-projects/compiler/scanner.sml
2026-05-05 16:32:25 +01:00

74 lines
2.3 KiB
Standard ML

structure Scanner =
struct
structure Dfa = CaseSensitiveDfa
val intDfa = Dfa.fromString "[0-9]+"
val wordDfa = Dfa.fromString "0w[0-9]+"
val realDfa = Dfa.fromString "[0-9]+.[0-9]+"
val idDfa = Dfa.fromString "[a-zA-Z][a-zA-Z0-9_']*"
val longIdDfa = Dfa.fromString
"[a-zA-Z[(a-zA-Z0-9_']*(\\.[a-zA-Z][a-zA-Z0-9_']*)+"
fun skipFormattingChars (str, pos) =
if pos = String.size str then
raise Fail "unterminated formatting char sequence"
else
let
val chr = String.sub (str, pos)
in
if Char.isSpace chr then skipFormattingChars (str, pos + 1)
else if chr = #"\\" then pos + 1
else raise Fail "encountered non-space char in formatting sequence"
end
fun getEscapeChr (str, pos, acc) =
case String.sub (str, pos) of
#"a" => (pos + 1, acc ^ "\a")
| #"b" => (pos + 1, acc ^ "\b")
| #"t" => (pos + 1, acc ^ "\t")
| #"n" => (pos + 1, acc ^ "\n")
| #"v" => (pos + 1, acc ^ "\v")
| #"f" => (pos + 1, acc ^ "\f")
| #"r" => (pos + 1, acc ^ "\r")
| #"\"" => (pos + 1, acc ^ "\"")
| #"\\" => (pos + 1, acc ^ "\\")
| chr =>
if Char.isSpace chr then
(* formatting characters ahead *)
let val nextPos = skipFormattingChars (str, pos + 1)
in (nextPos, acc)
end
else
raise Fail "invalid escape char"
fun helpExtractString (pos, str, acc) =
if pos >= String.size str then
raise Fail ("unterminated string: [" ^ acc ^ "]\n")
else
case String.sub (str, pos) of
#"\"" => (pos + 1, acc)
| #"\\" =>
if pos + 1 >= String.size str then
raise Fail ("unterminated string: [" ^ acc ^ "\\]\n")
else
let val (nextPos, acc) = getEscapeChr (str, pos + 1, acc)
in helpExtractString (nextPos, str, acc)
end
| chr => helpExtractString (pos + 1, str, acc ^ String.implode [chr])
fun extractString (pos, str) = helpExtractString (pos, str, "")
fun extractChar (pos, str) =
let
val (nextPos, newString) = extractString (pos, str)
in
if String.size newString = 1 then
(nextPos, newString)
else
raise Fail
("expected char of size 1 but got char of size"
^ Int.toString (String.size newString) ^ "\n")
end
end