git-subtree-dir: string-tries-sml git-subtree-mainline:dba78da7ecgit-subtree-split:d056e08ce7
60 lines
1.6 KiB
Standard ML
60 lines
1.6 KiB
Standard ML
(* generate a words.sml file with a vector of strings,
|
|
* from a line-delimited words.txt file *)
|
|
val inIo = TextIO.openIn "words.txt"
|
|
val outIO = TextIO.openOut "words.sml"
|
|
|
|
fun consWordChrs (wordChrs, acc) =
|
|
case wordChrs of
|
|
[] => acc
|
|
| _ => (String.concat wordChrs) :: acc
|
|
|
|
fun helpTokeniseLine (pos, wordChrs, line, acc) =
|
|
if pos < 0 then
|
|
consWordChrs (wordChrs, acc)
|
|
else
|
|
let
|
|
val chr = String.sub (line, pos)
|
|
(* using Char.toString is necessary because it escapes double-quotes.
|
|
* Without escaping, there's a chance we will produce an invalid .sml file
|
|
* *)
|
|
val sChr = Char.toString chr
|
|
in
|
|
if Char.isPrint chr andalso not (Char.isSpace chr) then
|
|
helpTokeniseLine (pos - 1, sChr :: wordChrs, line, acc)
|
|
else
|
|
helpTokeniseLine (pos - 1, [], line, consWordChrs (wordChrs, acc))
|
|
end
|
|
|
|
fun tokeniseLine (line, acc) =
|
|
helpTokeniseLine (String.size line - 1, [], line, acc)
|
|
|
|
fun readLines (inIo, acc) =
|
|
case TextIO.inputLine inIo of
|
|
SOME line => readLines (inIo, tokeniseLine (line, acc))
|
|
| NONE => List.rev acc
|
|
|
|
fun writeLines (outIO, lst) =
|
|
case lst of
|
|
[] => ()
|
|
| word :: tl =>
|
|
let
|
|
val isLast = tl = []
|
|
val word = if isLast then "\"" ^ word ^ "\"" else "\"" ^ word ^ "\",\n"
|
|
val _ = TextIO.output (outIO, word)
|
|
in
|
|
writeLines (outIO, tl)
|
|
end
|
|
|
|
fun main () =
|
|
let
|
|
val lst = readLines (inIo, [])
|
|
val _ = TextIO.output
|
|
(outIO, "structure WordsList = \nstruct \n val words = #[\n")
|
|
val _ = writeLines (outIO, lst)
|
|
val _ = TextIO.output (outIO, "]\n end")
|
|
in
|
|
()
|
|
end
|
|
|
|
val _ = main ()
|