use larger set of words in words.txt dataset

This commit is contained in:
2024-09-09 15:19:18 +01:00
parent 7d707c9af4
commit fc0fd6923b
6 changed files with 886133 additions and 69250 deletions

View File

@@ -1,3 +1,5 @@
(* generate a words.sml file with a vector of strings,
* from a line-delimited words.txt file *)
val inIo = TextIO.openIn "words.txt"
val outIO = TextIO.openOut "words.sml"
@@ -11,11 +13,14 @@ fun writeLines (outIO, lst) =
[] => ()
| word :: tl =>
let
val word = String.substring (word, 0, String.size word - 2)
val isLast = tl = []
(* remove \r and \n from the word *)
val word = Substring.full word
val word =
if isLast then "\"" ^ word ^ "\""
else "\"" ^ word ^ "\",\n"
Substring.dropr (fn chr => chr = #"\n" orelse chr = #"\r") word
val word = Substring.string word
val isLast = tl = []
val word = if isLast then "\"" ^ word ^ "\"" else "\"" ^ word ^ "\",\n"
val _ = TextIO.output (outIO, word)
in
writeLines (outIO, tl)