use larger set of words in words.txt dataset

This commit is contained in:
2024-09-09 15:19:18 +01:00
parent 7d707c9af4
commit fc0fd6923b
6 changed files with 886133 additions and 69250 deletions

View File

@@ -1,3 +1,5 @@
(* generate a words.sml file with a vector of strings,
* from a line-delimited words.txt file *)
val inIo = TextIO.openIn "words.txt"
val outIO = TextIO.openOut "words.sml"
@@ -11,11 +13,14 @@ fun writeLines (outIO, lst) =
[] => ()
| word :: tl =>
let
val word = String.substring (word, 0, String.size word - 2)
val isLast = tl = []
(* remove \r and \n from the word *)
val word = Substring.full word
val word =
if isLast then "\"" ^ word ^ "\""
else "\"" ^ word ^ "\",\n"
Substring.dropr (fn chr => chr = #"\n" orelse chr = #"\r") word
val word = Substring.string word
val isLast = tl = []
val word = if isLast then "\"" ^ word ^ "\"" else "\"" ^ word ^ "\",\n"
val _ = TextIO.output (outIO, word)
in
writeLines (outIO, tl)

Binary file not shown.

Binary file not shown.

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff