diff --git a/.gitignore b/.gitignore index 2f10d61..625cbc1 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ -/proj -/proj.du -/proj.ud +/bench +/bench.du +/bench.ud +/examples +/examples.du +/examples.ud diff --git a/README.md b/README.md index 6e4ed7e..81d3c6f 100644 --- a/README.md +++ b/README.md @@ -4,4 +4,32 @@ Standard ML port of [this](https://github.com/hummy123/brolib) rope implementation. +This particular rope uses the balancing scheme described in the [Purely Functional 1-2 Brother Trees paper authored by Ralph Hinze](https://www.cs.ox.ac.uk/ralf.hinze/publications/Brother12.pdf). It tries to keep the number of nodes to a minimum by joining the strings in adjacent leaf nodes, if joining would not be too expensive. + ## Usage + +The two files are `rope.sml` and `tiny_rope.sml`. + +`rope.sml` contains a rope that tracks line metadata (which has a small performance and memory penalty). This is useful if you have line-based operations in mind. + +`tiny_rope.sml` doesn't track line metadata, and is useful when line-queries aren't needed. + +Except for those line-based operations marked below, all functions are the same between the two. + +### Examples + +#### Initialise + +`val rope = Rope.fromString "hello, world!"` + +It's best to use a string with a length less than or equal to 1024 for performance reasons. (The point of a rope is to represent a large string using a binary tree that contains smaller pieces.) + +#### Convert to string + +`val str = Rope.toString rope` + +This is a function that is better to avoid. + +#### Insert + +`Rope.insert(0, "hello, world!")` diff --git a/proj.mlb b/bench.mlb similarity index 100% rename from proj.mlb rename to bench.mlb diff --git a/examples.mlb b/examples.mlb new file mode 100644 index 0000000..261e6f9 --- /dev/null +++ b/examples.mlb @@ -0,0 +1,5 @@ +$(SML_LIB)/basis/basis.mlb + +tiny_rope.sml +rope.sml +examples.sml diff --git a/examples.sml b/examples.sml new file mode 100644 index 0000000..b3c2afc --- /dev/null +++ b/examples.sml @@ -0,0 +1,139 @@ +(* An empty rope, containing no strings. *) +val rope = Rope.empty; + +(* Initialise rope from a string. + * + * You probably want to avoid initialising the rope with very long strings, + * because a rope is meant to represent a long string + * by holding nodes that contain smaller strings in a binary tree. + * The implementation avoids building strings that are ever larger than 1024, + * but that was done in an attempt to find the ideal length for performance. + * A user shouldn't notice any delays in larger lengths like 65535 either. + * + * In their text buffer (a piece-tree, which is slower than a rope), + * the VS Code team had other issues with excessively large strings. + * https://code.visualstudio.com/blogs/2018/03/23/text-buffer-reimplementation#_avoid-the-string-concatenation-trap *) +val rope = Rope.fromString "hello, world!\n"; + +(* Convert a rope to a string. + * + * This may involve allocating an extremely large string in some cases, + * which should be avoided for the reason mentioned in the above comment. *) +val str = Rope.toString rope; + +(* Insert a string into the rope. + * + * There isn't any validation to check that you inserted at a reasonable + * position. + * If you insert at an index lower than 0, your inserted string is just + * prepended to the start. + * If you insert at an index greater than the length, your inserted string is + * just appended to the end. + * + * One thing to watch out for if you are using the line-rope is making sure + * that you don't insert in the middle of a \r\n pair, separating \r from \n. + * That would mess up the line metadata the rope contains and make the line + * metadata invalid. *) +val rope = Rope.insert (14, "goodbye, world!", rope); + +(* Append a string into the rope. *) +val rope = Rope.append ("hello again\n", rope); + +(* Append a string into the rope, providing line metadata with it. + * + * The point of this function is for performance: the other insertion functions + * calculate the line metadata by scanning the string itself, but in some cases + * this is already known. The larger example below is such a case. *) +val rope = Rope.appendLine ("my new line", Vector.fromList [], rope); + +(** Second larger example motivating String.appendLine below. *) +(*** Returns the start index of a line, + *** returning the index of \r if line ends with a \r\n pair. *) +fun getLineStart line = + let + val lastIdx = String.size line - 1 + val lastChr = String.sub (line, lastIdx) + in + if lastChr = #"\n" andalso lastIdx - 1 >= 0 then + if String.sub (line, lastIdx - 1) = #"\r" then lastIdx - 1 else lastIdx + else + lastIdx + end; + +(*** Appends the lines in a file to a rope. *) +fun readLines (rope, file) = + case TextIO.inputLine file of + SOME line => + let + (* Don't need to scan string to find line breaks, + * because we already know. *) + val lineIdx = getLineStart (line) + val vec = Vector.fromList [lineIdx] + val rope = Rope.appendLine (line, vec, rope) + in + readLines (rope, file) + end + | NONE => rope; + +val licenseRope = readLines (Rope.empty, TextIO.openIn "LICENSE"); + +(* Deletes the given range from rope, from the start index to the end index. + * + * As with insert, one should make sure they don't corrupt the line metadata. + * Specifically, in a \r\n pair, the line metadata points to \r. + * Deleting \r would corrupt it, but deleting \n would be fine. + * In general, if you want to delete a line break, you would want to delete both + * \r and \n. The user thinks of the \r\n pair as a single character so they are + * expecting the whole line break to be deleted. *) + +(** Initialise new rope. *) +val rope = Rope.fromString "hello, world!"; +(** New rope contains "hello world!" without comma. *) +val rope = Rope.delete (5, 1, rope); + +(* Folds over the characters in a rope, starting from the given index. + * + * This is meant to be an alternative to queries for a specific line or a + * substring. + * If a rope is meant to avoid allocating large strings, then it seems more + * performant to query its contents through higher-order functions rather than + * allocating substrings and querying the substring. *) +val rope = Rope.fromString "hello!";; + +fun apply (chr, lst) = chr :: lst; +(** val result = [#"!",#"o",#"l",#"l",#"e"] : char list *) +val result = Rope.foldFromIdx (apply, 1, rope, []); + +(* Folds over the characters in a rope, accepting a predicate function + * that terminates the fold when it returns true. *) +fun apply (chr, acc) = + (print (Char.toString chr); acc + 1); + +fun term acc = acc = 3; + +(** Below function prints first three letters, "hel", + ** and then steops folding. *) +val _ = Rope.foldFromIdxTerm (apply, term, 0, rope, 0); + +(* Folds over the characters in a rope, starting from the given line number. + * + * This is just like the foldFromIdxTerm function, except that it starts folding + * from the given line number instead. *) +val rope = Rope.fromString "hello, world!\ngoodbye, world!\nhello again!"; + +fun apply (chr, _) = + print (Char.toString chr); + +fun term _ = false; + +(** Below line prints the whole string, one character at a time. *) +Rope.foldLines (apply, term, 0, rope, ()); +(** Prints starting from #"g" in "goodbye". *) +Rope.foldLines (apply, term, 1, rope, ()); +(** Prints the very last line. *) +Rope.foldLines (apply, term, 2, rope, ()); + +(** Prints the whole string if specifying a line before 0, which doesn't exist. *) +Rope.foldLines (apply, term, ~3, rope, ()); +(** Raises a subscript exception: there is no corresponding line in the rope. *) +Rope.foldLines (apply, term, 4, rope, ()); diff --git a/rope.sml b/rope.sml index dbcc714..a743bc7 100644 --- a/rope.sml +++ b/rope.sml @@ -5,7 +5,6 @@ sig val fromString: string -> t val toString: t -> string - val foldr: ('a * string * int vector -> 'a) * 'a * t -> 'a (* The caller should not insert in the middle of a \r\n pair, * or else line metadata will become invalid. *) @@ -727,34 +726,60 @@ struct val chr = String.sub (str, pos) val acc = apply (chr, acc) in - foldLineCharsTerm (apply, term, pos, str, strSize, acc) + foldLineCharsTerm (apply, term, pos + 1, str, strSize, acc) end | true => acc else acc - fun foldLines (apply, term, lineNum, rope, acc) = + fun helpFoldLines (apply, term, lineNum, rope, acc) = case rope of N2 (l, _, lmv, r) => if lineNum < lmv then let - val acc = foldLines (apply, term, lineNum, rope, acc) + val acc = helpFoldLines (apply, term, lineNum, rope, acc) in if term acc then acc - else foldLines (apply, term, lineNum - lmv, r, acc) + else helpFoldLines (apply, term, lineNum - lmv, r, acc) end else - foldLines (apply, term, lineNum - lmv, r, acc) - | N1 t => foldLines (apply, term, lineNum, t, acc) + helpFoldLines (apply, term, lineNum - lmv, r, acc) + | N1 t => helpFoldLines (apply, term, lineNum, t, acc) | N0 (str, vec) => - let - val idx = - if Vector.length vec > 0 then Vector.sub (vec, lineNum) else 0 - in - foldLineCharsTerm (apply, term, idx, str, String.size str, acc) - end + (* We have a few edge cases to handle here. + * 1. If lineNum is 0 or the vector has no elements, + * we should start folding from the start of the string. + * 2. Since the vector points to the start of a linebreak + * (which means either \r or \n when either is alone, + * or \r in a \r\n pair), + * we have to skip the linebreak or linebreak pair when folding + * over the string. That is more intuitive to the user. *) + if lineNum < 0 orelse Vector.length vec = 0 then + foldLineCharsTerm (apply, term, 0, str, String.size str, acc) + else + let + val idx = Vector.sub (vec, lineNum) + in + if idx + 1 < String.size str then + let + val chr = String.sub (str, idx) + val nextChr = String.sub (str, idx + 1) + in + if chr = #"\r" andalso nextChr = #"\n" then + foldLineCharsTerm + (apply, term, idx + 2, str, String.size str, acc) + else + foldLineCharsTerm + (apply, term, idx + 1, str, String.size str, acc) + end + else + acc + end | _ => raise AuxConstructor + fun foldLines (apply, term, lineNum, rope, acc) = + helpFoldLines (apply, term, lineNum - 1, rope, acc) + fun verifyLines rope = foldr ( (fn (_, str, vec) =>