fix balancing errors
This commit is contained in:
@@ -1,4 +1,4 @@
|
|||||||
val data = #[(0, 0, "''"),
|
val automerge_arr = #[(0, 0, "''"),
|
||||||
(1, 0, "'d'"),
|
(1, 0, "'d'"),
|
||||||
(2, 0, "'o'"),
|
(2, 0, "'o'"),
|
||||||
(3, 0, "'c'"),
|
(3, 0, "'c'"),
|
||||||
|
|||||||
35
rope.sml
35
rope.sml
@@ -9,6 +9,11 @@ val target_length = 1024
|
|||||||
val empty = N0 ""
|
val empty = N0 ""
|
||||||
fun of_string string = N0 string
|
fun of_string string = N0 string
|
||||||
|
|
||||||
|
exception Size
|
||||||
|
exception Ins
|
||||||
|
exception Substring
|
||||||
|
exception Delete
|
||||||
|
|
||||||
fun size rope =
|
fun size rope =
|
||||||
case rope of
|
case rope of
|
||||||
N0 s => String.size s
|
N0 s => String.size s
|
||||||
@@ -22,11 +27,11 @@ fun size rope =
|
|||||||
in
|
in
|
||||||
t1_size + t2_size + t3_size
|
t1_size + t2_size + t3_size
|
||||||
end
|
end
|
||||||
| _ => raise Empty
|
| _ => raise Size
|
||||||
|
|
||||||
fun root rope =
|
fun root rope =
|
||||||
case rope of
|
case rope of
|
||||||
L2(s1, s2) => N2(N0 s1, String.size s1, String.size s2, N0 s2)
|
L2(s1, s2) => N2(N0 s1, String.size s1, String.size s2, N0 s2)
|
||||||
| N3(t1, t2, t3) =>
|
| N3(t1, t2, t3) =>
|
||||||
let
|
let
|
||||||
val t1_size = size t1
|
val t1_size = size t1
|
||||||
@@ -37,7 +42,7 @@ fun root rope =
|
|||||||
in
|
in
|
||||||
N2(left, left_size, t3_size, N1 t3)
|
N2(left, left_size, t3_size, N1 t3)
|
||||||
end
|
end
|
||||||
| t => t
|
| t => t
|
||||||
|
|
||||||
fun n1 rope =
|
fun n1 rope =
|
||||||
case rope of
|
case rope of
|
||||||
@@ -51,7 +56,7 @@ fun n1 rope =
|
|||||||
val left_size = t1_size + t2_size
|
val left_size = t1_size + t2_size
|
||||||
val t3_size = size t3
|
val t3_size = size t3
|
||||||
in
|
in
|
||||||
N2(left, left_size, t3_size, t3)
|
N2(left, left_size, t3_size, N1 t3)
|
||||||
end
|
end
|
||||||
| t => N1 t
|
| t => N1 t
|
||||||
|
|
||||||
@@ -115,6 +120,19 @@ fun ins_n2_right left right =
|
|||||||
in
|
in
|
||||||
N3(t1, N1 t2, right)
|
N3(t1, N1 t2, right)
|
||||||
end
|
end
|
||||||
|
| (t1, N3(t2, t3, t4)) =>
|
||||||
|
let
|
||||||
|
val t1_size = size t1
|
||||||
|
val t2_size = size t2
|
||||||
|
val left = N2(t1, t1_size, t2_size, t2)
|
||||||
|
val t3_size = size t3
|
||||||
|
val t4_size = size t4
|
||||||
|
val right = N2(t3, t3_size, t4_size, t4)
|
||||||
|
val right_size = t3_size + t4_size
|
||||||
|
val left_size = t1_size + t2_size
|
||||||
|
in
|
||||||
|
N2(left, left_size, right_size, right)
|
||||||
|
end
|
||||||
| (l, r) =>
|
| (l, r) =>
|
||||||
N2(l, size l, size r, r)
|
N2(l, size l, size r, r)
|
||||||
|
|
||||||
@@ -178,7 +196,7 @@ fun ins cur_index string rope =
|
|||||||
in
|
in
|
||||||
ins_n2_right l r
|
ins_n2_right l r
|
||||||
end
|
end
|
||||||
| _ => raise Empty
|
| _ => raise Ins
|
||||||
|
|
||||||
fun insert index string rope =
|
fun insert index string rope =
|
||||||
let
|
let
|
||||||
@@ -247,7 +265,7 @@ fun sub start_idx end_idx acc rope =
|
|||||||
sub start_idx end_idx sub_acc l
|
sub start_idx end_idx sub_acc l
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
| _ => raise Empty
|
| _ => raise Substring
|
||||||
|
|
||||||
fun substring start length rope =
|
fun substring start length rope =
|
||||||
let
|
let
|
||||||
@@ -346,12 +364,13 @@ fun del start_idx end_idx rope =
|
|||||||
(N2(l, size l, size r, r), false)
|
(N2(l, size l, size r, r), false)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
| _ => raise Empty
|
| _ => raise Delete
|
||||||
|
|
||||||
fun delete start length rope =
|
fun delete start length rope =
|
||||||
let
|
let
|
||||||
val finish = start + length
|
val finish = start + length
|
||||||
val (t, _) = del start finish rope
|
val (t, did_ins) = del start finish rope
|
||||||
|
val t = if did_ins then root rope else rope
|
||||||
in
|
in
|
||||||
t
|
t
|
||||||
end
|
end
|
||||||
|
|||||||
4
seph.sml
4
seph.sml
@@ -1,4 +1,4 @@
|
|||||||
val data = #[(0, 0, "'Automerge is too slow and clunky. Martin (its principle architect and programmer) recorded himself typing an academic paper. Running his editing history through automerge (his own code) takes 490 seconds, which is a bit less than 10 minutes. Once processed, the editing trace sits on 1.1 GB of RAM. The newly merged performance branch (designed to fix a lot of these problems) is even slower - taking 750 seconds (12.5 minutes) to process the same editing trace.nI managed to get that 10 minute time down to 70ms (0.07 seconds). Thats the best result I've ever gotten from optimization work, and I'm delighted by it. Let me tell you what I did!nWhat does automerge do?nBefore we can go into detail about how I made automerge fast, we have to spend a moment talking about how automerge itself works.nAn automerge document is actually a tree of inserted characters. Each character in the document has the following properties:nA unique ID, made up of a tuple of (client ID, sequence number)nThe ID (or a pointer to) its parent item, which is the item directly before that character when it was inserted.nThe character itself ('A')nThere's a couple more fields (eg to mark when characters have been deleted), but essentially thats it. When a character is inserted in the document, automerge figures out the ID of the character immediately before the new character, and inserts the new character as one of its predecessor's *children*. If you just type a linear sequence of characters (as I'm doing right now), you'll end up with a big long chain of characters going down like a linked list.nSo why is automerge so slow?nWhen optimizing, I imagine myself manually doing all the work the computer is doing, one step at a time. Then I imagine asking: 'When I get bored, how would I speed this job up?'.nAutomerge is slow for 3 main reasons:nIts written in javascript and uses complex data structures. Javascript is reasonably fast for math, but slow and inefficient when using complex data structures.nAutomerge uses a complex and inefficient data structurenAutomerge makes extremely heavy use of immutablejsnEach of these issues accounts for about an order of magnitude slowdown in performance. You can see all 3 issues showing up in this method from the automerge source tree, which is called on each keystroke. Automerge uses this method to figure out where each new character should be placed in the resulting document:nfunction insertionsAfter(opSet, objectId, parentId, childId) {n let childKey = nulln if (childId) childKey = Map({opId: childId})nn return opSetn .getIn(['byObject', objectId, '_following', parentId], List())n .filter(op => op.get('insert') && (!childKey || lamportCompare(op, childKey) < 0))n .sort(lamportCompare)n .reverse() // descending ordern .map(op => op.get('opId'))n}nWhats wrong with this method?nThis method allocates all over the place. I can spot 5 allocations, not counting any extra nonsense immutablejs is doing. The call to List() has no effect as far as I can tell from reading immutablejs's documentation.nThe document is always kept in a sorted order anyway, so the calls to sort() and reverse() are unnecessary. The algorithm only needs to figure out where the new child should be inserted. Re-sorting all children is entirely avoidable. Sort functions are often fast when the input is sorted already, but in this case because the sorting function is inverted, the computer always has to sort the entire list.nYou can't tell from looking at this method, but insertionsAfter nDespite CRDTs being the 'new hotness' in the collaborative editing game for years, I've been resisting them. As I said in my [recent blog post about CRDTs](https://josephg.com/blog/crdts-are-the-future/), they've been generally unworkable for real world collaborative editing because:nThey take up too much space on disk and in memory. (Automerge takes 1.1GB in RAM to store a 100kb document)nThey consume way too much CPU to process editsnUntil these issues are addressed, I can't recommend CRDTs for use in general computing.n'"),
|
val seph_arr = #[(0, 0, "'Automerge is too slow and clunky. Martin (its principle architect and programmer) recorded himself typing an academic paper. Running his editing history through automerge (his own code) takes 490 seconds, which is a bit less than 10 minutes. Once processed, the editing trace sits on 1.1 GB of RAM. The newly merged performance branch (designed to fix a lot of these problems) is even slower - taking 750 seconds (12.5 minutes) to process the same editing trace.nI managed to get that 10 minute time down to 70ms (0.07 seconds). Thats the best result I've ever gotten from optimization work, and I'm delighted by it. Let me tell you what I did!nWhat does automerge do?nBefore we can go into detail about how I made automerge fast, we have to spend a moment talking about how automerge itself works.nAn automerge document is actually a tree of inserted characters. Each character in the document has the following properties:nA unique ID, made up of a tuple of (client ID, sequence number)nThe ID (or a pointer to) its parent item, which is the item directly before that character when it was inserted.nThe character itself ('A')nThere's a couple more fields (eg to mark when characters have been deleted), but essentially thats it. When a character is inserted in the document, automerge figures out the ID of the character immediately before the new character, and inserts the new character as one of its predecessor's *children*. If you just type a linear sequence of characters (as I'm doing right now), you'll end up with a big long chain of characters going down like a linked list.nSo why is automerge so slow?nWhen optimizing, I imagine myself manually doing all the work the computer is doing, one step at a time. Then I imagine asking: 'When I get bored, how would I speed this job up?'.nAutomerge is slow for 3 main reasons:nIts written in javascript and uses complex data structures. Javascript is reasonably fast for math, but slow and inefficient when using complex data structures.nAutomerge uses a complex and inefficient data structurenAutomerge makes extremely heavy use of immutablejsnEach of these issues accounts for about an order of magnitude slowdown in performance. You can see all 3 issues showing up in this method from the automerge source tree, which is called on each keystroke. Automerge uses this method to figure out where each new character should be placed in the resulting document:nfunction insertionsAfter(opSet, objectId, parentId, childId) {n let childKey = nulln if (childId) childKey = Map({opId: childId})nn return opSetn .getIn(['byObject', objectId, '_following', parentId], List())n .filter(op => op.get('insert') && (!childKey || lamportCompare(op, childKey) < 0))n .sort(lamportCompare)n .reverse() // descending ordern .map(op => op.get('opId'))n}nWhats wrong with this method?nThis method allocates all over the place. I can spot 5 allocations, not counting any extra nonsense immutablejs is doing. The call to List() has no effect as far as I can tell from reading immutablejs's documentation.nThe document is always kept in a sorted order anyway, so the calls to sort() and reverse() are unnecessary. The algorithm only needs to figure out where the new child should be inserted. Re-sorting all children is entirely avoidable. Sort functions are often fast when the input is sorted already, but in this case because the sorting function is inverted, the computer always has to sort the entire list.nYou can't tell from looking at this method, but insertionsAfter nDespite CRDTs being the 'new hotness' in the collaborative editing game for years, I've been resisting them. As I said in my [recent blog post about CRDTs](https://josephg.com/blog/crdts-are-the-future/), they've been generally unworkable for real world collaborative editing because:nThey take up too much space on disk and in memory. (Automerge takes 1.1GB in RAM to store a 100kb document)nThey consume way too much CPU to process editsnUntil these issues are addressed, I can't recommend CRDTs for use in general computing.n'"),
|
||||||
(0, 0, "'n'"),
|
(0, 0, "'n'"),
|
||||||
(1, 0, "'n'"),
|
(1, 0, "'n'"),
|
||||||
(2, 0, "'n'"),
|
(2, 0, "'n'"),
|
||||||
@@ -137990,4 +137990,4 @@ val data = #[(0, 0, "'Automerge is too slow and clunky. Martin (its principle ar
|
|||||||
(20698, 0, "'t'"),
|
(20698, 0, "'t'"),
|
||||||
(20698, 1, "''"),
|
(20698, 1, "''"),
|
||||||
(20698, 0, "'n'"),
|
(20698, 0, "'n'"),
|
||||||
(20698, 1, "''")]
|
(20698, 1, "''")]
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
val data = #[(0, 0, "'<script>n import {onMount} from 'svelte';n let count = 0;n onMount(() => {n const interval = setInterval(() => count++, 1000);n return () => {n clearInterval(interval);n };n });n</script>nn<style>n :global(body) {n margin: 0;n font-family: Arial, Helvetica, sans-serif;n }n .App {n text-align: center;n }n .App code {n background: #0002;n padding: 4px 8px;n border-radius: 4px;n }n .App p {n margin: 0.4rem;n }nn .App-header {n background-color: #f9f6f6;n color: #333;n min-height: 100vh;n display: flex;n flex-direction: column;n align-items: center;n justify-content: center;n font-size: calc(10px + 2vmin);n }n .App-link {n color: #ff3e00;n }n .App-logo {n height: 36vmin;n pointer-events: none;n margin-bottom: 3rem;n animation: App-logo-spin infinite 1.6s ease-in-out alternate;n }n @keyframes App-logo-spin {n from {n transform: scale(1);n }n to {n transform: scale(1.06);n }n }n</style>nn<div class='App'>n <header class='App-header'>n <img src='/logo.svg' class='App-logo' alt='logo' />n <p>Edit <code>src/App.svelte</code> and save to reload.</p>n <p>Page has been open for <code>{count}</code> seconds.</p>n <p>n <a class='App-link' href='https://svelte.dev' target='_blank' rel='noopener noreferrer'>n Learn Svelten </a>n </p>n </header>n</div>n'"),
|
val svelte_arr = #[(0, 0, "'<script>n import {onMount} from 'svelte';n let count = 0;n onMount(() => {n const interval = setInterval(() => count++, 1000);n return () => {n clearInterval(interval);n };n });n</script>nn<style>n :global(body) {n margin: 0;n font-family: Arial, Helvetica, sans-serif;n }n .App {n text-align: center;n }n .App code {n background: #0002;n padding: 4px 8px;n border-radius: 4px;n }n .App p {n margin: 0.4rem;n }nn .App-header {n background-color: #f9f6f6;n color: #333;n min-height: 100vh;n display: flex;n flex-direction: column;n align-items: center;n justify-content: center;n font-size: calc(10px + 2vmin);n }n .App-link {n color: #ff3e00;n }n .App-logo {n height: 36vmin;n pointer-events: none;n margin-bottom: 3rem;n animation: App-logo-spin infinite 1.6s ease-in-out alternate;n }n @keyframes App-logo-spin {n from {n transform: scale(1);n }n to {n transform: scale(1.06);n }n }n</style>nn<div class='App'>n <header class='App-header'>n <img src='/logo.svg' class='App-logo' alt='logo' />n <p>Edit <code>src/App.svelte</code> and save to reload.</p>n <p>Page has been open for <code>{count}</code> seconds.</p>n <p>n <a class='App-link' href='https://svelte.dev' target='_blank' rel='noopener noreferrer'>n Learn Svelten </a>n </p>n </header>n</div>n'"),
|
||||||
(7, 0, "' '"),
|
(7, 0, "' '"),
|
||||||
(8, 0, "'l'"),
|
(8, 0, "'l'"),
|
||||||
(9, 0, "'a'"),
|
(9, 0, "'a'"),
|
||||||
|
|||||||
10
utils.sml
10
utils.sml
@@ -33,3 +33,13 @@ fun run_txns_time title arr =
|
|||||||
in
|
in
|
||||||
time_func title f
|
time_func title f
|
||||||
end
|
end
|
||||||
|
|
||||||
|
val _ =
|
||||||
|
let
|
||||||
|
val _ = run_txns_time "svelte" svelte_arr
|
||||||
|
val _ = run_txns_time "rust" rust_arr
|
||||||
|
val _ = run_txns_time "seph" seph_arr
|
||||||
|
val _ = run_txns_time "automerge" automerge_arr
|
||||||
|
in
|
||||||
|
()
|
||||||
|
end
|
||||||
|
|||||||
Reference in New Issue
Block a user