fix balancing errors

This commit is contained in:
Humza Shahid
2023-11-13 06:05:36 +00:00
parent 26cfc01f5c
commit 6cd3db9104
9 changed files with 220318 additions and 14 deletions

View File

@@ -1,4 +1,4 @@
val data = #[(0, 0, "''"), val automerge_arr = #[(0, 0, "''"),
(1, 0, "'d'"), (1, 0, "'d'"),
(2, 0, "'o'"), (2, 0, "'o'"),
(3, 0, "'c'"), (3, 0, "'c'"),

BIN
proj Executable file

Binary file not shown.

73425
proj.du Normal file

File diff suppressed because it is too large Load Diff

146850
proj.ud Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -9,6 +9,11 @@ val target_length = 1024
val empty = N0 "" val empty = N0 ""
fun of_string string = N0 string fun of_string string = N0 string
exception Size
exception Ins
exception Substring
exception Delete
fun size rope = fun size rope =
case rope of case rope of
N0 s => String.size s N0 s => String.size s
@@ -22,11 +27,11 @@ fun size rope =
in in
t1_size + t2_size + t3_size t1_size + t2_size + t3_size
end end
| _ => raise Empty | _ => raise Size
fun root rope = fun root rope =
case rope of case rope of
L2(s1, s2) => N2(N0 s1, String.size s1, String.size s2, N0 s2) L2(s1, s2) => N2(N0 s1, String.size s1, String.size s2, N0 s2)
| N3(t1, t2, t3) => | N3(t1, t2, t3) =>
let let
val t1_size = size t1 val t1_size = size t1
@@ -37,7 +42,7 @@ fun root rope =
in in
N2(left, left_size, t3_size, N1 t3) N2(left, left_size, t3_size, N1 t3)
end end
| t => t | t => t
fun n1 rope = fun n1 rope =
case rope of case rope of
@@ -51,7 +56,7 @@ fun n1 rope =
val left_size = t1_size + t2_size val left_size = t1_size + t2_size
val t3_size = size t3 val t3_size = size t3
in in
N2(left, left_size, t3_size, t3) N2(left, left_size, t3_size, N1 t3)
end end
| t => N1 t | t => N1 t
@@ -115,6 +120,19 @@ fun ins_n2_right left right =
in in
N3(t1, N1 t2, right) N3(t1, N1 t2, right)
end end
| (t1, N3(t2, t3, t4)) =>
let
val t1_size = size t1
val t2_size = size t2
val left = N2(t1, t1_size, t2_size, t2)
val t3_size = size t3
val t4_size = size t4
val right = N2(t3, t3_size, t4_size, t4)
val right_size = t3_size + t4_size
val left_size = t1_size + t2_size
in
N2(left, left_size, right_size, right)
end
| (l, r) => | (l, r) =>
N2(l, size l, size r, r) N2(l, size l, size r, r)
@@ -178,7 +196,7 @@ fun ins cur_index string rope =
in in
ins_n2_right l r ins_n2_right l r
end end
| _ => raise Empty | _ => raise Ins
fun insert index string rope = fun insert index string rope =
let let
@@ -247,7 +265,7 @@ fun sub start_idx end_idx acc rope =
sub start_idx end_idx sub_acc l sub start_idx end_idx sub_acc l
end end
end end
| _ => raise Empty | _ => raise Substring
fun substring start length rope = fun substring start length rope =
let let
@@ -346,12 +364,13 @@ fun del start_idx end_idx rope =
(N2(l, size l, size r, r), false) (N2(l, size l, size r, r), false)
end end
end end
| _ => raise Empty | _ => raise Delete
fun delete start length rope = fun delete start length rope =
let let
val finish = start + length val finish = start + length
val (t, _) = del start finish rope val (t, did_ins) = del start finish rope
val t = if did_ins then root rope else rope
in in
t t
end end

File diff suppressed because one or more lines are too long

View File

@@ -1,4 +1,4 @@
val data = #[(0, 0, "'Automerge is too slow and clunky. Martin (its principle architect and programmer) recorded himself typing an academic paper. Running his editing history through automerge (his own code) takes 490 seconds, which is a bit less than 10 minutes. Once processed, the editing trace sits on 1.1 GB of RAM. The newly merged performance branch (designed to fix a lot of these problems) is even slower - taking 750 seconds (12.5 minutes) to process the same editing trace.nI managed to get that 10 minute time down to 70ms (0.07 seconds). Thats the best result I've ever gotten from optimization work, and I'm delighted by it. Let me tell you what I did!nWhat does automerge do?nBefore we can go into detail about how I made automerge fast, we have to spend a moment talking about how automerge itself works.nAn automerge document is actually a tree of inserted characters. Each character in the document has the following properties:nA unique ID, made up of a tuple of (client ID, sequence number)nThe ID (or a pointer to) its parent item, which is the item directly before that character when it was inserted.nThe character itself ('A')nThere's a couple more fields (eg to mark when characters have been deleted), but essentially thats it. When a character is inserted in the document, automerge figures out the ID of the character immediately before the new character, and inserts the new character as one of its predecessor's *children*. If you just type a linear sequence of characters (as I'm doing right now), you'll end up with a big long chain of characters going down like a linked list.nSo why is automerge so slow?nWhen optimizing, I imagine myself manually doing all the work the computer is doing, one step at a time. Then I imagine asking: 'When I get bored, how would I speed this job up?'.nAutomerge is slow for 3 main reasons:nIts written in javascript and uses complex data structures. Javascript is reasonably fast for math, but slow and inefficient when using complex data structures.nAutomerge uses a complex and inefficient data structurenAutomerge makes extremely heavy use of immutablejsnEach of these issues accounts for about an order of magnitude slowdown in performance. You can see all 3 issues showing up in this method from the automerge source tree, which is called on each keystroke. Automerge uses this method to figure out where each new character should be placed in the resulting document:nfunction insertionsAfter(opSet, objectId, parentId, childId) {n let childKey = nulln if (childId) childKey = Map({opId: childId})nn return opSetn .getIn(['byObject', objectId, '_following', parentId], List())n .filter(op => op.get('insert') && (!childKey || lamportCompare(op, childKey) < 0))n .sort(lamportCompare)n .reverse() // descending ordern .map(op => op.get('opId'))n}nWhats wrong with this method?nThis method allocates all over the place. I can spot 5 allocations, not counting any extra nonsense immutablejs is doing. The call to List() has no effect as far as I can tell from reading immutablejs's documentation.nThe document is always kept in a sorted order anyway, so the calls to sort() and reverse() are unnecessary. The algorithm only needs to figure out where the new child should be inserted. Re-sorting all children is entirely avoidable. Sort functions are often fast when the input is sorted already, but in this case because the sorting function is inverted, the computer always has to sort the entire list.nYou can't tell from looking at this method, but insertionsAfter nDespite CRDTs being the 'new hotness' in the collaborative editing game for years, I've been resisting them. As I said in my [recent blog post about CRDTs](https://josephg.com/blog/crdts-are-the-future/), they've been generally unworkable for real world collaborative editing because:nThey take up too much space on disk and in memory. (Automerge takes 1.1GB in RAM to store a 100kb document)nThey consume way too much CPU to process editsnUntil these issues are addressed, I can't recommend CRDTs for use in general computing.n'"), val seph_arr = #[(0, 0, "'Automerge is too slow and clunky. Martin (its principle architect and programmer) recorded himself typing an academic paper. Running his editing history through automerge (his own code) takes 490 seconds, which is a bit less than 10 minutes. Once processed, the editing trace sits on 1.1 GB of RAM. The newly merged performance branch (designed to fix a lot of these problems) is even slower - taking 750 seconds (12.5 minutes) to process the same editing trace.nI managed to get that 10 minute time down to 70ms (0.07 seconds). Thats the best result I've ever gotten from optimization work, and I'm delighted by it. Let me tell you what I did!nWhat does automerge do?nBefore we can go into detail about how I made automerge fast, we have to spend a moment talking about how automerge itself works.nAn automerge document is actually a tree of inserted characters. Each character in the document has the following properties:nA unique ID, made up of a tuple of (client ID, sequence number)nThe ID (or a pointer to) its parent item, which is the item directly before that character when it was inserted.nThe character itself ('A')nThere's a couple more fields (eg to mark when characters have been deleted), but essentially thats it. When a character is inserted in the document, automerge figures out the ID of the character immediately before the new character, and inserts the new character as one of its predecessor's *children*. If you just type a linear sequence of characters (as I'm doing right now), you'll end up with a big long chain of characters going down like a linked list.nSo why is automerge so slow?nWhen optimizing, I imagine myself manually doing all the work the computer is doing, one step at a time. Then I imagine asking: 'When I get bored, how would I speed this job up?'.nAutomerge is slow for 3 main reasons:nIts written in javascript and uses complex data structures. Javascript is reasonably fast for math, but slow and inefficient when using complex data structures.nAutomerge uses a complex and inefficient data structurenAutomerge makes extremely heavy use of immutablejsnEach of these issues accounts for about an order of magnitude slowdown in performance. You can see all 3 issues showing up in this method from the automerge source tree, which is called on each keystroke. Automerge uses this method to figure out where each new character should be placed in the resulting document:nfunction insertionsAfter(opSet, objectId, parentId, childId) {n let childKey = nulln if (childId) childKey = Map({opId: childId})nn return opSetn .getIn(['byObject', objectId, '_following', parentId], List())n .filter(op => op.get('insert') && (!childKey || lamportCompare(op, childKey) < 0))n .sort(lamportCompare)n .reverse() // descending ordern .map(op => op.get('opId'))n}nWhats wrong with this method?nThis method allocates all over the place. I can spot 5 allocations, not counting any extra nonsense immutablejs is doing. The call to List() has no effect as far as I can tell from reading immutablejs's documentation.nThe document is always kept in a sorted order anyway, so the calls to sort() and reverse() are unnecessary. The algorithm only needs to figure out where the new child should be inserted. Re-sorting all children is entirely avoidable. Sort functions are often fast when the input is sorted already, but in this case because the sorting function is inverted, the computer always has to sort the entire list.nYou can't tell from looking at this method, but insertionsAfter nDespite CRDTs being the 'new hotness' in the collaborative editing game for years, I've been resisting them. As I said in my [recent blog post about CRDTs](https://josephg.com/blog/crdts-are-the-future/), they've been generally unworkable for real world collaborative editing because:nThey take up too much space on disk and in memory. (Automerge takes 1.1GB in RAM to store a 100kb document)nThey consume way too much CPU to process editsnUntil these issues are addressed, I can't recommend CRDTs for use in general computing.n'"),
(0, 0, "'n'"), (0, 0, "'n'"),
(1, 0, "'n'"), (1, 0, "'n'"),
(2, 0, "'n'"), (2, 0, "'n'"),

View File

@@ -1,4 +1,4 @@
val data = #[(0, 0, "'<script>n import {onMount} from 'svelte';n let count = 0;n onMount(() => {n const interval = setInterval(() => count++, 1000);n return () => {n clearInterval(interval);n };n });n</script>nn<style>n :global(body) {n margin: 0;n font-family: Arial, Helvetica, sans-serif;n }n .App {n text-align: center;n }n .App code {n background: #0002;n padding: 4px 8px;n border-radius: 4px;n }n .App p {n margin: 0.4rem;n }nn .App-header {n background-color: #f9f6f6;n color: #333;n min-height: 100vh;n display: flex;n flex-direction: column;n align-items: center;n justify-content: center;n font-size: calc(10px + 2vmin);n }n .App-link {n color: #ff3e00;n }n .App-logo {n height: 36vmin;n pointer-events: none;n margin-bottom: 3rem;n animation: App-logo-spin infinite 1.6s ease-in-out alternate;n }n @keyframes App-logo-spin {n from {n transform: scale(1);n }n to {n transform: scale(1.06);n }n }n</style>nn<div class='App'>n <header class='App-header'>n <img src='/logo.svg' class='App-logo' alt='logo' />n <p>Edit <code>src/App.svelte</code> and save to reload.</p>n <p>Page has been open for <code>{count}</code> seconds.</p>n <p>n <a class='App-link' href='https://svelte.dev' target='_blank' rel='noopener noreferrer'>n Learn Svelten </a>n </p>n </header>n</div>n'"), val svelte_arr = #[(0, 0, "'<script>n import {onMount} from 'svelte';n let count = 0;n onMount(() => {n const interval = setInterval(() => count++, 1000);n return () => {n clearInterval(interval);n };n });n</script>nn<style>n :global(body) {n margin: 0;n font-family: Arial, Helvetica, sans-serif;n }n .App {n text-align: center;n }n .App code {n background: #0002;n padding: 4px 8px;n border-radius: 4px;n }n .App p {n margin: 0.4rem;n }nn .App-header {n background-color: #f9f6f6;n color: #333;n min-height: 100vh;n display: flex;n flex-direction: column;n align-items: center;n justify-content: center;n font-size: calc(10px + 2vmin);n }n .App-link {n color: #ff3e00;n }n .App-logo {n height: 36vmin;n pointer-events: none;n margin-bottom: 3rem;n animation: App-logo-spin infinite 1.6s ease-in-out alternate;n }n @keyframes App-logo-spin {n from {n transform: scale(1);n }n to {n transform: scale(1.06);n }n }n</style>nn<div class='App'>n <header class='App-header'>n <img src='/logo.svg' class='App-logo' alt='logo' />n <p>Edit <code>src/App.svelte</code> and save to reload.</p>n <p>Page has been open for <code>{count}</code> seconds.</p>n <p>n <a class='App-link' href='https://svelte.dev' target='_blank' rel='noopener noreferrer'>n Learn Svelten </a>n </p>n </header>n</div>n'"),
(7, 0, "' '"), (7, 0, "' '"),
(8, 0, "'l'"), (8, 0, "'l'"),
(9, 0, "'a'"), (9, 0, "'a'"),

View File

@@ -33,3 +33,13 @@ fun run_txns_time title arr =
in in
time_func title f time_func title f
end end
val _ =
let
val _ = run_txns_time "svelte" svelte_arr
val _ = run_txns_time "rust" rust_arr
val _ = run_txns_time "seph" seph_arr
val _ = run_txns_time "automerge" automerge_arr
in
()
end