diff --git a/fcore/persistent-vector.sml b/fcore/persistent-vector.sml index e8af0ed..25fc790 100644 --- a/fcore/persistent-vector.sml +++ b/fcore/persistent-vector.sml @@ -1,82 +1,76 @@ structure PersistentVector = struct - (* Clojure-style persistent vector, - * as intermediary data structure - * for building search list *) + (* Clojure-style persistent vector, for building search list. + * There is an "int table" too, which stores the last index + * at the node with the same index. + * We can use the size table for binary search. + * *) datatype t = - BRANCH of t vector - | LEAF of int vector + BRANCH of t vector * int vector + | LEAF of {start: int, finish: int} vector * int vector val maxSize = 32 - val empty = LEAF #[] + val empty = LEAF (#[], #[]) datatype append_result = APPEND of t | UPDATE of t - fun helpAppend (key, tree) = + fun getFinishIdx t = + case t of + BRANCH (_, sizes) => Vector.sub (sizes, Vector.length sizes - 1) + | LEAF (_, sizes) => Vector.sub (sizes, Vector.length sizes - 1) + + fun helpAppend (start, finish, tree) = case tree of - BRANCH nodes => + BRANCH (nodes, sizes) => let val lastNode = Vector.sub (nodes, Vector.length nodes - 1) in - case helpAppend (key, lastNode) of + case helpAppend (start, finish, lastNode) of UPDATE newLast => let - val newNode = Vector.update - (nodes, Vector.length nodes - 1, newLast) - val newNode = BRANCH newNode + val lastPos = Vector.length nodes - 1 + val newNode = Vector.update (nodes, lastPos, newLast) + val newSizes = Vector.update (sizes, lastPos, finish) + val newNode = BRANCH (newNode, newSizes) in UPDATE newNode end | APPEND newVec => if Vector.length nodes + 1 > maxSize then - let val newNode = BRANCH #[newVec] + let val newNode = BRANCH (#[newVec], #[finish]) in APPEND newNode end else let val newNodes = Vector.concat [nodes, #[newVec]] - val newNodes = BRANCH newNodes + val newSizes = Vector.concat [sizes, #[finish]] + val newNodes = BRANCH (newNodes, newSizes) in UPDATE newNodes end end - | LEAF vec => - if Vector.length vec + 1 > maxSize then - let val newNode = LEAF #[key] + | LEAF (values, sizes) => + if Vector.length values + 1 > maxSize then + let val newNode = LEAF (#[{start = start, finish = finish}], #[finish]) in APPEND newNode end else let - val newNode = Vector.concat [vec, #[key]] - val newNode = LEAF newNode + val newNode = Vector.concat + [values, #[{start = start, finish = finish}]] + val newSizes = Vector.concat [sizes, #[finish]] + val newNode = LEAF (newNode, newSizes) in UPDATE newNode end - fun append (key, tree) = - case helpAppend (key, tree) of - UPDATE t => t - | APPEND newNode => BRANCH #[tree, newNode] - - fun branchToList (pos, nodes, acc) = - if pos < 0 then - acc - else - let - val node = Vector.sub (nodes, pos) - val acc = helpToVector (node, acc) - in - branchToList (pos - 1, nodes, acc) - end - - and helpToVector (tree, acc) = - case tree of - BRANCH nodes => branchToList (Vector.length nodes - 1, nodes, acc) - | LEAF vec => vec :: acc - - fun toVector tree = - let val lst = helpToVector (tree, []) - in Vector.concat lst + fun append (start, finish, tree) = + let + val maxSize = getFinishIdx tree + in + case helpAppend (start, finish, tree) of + UPDATE t => t + | APPEND newNode => BRANCH (#[tree, newNode], #[maxSize, finish]) end end diff --git a/fcore/search-list.sml b/fcore/search-list.sml index 401abbd..0b8c2b2 100644 --- a/fcore/search-list.sml +++ b/fcore/search-list.sml @@ -2,7 +2,7 @@ structure SearchList = struct type t = int vector - val empty = Vector.fromList [] + val empty = PersistentVector.empty fun backtrackFull (pos, hd, absIdx, tl, acc, searchPos, searchString, prevTl) = if pos < 0 then @@ -47,7 +47,7 @@ struct , searchString , hd :: prevTl ) - | [] => PersistentVector.toVector acc + | [] => acc else let val bufferChr = String.sub (hd, pos) @@ -58,7 +58,7 @@ struct (* we fully matched the search string *) let val foundIdx = absIdx - String.size searchString + 1 - val acc = PersistentVector.append (foundIdx, acc) + val acc = PersistentVector.append (foundIdx, absIdx, acc) in loopSearch (pos + 1, hd, absIdx + 1, tl, acc, 0, searchString, prevTl) @@ -152,9 +152,9 @@ struct , prevTl ) end - | [] => PersistentVector.toVector acc + | [] => acc else if absIdx = finish then - PersistentVector.toVector acc + acc else let val bufferChr = String.sub (hd, pos) @@ -165,7 +165,7 @@ struct (* full match *) let val foundIdx = absIdx - String.size searchString + 1 - val acc = PersistentVector.append (foundIdx, acc) + val acc = PersistentVector.append (foundIdx, absIdx, acc) in loopRange ( pos + 1 @@ -242,7 +242,8 @@ struct if String.size searchString > 0 then case Nfa.parse searchString of SOME nfa => - Nfa.getMatchesInRange (#idx buffer, finishIdx, buffer : LineGap.t, nfa) + Nfa.getMatchesInRange + (#idx buffer, finishIdx, buffer : LineGap.t, nfa) | NONE => empty else empty diff --git a/fcore/search-list/nfa.sml b/fcore/search-list/nfa.sml index a6ba84e..ef04449 100644 --- a/fcore/search-list/nfa.sml +++ b/fcore/search-list/nfa.sml @@ -141,7 +141,7 @@ struct local fun loop (pos, str, nfa, origNfa, startPos, acc) = if pos = String.size str then - PersistentVector.toVector acc + acc else let val chr = String.sub (str, pos) @@ -149,7 +149,7 @@ struct in case state of VALID finishIdx => - let val acc = PersistentVector.append (pos, acc) + let val acc = PersistentVector.append (pos, finishIdx, acc) in loop (finishIdx, str, origNfa, origNfa, finishIdx, acc) end | INVALID => @@ -232,9 +232,9 @@ struct , startIdx , finishIdx ) - | [] => PersistentVector.toVector acc + | [] => acc else if absIdx > finishIdx then - PersistentVector.toVector acc + acc else let val chr = String.sub (hd, strIdx) @@ -254,9 +254,9 @@ struct , startIdx , finishIdx ) - | VALID _ => + | VALID finishIdx => let - val acc = PersistentVector.append (startIdx, acc) + val acc = PersistentVector.append (startIdx, finishIdx, acc) in loop ( strIdx + 1 @@ -349,8 +349,8 @@ struct , finishIdx ) end - | [] => Vector.fromList []) - | [] => Vector.fromList [] + | [] => PersistentVector.empty) + | [] => PersistentVector.empty end end end