diff --git a/src/merklet.erl b/src/merklet.erl index eda3101..8db1d1f 100644 --- a/src/merklet.erl +++ b/src/merklet.erl @@ -10,7 +10,7 @@ %%% The Hkey is used as the main index and to build a tree. If we have three %%% hashes with the values `<<213,21,54,...>>', `<<213,33,98,...>>', and %%% `<<11,45,101,...>>', the resulting tree/trie is: -%%% +%%% ``` %%% (Root) %%% Inner %%% / \ @@ -21,6 +21,7 @@ %%% / \ %%% (21) (33) %%% <<213,21,54,...>> <<213,33,98,...>> +%%% ''' %%% %%% Each of the leaf nodes will contain both hashes, along with a non-hashed %%% version of the key. Each Inner node contains a hash of all its children's @@ -32,31 +33,105 @@ %%% %%% It also allows to do a level-order traversal node-per-node over the network %%% allowing somewhat efficient diffing. +%%% +%%% This implementaion has two variants: +%%% - A purely functional ADT (i.e., non_db_tree()) +%%% - An ADT with a (possibly stateful) storage backend. (i.e., dbtree()) +%%% +%%% The variants are semantically equivalent under the API, but since +%%% the representation is different, trees cannot be compared by using +%%% term comparison (=:=, pattern matching, etc). +%%% +%%% In the functional ADT all inner nodes (inner()) contains the +%%% complete terms of its children, but in the DB backed ADT, the +%%% inner nodes contains only the hashes of its children. +%%% +%%% The children are stored in (and retrieved from) the backend using +%%% user defined callback functions. +%%% +%%%
  • {@type db_handle()} A term that defines to the access funs +%%% which storage to use.
  • +%%% +%%%
  • {@type get_fun()} A fun that given a hash and a {@type db_handle()} +%%% delivers the node associated with the hash.
  • +%%% +%%%
  • {@type put_fun()} A fun that given a hash, a node and a +%%% {@type db_handle()} stores the node previously stored with +%%% the hash as key, returning a new db_handle().
  • +%%% +%%% Note that the db_handle() is treated in a functional manner, so a +%%% functional key-value store (e.g., dict(), gb_tree()) can be used +%%% to keep the full trees functional even if this version is +%%% used. The typical usage would rather have some other stateful +%%% storage model as the backend (e.g., mnesia table, ets, dets). +%%% +%%% Also note that nodes are not removed from the db backend. This +%%% gives the benefit that historical merkle trees can be retrieved +%%% from the database (using the root hash), but at the same time it +%%% leads to a waste of space if only the newest tree is ever used. +%%% +%%% The function visit_nodes/3 can be used to implement a garbage +%%% collection. An example of this is available in the proper tests. +%%% +%%% Accessing a historical tree can be done by creating a tree with +%%% the appropriate root hash and db backend using the function +%%% db_tree/3. +%%% %%% @end -module(merklet). --record(leaf, {hash :: binary(), % hash(hash(key), hash(value)) +-record(leaf, {hash :: hash(), % hash(hash(key), hash(value)) userkey :: binary(), % user submitted key hashkey :: binary()}). % hash of the user submitted key --record(inner, {hashchildren :: binary(), % hash of children's hashes - children :: [{offset(), #inner{} | #leaf{}}, ...], +-record(inner, {hashchildren :: hash(), % hash of children's hashes + %% The children is really nonempty but, we abuse it in + %% unserialize, which Dialyzer finds. + children :: [{offset(), #inner{} | #leaf{} | hash()}], offset :: non_neg_integer()}). % byte offset + +-record(db, { get :: get_fun() + , put :: put_fun() + , handle :: term() + }). + +-record(dbtree, { db :: db() + , hash :: 'undefined' | hash() + }). + -define(HASHPOS, 2). % #leaf.hash =:= #inner.hashchildren -type offset() :: byte(). -type leaf() :: #leaf{}. -type inner() :: #inner{}. --type tree() :: leaf() | inner() | 'undefined'. +-type non_db_tree() :: leaf() | inner() | 'undefined'. +-type db_tree() :: #dbtree{}. +-type tree() :: db_tree() | non_db_tree(). + -type key() :: binary(). -type value() :: binary(). -type path() :: binary(). --type access_fun() :: fun((at | child_at | keys | {keys, Hash::binary()}, path()) -> tree()). --type serial_fun() :: fun((at | child_at | keys | {keys, Hash::binary()}, path()) -> binary()). +-type hash() :: binary(). +-type access_fun() :: fun((at | child_at | keys | {keys, Hash::hash()}, path()) -> non_db_tree()). +-type serial_fun() :: fun((at | child_at | keys | {keys, Hash::hash()}, path()) -> binary()). + +-type db() :: #db{}. +-type db_handle() :: term(). +-type get_fun() :: fun((hash(), db_handle()) -> non_db_tree()). +-type put_fun() :: fun((hash(), Node :: leaf() | inner(), db_handle()) -> db_handle()). -export_type([tree/0, key/0, value/0, path/0, access_fun/0, serial_fun/0]). +-export_type([get_fun/0, put_fun/0, db_handle/0]). -export([insert/2, insert_many/2, delete/2, keys/1, diff/2]). -export([dist_diff/2, access_serialize/1, access_unserialize/1]). +-export([empty_db_tree/0, empty_db_tree/1, db_tree/2, db_handle/1, root_hash/1]). +-export([visit_nodes/3]). + +-ifdef(TEST). +%% Test interface +-export([ expand_db_tree/1 + ]). +-endif. -define(HASH, sha). -define(HASHBYTES, 20). @@ -76,34 +151,144 @@ %%% API %%% %%%%%%%%%%% +%% @doc Creating an empty tree with the built-in dict backend +-spec empty_db_tree() -> db_tree(). +empty_db_tree() -> + empty_db_tree(#{ get => fun dict:fetch/2 + , put => fun dict:store/3 + , handle => dict:new() + }). + +%% @doc Creating an empty tree with user-defined db backend +-spec empty_db_tree(#{ get := get_fun() + , put := put_fun() + , handle := db_handle()}) -> db_tree(). +empty_db_tree(#{ get := GetFun + , put := PutFun + , handle := Handle}) when is_function(GetFun, 2), + is_function(PutFun, 3) -> + #dbtree{ hash = undefined + , db = #db{ get=GetFun + , put=PutFun + , handle=Handle + }}. + +%% @doc Get the root hash for the tree. If the tree is empty, the root +%% hash is 'undefined'. +-spec root_hash(tree()) -> hash() | 'undefined'. +root_hash(Tree0) -> + case unpack_db_tree(Tree0) of + {undefined, _} -> undefined; + {#inner{hashchildren=Hash}, _} -> Hash; + {#leaf{hash=Hash}, _} -> Hash + end. + +%% @doc Get the db_handle() of a db_tree(). +-spec db_handle(db_tree()) -> db_handle(). +db_handle(#dbtree{db = #db{handle=Handle}}) -> + Handle. + +%% @doc Creating an tree with user-defined db backend and setting the +%% root hash. Useful to re-initialize a tree from minimal +%% information and a prebuilt db store. +-spec db_tree(hash(), #{ get := get_fun() + , put := put_fun() + , handle := db_handle()}) -> db_tree(). +db_tree(RootHash, Spec) when RootHash =:= undefined; + byte_size(RootHash) =:= ?HASHBYTES -> + T0 = empty_db_tree(Spec), + T0#dbtree{ hash = RootHash }. + +-ifdef(TEST). +%% Test interface to facilitate comparing trees +expand_db_tree(#dbtree{hash=undefined}) -> + undefined; +expand_db_tree(#dbtree{hash=H, db=DB}) -> + expand_db_tree(db_get(H, DB), DB); +expand_db_tree(Tree) -> + Tree. + +expand_db_tree(#leaf{} = L,_DB) -> + L; +expand_db_tree(#inner{children=Children} = I, DB) -> + Expand = fun(_, Child) -> expand_db_tree(db_get(Child, DB), DB)end, + I#inner{children=orddict:map(Expand, Children)}. +-endif. + +unpack_db_tree(#dbtree{hash=H, db=DB}) -> {db_get(H, DB), DB}; +unpack_db_tree(#leaf{} = T) -> {T, no_db}; +unpack_db_tree(#inner{} = T) -> {T, no_db}; +unpack_db_tree(undefined) -> {undefined, no_db}. + +pack_db_tree({T, no_db}) -> T; +pack_db_tree({#leaf{hash=H}, #db{} = DB}) -> #dbtree{hash=H, db=DB}; +pack_db_tree({#inner{hashchildren=H}, #db{} = DB}) -> #dbtree{hash=H, db=DB}; +pack_db_tree({undefined, #db{} = DB}) -> #dbtree{hash=undefined, db=DB}. + %% @doc Adds a key to the tree, or overwrites an exiting one. -spec insert({key(), value()}, tree()) -> tree(). -insert({Key, Value}, Tree) -> - insert(0, to_leaf(Key, Value), Tree). +insert({Key, Value}, Tree0) -> + {Tree, DB} = unpack_db_tree(Tree0), + pack_db_tree(insert(0, to_leaf(Key, Value), Tree, DB)). %% @doc Adds multiple keys to the tree, or overwrites existing ones. --spec insert_many({key(), value()}, tree()) -> tree(). -insert_many([], Tree) -> Tree; -insert_many([H|T], Tree) -> insert_many(T, insert(H, Tree)). +-spec insert_many([{key(), value()}], tree()) -> tree(). +insert_many(List, Tree0) -> + {Tree, DB} = unpack_db_tree(Tree0), + insert_many(List, Tree, DB). + +insert_many([{Key, Value}|T], Tree, DB) -> + {Tree1, DB1} = insert(0, to_leaf(Key, Value), Tree, DB), + insert_many(T, Tree1, DB1); +insert_many([], Tree, DB) -> + pack_db_tree({Tree, DB}). %% @doc Removes a key from a tree, if present. -spec delete(key(), tree()) -> tree(). -delete(Key, Tree) -> - delete_leaf(to_leaf(Key, <<>>), Tree). +delete(Key, Tree0) -> + {Tree, DB} = unpack_db_tree(Tree0), + pack_db_tree(delete_leaf(to_leaf(Key, <<>>), Tree, DB)). %% @doc Returns a sorted list of all the keys in the tree -spec keys(tree()) -> [key()]. -keys(Tree) -> - lists:usort(raw_keys(Tree)). +keys(Tree0) -> + {Tree, DB} = unpack_db_tree(Tree0), + lists:usort(raw_keys(Tree, DB)). %% @doc Takes two trees and returns the different keys between them. -spec diff(tree(), tree()) -> [key()]. -diff(Tree1, Tree2) -> +diff(Tree10, Tree20) -> %% We use the remote access for this local comparison. This is %% slower than a dedicated traversal algorithm, but less code %% means fewer chances of breaking stuff. - Fun = access_local(Tree2), - diff(Tree1, Fun, <<>>). + {Tree1, DB1} = unpack_db_tree(Tree10), + {Tree2, DB2} = unpack_db_tree(Tree20), + Fun = access_local(Tree2, DB2), + diff(Tree1, DB1, Fun, <<>>). + +%% @doc Traversal of the tree nodes. Can be useful for implementing a +%% moving garbage collector of the db backend, Or a staged commit +%% scheme of the db where only the reachable nodes of a cache is +%% committed to the real backend. +%% +%% The nodes are visited in pre-order, parents are visited before +%% children. +%% +%% The visit fun should either return the atom `stop' or a new +%% accumulator. The `stop' is interpreted as to not traverse the +%% subtree rooted at the node. Siblings are still traversed. + +-type node_type() :: 'leaf' | 'inner'. +-type visit_fun() :: fun((node_type(), hash(), leaf() | inner(), Acc :: term()) -> + 'stop' | term()). +-spec visit_nodes(visit_fun(), InitAcc :: term(), tree()) -> ResultAcc :: term(). +visit_nodes(VisitFun, InitAcc, Tree0) when is_function(VisitFun, 4) -> + case unpack_db_tree(Tree0) of + {undefined, _} -> + InitAcc; + {Tree, DB} -> + visit(Tree, VisitFun, DB, InitAcc) + end. %% @doc Takes a local tree, and an access function to another tree, %% and returns the keys associated with diverging parts of both trees. @@ -113,33 +298,38 @@ diff(Tree1, Tree2) -> %% The Path is a sequence of bytes (in a `binary()') telling how to get to %% a specific node: %% -%% - `<<>>' means returning the current node, at whatever point we are in the -%% tree's traversal. -%% - `<>' means to return the node at the given offset for the +%%
  • `<<>>' means returning the current node, at whatever point we are in the +%% tree's traversal.
  • +%% +%%
  • `<>' means to return the node at the given offset for the %% current tree level. For example, a value of `<<0>>' means to return the %% leftmost child of the current node, whereas `<<3>>' should return the %% 4th leftmost child. Any time the path is larger than the number of %% children, we return `undefined'. -%% This is the case where we can recurse. -%% - Any invalid path returns `undefined'. +%% This is the case where we can recurse.
  • +%% +%%
  • Any invalid path returns `undefined'.
  • %% %% The three terms required are: -%% - `at': Uses the path as above to traverse the tree and return a node. -%% - `keys': Returns all the keys held (recursively) by the node at a given +%% +%%
  • `at': Uses the path as above to traverse the tree and return a node.
  • +%% +%%
  • `keys': Returns all the keys held (recursively) by the node at a given %% path. A special variant exists of the form `{keys, Key, Hash}', where the %% function must return the key set minus the one that would contain either %% `Key' or `Hash', but by specifying if the key and hash were encountered, -%% and if so, if they matched or not. -%% - `child_at': Special case of `at' used when comparing child nodes of two +%% and if so, if they matched or not.
  • +%% +%%
  • `child_at': Special case of `at' used when comparing child nodes of two %% inner nodes. Basically the same as `at', but with one new rule: %% %% Whenever we hit a path that is `<>' and we are on an inner node, %% it means we only have a child to look at. Return that child along %% with its byte at the offset in the dictionary structure -%% (`{ByteAtOffset, Node}'). +%% (`{ByteAtOffset, Node}').
  • %% %% Examples of navigation through a tree of the form: -%% +%%``` %% 0 | ___.-A-._____ %% | / | \ %% 1 | .-B-. C .-D-. @@ -147,9 +337,9 @@ diff(Tree1, Tree2) -> %% 2 | E F .G. H %% | / \ %% 3 | I J -%% +%%''' %% Which is four levels deep. The following paths lead to following nodes: -%% +%%``` %% +==============+===========+ +==============+===========+ %% | Path | Node | | Path | Node | %% +==============+===========+ +==============+===========+ @@ -160,21 +350,23 @@ diff(Tree1, Tree2) -> %% | <<3>> | undefined | | <<2,0,1>> | J | %% | <<0,0>> | E | | <<2,0,1,3>> | undefined | %% +--------------+-----------+ +--------------+-----------+ -%% +%%''' %% The values returned are all the keys that differ across both trees. -spec dist_diff(tree(), access_fun()) -> [key()]. -dist_diff(Tree, Fun) when is_function(Fun,2) -> - diff(Tree, Fun, <<>>). +dist_diff(Tree0, Fun) when is_function(Fun,2) -> + {Tree, DB} = unpack_db_tree(Tree0), + diff(Tree, DB, Fun, <<>>). -%% @doc Returns an `access_fun()' for the current tree. This function +%% @doc Returns an {@link access_fun()} for the current tree. This function %% can be put at the end of a connection to a remote node, and it %% will return serialized tree nodes. -spec access_serialize(tree()) -> serial_fun(). -access_serialize(Tree) -> - fun(at, Path) -> serialize(at(Path, Tree)); - (child_at, Path) -> serialize(child_at(Path, Tree)); - (keys, Path) -> serialize(raw_keys(at(Path, Tree))); - ({keys,Key,Skip}, Path) -> serialize(raw_keys(at(Path, Tree), Key, Skip)) +access_serialize(Tree0) -> + {Tree, DB} = unpack_db_tree(Tree0), + fun(at, Path) -> serialize(at(Path, Tree, DB)); + (child_at, Path) -> serialize(child_at(Path, Tree, DB)); + (keys, Path) -> serialize(raw_keys(at(Path, Tree, DB), DB)); + ({keys,Key,Skip}, Path) -> serialize(raw_keys(at(Path, Tree, DB), Key, Skip, DB)) end. %% @doc Takes an {@link access_fun()} that fetches nodes serialized according @@ -190,83 +382,100 @@ access_unserialize(Fun) -> %%%%%%%%%%%%%%% %% if the tree is empty, just use the leaf -insert(_Offset, Leaf, undefined) -> - Leaf; +insert(_Offset, Leaf, undefined, DB) -> + {Leaf, db_put(Leaf, DB)}; %% If the offset is at the max value for the hash, return the leaf -- %% We can't go deeper anyway. -insert(?HASHBYTES, Leaf, _) -> - Leaf; +insert(?HASHBYTES, Leaf, _, DB) -> + {Leaf, db_put(Leaf, DB)}; %% if the current node of the tree is a leaf and both keys are the same, %% replace it. -insert(_Offset, Leaf=#leaf{hashkey=Key}, #leaf{hashkey=Key}) -> - Leaf; +insert(_Offset, Leaf=#leaf{hashkey=Key}, #leaf{hashkey=Key}, DB) -> + {Leaf, db_put(Leaf, DB)}; %% if the current node of the tree is a leaf, and keys are different, turn the %% current leaf into an inner node, and insert the new one in it. -insert(Offset, NewLeaf, OldLeaf=#leaf{}) -> - insert(Offset, NewLeaf, to_inner(Offset, OldLeaf)); +insert(Offset, NewLeaf, OldLeaf=#leaf{}, DB) -> + Inner = to_inner(Offset, OldLeaf, DB), + insert(Offset, NewLeaf, Inner, db_put(Inner, DB)); %% Insert to an inner node! -insert(Offset, Leaf=#leaf{hashkey=Key}, Inner=#inner{children=Children}) -> +insert(Offset, Leaf=#leaf{hashkey=Key}, Inner=#inner{children=Children}, DB) -> Byte = binary:at(Key, Offset), - NewChildren = case orddict:find(Byte, Children) of + case orddict:find(Byte, Children) of error -> - orddict:store(Byte, Leaf, Children); - {ok, Subtree} -> - orddict:store(Byte, insert(Offset+1, Leaf, Subtree), Children) - end, - Inner#inner{hashchildren=children_hash(NewChildren), children=NewChildren}. + DB1 = db_put(Leaf, DB), + NewChildren = orddict:store(Byte, db_ref(Leaf, DB1), Children), + NewInner = Inner#inner{hashchildren=children_hash(NewChildren), + children=NewChildren}, + {NewInner, db_put(NewInner, DB1)}; + {ok, SubtreeRef} -> + Subtree = db_get(SubtreeRef, DB), + {Subtree1, DB1} = insert(Offset+1, Leaf, Subtree, DB), + NewChildren = orddict:store(Byte, db_ref(Subtree1, DB1), Children), + NewInner = Inner#inner{hashchildren=children_hash(NewChildren), + children=NewChildren}, + {NewInner, db_put(NewInner, DB1)} + end. %% Not found or empty tree. Leave as is. -delete_leaf(_, undefined) -> - undefined; +delete_leaf(_, undefined, DB) -> + {undefined, DB}; %% If we have the same leaf node we were looking for, kill it. -delete_leaf(#leaf{hashkey=K}, #leaf{hashkey=K}) -> - undefined; +delete_leaf(#leaf{hashkey=K}, #leaf{hashkey=K}, DB) -> + {undefined, DB}; %% If it's a different leaf, the item to delete is already gone. Leave as is. -delete_leaf(#leaf{}, Leaf=#leaf{}) -> - Leaf; +delete_leaf(#leaf{}, Leaf=#leaf{}, DB) -> + {Leaf, DB}; %% if it's an inner node, look inside -delete_leaf(Leaf=#leaf{hashkey=K}, Inner=#inner{offset=Offset, children=Children}) -> +delete_leaf(Leaf=#leaf{hashkey=K}, Inner=#inner{offset=Offset, children=Children}, DB) -> Byte = binary:at(K, Offset), case orddict:find(Byte, Children) of error -> % not found, leave as is - Inner; - {ok, Subtree} -> - NewChildren = case maybe_shrink(delete_leaf(Leaf, Subtree)) of - undefined -> % leaf gone - orddict:erase(Byte, Children); - Node -> % replacement node - orddict:store(Byte, Node, Children) - end, - maybe_shrink(Inner#inner{hashchildren=children_hash(NewChildren), - children=NewChildren}) + {Inner, DB}; + {ok, SubtreeHandle} -> + Subtree = db_get(SubtreeHandle, DB), + {Subtree1, DB1} = delete_leaf(Leaf, Subtree, DB), + case maybe_shrink(Subtree1, DB) of + {undefined, DB1} -> % leaf gone + NewChildren = orddict:erase(Byte, Children), + NewInner = Inner#inner{hashchildren=children_hash(NewChildren), + children=NewChildren}, + maybe_shrink(NewInner, DB1); + {Node, DB1} -> % replacement node + NewChildren = orddict:store(Byte, db_ref(Node, DB), Children), + NewInner = Inner#inner{hashchildren=children_hash(NewChildren), + children=NewChildren}, + maybe_shrink(NewInner, DB1) + end end. -raw_keys(undefined) -> +raw_keys(undefined,_DB) -> []; -raw_keys(#leaf{userkey=Key}) -> +raw_keys(#leaf{userkey=Key},_DB) -> [Key]; -raw_keys(#inner{children=Children}) -> +raw_keys(#inner{children=Children}, DB) -> lists:append(orddict:fold( - fun(_Byte, Node, Acc) -> [raw_keys(Node)|Acc] end, + fun(_Byte, NodeHandle, Acc) -> + [raw_keys(db_get(NodeHandle, DB), DB)|Acc] end, [], Children )). %% Same as raw_keys/1, but reports on a given hash and key -raw_keys(I=#inner{}, KeyToWatch, ToSkip) -> raw_keys(I, KeyToWatch, ToSkip, unseen). +raw_keys(I=#inner{}, KeyToWatch, ToSkip, DB) -> raw_keys(I, KeyToWatch, ToSkip, unseen, DB). -raw_keys(undefined, _, _, Status) -> +raw_keys(undefined, _, _, Status,_DB) -> {Status, []}; -raw_keys(#leaf{hash=Hash}, _, Hash, Status) -> +raw_keys(#leaf{hash=Hash}, _, Hash, Status,_DB) -> {merge_status(same, Status), []}; -raw_keys(#leaf{userkey=Key}, Key, _, Status) -> +raw_keys(#leaf{userkey=Key}, Key, _, Status,_DB) -> {merge_status(diff, Status), []}; -raw_keys(#leaf{userkey=Key}, _, _, Status) -> +raw_keys(#leaf{userkey=Key}, _, _, Status,_DB) -> {Status, [Key]}; -raw_keys(#inner{children=Children}, Key, ToSkip, InitStatus) -> +raw_keys(#inner{children=Children}, Key, ToSkip, InitStatus, DB) -> {Status, DeepList} = lists:foldl( - fun({_, Node}, {Status, Acc}) -> - {NewStatus, ToAdd} = raw_keys(Node, Key, ToSkip, Status), + fun({_, NodeHandle}, {Status, Acc}) -> + Node = db_get(NodeHandle, DB), + {NewStatus, ToAdd} = raw_keys(Node, Key, ToSkip, Status, DB), {NewStatus, [ToAdd|Acc]} end, {InitStatus, []}, @@ -278,83 +487,83 @@ raw_keys(#inner{children=Children}, Key, ToSkip, InitStatus) -> %% That would mean the tree may contain many similar keys %% in many places merge_status(same, unseen) -> same; -merge_status(unseen, same) -> same; -merge_status(diff, unseen) -> diff; -merge_status(unseen, diff) -> diff; -merge_status(unseen, unseen) -> unseen. +merge_status(diff, unseen) -> diff. + +-spec diff(tree(), db(), access_fun(), path()) -> [key()]. +diff(Tree, DB, Fun, Path) -> + lists:usort(raw_diff(Tree, Fun(at, Path), Fun, Path, DB)). --spec diff(tree(), access_fun(), path()) -> [key()]. -diff(Tree, Fun, Path) -> - lists:usort(raw_diff(Tree, Fun(at, Path), Fun, Path)). %% Empty trees yield all keys of remaining trees -raw_diff(undefined, undefined, _, _) -> +raw_diff(undefined, undefined, _, _, _) -> []; -raw_diff(undefined, _Tree2, Fun, Path) -> +raw_diff(undefined, _Tree2, Fun, Path,_DB) -> Fun(keys, Path); -raw_diff(Tree1, undefined, _, _) -> - raw_keys(Tree1); +raw_diff(Tree1, undefined, _, _, DB) -> + raw_keys(Tree1, DB); %% If hashes are the same, we're done. -raw_diff(#leaf{hash=Hash}, #leaf{hash=Hash}, _, _) -> +raw_diff(#leaf{hash=Hash}, #leaf{hash=Hash}, _, _, _) -> []; -raw_diff(#leaf{hash=Hash}, #inner{hashchildren=Hash}, _, _) -> +raw_diff(#leaf{hash=Hash}, #inner{hashchildren=Hash}, _, _, _) -> []; -raw_diff(#inner{hashchildren=Hash}, #leaf{hash=Hash}, _, _) -> +raw_diff(#inner{hashchildren=Hash}, #leaf{hash=Hash}, _, _, _) -> []; -raw_diff(#inner{hashchildren=Hash}, #inner{hashchildren=Hash}, _, _) -> +raw_diff(#inner{hashchildren=Hash}, #inner{hashchildren=Hash}, _, _, _) -> []; %% if they differ and both nodes are leaf nodes, return both values -raw_diff(#leaf{userkey=Key1}, #leaf{userkey=Key2}, _, _) -> +raw_diff(#leaf{userkey=Key1}, #leaf{userkey=Key2}, _, _, _) -> [Key1,Key2]; %% if both differ but one is an inner node, return everything -raw_diff(#leaf{userkey=Key, hash=ToSkip}, #inner{}, Fun, Path) -> +raw_diff(#leaf{userkey=Key, hash=ToSkip}, #inner{}, Fun, Path,_DB) -> %% We can only get rid of the current Key if the hashes are the same case Fun({keys, Key, ToSkip}, Path) of {same, Keys} -> Keys; {diff, Keys} -> [Key|Keys]; {unseen, Keys} -> [Key|Keys] end; -raw_diff(Inner=#inner{}, #leaf{userkey=Key, hash=ToSkip}, _, _) -> +raw_diff(Inner=#inner{}, #leaf{userkey=Key, hash=ToSkip}, _, _, DB) -> %% We can only get rid of the current Key if the hashes are the same - case raw_keys(Inner, Key, ToSkip) of + case raw_keys(Inner, Key, ToSkip, DB) of {same, Keys} -> Keys; {diff, Keys} -> [Key|Keys]; {unseen, Keys} -> [Key|Keys] end; %% if both nodes are inner and populated, compare them offset by offset. -raw_diff(#inner{children=Children}, #inner{}, Fun, Path) -> +raw_diff(#inner{children=Children}, #inner{}, Fun, Path, DB) -> ChildPath = <>, diff_offsets(children_offsets(Children), Fun(child_at, ChildPath), Fun, - ChildPath). + ChildPath, + DB). %% Whatever is left alone is returned -diff_offsets([], undefined, _, _) -> +diff_offsets([], undefined, _, _, _) -> []; -diff_offsets(List, undefined, _, _) -> - lists:append([raw_keys(Child) || {_, Child} <- List]); -diff_offsets([], _, Fun, Path) -> +diff_offsets(List, undefined, _, _, DB) -> + lists:append([raw_keys(db_get(Child, DB), DB) || {_, Child} <- List]); +diff_offsets([], _, Fun, Path, DB) -> Keys = Fun(keys, Path), case next_child_path(Path) of undefined -> Keys; - Next -> Keys ++ diff_offsets([], Fun(child_at, Next), Fun, Next) + Next -> Keys ++ diff_offsets([], Fun(child_at, Next), Fun, Next, DB) end; %% If both offsets are the same, compare recursively. -diff_offsets(L=[{OffL, Child}|Rest], R={OffR,Node}, Fun, Path) -> +diff_offsets(L=[{OffL, Child0}|Rest], R={OffR,Node}, Fun, Path, DB) -> + Child = db_get(Child0, DB), if OffL =:= OffR -> - Diff = raw_diff(Child, Node, Fun, Path), + Diff = raw_diff(Child, Node, Fun, Path, DB), case next_child_path(Path) of undefined -> Diff; - Next -> Diff ++ diff_offsets(Rest, Fun(child_at, Next), Fun, Next) + Next -> Diff ++ diff_offsets(Rest, Fun(child_at, Next), Fun, Next, DB) end; OffL < OffR -> - raw_keys(Child) ++ diff_offsets(Rest, R, Fun, Path); + raw_keys(Child, DB) ++ diff_offsets(Rest, R, Fun, Path, DB); OffL > OffR -> Keys = Fun(keys, Path), case next_child_path(Path) of undefined -> Keys; - Next -> Keys ++ diff_offsets(L, Fun(child_at, Next), Fun, Next) + Next -> Keys ++ diff_offsets(L, Fun(child_at, Next), Fun, Next, DB) end end. @@ -383,9 +592,10 @@ to_leaf(Key, Value) when is_binary(Key) -> %% @doc We build a Key-Value list of the child nodes and their offset %% to be used as a sparse K-ary tree. --spec to_inner(offset(), leaf()) -> inner(). -to_inner(Offset, Child=#leaf{hashkey=Hash}) -> - Children = orddict:store(binary:at(Hash, Offset), Child, orddict:new()), +-spec to_inner(offset(), leaf(), db()) -> inner(). +to_inner(Offset, Child=#leaf{hashkey=Hash}, DB) -> + ChildRef = db_ref(Child, DB), + Children = orddict:store(binary:at(Hash, Offset), ChildRef, orddict:new()), #inner{hashchildren=children_hash(Children), children=Children, offset=Offset}. @@ -400,7 +610,11 @@ to_inner(Offset, Child=#leaf{hashkey=Hash}) -> %% of inner nodes, because it is dictated by the children's keyhashes, and %% not the inner node's own hashes. %% @todo consider endianness for absolute portability --spec children_hash([{offset(), leaf()}, ...]) -> binary(). +-spec children_hash([{offset(), leaf()}, ...]) -> hash(). +children_hash([{_, B}|_] = Children) when is_binary(B) -> + %% This is in db mode + Hashes = [ChildHash || {_Offset, ChildHash} <- Children], + crypto:hash(?HASH, Hashes); children_hash(Children) -> Hashes = [element(?HASHPOS, Child) || {_Offset, Child} <- Children], crypto:hash(?HASH, Hashes). @@ -409,21 +623,27 @@ children_hash(Children) -> %% or should just be returned as is. %% This avoids a problem where a deleted subtree results in an inner node %% with a single element, which wastes space and can slow down diffing. -maybe_shrink(Leaf = #leaf{}) -> - Leaf; -maybe_shrink(undefined) -> - undefined; -maybe_shrink(Inner = #inner{children=Children}) -> +maybe_shrink(Leaf = #leaf{}, DB) -> + {Leaf, DB}; +maybe_shrink(undefined, DB) -> + {undefined, DB}; +maybe_shrink(Inner = #inner{children=Children}, DB) -> %% The trick for this one is that if we have *any* child set that %% is anything else than a single leaf node, we can't shrink. We use %% a fold with a quick try ... catch to quickly figure this out, in %% two iterations at most. try - orddict:fold(fun(_Offset, Leaf=#leaf{}, 0) -> Leaf; + orddict:fold(fun(_Offset, NodeHandle, 0) -> + case db_get(NodeHandle, DB) of + #leaf{} = Leaf -> + {Leaf, DB}; + _ -> + throw(false) + end; (_, _, _) -> throw(false) end, 0, Children) catch - throw:false -> Inner + throw:false -> {Inner, db_put(Inner, DB)} end. %% @doc Returns the sorted offsets of a given child. Because we're using @@ -433,16 +653,16 @@ maybe_shrink(Inner = #inner{children=Children}) -> children_offsets(Children) -> Children. %% Wrapper for the diff function. -access_local(Node) -> - fun(at, Path) -> at(Path, Node); - (child_at, Path) -> child_at(Path, Node); - (keys, Path) -> raw_keys(at(Path, Node)); - ({keys, Key, Skip}, Path) -> raw_keys(at(Path, Node), Key, Skip) +access_local(Node, DB) -> + fun(at, Path) -> at(Path, Node, DB); + (child_at, Path) -> child_at(Path, Node, DB); + (keys, Path) -> raw_keys(at(Path, Node, DB), DB); + ({keys, Key, Skip}, Path) -> raw_keys(at(Path, Node, DB), Key, Skip, DB) end. %% Return the node at a given position in a tree. -at(Path, Tree) -> - case child_at(Path, Tree) of +at(Path, Tree, DB) -> + case child_at(Path, Tree, DB) of {_Off, Node} -> Node; Node -> Node end. @@ -452,10 +672,10 @@ at(Path, Tree) -> %% its indexed offset. %% This allows to diff inner nodes without contextual info while in the %% offset traversal. -child_at(<<>>, Node) -> +child_at(<<>>, Node,_DB) -> %% End of path, return whatever Node; -child_at(<>, #inner{children=Children}) -> +child_at(<>, #inner{children=Children}, DB) -> %% Depending on the path depth, the behavior changes. If the path depth %% left is of one (i.e. `<> = <>') and that we are in %% an inner node, then we're looking for the child definition as @@ -470,11 +690,11 @@ child_at(<>, #inner{children=Children}) -> end, N, Children), undefined catch - throw:{Off,Node} -> {Off,Node}; - throw:Node -> child_at(Rest, Node) + throw:{Off,Node} -> {Off, db_get(Node, DB)}; + throw:Node -> child_at(Rest, db_get(Node, DB), DB) end; %% Invalid path -child_at(_, _) -> +child_at(_, _, _) -> undefined. %% Serialize nodes flatly. All terms are self-contained and their @@ -514,7 +734,8 @@ unserialize(<>) -> #leaf{userkey=Key, hashkey=HKey, hash=Hash}; unserialize(<>) -> - #inner{hashchildren=Hash}; + %% Cheat a little to please Dialyzer + #inner{hashchildren=Hash, children=[], offset=256}; unserialize(<>) -> {Byte, unserialize(Node)}; unserialize(<>) -> @@ -531,3 +752,41 @@ unserialize(<>) -> NumKeys = length(Keys), {Word, Keys}. +%% Node traversal +visit(#leaf{hash = Hash} = Node, VisitFun,_DB, Acc) -> + case VisitFun(leaf, Hash, Node, Acc) of + stop -> Acc; + NewAcc -> NewAcc + end; +visit(#inner{hashchildren = Hash} = Node, VisitFun, DB, Acc) -> + case VisitFun(inner, Hash, Node, Acc) of + stop -> + Acc; + NewAcc -> + orddict:fold(fun(_, ChildRef, FoldAcc) -> + visit(db_get(ChildRef, DB), VisitFun, DB, FoldAcc) + end, NewAcc, Node#inner.children) + end. + +%% Interface to the db backend's callback functions. +db_put(_, no_db) -> + no_db; +db_put(#leaf{hash = Hash} = Node, #db{put=Put, handle=Handle} = DB) -> + DB#db{handle=Put(Hash, Node, Handle)}; +db_put(#inner{hashchildren = Hash} = Node, #db{put=Put, handle=Handle} = DB) -> + DB#db{handle=Put(Hash, Node, Handle)}. + +db_get(X, no_db) when is_record(X, inner) orelse is_record(X, leaf) orelse X =:= undefined -> + X; +db_get(X, #db{get=Get, handle=Handle}) when is_binary(X) -> + Get(X, Handle); +db_get(undefined, #db{}) -> + undefined. + +db_ref(X, no_db) when is_record(X, inner) orelse is_record(X, leaf) orelse X =:= undefined -> + X; +db_ref(#leaf{hash=Hash}, #db{}) -> + Hash; +db_ref(#inner{hashchildren=Hash}, #db{}) -> + Hash. + diff --git a/test/merklet_SUITE.erl b/test/merklet_SUITE.erl index 80c08c9..342233c 100644 --- a/test/merklet_SUITE.erl +++ b/test/merklet_SUITE.erl @@ -3,14 +3,51 @@ -include_lib("common_test/include/ct.hrl"). -include_lib("eunit/include/eunit.hrl"). -all() -> [regression_diff]. +all() -> [ regression_diff + , regression_diff_dict_db + , regression_diff_ets_db + , regression_visit_nodes + , regression_visit_nodes_dict_db + , regression_visit_nodes_ets_db + ]. regression_diff(_) -> - T1 = insert_all([{<<1>>,<<1>>},{<<2>>,<<2>>},{<<3>>,<<3>>}]), - T2 = insert_all([{<<1>>,<<0>>}]), + regression_diff_common(undefined). + +regression_diff_dict_db(_) -> + T0 = merklet:empty_db_tree(), + regression_diff_common(T0). + +regression_diff_ets_db(_) -> + T0 = merklet:empty_db_tree(merklet_ets_db_backend:spec()), + regression_diff_common(T0). + +regression_diff_common(T0) -> + T1 = insert_all([{<<1>>,<<1>>},{<<2>>,<<2>>},{<<3>>,<<3>>}], T0), + T2 = insert_all([{<<1>>,<<0>>}], T0), ?assertEqual([<<1>>,<<2>>,<<3>>], merklet:diff(T1,T2)), ?assertEqual([<<1>>,<<2>>,<<3>>], merklet:diff(T2,T1)). +regression_visit_nodes(_) -> + regression_visit_nodes_common(undefined). + +regression_visit_nodes_dict_db(_) -> + T0 = merklet:empty_db_tree(), + regression_visit_nodes_common(T0). + +regression_visit_nodes_ets_db(_) -> + T0 = merklet:empty_db_tree(merklet_ets_db_backend:spec()), + regression_visit_nodes_common(T0). + +regression_visit_nodes_common(T0) -> + AllVals = [{<<1>>,<<1>>},{<<2>>,<<2>>},{<<3>>,<<3>>}], + T1 = insert_all(AllVals, T0), + Fun = fun(Type,_Hash,_Node, Count) -> + orddict:update_counter(Type, 1, Count) + end, + Count = merklet:visit_nodes(Fun, [], T1), + ?assertEqual(length(AllVals), orddict:fetch(leaf, Count)). + %%%%%%%%%%%%%%%% %%% Builders %%% %%%%%%%%%%%%%%%% diff --git a/test/merklet_ets_db_backend.erl b/test/merklet_ets_db_backend.erl new file mode 100644 index 0000000..9443320 --- /dev/null +++ b/test/merklet_ets_db_backend.erl @@ -0,0 +1,25 @@ +%%% @doc Test backend module using an ets table as a db backend for merklet. +%%% @end +-module(merklet_ets_db_backend). + +-export([spec/0]). + +spec() -> + #{ get => fun ets_get/2 + , put => fun ets_put/3 + , handle => ets_new()}. + +%% NOTE: This relies on the identity hash being the second element of +%% #leaf{} and #inner{}. +ets_new() -> + ets:new(merklet_backend, [set, private, {keypos, 2}]). + +ets_get(Key, Ets) -> + [Rec] = ets:lookup(Ets, Key), + Rec. + +ets_put(Key, Val, Ets) -> + %% Assert + Key = element(2, Val), + ets:insert(Ets, Val), + Ets. diff --git a/test/prop_merklet.erl b/test/prop_merklet.erl index 8b93c36..d36ee32 100644 --- a/test/prop_merklet.erl +++ b/test/prop_merklet.erl @@ -6,23 +6,51 @@ -define(run(Case), {timeout, timer:seconds(60), ?_assert(proper:quickcheck(Case, ?OPTS))}). -eunit_test_() -> - [?run(prop_diff()), - ?run(prop_dist_diff()), - ?run(prop_delete()), - ?run(prop_modify())]. +eunit_no_db_test_() -> + [?run(prop_diff_no_db()), + ?run(prop_dist_diff_no_db()), + ?run(prop_delete_no_db()), + ?run(prop_modify_no_db()) + ]. + +eunit_dict_db_test_() -> + [?run(prop_diff_dict_db()), + ?run(prop_dist_diff_dict_db()), + ?run(prop_delete_dict_db()), + ?run(prop_modify_dict_db()) + ]. + +eunit_ets_db_test_() -> + [?run(prop_diff_ets_db()), + ?run(prop_dist_diff_ets_db()), + ?run(prop_delete_ets_db()), + ?run(prop_modify_ets_db()) + ]. + +eunit_gc_test_() -> + [?run(prop_gc()) + ]. %%%%%%%%%%%%%%%%%% %%% Properties %%% %%%%%%%%%%%%%%%%%% -prop_diff() -> +prop_diff_no_db() -> + prop_diff(no_db). + +prop_diff_dict_db() -> + prop_diff(dict_db). + +prop_diff_ets_db() -> + prop_diff(ets_db). + +prop_diff(Backend) -> %% All differences between trees can be found no matter the order, %% and returns the list of different keys. ?FORALL({KV1,KV2}, diff_keyvals(), begin Keys = [K || {K,_} <- KV2], - T1 = insert_all(KV1), - T2 = insert_all(KV2, T1), + T1 = insert_all(KV1, Backend), + T2 = extend(KV2, T1), Diff1 = merklet:diff(T1,T2), Diff2 = merklet:diff(T2,T1), Diff1 =:= Diff2 @@ -30,7 +58,16 @@ prop_diff() -> Diff1 =:= lists:sort(Keys) end). -prop_dist_diff() -> +prop_dist_diff_no_db() -> + prop_dist_diff(no_db). + +prop_dist_diff_dict_db() -> + prop_dist_diff(dict_db). + +prop_dist_diff_ets_db() -> + prop_dist_diff(ets_db). + +prop_dist_diff(Backend) -> %% All differences between trees can be found no matter the order, %% and returns the list of different keys. Same as previous case, but %% uses the internal serialization format and distribution API @@ -38,8 +75,8 @@ prop_dist_diff() -> ?FORALL({KV1,KV2}, diff_keyvals(), begin Keys = [K || {K,_} <- KV2], - T1 = insert_all(KV1), - T2 = insert_all(KV2, T1), + T1 = insert_all(KV1, Backend), + T2 = extend(KV2, T1), %% remmote version of the trees, should be handled %% by merklet:unserialize/1. In practice, this kind %% of thing would take place over the network, and @@ -55,29 +92,47 @@ prop_dist_diff() -> Diff1 =:= lists:sort(Keys) end). -prop_delete() -> +prop_delete_no_db() -> + prop_delete(no_db). + +prop_delete_dict_db() -> + prop_delete(dict_db). + +prop_delete_ets_db() -> + prop_delete(ets_db). + +prop_delete(Backend) -> %% Having a tree and deleting a percentage of it yields the same tree %% without said keys. ?FORALL({All, Partial, ToDelete}, delete_keyvals(0.50), begin - Tree = insert_all(All), - PartialTree = insert_all(Partial), + Tree = insert_all(All, Backend), + PartialTree = insert_all(Partial, Backend), DeletedTree = delete_keys(ToDelete, Tree), [] =:= merklet:diff(PartialTree, DeletedTree) andalso merklet:keys(DeletedTree) =:= merklet:keys(PartialTree) andalso - DeletedTree =:= PartialTree + merklet:expand_db_tree(DeletedTree) =:= merklet:expand_db_tree(PartialTree) end). -prop_modify() -> +prop_modify_no_db() -> + prop_modify(no_db). + +prop_modify_dict_db() -> + prop_modify(dict_db). + +prop_modify_ets_db() -> + prop_modify(ets_db). + +prop_modify(Backend) -> %% Updating records' values should show detections as part of merklet's %% diff operations, even if none of the keys change. ?FORALL({All, ToChange}, modify_keyvals(0.50), begin - Tree = insert_all(All), + Tree = insert_all(All, Backend), KVSet = [{K, term_to_binary(make_ref())} || K <- ToChange], - Modified = insert_all(KVSet, Tree), + Modified = extend(KVSet, Tree), merklet:keys(Tree) =:= merklet:keys(Modified) andalso lists:sort(ToChange) =:= merklet:diff(Tree, Modified) @@ -85,11 +140,54 @@ prop_modify() -> lists:sort(ToChange) =:= merklet:diff(Modified, Tree) end). +%% Test insertion and garbage collection of db-backed tree. Note that +%% the nodes of a non-db-backed tree are subtrees rather than nodes, +%% which makes the abstraction break down. +prop_gc() -> + Fun = fun(_Type, Hash, Node, AccHandle) -> + orddict:store(Hash, Node, AccHandle) + end, + Spec = #{ get => fun orddict:fetch/2 + , put => fun orddict:store/3 + , handle => []}, + ?FORALL(Entries, keyvals(), + begin + Tree1 = extend(Entries, merklet:empty_db_tree(Spec)), + NewStore1 = merklet:visit_nodes(Fun, [], Tree1), + Tree2 = merklet:db_tree(merklet:root_hash(Tree1), + Spec#{handle => NewStore1}), + NewStore2 = merklet:visit_nodes(Fun, [], Tree2), + %% Test that we can traverse the new tree. + Tree3 = merklet:db_tree(merklet:root_hash(Tree2), + Spec#{handle => NewStore2}), + DB1 = merklet:db_handle(Tree1), + DB2 = merklet:db_handle(Tree2), + DB3 = merklet:db_handle(Tree3), + merklet:keys(Tree1) =:= merklet:keys(Tree2) + andalso merklet:diff(Tree1, Tree2) =:= [] + andalso merklet:root_hash(Tree1) =:= merklet:root_hash(Tree1) + andalso length(DB2) =< length(DB1) + andalso is_sub_orddict(DB2, DB1) + andalso DB2 =:= DB3 %% The gc should be idempotent + end). + +is_sub_orddict([], _) -> + true; +is_sub_orddict(_, []) -> + false; +is_sub_orddict([{K1, V1}|Left1], [{K1, V2}|Left2]) -> + V1 =:= V2 andalso is_sub_orddict(Left1, Left2); +is_sub_orddict([{K1, _}|_] = Orddict1, [{K2, _}|Left2]) -> + K1 > K2 andalso is_sub_orddict(Orddict1, Left2). + %%%%%%%%%%%%%%%% %%% Builders %%% %%%%%%%%%%%%%%%% -insert_all(KeyVals) -> insert_all(KeyVals, undefined). -insert_all(KeyVals, Tree) -> lists:foldl(fun merklet:insert/2, Tree, KeyVals). +insert_all(KeyVals, no_db) -> extend(KeyVals, undefined); +insert_all(KeyVals, dict_db) -> extend(KeyVals, merklet:empty_db_tree()); +insert_all(KeyVals, ets_db) -> extend(KeyVals, merklet:empty_db_tree(merklet_ets_db_backend:spec())). + +extend(KeyVals, Tree) -> lists:foldl(fun merklet:insert/2, Tree, KeyVals). delete_keys(Keys, Tree) -> lists:foldl(fun merklet:delete/2, Tree, Keys). @@ -108,7 +206,7 @@ diff_keyvals() -> delete_keyvals(Rate) -> ?LET(KeyVals, keyvals(), begin - Rand = random:uniform(), + Rand = rand:uniform(), ToDelete = [Key || {Key,_} <- KeyVals, Rate > Rand], WithoutDeleted = [{K,V} || {K,V} <- KeyVals, Rate < Rand], {KeyVals, WithoutDeleted, ToDelete} @@ -116,10 +214,10 @@ delete_keyvals(Rate) -> modify_keyvals(Rate) -> % similar as delete_keyvals but doesn't allow duplicate updates - ?SUCHTHAT({_,ToChange}, + ?SUCHTHAT({_,ToChange}, ?LET(KeyVals, keyvals(), begin - Rand = random:uniform(), + Rand = rand:uniform(), ToDelete = [Key || {Key,_} <- KeyVals, Rate > Rand], {KeyVals, lists:usort(ToDelete)} end), diff --git a/test/prop_model.erl b/test/prop_model.erl index 95e6a6b..4dae4c3 100644 --- a/test/prop_model.erl +++ b/test/prop_model.erl @@ -8,31 +8,113 @@ -define(run(Case), {timeout, timer:seconds(60), ?_assert(proper:quickcheck(Case, ?OPTS))}). -eunit_test_() -> - [?run(prop_insert_many()), - ?run(prop_delete_random()), - ?run(prop_delete_members()), - ?run(prop_overwrite()), - ?run(prop_insert_same_diff()), - ?run(prop_insert_mixed_diff()), - ?run(prop_insert_disjoint_diff()), - ?run(prop_delete_random_diff()), - ?run(prop_delete_members_diff()), - ?run(prop_overwrite_diff()), - ?run(prop_mixed_diff()), - ?run(prop_mixed_dist_diff()) +eunit_no_db_test_() -> + [?run(prop_insert_many_no_db()), + ?run(prop_traversal_no_db()), + ?run(prop_delete_random_no_db()), + ?run(prop_delete_members_no_db()), + ?run(prop_overwrite_no_db()), + ?run(prop_insert_same_diff_no_db()), + ?run(prop_insert_mixed_diff_no_db()), + ?run(prop_insert_disjoint_diff_no_db()), + ?run(prop_delete_random_diff_no_db()), + ?run(prop_delete_members_diff_no_db()), + ?run(prop_overwrite_diff_no_db()), + ?run(prop_mixed_diff_no_db()), + ?run(prop_mixed_dist_diff_no_db()) ]. +eunit_dict_db_test_() -> + [?run(prop_insert_many_dict_db()), + ?run(prop_traversal_dict_db()), + ?run(prop_delete_random_dict_db()), + ?run(prop_delete_members_dict_db()), + ?run(prop_overwrite_dict_db()), + ?run(prop_insert_same_diff_dict_db()), + ?run(prop_insert_mixed_diff_dict_db()), + ?run(prop_insert_disjoint_diff_dict_db()), + ?run(prop_delete_random_diff_dict_db()), + ?run(prop_delete_members_diff_dict_db()), + ?run(prop_overwrite_diff_dict_db()), + ?run(prop_mixed_diff_dict_db()), + ?run(prop_mixed_dist_diff_dict_db()) + ]. + +eunit_ets_db_test_() -> + [?run(prop_insert_many_ets_db()), + ?run(prop_traversal_ets_db()), + ?run(prop_delete_random_ets_db()), + ?run(prop_delete_members_ets_db()), + ?run(prop_overwrite_ets_db()), + ?run(prop_insert_same_diff_ets_db()), + ?run(prop_insert_mixed_diff_ets_db()), + ?run(prop_insert_disjoint_diff_ets_db()), + ?run(prop_delete_random_diff_ets_db()), + ?run(prop_delete_members_diff_ets_db()), + ?run(prop_overwrite_diff_ets_db()), + ?run(prop_mixed_diff_ets_db()), + ?run(prop_mixed_dist_diff_ets_db()) + ]. + +empty_tree(no_db) -> + undefined; +empty_tree(dict_db) -> + merklet:empty_db_tree(); +empty_tree(ets_db) -> + merklet:empty_db_tree(merklet_ets_db_backend:spec()). + %% Test insertion and reading the keys back -prop_insert_many() -> +prop_insert_many_no_db() -> + prop_insert_many(no_db). + +prop_insert_many_dict_db() -> + prop_insert_many(dict_db). + +prop_insert_many_ets_db() -> + prop_insert_many(ets_db). + +prop_insert_many(Backend) -> ?FORALL(Entries, keyvals(), merklet_model:keys(merklet_model:insert_many(Entries,undefined)) =:= - merklet:keys(merklet:insert_many(Entries,undefined)) + merklet:keys(merklet:insert_many(Entries,empty_tree(Backend))) ). +%% Test insertion and traversing all nodes. +prop_traversal_no_db() -> + prop_traversal(no_db). + +prop_traversal_dict_db() -> + prop_traversal(dict_db). + +prop_traversal_ets_db() -> + prop_traversal(ets_db). + +prop_traversal(Backend) -> + Fun = fun(Type,_Hash,_Node, Count) -> + orddict:update_counter(Type, 1, Count) + end, + ?FORALL(Entries, keyvals(), + begin + M = merklet_model:insert_many(Entries,undefined), + T = merklet:insert_many(Entries,empty_tree(Backend)), + InitAcc = orddict:from_list([{leaf, 0}, {inner, 0}]), + length(merklet_model:keys(M)) + =:= + orddict:fetch(leaf, merklet:visit_nodes(Fun, InitAcc, T)) + end). + %% Delete keys that may or may not be in the tree -prop_delete_random() -> +prop_delete_random_no_db() -> + prop_delete_random(no_db). + +prop_delete_random_dict_db() -> + prop_delete_random(dict_db). + +prop_delete_random_ets_db() -> + prop_delete_random(ets_db). + +prop_delete_random(Backend) -> ?FORALL({Entries, ToDelete}, {keyvals(), list(binary())}, merklet_model:keys( delete(merklet_model, @@ -42,11 +124,20 @@ prop_delete_random() -> merklet:keys( delete(merklet, ToDelete, - merklet:insert_many(Entries,undefined))) + merklet:insert_many(Entries,empty_tree(Backend)))) ). %% Only delete keys that have previously been inserted in the tree -prop_delete_members() -> +prop_delete_members_no_db() -> + prop_delete_members(no_db). + +prop_delete_members_dict_db() -> + prop_delete_members(dict_db). + +prop_delete_members_ets_db() -> + prop_delete_members(ets_db). + +prop_delete_members(Backend) -> ?FORALL({Entries, ToDelete}, delete_keyvals(0.5), merklet_model:keys( delete(merklet_model, @@ -56,11 +147,20 @@ prop_delete_members() -> merklet:keys( delete(merklet, ToDelete, - merklet:insert_many(Entries,undefined))) + merklet:insert_many(Entries,empty_tree(Backend)))) ). %% Overwrite existing entries, make sure nothing was lost or added -prop_overwrite() -> +prop_overwrite_no_db() -> + prop_overwrite(no_db). + +prop_overwrite_dict_db() -> + prop_overwrite(dict_db). + +prop_overwrite_ets_db() -> + prop_overwrite(ets_db). + +prop_overwrite(Backend) -> ?FORALL({Entries, ToUpdate}, overwrite_keyvals(0.5), merklet_model:keys( merklet_model:insert_many(ToUpdate, @@ -68,50 +168,86 @@ prop_overwrite() -> =:= merklet:keys( merklet:insert_many(ToUpdate, - merklet:insert_many(Entries,undefined))) + merklet:insert_many(Entries,empty_tree(Backend)))) ). %% Trees diffed with themselves should be stable -prop_insert_same_diff() -> +prop_insert_same_diff_no_db() -> + prop_insert_same_diff(no_db). + +prop_insert_same_diff_dict_db() -> + prop_insert_same_diff(dict_db). + +prop_insert_same_diff_ets_db() -> + prop_insert_same_diff(ets_db). + +prop_insert_same_diff(Backend) -> ?FORALL(Entries, keyvals(), merklet_model:diff(merklet_model:insert_many(Entries,undefined), merklet_model:insert_many(Entries,undefined)) =:= - merklet:diff(merklet:insert_many(Entries,undefined), - merklet:insert_many(Entries,undefined)) + merklet:diff(merklet:insert_many(Entries,empty_tree(Backend)), + merklet:insert_many(Entries,empty_tree(Backend))) ). %% Two independent trees diffed together (no verification of commutativity) -prop_insert_mixed_diff() -> +prop_insert_mixed_diff_no_db() -> + prop_insert_mixed_diff(no_db). + +prop_insert_mixed_diff_dict_db() -> + prop_insert_mixed_diff(dict_db). + +prop_insert_mixed_diff_ets_db() -> + prop_insert_mixed_diff(ets_db). + +prop_insert_mixed_diff(Backend) -> ?FORALL({Entries1, Entries2}, {keyvals(), keyvals()}, merklet_model:diff(merklet_model:insert_many(Entries1,undefined), merklet_model:insert_many(Entries2,undefined)) =:= - merklet:diff(merklet:insert_many(Entries1,undefined), - merklet:insert_many(Entries2,undefined)) + merklet:diff(merklet:insert_many(Entries1,empty_tree(Backend)), + merklet:insert_many(Entries2,empty_tree(Backend))) ). %% Two independent trees with no overlapping data sets diffed together %% (no verification of commutativity) -prop_insert_disjoint_diff() -> +prop_insert_disjoint_diff_no_db() -> + prop_insert_disjoint_diff(no_db). + +prop_insert_disjoint_diff_dict_db() -> + prop_insert_disjoint_diff(dict_db). + +prop_insert_disjoint_diff_ets_db() -> + prop_insert_disjoint_diff(ets_db). + +prop_insert_disjoint_diff(Backend) -> ?FORALL(Lists, disjoint_keyvals(), begin {Entries1, Entries2} = Lists, merklet_model:diff(merklet_model:insert_many(Entries1,undefined), merklet_model:insert_many(Entries2,undefined)) =:= - merklet:diff(merklet:insert_many(Entries1,undefined), - merklet:insert_many(Entries2,undefined)) + merklet:diff(merklet:insert_many(Entries1,empty_tree(Backend)), + merklet:insert_many(Entries2,empty_tree(Backend))) end). %% Diffing two trees that had random element deletion that may or may %% not be present (tests commutativity) -prop_delete_random_diff() -> +prop_delete_random_diff_no_db() -> + prop_delete_random_diff(no_db). + +prop_delete_random_diff_dict_db() -> + prop_delete_random_diff(dict_db). + +prop_delete_random_diff_ets_db() -> + prop_delete_random_diff(ets_db). + +prop_delete_random_diff(Backend) -> ?FORALL({Entries, ToDelete}, {keyvals(), list(binary())}, begin ModelFull = merklet_model:insert_many(Entries,undefined), ModelDelete = delete(merklet_model, ToDelete, ModelFull), - MerkletFull = merklet:insert_many(Entries,undefined), + MerkletFull = merklet:insert_many(Entries,empty_tree(Backend)), MerkletDelete = delete(merklet, ToDelete, MerkletFull), (merklet_model:diff(ModelFull, ModelDelete) =:= merklet:diff(MerkletFull, MerkletDelete)) @@ -121,12 +257,21 @@ prop_delete_random_diff() -> end). %% Diffing two trees that had member element deletion (tests commutativity) -prop_delete_members_diff() -> +prop_delete_members_diff_no_db() -> + prop_delete_members_diff(no_db). + +prop_delete_members_diff_dict_db() -> + prop_delete_members_diff(dict_db). + +prop_delete_members_diff_ets_db() -> + prop_delete_members_diff(ets_db). + +prop_delete_members_diff(Backend) -> ?FORALL({Entries, ToDelete}, delete_keyvals(0.5), begin ModelFull = merklet_model:insert_many(Entries,undefined), ModelDelete = delete(merklet_model, ToDelete, ModelFull), - MerkletFull = merklet:insert_many(Entries,undefined), + MerkletFull = merklet:insert_many(Entries,empty_tree(Backend)), MerkletDelete = delete(merklet, ToDelete, MerkletFull), (merklet_model:diff(ModelFull, ModelDelete) =:= merklet:diff(MerkletFull, MerkletDelete)) @@ -136,12 +281,21 @@ prop_delete_members_diff() -> end). %% Diffing trees that had overwritten pieces of data (tests commutativity) -prop_overwrite_diff() -> +prop_overwrite_diff_no_db() -> + prop_overwrite_diff(no_db). + +prop_overwrite_diff_dict_db() -> + prop_overwrite_diff(dict_db). + +prop_overwrite_diff_ets_db() -> + prop_overwrite_diff(ets_db). + +prop_overwrite_diff(Backend) -> ?FORALL({Entries, ToUpdate}, overwrite_keyvals(0.5), begin ModelFull = merklet_model:insert_many(Entries, undefined), ModelUpdate = merklet_model:insert_many(ToUpdate, ModelFull), - MerkletFull = merklet:insert_many(Entries, undefined), + MerkletFull = merklet:insert_many(Entries, empty_tree(Backend)), MerkletUpdate = merklet:insert_many(ToUpdate, MerkletFull), (merklet_model:diff(ModelFull, ModelUpdate) =:= merklet:diff(MerkletFull, MerkletUpdate)) @@ -152,13 +306,22 @@ prop_overwrite_diff() -> %% Commutative verification of various trees that had their keys %% inserted, updated, and randomly deleted. -prop_mixed_diff() -> +prop_mixed_diff_no_db() -> + prop_mixed_diff(no_db). + +prop_mixed_diff_dict_db() -> + prop_mixed_diff(dict_db). + +prop_mixed_diff_ets_db() -> + prop_mixed_diff(ets_db). + +prop_mixed_diff(Backend) -> ?FORALL({{Entries, ToUpdate}, ToDelete}, {overwrite_keyvals(0.5), list(binary())}, begin ModelFull = merklet_model:insert_many(Entries, undefined), ModelDelete = delete(merklet_model, ToDelete, ModelFull), ModelUpdate = merklet_model:insert_many(ToUpdate, ModelDelete), - MerkletFull = merklet:insert_many(Entries, undefined), + MerkletFull = merklet:insert_many(Entries, empty_tree(Backend)), MerkletDelete = delete(merklet, ToDelete, MerkletFull), MerkletUpdate = merklet:insert_many(ToUpdate, MerkletDelete), %% Full vs. Update @@ -186,13 +349,22 @@ prop_mixed_diff() -> %% Commutative verification of various trees that had their keys %% inserted, updated, and randomly deleted, while using the %% distributed/serialized interface. -prop_mixed_dist_diff() -> +prop_mixed_dist_diff_no_db() -> + prop_mixed_dist_diff(no_db). + +prop_mixed_dist_diff_dict_db() -> + prop_mixed_dist_diff(dict_db). + +prop_mixed_dist_diff_ets_db() -> + prop_mixed_dist_diff(ets_db). + +prop_mixed_dist_diff(Backend) -> ?FORALL({{Entries, ToUpdate}, ToDelete}, {overwrite_keyvals(0.5), list(binary())}, begin ModelFull = merklet_model:insert_many(Entries, undefined), ModelDelete = delete(merklet_model, ToDelete, ModelFull), ModelUpdate = merklet_model:insert_many(ToUpdate, ModelDelete), - MerkletFull = merklet:insert_many(Entries, undefined), + MerkletFull = merklet:insert_many(Entries, empty_tree(Backend)), MerkletDelete = delete(merklet, ToDelete, MerkletFull), MerkletUpdate = merklet:insert_many(ToUpdate, MerkletDelete), DistFull = merklet:access_unserialize(merklet:access_serialize(MerkletFull)), @@ -235,7 +407,7 @@ keyvals() -> list({binary(), binary()}). delete_keyvals(Rate) -> ?LET(KeyVals, keyvals(), begin - Rand = random:uniform(), + Rand = rand:uniform(), ToDelete = [Key || {Key,_} <- KeyVals, Rate > Rand], {KeyVals, ToDelete} end). @@ -243,7 +415,7 @@ delete_keyvals(Rate) -> overwrite_keyvals(Rate) -> ?LET(KeyVals, keyvals(), begin - Rand = random:uniform(), + Rand = rand:uniform(), ToUpdate = [{Key, <<0,Val/binary>>} || {Key,Val} <- KeyVals, Rate > Rand], {KeyVals, ToUpdate} end). @@ -255,4 +427,4 @@ disjoint_keyvals() -> KS2 = [K || {K, _} <- KV2], lists:all(fun(K) -> not lists:member(K,KS2) end, KS1) end). - +