diff --git a/src/merklet.erl b/src/merklet.erl
index eda3101..8db1d1f 100644
--- a/src/merklet.erl
+++ b/src/merklet.erl
@@ -10,7 +10,7 @@
%%% The Hkey is used as the main index and to build a tree. If we have three
%%% hashes with the values `<<213,21,54,...>>', `<<213,33,98,...>>', and
%%% `<<11,45,101,...>>', the resulting tree/trie is:
-%%%
+%%% ```
%%% (Root)
%%% Inner
%%% / \
@@ -21,6 +21,7 @@
%%% / \
%%% (21) (33)
%%% <<213,21,54,...>> <<213,33,98,...>>
+%%% '''
%%%
%%% Each of the leaf nodes will contain both hashes, along with a non-hashed
%%% version of the key. Each Inner node contains a hash of all its children's
@@ -32,31 +33,105 @@
%%%
%%% It also allows to do a level-order traversal node-per-node over the network
%%% allowing somewhat efficient diffing.
+%%%
+%%% This implementaion has two variants:
+%%% - A purely functional ADT (i.e., non_db_tree())
+%%% - An ADT with a (possibly stateful) storage backend. (i.e., dbtree())
+%%%
+%%% The variants are semantically equivalent under the API, but since
+%%% the representation is different, trees cannot be compared by using
+%%% term comparison (=:=, pattern matching, etc).
+%%%
+%%% In the functional ADT all inner nodes (inner()) contains the
+%%% complete terms of its children, but in the DB backed ADT, the
+%%% inner nodes contains only the hashes of its children.
+%%%
+%%% The children are stored in (and retrieved from) the backend using
+%%% user defined callback functions.
+%%%
+%%%
{@type db_handle()} A term that defines to the access funs
+%%% which storage to use.
+%%%
+%%% {@type get_fun()} A fun that given a hash and a {@type db_handle()}
+%%% delivers the node associated with the hash.
+%%%
+%%% {@type put_fun()} A fun that given a hash, a node and a
+%%% {@type db_handle()} stores the node previously stored with
+%%% the hash as key, returning a new db_handle().
+%%%
+%%% Note that the db_handle() is treated in a functional manner, so a
+%%% functional key-value store (e.g., dict(), gb_tree()) can be used
+%%% to keep the full trees functional even if this version is
+%%% used. The typical usage would rather have some other stateful
+%%% storage model as the backend (e.g., mnesia table, ets, dets).
+%%%
+%%% Also note that nodes are not removed from the db backend. This
+%%% gives the benefit that historical merkle trees can be retrieved
+%%% from the database (using the root hash), but at the same time it
+%%% leads to a waste of space if only the newest tree is ever used.
+%%%
+%%% The function visit_nodes/3 can be used to implement a garbage
+%%% collection. An example of this is available in the proper tests.
+%%%
+%%% Accessing a historical tree can be done by creating a tree with
+%%% the appropriate root hash and db backend using the function
+%%% db_tree/3.
+%%%
%%% @end
-module(merklet).
--record(leaf, {hash :: binary(), % hash(hash(key), hash(value))
+-record(leaf, {hash :: hash(), % hash(hash(key), hash(value))
userkey :: binary(), % user submitted key
hashkey :: binary()}). % hash of the user submitted key
--record(inner, {hashchildren :: binary(), % hash of children's hashes
- children :: [{offset(), #inner{} | #leaf{}}, ...],
+-record(inner, {hashchildren :: hash(), % hash of children's hashes
+ %% The children is really nonempty but, we abuse it in
+ %% unserialize, which Dialyzer finds.
+ children :: [{offset(), #inner{} | #leaf{} | hash()}],
offset :: non_neg_integer()}). % byte offset
+
+-record(db, { get :: get_fun()
+ , put :: put_fun()
+ , handle :: term()
+ }).
+
+-record(dbtree, { db :: db()
+ , hash :: 'undefined' | hash()
+ }).
+
-define(HASHPOS, 2). % #leaf.hash =:= #inner.hashchildren
-type offset() :: byte().
-type leaf() :: #leaf{}.
-type inner() :: #inner{}.
--type tree() :: leaf() | inner() | 'undefined'.
+-type non_db_tree() :: leaf() | inner() | 'undefined'.
+-type db_tree() :: #dbtree{}.
+-type tree() :: db_tree() | non_db_tree().
+
-type key() :: binary().
-type value() :: binary().
-type path() :: binary().
--type access_fun() :: fun((at | child_at | keys | {keys, Hash::binary()}, path()) -> tree()).
--type serial_fun() :: fun((at | child_at | keys | {keys, Hash::binary()}, path()) -> binary()).
+-type hash() :: binary().
+-type access_fun() :: fun((at | child_at | keys | {keys, Hash::hash()}, path()) -> non_db_tree()).
+-type serial_fun() :: fun((at | child_at | keys | {keys, Hash::hash()}, path()) -> binary()).
+
+-type db() :: #db{}.
+-type db_handle() :: term().
+-type get_fun() :: fun((hash(), db_handle()) -> non_db_tree()).
+-type put_fun() :: fun((hash(), Node :: leaf() | inner(), db_handle()) -> db_handle()).
-export_type([tree/0, key/0, value/0, path/0, access_fun/0, serial_fun/0]).
+-export_type([get_fun/0, put_fun/0, db_handle/0]).
-export([insert/2, insert_many/2, delete/2, keys/1, diff/2]).
-export([dist_diff/2, access_serialize/1, access_unserialize/1]).
+-export([empty_db_tree/0, empty_db_tree/1, db_tree/2, db_handle/1, root_hash/1]).
+-export([visit_nodes/3]).
+
+-ifdef(TEST).
+%% Test interface
+-export([ expand_db_tree/1
+ ]).
+-endif.
-define(HASH, sha).
-define(HASHBYTES, 20).
@@ -76,34 +151,144 @@
%%% API %%%
%%%%%%%%%%%
+%% @doc Creating an empty tree with the built-in dict backend
+-spec empty_db_tree() -> db_tree().
+empty_db_tree() ->
+ empty_db_tree(#{ get => fun dict:fetch/2
+ , put => fun dict:store/3
+ , handle => dict:new()
+ }).
+
+%% @doc Creating an empty tree with user-defined db backend
+-spec empty_db_tree(#{ get := get_fun()
+ , put := put_fun()
+ , handle := db_handle()}) -> db_tree().
+empty_db_tree(#{ get := GetFun
+ , put := PutFun
+ , handle := Handle}) when is_function(GetFun, 2),
+ is_function(PutFun, 3) ->
+ #dbtree{ hash = undefined
+ , db = #db{ get=GetFun
+ , put=PutFun
+ , handle=Handle
+ }}.
+
+%% @doc Get the root hash for the tree. If the tree is empty, the root
+%% hash is 'undefined'.
+-spec root_hash(tree()) -> hash() | 'undefined'.
+root_hash(Tree0) ->
+ case unpack_db_tree(Tree0) of
+ {undefined, _} -> undefined;
+ {#inner{hashchildren=Hash}, _} -> Hash;
+ {#leaf{hash=Hash}, _} -> Hash
+ end.
+
+%% @doc Get the db_handle() of a db_tree().
+-spec db_handle(db_tree()) -> db_handle().
+db_handle(#dbtree{db = #db{handle=Handle}}) ->
+ Handle.
+
+%% @doc Creating an tree with user-defined db backend and setting the
+%% root hash. Useful to re-initialize a tree from minimal
+%% information and a prebuilt db store.
+-spec db_tree(hash(), #{ get := get_fun()
+ , put := put_fun()
+ , handle := db_handle()}) -> db_tree().
+db_tree(RootHash, Spec) when RootHash =:= undefined;
+ byte_size(RootHash) =:= ?HASHBYTES ->
+ T0 = empty_db_tree(Spec),
+ T0#dbtree{ hash = RootHash }.
+
+-ifdef(TEST).
+%% Test interface to facilitate comparing trees
+expand_db_tree(#dbtree{hash=undefined}) ->
+ undefined;
+expand_db_tree(#dbtree{hash=H, db=DB}) ->
+ expand_db_tree(db_get(H, DB), DB);
+expand_db_tree(Tree) ->
+ Tree.
+
+expand_db_tree(#leaf{} = L,_DB) ->
+ L;
+expand_db_tree(#inner{children=Children} = I, DB) ->
+ Expand = fun(_, Child) -> expand_db_tree(db_get(Child, DB), DB)end,
+ I#inner{children=orddict:map(Expand, Children)}.
+-endif.
+
+unpack_db_tree(#dbtree{hash=H, db=DB}) -> {db_get(H, DB), DB};
+unpack_db_tree(#leaf{} = T) -> {T, no_db};
+unpack_db_tree(#inner{} = T) -> {T, no_db};
+unpack_db_tree(undefined) -> {undefined, no_db}.
+
+pack_db_tree({T, no_db}) -> T;
+pack_db_tree({#leaf{hash=H}, #db{} = DB}) -> #dbtree{hash=H, db=DB};
+pack_db_tree({#inner{hashchildren=H}, #db{} = DB}) -> #dbtree{hash=H, db=DB};
+pack_db_tree({undefined, #db{} = DB}) -> #dbtree{hash=undefined, db=DB}.
+
%% @doc Adds a key to the tree, or overwrites an exiting one.
-spec insert({key(), value()}, tree()) -> tree().
-insert({Key, Value}, Tree) ->
- insert(0, to_leaf(Key, Value), Tree).
+insert({Key, Value}, Tree0) ->
+ {Tree, DB} = unpack_db_tree(Tree0),
+ pack_db_tree(insert(0, to_leaf(Key, Value), Tree, DB)).
%% @doc Adds multiple keys to the tree, or overwrites existing ones.
--spec insert_many({key(), value()}, tree()) -> tree().
-insert_many([], Tree) -> Tree;
-insert_many([H|T], Tree) -> insert_many(T, insert(H, Tree)).
+-spec insert_many([{key(), value()}], tree()) -> tree().
+insert_many(List, Tree0) ->
+ {Tree, DB} = unpack_db_tree(Tree0),
+ insert_many(List, Tree, DB).
+
+insert_many([{Key, Value}|T], Tree, DB) ->
+ {Tree1, DB1} = insert(0, to_leaf(Key, Value), Tree, DB),
+ insert_many(T, Tree1, DB1);
+insert_many([], Tree, DB) ->
+ pack_db_tree({Tree, DB}).
%% @doc Removes a key from a tree, if present.
-spec delete(key(), tree()) -> tree().
-delete(Key, Tree) ->
- delete_leaf(to_leaf(Key, <<>>), Tree).
+delete(Key, Tree0) ->
+ {Tree, DB} = unpack_db_tree(Tree0),
+ pack_db_tree(delete_leaf(to_leaf(Key, <<>>), Tree, DB)).
%% @doc Returns a sorted list of all the keys in the tree
-spec keys(tree()) -> [key()].
-keys(Tree) ->
- lists:usort(raw_keys(Tree)).
+keys(Tree0) ->
+ {Tree, DB} = unpack_db_tree(Tree0),
+ lists:usort(raw_keys(Tree, DB)).
%% @doc Takes two trees and returns the different keys between them.
-spec diff(tree(), tree()) -> [key()].
-diff(Tree1, Tree2) ->
+diff(Tree10, Tree20) ->
%% We use the remote access for this local comparison. This is
%% slower than a dedicated traversal algorithm, but less code
%% means fewer chances of breaking stuff.
- Fun = access_local(Tree2),
- diff(Tree1, Fun, <<>>).
+ {Tree1, DB1} = unpack_db_tree(Tree10),
+ {Tree2, DB2} = unpack_db_tree(Tree20),
+ Fun = access_local(Tree2, DB2),
+ diff(Tree1, DB1, Fun, <<>>).
+
+%% @doc Traversal of the tree nodes. Can be useful for implementing a
+%% moving garbage collector of the db backend, Or a staged commit
+%% scheme of the db where only the reachable nodes of a cache is
+%% committed to the real backend.
+%%
+%% The nodes are visited in pre-order, parents are visited before
+%% children.
+%%
+%% The visit fun should either return the atom `stop' or a new
+%% accumulator. The `stop' is interpreted as to not traverse the
+%% subtree rooted at the node. Siblings are still traversed.
+
+-type node_type() :: 'leaf' | 'inner'.
+-type visit_fun() :: fun((node_type(), hash(), leaf() | inner(), Acc :: term()) ->
+ 'stop' | term()).
+-spec visit_nodes(visit_fun(), InitAcc :: term(), tree()) -> ResultAcc :: term().
+visit_nodes(VisitFun, InitAcc, Tree0) when is_function(VisitFun, 4) ->
+ case unpack_db_tree(Tree0) of
+ {undefined, _} ->
+ InitAcc;
+ {Tree, DB} ->
+ visit(Tree, VisitFun, DB, InitAcc)
+ end.
%% @doc Takes a local tree, and an access function to another tree,
%% and returns the keys associated with diverging parts of both trees.
@@ -113,33 +298,38 @@ diff(Tree1, Tree2) ->
%% The Path is a sequence of bytes (in a `binary()') telling how to get to
%% a specific node:
%%
-%% - `<<>>' means returning the current node, at whatever point we are in the
-%% tree's traversal.
-%% - `<>' means to return the node at the given offset for the
+%% `<<>>' means returning the current node, at whatever point we are in the
+%% tree's traversal.
+%%
+%% `<>' means to return the node at the given offset for the
%% current tree level. For example, a value of `<<0>>' means to return the
%% leftmost child of the current node, whereas `<<3>>' should return the
%% 4th leftmost child. Any time the path is larger than the number of
%% children, we return `undefined'.
-%% This is the case where we can recurse.
-%% - Any invalid path returns `undefined'.
+%% This is the case where we can recurse.
+%%
+%% Any invalid path returns `undefined'.
%%
%% The three terms required are:
-%% - `at': Uses the path as above to traverse the tree and return a node.
-%% - `keys': Returns all the keys held (recursively) by the node at a given
+%%
+%% `at': Uses the path as above to traverse the tree and return a node.
+%%
+%% `keys': Returns all the keys held (recursively) by the node at a given
%% path. A special variant exists of the form `{keys, Key, Hash}', where the
%% function must return the key set minus the one that would contain either
%% `Key' or `Hash', but by specifying if the key and hash were encountered,
-%% and if so, if they matched or not.
-%% - `child_at': Special case of `at' used when comparing child nodes of two
+%% and if so, if they matched or not.
+%%
+%% `child_at': Special case of `at' used when comparing child nodes of two
%% inner nodes. Basically the same as `at', but with one new rule:
%%
%% Whenever we hit a path that is `<>' and we are on an inner node,
%% it means we only have a child to look at. Return that child along
%% with its byte at the offset in the dictionary structure
-%% (`{ByteAtOffset, Node}').
+%% (`{ByteAtOffset, Node}').
%%
%% Examples of navigation through a tree of the form:
-%%
+%%```
%% 0 | ___.-A-._____
%% | / | \
%% 1 | .-B-. C .-D-.
@@ -147,9 +337,9 @@ diff(Tree1, Tree2) ->
%% 2 | E F .G. H
%% | / \
%% 3 | I J
-%%
+%%'''
%% Which is four levels deep. The following paths lead to following nodes:
-%%
+%%```
%% +==============+===========+ +==============+===========+
%% | Path | Node | | Path | Node |
%% +==============+===========+ +==============+===========+
@@ -160,21 +350,23 @@ diff(Tree1, Tree2) ->
%% | <<3>> | undefined | | <<2,0,1>> | J |
%% | <<0,0>> | E | | <<2,0,1,3>> | undefined |
%% +--------------+-----------+ +--------------+-----------+
-%%
+%%'''
%% The values returned are all the keys that differ across both trees.
-spec dist_diff(tree(), access_fun()) -> [key()].
-dist_diff(Tree, Fun) when is_function(Fun,2) ->
- diff(Tree, Fun, <<>>).
+dist_diff(Tree0, Fun) when is_function(Fun,2) ->
+ {Tree, DB} = unpack_db_tree(Tree0),
+ diff(Tree, DB, Fun, <<>>).
-%% @doc Returns an `access_fun()' for the current tree. This function
+%% @doc Returns an {@link access_fun()} for the current tree. This function
%% can be put at the end of a connection to a remote node, and it
%% will return serialized tree nodes.
-spec access_serialize(tree()) -> serial_fun().
-access_serialize(Tree) ->
- fun(at, Path) -> serialize(at(Path, Tree));
- (child_at, Path) -> serialize(child_at(Path, Tree));
- (keys, Path) -> serialize(raw_keys(at(Path, Tree)));
- ({keys,Key,Skip}, Path) -> serialize(raw_keys(at(Path, Tree), Key, Skip))
+access_serialize(Tree0) ->
+ {Tree, DB} = unpack_db_tree(Tree0),
+ fun(at, Path) -> serialize(at(Path, Tree, DB));
+ (child_at, Path) -> serialize(child_at(Path, Tree, DB));
+ (keys, Path) -> serialize(raw_keys(at(Path, Tree, DB), DB));
+ ({keys,Key,Skip}, Path) -> serialize(raw_keys(at(Path, Tree, DB), Key, Skip, DB))
end.
%% @doc Takes an {@link access_fun()} that fetches nodes serialized according
@@ -190,83 +382,100 @@ access_unserialize(Fun) ->
%%%%%%%%%%%%%%%
%% if the tree is empty, just use the leaf
-insert(_Offset, Leaf, undefined) ->
- Leaf;
+insert(_Offset, Leaf, undefined, DB) ->
+ {Leaf, db_put(Leaf, DB)};
%% If the offset is at the max value for the hash, return the leaf --
%% We can't go deeper anyway.
-insert(?HASHBYTES, Leaf, _) ->
- Leaf;
+insert(?HASHBYTES, Leaf, _, DB) ->
+ {Leaf, db_put(Leaf, DB)};
%% if the current node of the tree is a leaf and both keys are the same,
%% replace it.
-insert(_Offset, Leaf=#leaf{hashkey=Key}, #leaf{hashkey=Key}) ->
- Leaf;
+insert(_Offset, Leaf=#leaf{hashkey=Key}, #leaf{hashkey=Key}, DB) ->
+ {Leaf, db_put(Leaf, DB)};
%% if the current node of the tree is a leaf, and keys are different, turn the
%% current leaf into an inner node, and insert the new one in it.
-insert(Offset, NewLeaf, OldLeaf=#leaf{}) ->
- insert(Offset, NewLeaf, to_inner(Offset, OldLeaf));
+insert(Offset, NewLeaf, OldLeaf=#leaf{}, DB) ->
+ Inner = to_inner(Offset, OldLeaf, DB),
+ insert(Offset, NewLeaf, Inner, db_put(Inner, DB));
%% Insert to an inner node!
-insert(Offset, Leaf=#leaf{hashkey=Key}, Inner=#inner{children=Children}) ->
+insert(Offset, Leaf=#leaf{hashkey=Key}, Inner=#inner{children=Children}, DB) ->
Byte = binary:at(Key, Offset),
- NewChildren = case orddict:find(Byte, Children) of
+ case orddict:find(Byte, Children) of
error ->
- orddict:store(Byte, Leaf, Children);
- {ok, Subtree} ->
- orddict:store(Byte, insert(Offset+1, Leaf, Subtree), Children)
- end,
- Inner#inner{hashchildren=children_hash(NewChildren), children=NewChildren}.
+ DB1 = db_put(Leaf, DB),
+ NewChildren = orddict:store(Byte, db_ref(Leaf, DB1), Children),
+ NewInner = Inner#inner{hashchildren=children_hash(NewChildren),
+ children=NewChildren},
+ {NewInner, db_put(NewInner, DB1)};
+ {ok, SubtreeRef} ->
+ Subtree = db_get(SubtreeRef, DB),
+ {Subtree1, DB1} = insert(Offset+1, Leaf, Subtree, DB),
+ NewChildren = orddict:store(Byte, db_ref(Subtree1, DB1), Children),
+ NewInner = Inner#inner{hashchildren=children_hash(NewChildren),
+ children=NewChildren},
+ {NewInner, db_put(NewInner, DB1)}
+ end.
%% Not found or empty tree. Leave as is.
-delete_leaf(_, undefined) ->
- undefined;
+delete_leaf(_, undefined, DB) ->
+ {undefined, DB};
%% If we have the same leaf node we were looking for, kill it.
-delete_leaf(#leaf{hashkey=K}, #leaf{hashkey=K}) ->
- undefined;
+delete_leaf(#leaf{hashkey=K}, #leaf{hashkey=K}, DB) ->
+ {undefined, DB};
%% If it's a different leaf, the item to delete is already gone. Leave as is.
-delete_leaf(#leaf{}, Leaf=#leaf{}) ->
- Leaf;
+delete_leaf(#leaf{}, Leaf=#leaf{}, DB) ->
+ {Leaf, DB};
%% if it's an inner node, look inside
-delete_leaf(Leaf=#leaf{hashkey=K}, Inner=#inner{offset=Offset, children=Children}) ->
+delete_leaf(Leaf=#leaf{hashkey=K}, Inner=#inner{offset=Offset, children=Children}, DB) ->
Byte = binary:at(K, Offset),
case orddict:find(Byte, Children) of
error -> % not found, leave as is
- Inner;
- {ok, Subtree} ->
- NewChildren = case maybe_shrink(delete_leaf(Leaf, Subtree)) of
- undefined -> % leaf gone
- orddict:erase(Byte, Children);
- Node -> % replacement node
- orddict:store(Byte, Node, Children)
- end,
- maybe_shrink(Inner#inner{hashchildren=children_hash(NewChildren),
- children=NewChildren})
+ {Inner, DB};
+ {ok, SubtreeHandle} ->
+ Subtree = db_get(SubtreeHandle, DB),
+ {Subtree1, DB1} = delete_leaf(Leaf, Subtree, DB),
+ case maybe_shrink(Subtree1, DB) of
+ {undefined, DB1} -> % leaf gone
+ NewChildren = orddict:erase(Byte, Children),
+ NewInner = Inner#inner{hashchildren=children_hash(NewChildren),
+ children=NewChildren},
+ maybe_shrink(NewInner, DB1);
+ {Node, DB1} -> % replacement node
+ NewChildren = orddict:store(Byte, db_ref(Node, DB), Children),
+ NewInner = Inner#inner{hashchildren=children_hash(NewChildren),
+ children=NewChildren},
+ maybe_shrink(NewInner, DB1)
+ end
end.
-raw_keys(undefined) ->
+raw_keys(undefined,_DB) ->
[];
-raw_keys(#leaf{userkey=Key}) ->
+raw_keys(#leaf{userkey=Key},_DB) ->
[Key];
-raw_keys(#inner{children=Children}) ->
+raw_keys(#inner{children=Children}, DB) ->
lists:append(orddict:fold(
- fun(_Byte, Node, Acc) -> [raw_keys(Node)|Acc] end,
+ fun(_Byte, NodeHandle, Acc) ->
+ [raw_keys(db_get(NodeHandle, DB), DB)|Acc] end,
[],
Children
)).
%% Same as raw_keys/1, but reports on a given hash and key
-raw_keys(I=#inner{}, KeyToWatch, ToSkip) -> raw_keys(I, KeyToWatch, ToSkip, unseen).
+raw_keys(I=#inner{}, KeyToWatch, ToSkip, DB) -> raw_keys(I, KeyToWatch, ToSkip, unseen, DB).
-raw_keys(undefined, _, _, Status) ->
+raw_keys(undefined, _, _, Status,_DB) ->
{Status, []};
-raw_keys(#leaf{hash=Hash}, _, Hash, Status) ->
+raw_keys(#leaf{hash=Hash}, _, Hash, Status,_DB) ->
{merge_status(same, Status), []};
-raw_keys(#leaf{userkey=Key}, Key, _, Status) ->
+raw_keys(#leaf{userkey=Key}, Key, _, Status,_DB) ->
{merge_status(diff, Status), []};
-raw_keys(#leaf{userkey=Key}, _, _, Status) ->
+raw_keys(#leaf{userkey=Key}, _, _, Status,_DB) ->
{Status, [Key]};
-raw_keys(#inner{children=Children}, Key, ToSkip, InitStatus) ->
+raw_keys(#inner{children=Children}, Key, ToSkip, InitStatus, DB) ->
{Status, DeepList} = lists:foldl(
- fun({_, Node}, {Status, Acc}) ->
- {NewStatus, ToAdd} = raw_keys(Node, Key, ToSkip, Status),
+ fun({_, NodeHandle}, {Status, Acc}) ->
+ Node = db_get(NodeHandle, DB),
+ {NewStatus, ToAdd} = raw_keys(Node, Key, ToSkip, Status, DB),
{NewStatus, [ToAdd|Acc]}
end,
{InitStatus, []},
@@ -278,83 +487,83 @@ raw_keys(#inner{children=Children}, Key, ToSkip, InitStatus) ->
%% That would mean the tree may contain many similar keys
%% in many places
merge_status(same, unseen) -> same;
-merge_status(unseen, same) -> same;
-merge_status(diff, unseen) -> diff;
-merge_status(unseen, diff) -> diff;
-merge_status(unseen, unseen) -> unseen.
+merge_status(diff, unseen) -> diff.
+
+-spec diff(tree(), db(), access_fun(), path()) -> [key()].
+diff(Tree, DB, Fun, Path) ->
+ lists:usort(raw_diff(Tree, Fun(at, Path), Fun, Path, DB)).
--spec diff(tree(), access_fun(), path()) -> [key()].
-diff(Tree, Fun, Path) ->
- lists:usort(raw_diff(Tree, Fun(at, Path), Fun, Path)).
%% Empty trees yield all keys of remaining trees
-raw_diff(undefined, undefined, _, _) ->
+raw_diff(undefined, undefined, _, _, _) ->
[];
-raw_diff(undefined, _Tree2, Fun, Path) ->
+raw_diff(undefined, _Tree2, Fun, Path,_DB) ->
Fun(keys, Path);
-raw_diff(Tree1, undefined, _, _) ->
- raw_keys(Tree1);
+raw_diff(Tree1, undefined, _, _, DB) ->
+ raw_keys(Tree1, DB);
%% If hashes are the same, we're done.
-raw_diff(#leaf{hash=Hash}, #leaf{hash=Hash}, _, _) ->
+raw_diff(#leaf{hash=Hash}, #leaf{hash=Hash}, _, _, _) ->
[];
-raw_diff(#leaf{hash=Hash}, #inner{hashchildren=Hash}, _, _) ->
+raw_diff(#leaf{hash=Hash}, #inner{hashchildren=Hash}, _, _, _) ->
[];
-raw_diff(#inner{hashchildren=Hash}, #leaf{hash=Hash}, _, _) ->
+raw_diff(#inner{hashchildren=Hash}, #leaf{hash=Hash}, _, _, _) ->
[];
-raw_diff(#inner{hashchildren=Hash}, #inner{hashchildren=Hash}, _, _) ->
+raw_diff(#inner{hashchildren=Hash}, #inner{hashchildren=Hash}, _, _, _) ->
[];
%% if they differ and both nodes are leaf nodes, return both values
-raw_diff(#leaf{userkey=Key1}, #leaf{userkey=Key2}, _, _) ->
+raw_diff(#leaf{userkey=Key1}, #leaf{userkey=Key2}, _, _, _) ->
[Key1,Key2];
%% if both differ but one is an inner node, return everything
-raw_diff(#leaf{userkey=Key, hash=ToSkip}, #inner{}, Fun, Path) ->
+raw_diff(#leaf{userkey=Key, hash=ToSkip}, #inner{}, Fun, Path,_DB) ->
%% We can only get rid of the current Key if the hashes are the same
case Fun({keys, Key, ToSkip}, Path) of
{same, Keys} -> Keys;
{diff, Keys} -> [Key|Keys];
{unseen, Keys} -> [Key|Keys]
end;
-raw_diff(Inner=#inner{}, #leaf{userkey=Key, hash=ToSkip}, _, _) ->
+raw_diff(Inner=#inner{}, #leaf{userkey=Key, hash=ToSkip}, _, _, DB) ->
%% We can only get rid of the current Key if the hashes are the same
- case raw_keys(Inner, Key, ToSkip) of
+ case raw_keys(Inner, Key, ToSkip, DB) of
{same, Keys} -> Keys;
{diff, Keys} -> [Key|Keys];
{unseen, Keys} -> [Key|Keys]
end;
%% if both nodes are inner and populated, compare them offset by offset.
-raw_diff(#inner{children=Children}, #inner{}, Fun, Path) ->
+raw_diff(#inner{children=Children}, #inner{}, Fun, Path, DB) ->
ChildPath = <>,
diff_offsets(children_offsets(Children),
Fun(child_at, ChildPath),
Fun,
- ChildPath).
+ ChildPath,
+ DB).
%% Whatever is left alone is returned
-diff_offsets([], undefined, _, _) ->
+diff_offsets([], undefined, _, _, _) ->
[];
-diff_offsets(List, undefined, _, _) ->
- lists:append([raw_keys(Child) || {_, Child} <- List]);
-diff_offsets([], _, Fun, Path) ->
+diff_offsets(List, undefined, _, _, DB) ->
+ lists:append([raw_keys(db_get(Child, DB), DB) || {_, Child} <- List]);
+diff_offsets([], _, Fun, Path, DB) ->
Keys = Fun(keys, Path),
case next_child_path(Path) of
undefined -> Keys;
- Next -> Keys ++ diff_offsets([], Fun(child_at, Next), Fun, Next)
+ Next -> Keys ++ diff_offsets([], Fun(child_at, Next), Fun, Next, DB)
end;
%% If both offsets are the same, compare recursively.
-diff_offsets(L=[{OffL, Child}|Rest], R={OffR,Node}, Fun, Path) ->
+diff_offsets(L=[{OffL, Child0}|Rest], R={OffR,Node}, Fun, Path, DB) ->
+ Child = db_get(Child0, DB),
if OffL =:= OffR ->
- Diff = raw_diff(Child, Node, Fun, Path),
+ Diff = raw_diff(Child, Node, Fun, Path, DB),
case next_child_path(Path) of
undefined -> Diff;
- Next -> Diff ++ diff_offsets(Rest, Fun(child_at, Next), Fun, Next)
+ Next -> Diff ++ diff_offsets(Rest, Fun(child_at, Next), Fun, Next, DB)
end;
OffL < OffR ->
- raw_keys(Child) ++ diff_offsets(Rest, R, Fun, Path);
+ raw_keys(Child, DB) ++ diff_offsets(Rest, R, Fun, Path, DB);
OffL > OffR ->
Keys = Fun(keys, Path),
case next_child_path(Path) of
undefined -> Keys;
- Next -> Keys ++ diff_offsets(L, Fun(child_at, Next), Fun, Next)
+ Next -> Keys ++ diff_offsets(L, Fun(child_at, Next), Fun, Next, DB)
end
end.
@@ -383,9 +592,10 @@ to_leaf(Key, Value) when is_binary(Key) ->
%% @doc We build a Key-Value list of the child nodes and their offset
%% to be used as a sparse K-ary tree.
--spec to_inner(offset(), leaf()) -> inner().
-to_inner(Offset, Child=#leaf{hashkey=Hash}) ->
- Children = orddict:store(binary:at(Hash, Offset), Child, orddict:new()),
+-spec to_inner(offset(), leaf(), db()) -> inner().
+to_inner(Offset, Child=#leaf{hashkey=Hash}, DB) ->
+ ChildRef = db_ref(Child, DB),
+ Children = orddict:store(binary:at(Hash, Offset), ChildRef, orddict:new()),
#inner{hashchildren=children_hash(Children),
children=Children,
offset=Offset}.
@@ -400,7 +610,11 @@ to_inner(Offset, Child=#leaf{hashkey=Hash}) ->
%% of inner nodes, because it is dictated by the children's keyhashes, and
%% not the inner node's own hashes.
%% @todo consider endianness for absolute portability
--spec children_hash([{offset(), leaf()}, ...]) -> binary().
+-spec children_hash([{offset(), leaf()}, ...]) -> hash().
+children_hash([{_, B}|_] = Children) when is_binary(B) ->
+ %% This is in db mode
+ Hashes = [ChildHash || {_Offset, ChildHash} <- Children],
+ crypto:hash(?HASH, Hashes);
children_hash(Children) ->
Hashes = [element(?HASHPOS, Child) || {_Offset, Child} <- Children],
crypto:hash(?HASH, Hashes).
@@ -409,21 +623,27 @@ children_hash(Children) ->
%% or should just be returned as is.
%% This avoids a problem where a deleted subtree results in an inner node
%% with a single element, which wastes space and can slow down diffing.
-maybe_shrink(Leaf = #leaf{}) ->
- Leaf;
-maybe_shrink(undefined) ->
- undefined;
-maybe_shrink(Inner = #inner{children=Children}) ->
+maybe_shrink(Leaf = #leaf{}, DB) ->
+ {Leaf, DB};
+maybe_shrink(undefined, DB) ->
+ {undefined, DB};
+maybe_shrink(Inner = #inner{children=Children}, DB) ->
%% The trick for this one is that if we have *any* child set that
%% is anything else than a single leaf node, we can't shrink. We use
%% a fold with a quick try ... catch to quickly figure this out, in
%% two iterations at most.
try
- orddict:fold(fun(_Offset, Leaf=#leaf{}, 0) -> Leaf;
+ orddict:fold(fun(_Offset, NodeHandle, 0) ->
+ case db_get(NodeHandle, DB) of
+ #leaf{} = Leaf ->
+ {Leaf, DB};
+ _ ->
+ throw(false)
+ end;
(_, _, _) -> throw(false)
end, 0, Children)
catch
- throw:false -> Inner
+ throw:false -> {Inner, db_put(Inner, DB)}
end.
%% @doc Returns the sorted offsets of a given child. Because we're using
@@ -433,16 +653,16 @@ maybe_shrink(Inner = #inner{children=Children}) ->
children_offsets(Children) -> Children.
%% Wrapper for the diff function.
-access_local(Node) ->
- fun(at, Path) -> at(Path, Node);
- (child_at, Path) -> child_at(Path, Node);
- (keys, Path) -> raw_keys(at(Path, Node));
- ({keys, Key, Skip}, Path) -> raw_keys(at(Path, Node), Key, Skip)
+access_local(Node, DB) ->
+ fun(at, Path) -> at(Path, Node, DB);
+ (child_at, Path) -> child_at(Path, Node, DB);
+ (keys, Path) -> raw_keys(at(Path, Node, DB), DB);
+ ({keys, Key, Skip}, Path) -> raw_keys(at(Path, Node, DB), Key, Skip, DB)
end.
%% Return the node at a given position in a tree.
-at(Path, Tree) ->
- case child_at(Path, Tree) of
+at(Path, Tree, DB) ->
+ case child_at(Path, Tree, DB) of
{_Off, Node} -> Node;
Node -> Node
end.
@@ -452,10 +672,10 @@ at(Path, Tree) ->
%% its indexed offset.
%% This allows to diff inner nodes without contextual info while in the
%% offset traversal.
-child_at(<<>>, Node) ->
+child_at(<<>>, Node,_DB) ->
%% End of path, return whatever
Node;
-child_at(<>, #inner{children=Children}) ->
+child_at(<>, #inner{children=Children}, DB) ->
%% Depending on the path depth, the behavior changes. If the path depth
%% left is of one (i.e. `<> = <>') and that we are in
%% an inner node, then we're looking for the child definition as
@@ -470,11 +690,11 @@ child_at(<>, #inner{children=Children}) ->
end, N, Children),
undefined
catch
- throw:{Off,Node} -> {Off,Node};
- throw:Node -> child_at(Rest, Node)
+ throw:{Off,Node} -> {Off, db_get(Node, DB)};
+ throw:Node -> child_at(Rest, db_get(Node, DB), DB)
end;
%% Invalid path
-child_at(_, _) ->
+child_at(_, _, _) ->
undefined.
%% Serialize nodes flatly. All terms are self-contained and their
@@ -514,7 +734,8 @@ unserialize(<>) ->
#leaf{userkey=Key, hashkey=HKey, hash=Hash};
unserialize(<>) ->
- #inner{hashchildren=Hash};
+ %% Cheat a little to please Dialyzer
+ #inner{hashchildren=Hash, children=[], offset=256};
unserialize(<>) ->
{Byte, unserialize(Node)};
unserialize(<>) ->
@@ -531,3 +752,41 @@ unserialize(<>) ->
NumKeys = length(Keys),
{Word, Keys}.
+%% Node traversal
+visit(#leaf{hash = Hash} = Node, VisitFun,_DB, Acc) ->
+ case VisitFun(leaf, Hash, Node, Acc) of
+ stop -> Acc;
+ NewAcc -> NewAcc
+ end;
+visit(#inner{hashchildren = Hash} = Node, VisitFun, DB, Acc) ->
+ case VisitFun(inner, Hash, Node, Acc) of
+ stop ->
+ Acc;
+ NewAcc ->
+ orddict:fold(fun(_, ChildRef, FoldAcc) ->
+ visit(db_get(ChildRef, DB), VisitFun, DB, FoldAcc)
+ end, NewAcc, Node#inner.children)
+ end.
+
+%% Interface to the db backend's callback functions.
+db_put(_, no_db) ->
+ no_db;
+db_put(#leaf{hash = Hash} = Node, #db{put=Put, handle=Handle} = DB) ->
+ DB#db{handle=Put(Hash, Node, Handle)};
+db_put(#inner{hashchildren = Hash} = Node, #db{put=Put, handle=Handle} = DB) ->
+ DB#db{handle=Put(Hash, Node, Handle)}.
+
+db_get(X, no_db) when is_record(X, inner) orelse is_record(X, leaf) orelse X =:= undefined ->
+ X;
+db_get(X, #db{get=Get, handle=Handle}) when is_binary(X) ->
+ Get(X, Handle);
+db_get(undefined, #db{}) ->
+ undefined.
+
+db_ref(X, no_db) when is_record(X, inner) orelse is_record(X, leaf) orelse X =:= undefined ->
+ X;
+db_ref(#leaf{hash=Hash}, #db{}) ->
+ Hash;
+db_ref(#inner{hashchildren=Hash}, #db{}) ->
+ Hash.
+
diff --git a/test/merklet_SUITE.erl b/test/merklet_SUITE.erl
index 80c08c9..342233c 100644
--- a/test/merklet_SUITE.erl
+++ b/test/merklet_SUITE.erl
@@ -3,14 +3,51 @@
-include_lib("common_test/include/ct.hrl").
-include_lib("eunit/include/eunit.hrl").
-all() -> [regression_diff].
+all() -> [ regression_diff
+ , regression_diff_dict_db
+ , regression_diff_ets_db
+ , regression_visit_nodes
+ , regression_visit_nodes_dict_db
+ , regression_visit_nodes_ets_db
+ ].
regression_diff(_) ->
- T1 = insert_all([{<<1>>,<<1>>},{<<2>>,<<2>>},{<<3>>,<<3>>}]),
- T2 = insert_all([{<<1>>,<<0>>}]),
+ regression_diff_common(undefined).
+
+regression_diff_dict_db(_) ->
+ T0 = merklet:empty_db_tree(),
+ regression_diff_common(T0).
+
+regression_diff_ets_db(_) ->
+ T0 = merklet:empty_db_tree(merklet_ets_db_backend:spec()),
+ regression_diff_common(T0).
+
+regression_diff_common(T0) ->
+ T1 = insert_all([{<<1>>,<<1>>},{<<2>>,<<2>>},{<<3>>,<<3>>}], T0),
+ T2 = insert_all([{<<1>>,<<0>>}], T0),
?assertEqual([<<1>>,<<2>>,<<3>>], merklet:diff(T1,T2)),
?assertEqual([<<1>>,<<2>>,<<3>>], merklet:diff(T2,T1)).
+regression_visit_nodes(_) ->
+ regression_visit_nodes_common(undefined).
+
+regression_visit_nodes_dict_db(_) ->
+ T0 = merklet:empty_db_tree(),
+ regression_visit_nodes_common(T0).
+
+regression_visit_nodes_ets_db(_) ->
+ T0 = merklet:empty_db_tree(merklet_ets_db_backend:spec()),
+ regression_visit_nodes_common(T0).
+
+regression_visit_nodes_common(T0) ->
+ AllVals = [{<<1>>,<<1>>},{<<2>>,<<2>>},{<<3>>,<<3>>}],
+ T1 = insert_all(AllVals, T0),
+ Fun = fun(Type,_Hash,_Node, Count) ->
+ orddict:update_counter(Type, 1, Count)
+ end,
+ Count = merklet:visit_nodes(Fun, [], T1),
+ ?assertEqual(length(AllVals), orddict:fetch(leaf, Count)).
+
%%%%%%%%%%%%%%%%
%%% Builders %%%
%%%%%%%%%%%%%%%%
diff --git a/test/merklet_ets_db_backend.erl b/test/merklet_ets_db_backend.erl
new file mode 100644
index 0000000..9443320
--- /dev/null
+++ b/test/merklet_ets_db_backend.erl
@@ -0,0 +1,25 @@
+%%% @doc Test backend module using an ets table as a db backend for merklet.
+%%% @end
+-module(merklet_ets_db_backend).
+
+-export([spec/0]).
+
+spec() ->
+ #{ get => fun ets_get/2
+ , put => fun ets_put/3
+ , handle => ets_new()}.
+
+%% NOTE: This relies on the identity hash being the second element of
+%% #leaf{} and #inner{}.
+ets_new() ->
+ ets:new(merklet_backend, [set, private, {keypos, 2}]).
+
+ets_get(Key, Ets) ->
+ [Rec] = ets:lookup(Ets, Key),
+ Rec.
+
+ets_put(Key, Val, Ets) ->
+ %% Assert
+ Key = element(2, Val),
+ ets:insert(Ets, Val),
+ Ets.
diff --git a/test/prop_merklet.erl b/test/prop_merklet.erl
index 8b93c36..d36ee32 100644
--- a/test/prop_merklet.erl
+++ b/test/prop_merklet.erl
@@ -6,23 +6,51 @@
-define(run(Case), {timeout, timer:seconds(60),
?_assert(proper:quickcheck(Case, ?OPTS))}).
-eunit_test_() ->
- [?run(prop_diff()),
- ?run(prop_dist_diff()),
- ?run(prop_delete()),
- ?run(prop_modify())].
+eunit_no_db_test_() ->
+ [?run(prop_diff_no_db()),
+ ?run(prop_dist_diff_no_db()),
+ ?run(prop_delete_no_db()),
+ ?run(prop_modify_no_db())
+ ].
+
+eunit_dict_db_test_() ->
+ [?run(prop_diff_dict_db()),
+ ?run(prop_dist_diff_dict_db()),
+ ?run(prop_delete_dict_db()),
+ ?run(prop_modify_dict_db())
+ ].
+
+eunit_ets_db_test_() ->
+ [?run(prop_diff_ets_db()),
+ ?run(prop_dist_diff_ets_db()),
+ ?run(prop_delete_ets_db()),
+ ?run(prop_modify_ets_db())
+ ].
+
+eunit_gc_test_() ->
+ [?run(prop_gc())
+ ].
%%%%%%%%%%%%%%%%%%
%%% Properties %%%
%%%%%%%%%%%%%%%%%%
-prop_diff() ->
+prop_diff_no_db() ->
+ prop_diff(no_db).
+
+prop_diff_dict_db() ->
+ prop_diff(dict_db).
+
+prop_diff_ets_db() ->
+ prop_diff(ets_db).
+
+prop_diff(Backend) ->
%% All differences between trees can be found no matter the order,
%% and returns the list of different keys.
?FORALL({KV1,KV2}, diff_keyvals(),
begin
Keys = [K || {K,_} <- KV2],
- T1 = insert_all(KV1),
- T2 = insert_all(KV2, T1),
+ T1 = insert_all(KV1, Backend),
+ T2 = extend(KV2, T1),
Diff1 = merklet:diff(T1,T2),
Diff2 = merklet:diff(T2,T1),
Diff1 =:= Diff2
@@ -30,7 +58,16 @@ prop_diff() ->
Diff1 =:= lists:sort(Keys)
end).
-prop_dist_diff() ->
+prop_dist_diff_no_db() ->
+ prop_dist_diff(no_db).
+
+prop_dist_diff_dict_db() ->
+ prop_dist_diff(dict_db).
+
+prop_dist_diff_ets_db() ->
+ prop_dist_diff(ets_db).
+
+prop_dist_diff(Backend) ->
%% All differences between trees can be found no matter the order,
%% and returns the list of different keys. Same as previous case, but
%% uses the internal serialization format and distribution API
@@ -38,8 +75,8 @@ prop_dist_diff() ->
?FORALL({KV1,KV2}, diff_keyvals(),
begin
Keys = [K || {K,_} <- KV2],
- T1 = insert_all(KV1),
- T2 = insert_all(KV2, T1),
+ T1 = insert_all(KV1, Backend),
+ T2 = extend(KV2, T1),
%% remmote version of the trees, should be handled
%% by merklet:unserialize/1. In practice, this kind
%% of thing would take place over the network, and
@@ -55,29 +92,47 @@ prop_dist_diff() ->
Diff1 =:= lists:sort(Keys)
end).
-prop_delete() ->
+prop_delete_no_db() ->
+ prop_delete(no_db).
+
+prop_delete_dict_db() ->
+ prop_delete(dict_db).
+
+prop_delete_ets_db() ->
+ prop_delete(ets_db).
+
+prop_delete(Backend) ->
%% Having a tree and deleting a percentage of it yields the same tree
%% without said keys.
?FORALL({All, Partial, ToDelete}, delete_keyvals(0.50),
begin
- Tree = insert_all(All),
- PartialTree = insert_all(Partial),
+ Tree = insert_all(All, Backend),
+ PartialTree = insert_all(Partial, Backend),
DeletedTree = delete_keys(ToDelete, Tree),
[] =:= merklet:diff(PartialTree, DeletedTree)
andalso
merklet:keys(DeletedTree) =:= merklet:keys(PartialTree)
andalso
- DeletedTree =:= PartialTree
+ merklet:expand_db_tree(DeletedTree) =:= merklet:expand_db_tree(PartialTree)
end).
-prop_modify() ->
+prop_modify_no_db() ->
+ prop_modify(no_db).
+
+prop_modify_dict_db() ->
+ prop_modify(dict_db).
+
+prop_modify_ets_db() ->
+ prop_modify(ets_db).
+
+prop_modify(Backend) ->
%% Updating records' values should show detections as part of merklet's
%% diff operations, even if none of the keys change.
?FORALL({All, ToChange}, modify_keyvals(0.50),
begin
- Tree = insert_all(All),
+ Tree = insert_all(All, Backend),
KVSet = [{K, term_to_binary(make_ref())} || K <- ToChange],
- Modified = insert_all(KVSet, Tree),
+ Modified = extend(KVSet, Tree),
merklet:keys(Tree) =:= merklet:keys(Modified)
andalso
lists:sort(ToChange) =:= merklet:diff(Tree, Modified)
@@ -85,11 +140,54 @@ prop_modify() ->
lists:sort(ToChange) =:= merklet:diff(Modified, Tree)
end).
+%% Test insertion and garbage collection of db-backed tree. Note that
+%% the nodes of a non-db-backed tree are subtrees rather than nodes,
+%% which makes the abstraction break down.
+prop_gc() ->
+ Fun = fun(_Type, Hash, Node, AccHandle) ->
+ orddict:store(Hash, Node, AccHandle)
+ end,
+ Spec = #{ get => fun orddict:fetch/2
+ , put => fun orddict:store/3
+ , handle => []},
+ ?FORALL(Entries, keyvals(),
+ begin
+ Tree1 = extend(Entries, merklet:empty_db_tree(Spec)),
+ NewStore1 = merklet:visit_nodes(Fun, [], Tree1),
+ Tree2 = merklet:db_tree(merklet:root_hash(Tree1),
+ Spec#{handle => NewStore1}),
+ NewStore2 = merklet:visit_nodes(Fun, [], Tree2),
+ %% Test that we can traverse the new tree.
+ Tree3 = merklet:db_tree(merklet:root_hash(Tree2),
+ Spec#{handle => NewStore2}),
+ DB1 = merklet:db_handle(Tree1),
+ DB2 = merklet:db_handle(Tree2),
+ DB3 = merklet:db_handle(Tree3),
+ merklet:keys(Tree1) =:= merklet:keys(Tree2)
+ andalso merklet:diff(Tree1, Tree2) =:= []
+ andalso merklet:root_hash(Tree1) =:= merklet:root_hash(Tree1)
+ andalso length(DB2) =< length(DB1)
+ andalso is_sub_orddict(DB2, DB1)
+ andalso DB2 =:= DB3 %% The gc should be idempotent
+ end).
+
+is_sub_orddict([], _) ->
+ true;
+is_sub_orddict(_, []) ->
+ false;
+is_sub_orddict([{K1, V1}|Left1], [{K1, V2}|Left2]) ->
+ V1 =:= V2 andalso is_sub_orddict(Left1, Left2);
+is_sub_orddict([{K1, _}|_] = Orddict1, [{K2, _}|Left2]) ->
+ K1 > K2 andalso is_sub_orddict(Orddict1, Left2).
+
%%%%%%%%%%%%%%%%
%%% Builders %%%
%%%%%%%%%%%%%%%%
-insert_all(KeyVals) -> insert_all(KeyVals, undefined).
-insert_all(KeyVals, Tree) -> lists:foldl(fun merklet:insert/2, Tree, KeyVals).
+insert_all(KeyVals, no_db) -> extend(KeyVals, undefined);
+insert_all(KeyVals, dict_db) -> extend(KeyVals, merklet:empty_db_tree());
+insert_all(KeyVals, ets_db) -> extend(KeyVals, merklet:empty_db_tree(merklet_ets_db_backend:spec())).
+
+extend(KeyVals, Tree) -> lists:foldl(fun merklet:insert/2, Tree, KeyVals).
delete_keys(Keys, Tree) -> lists:foldl(fun merklet:delete/2, Tree, Keys).
@@ -108,7 +206,7 @@ diff_keyvals() ->
delete_keyvals(Rate) ->
?LET(KeyVals, keyvals(),
begin
- Rand = random:uniform(),
+ Rand = rand:uniform(),
ToDelete = [Key || {Key,_} <- KeyVals, Rate > Rand],
WithoutDeleted = [{K,V} || {K,V} <- KeyVals, Rate < Rand],
{KeyVals, WithoutDeleted, ToDelete}
@@ -116,10 +214,10 @@ delete_keyvals(Rate) ->
modify_keyvals(Rate) ->
% similar as delete_keyvals but doesn't allow duplicate updates
- ?SUCHTHAT({_,ToChange},
+ ?SUCHTHAT({_,ToChange},
?LET(KeyVals, keyvals(),
begin
- Rand = random:uniform(),
+ Rand = rand:uniform(),
ToDelete = [Key || {Key,_} <- KeyVals, Rate > Rand],
{KeyVals, lists:usort(ToDelete)}
end),
diff --git a/test/prop_model.erl b/test/prop_model.erl
index 95e6a6b..4dae4c3 100644
--- a/test/prop_model.erl
+++ b/test/prop_model.erl
@@ -8,31 +8,113 @@
-define(run(Case), {timeout, timer:seconds(60),
?_assert(proper:quickcheck(Case, ?OPTS))}).
-eunit_test_() ->
- [?run(prop_insert_many()),
- ?run(prop_delete_random()),
- ?run(prop_delete_members()),
- ?run(prop_overwrite()),
- ?run(prop_insert_same_diff()),
- ?run(prop_insert_mixed_diff()),
- ?run(prop_insert_disjoint_diff()),
- ?run(prop_delete_random_diff()),
- ?run(prop_delete_members_diff()),
- ?run(prop_overwrite_diff()),
- ?run(prop_mixed_diff()),
- ?run(prop_mixed_dist_diff())
+eunit_no_db_test_() ->
+ [?run(prop_insert_many_no_db()),
+ ?run(prop_traversal_no_db()),
+ ?run(prop_delete_random_no_db()),
+ ?run(prop_delete_members_no_db()),
+ ?run(prop_overwrite_no_db()),
+ ?run(prop_insert_same_diff_no_db()),
+ ?run(prop_insert_mixed_diff_no_db()),
+ ?run(prop_insert_disjoint_diff_no_db()),
+ ?run(prop_delete_random_diff_no_db()),
+ ?run(prop_delete_members_diff_no_db()),
+ ?run(prop_overwrite_diff_no_db()),
+ ?run(prop_mixed_diff_no_db()),
+ ?run(prop_mixed_dist_diff_no_db())
].
+eunit_dict_db_test_() ->
+ [?run(prop_insert_many_dict_db()),
+ ?run(prop_traversal_dict_db()),
+ ?run(prop_delete_random_dict_db()),
+ ?run(prop_delete_members_dict_db()),
+ ?run(prop_overwrite_dict_db()),
+ ?run(prop_insert_same_diff_dict_db()),
+ ?run(prop_insert_mixed_diff_dict_db()),
+ ?run(prop_insert_disjoint_diff_dict_db()),
+ ?run(prop_delete_random_diff_dict_db()),
+ ?run(prop_delete_members_diff_dict_db()),
+ ?run(prop_overwrite_diff_dict_db()),
+ ?run(prop_mixed_diff_dict_db()),
+ ?run(prop_mixed_dist_diff_dict_db())
+ ].
+
+eunit_ets_db_test_() ->
+ [?run(prop_insert_many_ets_db()),
+ ?run(prop_traversal_ets_db()),
+ ?run(prop_delete_random_ets_db()),
+ ?run(prop_delete_members_ets_db()),
+ ?run(prop_overwrite_ets_db()),
+ ?run(prop_insert_same_diff_ets_db()),
+ ?run(prop_insert_mixed_diff_ets_db()),
+ ?run(prop_insert_disjoint_diff_ets_db()),
+ ?run(prop_delete_random_diff_ets_db()),
+ ?run(prop_delete_members_diff_ets_db()),
+ ?run(prop_overwrite_diff_ets_db()),
+ ?run(prop_mixed_diff_ets_db()),
+ ?run(prop_mixed_dist_diff_ets_db())
+ ].
+
+empty_tree(no_db) ->
+ undefined;
+empty_tree(dict_db) ->
+ merklet:empty_db_tree();
+empty_tree(ets_db) ->
+ merklet:empty_db_tree(merklet_ets_db_backend:spec()).
+
%% Test insertion and reading the keys back
-prop_insert_many() ->
+prop_insert_many_no_db() ->
+ prop_insert_many(no_db).
+
+prop_insert_many_dict_db() ->
+ prop_insert_many(dict_db).
+
+prop_insert_many_ets_db() ->
+ prop_insert_many(ets_db).
+
+prop_insert_many(Backend) ->
?FORALL(Entries, keyvals(),
merklet_model:keys(merklet_model:insert_many(Entries,undefined))
=:=
- merklet:keys(merklet:insert_many(Entries,undefined))
+ merklet:keys(merklet:insert_many(Entries,empty_tree(Backend)))
).
+%% Test insertion and traversing all nodes.
+prop_traversal_no_db() ->
+ prop_traversal(no_db).
+
+prop_traversal_dict_db() ->
+ prop_traversal(dict_db).
+
+prop_traversal_ets_db() ->
+ prop_traversal(ets_db).
+
+prop_traversal(Backend) ->
+ Fun = fun(Type,_Hash,_Node, Count) ->
+ orddict:update_counter(Type, 1, Count)
+ end,
+ ?FORALL(Entries, keyvals(),
+ begin
+ M = merklet_model:insert_many(Entries,undefined),
+ T = merklet:insert_many(Entries,empty_tree(Backend)),
+ InitAcc = orddict:from_list([{leaf, 0}, {inner, 0}]),
+ length(merklet_model:keys(M))
+ =:=
+ orddict:fetch(leaf, merklet:visit_nodes(Fun, InitAcc, T))
+ end).
+
%% Delete keys that may or may not be in the tree
-prop_delete_random() ->
+prop_delete_random_no_db() ->
+ prop_delete_random(no_db).
+
+prop_delete_random_dict_db() ->
+ prop_delete_random(dict_db).
+
+prop_delete_random_ets_db() ->
+ prop_delete_random(ets_db).
+
+prop_delete_random(Backend) ->
?FORALL({Entries, ToDelete}, {keyvals(), list(binary())},
merklet_model:keys(
delete(merklet_model,
@@ -42,11 +124,20 @@ prop_delete_random() ->
merklet:keys(
delete(merklet,
ToDelete,
- merklet:insert_many(Entries,undefined)))
+ merklet:insert_many(Entries,empty_tree(Backend))))
).
%% Only delete keys that have previously been inserted in the tree
-prop_delete_members() ->
+prop_delete_members_no_db() ->
+ prop_delete_members(no_db).
+
+prop_delete_members_dict_db() ->
+ prop_delete_members(dict_db).
+
+prop_delete_members_ets_db() ->
+ prop_delete_members(ets_db).
+
+prop_delete_members(Backend) ->
?FORALL({Entries, ToDelete}, delete_keyvals(0.5),
merklet_model:keys(
delete(merklet_model,
@@ -56,11 +147,20 @@ prop_delete_members() ->
merklet:keys(
delete(merklet,
ToDelete,
- merklet:insert_many(Entries,undefined)))
+ merklet:insert_many(Entries,empty_tree(Backend))))
).
%% Overwrite existing entries, make sure nothing was lost or added
-prop_overwrite() ->
+prop_overwrite_no_db() ->
+ prop_overwrite(no_db).
+
+prop_overwrite_dict_db() ->
+ prop_overwrite(dict_db).
+
+prop_overwrite_ets_db() ->
+ prop_overwrite(ets_db).
+
+prop_overwrite(Backend) ->
?FORALL({Entries, ToUpdate}, overwrite_keyvals(0.5),
merklet_model:keys(
merklet_model:insert_many(ToUpdate,
@@ -68,50 +168,86 @@ prop_overwrite() ->
=:=
merklet:keys(
merklet:insert_many(ToUpdate,
- merklet:insert_many(Entries,undefined)))
+ merklet:insert_many(Entries,empty_tree(Backend))))
).
%% Trees diffed with themselves should be stable
-prop_insert_same_diff() ->
+prop_insert_same_diff_no_db() ->
+ prop_insert_same_diff(no_db).
+
+prop_insert_same_diff_dict_db() ->
+ prop_insert_same_diff(dict_db).
+
+prop_insert_same_diff_ets_db() ->
+ prop_insert_same_diff(ets_db).
+
+prop_insert_same_diff(Backend) ->
?FORALL(Entries, keyvals(),
merklet_model:diff(merklet_model:insert_many(Entries,undefined),
merklet_model:insert_many(Entries,undefined))
=:=
- merklet:diff(merklet:insert_many(Entries,undefined),
- merklet:insert_many(Entries,undefined))
+ merklet:diff(merklet:insert_many(Entries,empty_tree(Backend)),
+ merklet:insert_many(Entries,empty_tree(Backend)))
).
%% Two independent trees diffed together (no verification of commutativity)
-prop_insert_mixed_diff() ->
+prop_insert_mixed_diff_no_db() ->
+ prop_insert_mixed_diff(no_db).
+
+prop_insert_mixed_diff_dict_db() ->
+ prop_insert_mixed_diff(dict_db).
+
+prop_insert_mixed_diff_ets_db() ->
+ prop_insert_mixed_diff(ets_db).
+
+prop_insert_mixed_diff(Backend) ->
?FORALL({Entries1, Entries2}, {keyvals(), keyvals()},
merklet_model:diff(merklet_model:insert_many(Entries1,undefined),
merklet_model:insert_many(Entries2,undefined))
=:=
- merklet:diff(merklet:insert_many(Entries1,undefined),
- merklet:insert_many(Entries2,undefined))
+ merklet:diff(merklet:insert_many(Entries1,empty_tree(Backend)),
+ merklet:insert_many(Entries2,empty_tree(Backend)))
).
%% Two independent trees with no overlapping data sets diffed together
%% (no verification of commutativity)
-prop_insert_disjoint_diff() ->
+prop_insert_disjoint_diff_no_db() ->
+ prop_insert_disjoint_diff(no_db).
+
+prop_insert_disjoint_diff_dict_db() ->
+ prop_insert_disjoint_diff(dict_db).
+
+prop_insert_disjoint_diff_ets_db() ->
+ prop_insert_disjoint_diff(ets_db).
+
+prop_insert_disjoint_diff(Backend) ->
?FORALL(Lists, disjoint_keyvals(),
begin
{Entries1, Entries2} = Lists,
merklet_model:diff(merklet_model:insert_many(Entries1,undefined),
merklet_model:insert_many(Entries2,undefined))
=:=
- merklet:diff(merklet:insert_many(Entries1,undefined),
- merklet:insert_many(Entries2,undefined))
+ merklet:diff(merklet:insert_many(Entries1,empty_tree(Backend)),
+ merklet:insert_many(Entries2,empty_tree(Backend)))
end).
%% Diffing two trees that had random element deletion that may or may
%% not be present (tests commutativity)
-prop_delete_random_diff() ->
+prop_delete_random_diff_no_db() ->
+ prop_delete_random_diff(no_db).
+
+prop_delete_random_diff_dict_db() ->
+ prop_delete_random_diff(dict_db).
+
+prop_delete_random_diff_ets_db() ->
+ prop_delete_random_diff(ets_db).
+
+prop_delete_random_diff(Backend) ->
?FORALL({Entries, ToDelete}, {keyvals(), list(binary())},
begin
ModelFull = merklet_model:insert_many(Entries,undefined),
ModelDelete = delete(merklet_model, ToDelete, ModelFull),
- MerkletFull = merklet:insert_many(Entries,undefined),
+ MerkletFull = merklet:insert_many(Entries,empty_tree(Backend)),
MerkletDelete = delete(merklet, ToDelete, MerkletFull),
(merklet_model:diff(ModelFull, ModelDelete)
=:= merklet:diff(MerkletFull, MerkletDelete))
@@ -121,12 +257,21 @@ prop_delete_random_diff() ->
end).
%% Diffing two trees that had member element deletion (tests commutativity)
-prop_delete_members_diff() ->
+prop_delete_members_diff_no_db() ->
+ prop_delete_members_diff(no_db).
+
+prop_delete_members_diff_dict_db() ->
+ prop_delete_members_diff(dict_db).
+
+prop_delete_members_diff_ets_db() ->
+ prop_delete_members_diff(ets_db).
+
+prop_delete_members_diff(Backend) ->
?FORALL({Entries, ToDelete}, delete_keyvals(0.5),
begin
ModelFull = merklet_model:insert_many(Entries,undefined),
ModelDelete = delete(merklet_model, ToDelete, ModelFull),
- MerkletFull = merklet:insert_many(Entries,undefined),
+ MerkletFull = merklet:insert_many(Entries,empty_tree(Backend)),
MerkletDelete = delete(merklet, ToDelete, MerkletFull),
(merklet_model:diff(ModelFull, ModelDelete)
=:= merklet:diff(MerkletFull, MerkletDelete))
@@ -136,12 +281,21 @@ prop_delete_members_diff() ->
end).
%% Diffing trees that had overwritten pieces of data (tests commutativity)
-prop_overwrite_diff() ->
+prop_overwrite_diff_no_db() ->
+ prop_overwrite_diff(no_db).
+
+prop_overwrite_diff_dict_db() ->
+ prop_overwrite_diff(dict_db).
+
+prop_overwrite_diff_ets_db() ->
+ prop_overwrite_diff(ets_db).
+
+prop_overwrite_diff(Backend) ->
?FORALL({Entries, ToUpdate}, overwrite_keyvals(0.5),
begin
ModelFull = merklet_model:insert_many(Entries, undefined),
ModelUpdate = merklet_model:insert_many(ToUpdate, ModelFull),
- MerkletFull = merklet:insert_many(Entries, undefined),
+ MerkletFull = merklet:insert_many(Entries, empty_tree(Backend)),
MerkletUpdate = merklet:insert_many(ToUpdate, MerkletFull),
(merklet_model:diff(ModelFull, ModelUpdate)
=:= merklet:diff(MerkletFull, MerkletUpdate))
@@ -152,13 +306,22 @@ prop_overwrite_diff() ->
%% Commutative verification of various trees that had their keys
%% inserted, updated, and randomly deleted.
-prop_mixed_diff() ->
+prop_mixed_diff_no_db() ->
+ prop_mixed_diff(no_db).
+
+prop_mixed_diff_dict_db() ->
+ prop_mixed_diff(dict_db).
+
+prop_mixed_diff_ets_db() ->
+ prop_mixed_diff(ets_db).
+
+prop_mixed_diff(Backend) ->
?FORALL({{Entries, ToUpdate}, ToDelete}, {overwrite_keyvals(0.5), list(binary())},
begin
ModelFull = merklet_model:insert_many(Entries, undefined),
ModelDelete = delete(merklet_model, ToDelete, ModelFull),
ModelUpdate = merklet_model:insert_many(ToUpdate, ModelDelete),
- MerkletFull = merklet:insert_many(Entries, undefined),
+ MerkletFull = merklet:insert_many(Entries, empty_tree(Backend)),
MerkletDelete = delete(merklet, ToDelete, MerkletFull),
MerkletUpdate = merklet:insert_many(ToUpdate, MerkletDelete),
%% Full vs. Update
@@ -186,13 +349,22 @@ prop_mixed_diff() ->
%% Commutative verification of various trees that had their keys
%% inserted, updated, and randomly deleted, while using the
%% distributed/serialized interface.
-prop_mixed_dist_diff() ->
+prop_mixed_dist_diff_no_db() ->
+ prop_mixed_dist_diff(no_db).
+
+prop_mixed_dist_diff_dict_db() ->
+ prop_mixed_dist_diff(dict_db).
+
+prop_mixed_dist_diff_ets_db() ->
+ prop_mixed_dist_diff(ets_db).
+
+prop_mixed_dist_diff(Backend) ->
?FORALL({{Entries, ToUpdate}, ToDelete}, {overwrite_keyvals(0.5), list(binary())},
begin
ModelFull = merklet_model:insert_many(Entries, undefined),
ModelDelete = delete(merklet_model, ToDelete, ModelFull),
ModelUpdate = merklet_model:insert_many(ToUpdate, ModelDelete),
- MerkletFull = merklet:insert_many(Entries, undefined),
+ MerkletFull = merklet:insert_many(Entries, empty_tree(Backend)),
MerkletDelete = delete(merklet, ToDelete, MerkletFull),
MerkletUpdate = merklet:insert_many(ToUpdate, MerkletDelete),
DistFull = merklet:access_unserialize(merklet:access_serialize(MerkletFull)),
@@ -235,7 +407,7 @@ keyvals() -> list({binary(), binary()}).
delete_keyvals(Rate) ->
?LET(KeyVals, keyvals(),
begin
- Rand = random:uniform(),
+ Rand = rand:uniform(),
ToDelete = [Key || {Key,_} <- KeyVals, Rate > Rand],
{KeyVals, ToDelete}
end).
@@ -243,7 +415,7 @@ delete_keyvals(Rate) ->
overwrite_keyvals(Rate) ->
?LET(KeyVals, keyvals(),
begin
- Rand = random:uniform(),
+ Rand = rand:uniform(),
ToUpdate = [{Key, <<0,Val/binary>>} || {Key,Val} <- KeyVals, Rate > Rand],
{KeyVals, ToUpdate}
end).
@@ -255,4 +427,4 @@ disjoint_keyvals() ->
KS2 = [K || {K, _} <- KV2],
lists:all(fun(K) -> not lists:member(K,KS2) end, KS1)
end).
-
+