From ae57258dfd95460f0b3e25624536633e17f65c87 Mon Sep 17 00:00:00 2001 From: Nick Cleaton Date: Sat, 11 Jan 2020 14:58:48 +0000 Subject: [PATCH 1/5] Add input byte permutation for the buzhash chunker https://github.com/borgbackup/borg/issues/3687 --- src/borg/_chunker.c | 9 +++--- src/borg/archive.py | 7 ++-- src/borg/chunker.pyx | 30 +++++++++++++---- src/borg/crypto/key.py | 43 +++++++++++++++++++++++- src/borg/item.pyx | 4 ++- src/borg/testsuite/chunker.py | 52 +++++++++++++++++++++++------- src/borg/testsuite/chunker_slow.py | 43 ++++++++++++++---------- 7 files changed, 143 insertions(+), 45 deletions(-) diff --git a/src/borg/_chunker.c b/src/borg/_chunker.c index 75599c5b15..45e1f1cb55 100644 --- a/src/borg/_chunker.c +++ b/src/borg/_chunker.c @@ -68,13 +68,13 @@ static uint32_t table_base[] = size_t pagemask; static uint32_t * -buzhash_init_table(uint32_t seed) +buzhash_init_table(uint32_t seed, unsigned char *permutation) { int i; uint32_t *table = malloc(1024); for(i = 0; i < 256; i++) { - table[i] = table_base[i] ^ seed; + table[i] = table_base[permutation[i]] ^ seed; } return table; } @@ -112,13 +112,14 @@ typedef struct { } Chunker; static Chunker * -chunker_init(size_t window_size, uint32_t chunk_mask, size_t min_size, size_t max_size, uint32_t seed) +chunker_init(size_t window_size, uint32_t chunk_mask, size_t min_size, size_t max_size, uint32_t seed, + unsigned char *permutation) { Chunker *c = calloc(sizeof(Chunker), 1); c->window_size = window_size; c->chunk_mask = chunk_mask; c->min_size = min_size; - c->table = buzhash_init_table(seed); + c->table = buzhash_init_table(seed, permutation); c->buf_size = max_size; c->data = malloc(c->buf_size); c->fh = -1; diff --git a/src/borg/archive.py b/src/borg/archive.py index fa0c7d7e63..dfeb3f040e 100644 --- a/src/borg/archive.py +++ b/src/borg/archive.py @@ -320,7 +320,7 @@ def __init__(self, key, chunker_params=ITEMS_CHUNKER_PARAMS): self.packer = msgpack.Packer() self.chunks = [] self.key = key - self.chunker = get_chunker(*chunker_params, seed=self.key.chunk_seed) + self.chunker = get_chunker(*chunker_params, seed=self.key.chunk_seed, permutation=self.key.chunk_permutation) def add(self, item): self.buffer.write(self.packer.pack(item.as_dict())) @@ -1178,7 +1178,7 @@ def __init__(self, *, metadata_collector, cache, key, self.hard_links = {} self.stats = Statistics() # threading: done by cache (including progress) self.cwd = os.getcwd() - self.chunker = get_chunker(*chunker_params, seed=key.chunk_seed) + self.chunker = get_chunker(*chunker_params, seed=key.chunk_seed, permutation=key.chunk_permutation) @contextmanager def create_helper(self, path, st, status=None, hardlinkable=True): @@ -2102,7 +2102,8 @@ def create_target(self, archive, target_name=None): cache=self.cache, key=self.key, add_item=target.add_item, write_checkpoint=target.write_checkpoint, checkpoint_interval=self.checkpoint_interval, rechunkify=target.recreate_rechunkify).process_file_chunks - target.chunker = get_chunker(*target.chunker_params, seed=self.key.chunk_seed) + target.chunker = get_chunker(*target.chunker_params, seed=self.key.chunk_seed, + permutation=self.key.chunk_permutation) return target def create_target_archive(self, name): diff --git a/src/borg/chunker.pyx b/src/borg/chunker.pyx index 68f9c010e2..89f4b8c357 100644 --- a/src/borg/chunker.pyx +++ b/src/borg/chunker.pyx @@ -10,14 +10,19 @@ cdef extern from "_chunker.c": ctypedef int uint32_t ctypedef struct _Chunker "Chunker": pass - _Chunker *chunker_init(int window_size, int chunk_mask, int min_size, int max_size, uint32_t seed) + _Chunker *chunker_init(int window_size, int chunk_mask, int min_size, int max_size, + uint32_t seed, unsigned char *permutation) void chunker_set_fd(_Chunker *chunker, object f, int fd) void chunker_free(_Chunker *chunker) object chunker_process(_Chunker *chunker) - uint32_t *buzhash_init_table(uint32_t seed) + uint32_t *buzhash_init_table(uint32_t seed, unsigned char *permutation) uint32_t c_buzhash "buzhash"(unsigned char *data, size_t len, uint32_t *h) uint32_t c_buzhash_update "buzhash_update"(uint32_t sum, unsigned char remove, unsigned char add, size_t len, uint32_t *h) +# The identity permutation of input by bytes, useful for maintaining +# backward compatibility with interfaces defined before input byte +# permutations were introduced. +null_permutation = bytes(range(256)) class ChunkerFixed: """ @@ -94,13 +99,14 @@ cdef class Chunker: """ cdef _Chunker *chunker - def __cinit__(self, int seed, int chunk_min_exp, int chunk_max_exp, int hash_mask_bits, int hash_window_size): + def __cinit__(self, int seed, unsigned char *permutation, int chunk_min_exp, int chunk_max_exp, + int hash_mask_bits, int hash_window_size): min_size = 1 << chunk_min_exp max_size = 1 << chunk_max_exp # see chunker_process, first while loop condition, first term must be able to get True: assert hash_window_size + min_size + 1 <= max_size, "too small max_size" hash_mask = (1 << hash_mask_bits) - 1 - self.chunker = chunker_init(hash_window_size, hash_mask, min_size, max_size, seed & 0xffffffff) + self.chunker = chunker_init(hash_window_size, hash_mask, min_size, max_size, seed & 0xffffffff, permutation) def chunkify(self, fd, fh=-1): """ @@ -127,7 +133,8 @@ cdef class Chunker: def get_chunker(algo, *params, **kw): if algo == 'buzhash': seed = kw['seed'] - return Chunker(seed, *params) + perm = kw.get('permutation') or null_permutation + return Chunker(seed, perm, *params) if algo == 'fixed': return ChunkerFixed(*params) raise TypeError('unsupported chunker algo %r' % algo) @@ -143,17 +150,26 @@ def max_chunk_size(algo, *params): def buzhash(data, unsigned long seed): + return buzhash_perm(data, seed, null_permutation) + + +def buzhash_perm(data, unsigned long seed, unsigned char *permutation): cdef uint32_t *table cdef uint32_t sum - table = buzhash_init_table(seed & 0xffffffff) + table = buzhash_init_table(seed & 0xffffffff, permutation) sum = c_buzhash( data, len(data), table) free(table) return sum def buzhash_update(uint32_t sum, unsigned char remove, unsigned char add, size_t len, unsigned long seed): + return buzhash_update_perm(sum, remove, add, len, seed, null_permutation) + + +def buzhash_update_perm(uint32_t sum, unsigned char remove, unsigned char add, size_t len, + unsigned long seed, unsigned char *permutation): cdef uint32_t *table - table = buzhash_init_table(seed & 0xffffffff) + table = buzhash_init_table(seed & 0xffffffff, permutation) sum = c_buzhash_update(sum, remove, add, len, table) free(table) return sum diff --git a/src/borg/crypto/key.py b/src/borg/crypto/key.py index e263add8de..a4644bcaf4 100644 --- a/src/borg/crypto/key.py +++ b/src/borg/crypto/key.py @@ -161,6 +161,10 @@ class KeyBase: # type: int chunk_seed = None + # The input byte permutation for the buzhash chunker + # type: bytes + chunk_permutation = None + # Whether this *particular instance* is encrypted from a practical point of view, # i.e. when it's using encryption with a empty passphrase, then # that may be *technically* called encryption, but for all intents and purposes @@ -266,6 +270,7 @@ class PlaintextKey(KeyBase): STORAGE = KeyBlobStorage.NO_STORAGE chunk_seed = 0 + chunk_permutation = None logically_encrypted = False def __init__(self, repository): @@ -345,6 +350,37 @@ def id_hash(self, data): return hmac_sha256(self.id_key, data) +def _derive_byte_permutation(key_material): + """ + Derive a 256-byte permutation table from the key material + + There are 256! possible permutations of a byte-indexed table, and + we want to make an unbiased choice. Since 256! is just under 2^1684 + (it's 0xFF578F....) we derive 1684 pseudorandom bits from the key + material and treat it as a single large integer. There's only a 1 in + 350 chance that this integer is >= 256!, in which case we try again. + """ + for attempt in range(10): + context = b"chunker input byte permutation, attempt %d" % attempt + key = hkdf_hmac_sha512(key_material, None, context, 211) + pool = int.from_bytes(key, "big") + pool >>= 4 # 211 bytes is 1688 bits, 4 bits more than we want + perm = list(range(256)) + for i in range(256): + pool, offset = divmod(pool, 256-i) + j = i + offset + tmp = perm[i] + perm[i] = perm[j] + perm[j] = tmp + + if pool == 0: + # the pool value was less than 256!, we have an unbiased choice + return bytes(perm) + + # we're very unlikely to fall through to here. Just accept the biased permutation + return bytes(perm) + + class AESKeyBase(KeyBase): """ Common base class shared by KeyfileKey and PassphraseKey @@ -388,7 +424,7 @@ def decrypt(self, id, data, decompress=True): def init_from_random_data(self, data=None): if data is None: - data = os.urandom(100) + data = os.urandom(132) self.enc_key = data[0:32] self.enc_hmac_key = data[32:64] self.id_key = data[64:96] @@ -396,6 +432,9 @@ def init_from_random_data(self, data=None): # Convert to signed int32 if self.chunk_seed & 0x80000000: self.chunk_seed = self.chunk_seed - 0xffffffff - 1 + if len(data) >= 132: + chunk_key = data[100:132] + self.chunk_permutation = _derive_byte_permutation(chunk_key) def init_ciphers(self, manifest_data=None): self.cipher = self.CIPHERSUITE(mac_key=self.enc_hmac_key, enc_key=self.enc_key, header_len=1, aad_offset=1) @@ -620,6 +659,7 @@ def _load(self, key_data, passphrase): self.enc_hmac_key = key.enc_hmac_key self.id_key = key.id_key self.chunk_seed = key.chunk_seed + self.chunk_permutation = key.get('chunk_permutation') self.tam_required = key.get('tam_required', tam_required(self.repository)) return True return False @@ -660,6 +700,7 @@ def _save(self, passphrase): enc_hmac_key=self.enc_hmac_key, id_key=self.id_key, chunk_seed=self.chunk_seed, + chunk_permutation=self.chunk_permutation, tam_required=self.tam_required, ) data = self.encrypt_key_file(msgpack.packb(key.as_dict()), passphrase) diff --git a/src/borg/item.pyx b/src/borg/item.pyx index 7a3c4a8ac7..bd9b5e9f63 100644 --- a/src/borg/item.pyx +++ b/src/borg/item.pyx @@ -318,7 +318,8 @@ class Key(PropDict): If a Key shall be serialized, give as_dict() method output to msgpack packer. """ - VALID_KEYS = {'version', 'repository_id', 'enc_key', 'enc_hmac_key', 'id_key', 'chunk_seed', 'tam_required'} # str-typed keys + VALID_KEYS = {'version', 'repository_id', 'enc_key', 'enc_hmac_key', 'id_key', 'chunk_seed', + 'chunk_permutation', 'tam_required'} # str-typed keys __slots__ = ("_dict", ) # avoid setting attributes not supported by properties @@ -328,6 +329,7 @@ class Key(PropDict): enc_hmac_key = PropDict._make_property('enc_hmac_key', bytes) id_key = PropDict._make_property('id_key', bytes) chunk_seed = PropDict._make_property('chunk_seed', int) + chunk_permutation = PropDict._make_property('chunk_permutation', bytes) tam_required = PropDict._make_property('tam_required', bool) diff --git a/src/borg/testsuite/chunker.py b/src/borg/testsuite/chunker.py index c49e5be03c..a33f6ac46e 100644 --- a/src/borg/testsuite/chunker.py +++ b/src/borg/testsuite/chunker.py @@ -1,6 +1,6 @@ from io import BytesIO -from ..chunker import ChunkerFixed, Chunker, get_chunker, buzhash, buzhash_update +from ..chunker import ChunkerFixed, Chunker, get_chunker, buzhash, buzhash_perm, buzhash_update, buzhash_update_perm from ..constants import * # NOQA from . import BaseTestCase @@ -8,6 +8,18 @@ # See borg.selftest for details. If you add/remove test methods, update SELFTEST_COUNT +null_permutation = bytes(range(256)) + + +def permutation_invert_case(): + perm = list(range(256)) + for up in "ABCDEFGHIJKLMNOPQRSTUVWXYZ": + low = up.lower() + perm[ord(low)] = ord(up) + perm[ord(up)] = ord(low) + return bytes(perm) + + class ChunkerFixedTestCase(BaseTestCase): def test_chunkify_just_blocks(self): @@ -26,20 +38,21 @@ def test_chunkify_header_and_blocks(self): class ChunkerTestCase(BaseTestCase): def test_chunkify(self): + np = null_permutation data = b'0' * int(1.5 * (1 << CHUNK_MAX_EXP)) + b'Y' - parts = [bytes(c) for c in Chunker(0, 1, CHUNK_MAX_EXP, 2, 2).chunkify(BytesIO(data))] + parts = [bytes(c) for c in Chunker(0, np, 1, CHUNK_MAX_EXP, 2, 2).chunkify(BytesIO(data))] self.assert_equal(len(parts), 2) self.assert_equal(b''.join(parts), data) - self.assert_equal([bytes(c) for c in Chunker(0, 1, CHUNK_MAX_EXP, 2, 2).chunkify(BytesIO(b''))], []) - self.assert_equal([bytes(c) for c in Chunker(0, 1, CHUNK_MAX_EXP, 2, 2).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'fooba', b'rboobaz', b'fooba', b'rboobaz', b'fooba', b'rboobaz']) - self.assert_equal([bytes(c) for c in Chunker(1, 1, CHUNK_MAX_EXP, 2, 2).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'fo', b'obarb', b'oob', b'azf', b'oobarb', b'oob', b'azf', b'oobarb', b'oobaz']) - self.assert_equal([bytes(c) for c in Chunker(2, 1, CHUNK_MAX_EXP, 2, 2).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foob', b'ar', b'boobazfoob', b'ar', b'boobazfoob', b'ar', b'boobaz']) - self.assert_equal([bytes(c) for c in Chunker(0, 2, CHUNK_MAX_EXP, 2, 3).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foobarboobaz' * 3]) - self.assert_equal([bytes(c) for c in Chunker(1, 2, CHUNK_MAX_EXP, 2, 3).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foobar', b'boobazfo', b'obar', b'boobazfo', b'obar', b'boobaz']) - self.assert_equal([bytes(c) for c in Chunker(2, 2, CHUNK_MAX_EXP, 2, 3).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foob', b'arboobaz', b'foob', b'arboobaz', b'foob', b'arboobaz']) - self.assert_equal([bytes(c) for c in Chunker(0, 3, CHUNK_MAX_EXP, 2, 3).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foobarboobaz' * 3]) - self.assert_equal([bytes(c) for c in Chunker(1, 3, CHUNK_MAX_EXP, 2, 3).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foobarbo', b'obazfoobar', b'boobazfo', b'obarboobaz']) - self.assert_equal([bytes(c) for c in Chunker(2, 3, CHUNK_MAX_EXP, 2, 3).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foobarboobaz', b'foobarboobaz', b'foobarboobaz']) + self.assert_equal([bytes(c) for c in Chunker(0, np, 1, CHUNK_MAX_EXP, 2, 2).chunkify(BytesIO(b''))], []) + self.assert_equal([bytes(c) for c in Chunker(0, np, 1, CHUNK_MAX_EXP, 2, 2).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'fooba', b'rboobaz', b'fooba', b'rboobaz', b'fooba', b'rboobaz']) + self.assert_equal([bytes(c) for c in Chunker(1, np, 1, CHUNK_MAX_EXP, 2, 2).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'fo', b'obarb', b'oob', b'azf', b'oobarb', b'oob', b'azf', b'oobarb', b'oobaz']) + self.assert_equal([bytes(c) for c in Chunker(2, np, 1, CHUNK_MAX_EXP, 2, 2).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foob', b'ar', b'boobazfoob', b'ar', b'boobazfoob', b'ar', b'boobaz']) + self.assert_equal([bytes(c) for c in Chunker(0, np, 2, CHUNK_MAX_EXP, 2, 3).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foobarboobaz' * 3]) + self.assert_equal([bytes(c) for c in Chunker(1, np, 2, CHUNK_MAX_EXP, 2, 3).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foobar', b'boobazfo', b'obar', b'boobazfo', b'obar', b'boobaz']) + self.assert_equal([bytes(c) for c in Chunker(2, np, 2, CHUNK_MAX_EXP, 2, 3).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foob', b'arboobaz', b'foob', b'arboobaz', b'foob', b'arboobaz']) + self.assert_equal([bytes(c) for c in Chunker(0, np, 3, CHUNK_MAX_EXP, 2, 3).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foobarboobaz' * 3]) + self.assert_equal([bytes(c) for c in Chunker(1, np, 3, CHUNK_MAX_EXP, 2, 3).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foobarbo', b'obazfoobar', b'boobazfo', b'obarboobaz']) + self.assert_equal([bytes(c) for c in Chunker(2, np, 3, CHUNK_MAX_EXP, 2, 3).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foobarboobaz', b'foobarboobaz', b'foobarboobaz']) def test_buzhash(self): self.assert_equal(buzhash(b'abcdefghijklmnop', 0), 3795437769) @@ -48,6 +61,21 @@ def test_buzhash(self): # Test with more than 31 bytes to make sure our barrel_shift macro works correctly self.assert_equal(buzhash(b'abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz', 0), 566521248) + def test_permutation(self): + p = permutation_invert_case() + + # a non-null permutation should spoil these test cases copied from the methods above + self.assert_not_equal([bytes(c) for c in Chunker(2, p, 3, CHUNK_MAX_EXP, 2, 3).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foobarboobaz', b'foobarboobaz', b'foobarboobaz']) + self.assert_not_equal(buzhash_perm(b'abcdefghijklmnop', 0, p), 3795437769) + + # inverting the case of the input should compensate for the permutation + self.assert_equal([bytes(c) for c in Chunker(0, p, 1, CHUNK_MAX_EXP, 2, 2).chunkify(BytesIO(b'FOOBARBOOBAZ' * 3))], [b'FOOBA', b'RBOOBAZ', b'FOOBA', b'RBOOBAZ', b'FOOBA', b'RBOOBAZ']) + self.assert_equal([bytes(c) for c in Chunker(2, p, 3, CHUNK_MAX_EXP, 2, 3).chunkify(BytesIO(b'FOOBARBOOBAZ' * 3))], [b'FOOBARBOOBAZ', b'FOOBARBOOBAZ', b'FOOBARBOOBAZ']) + self.assert_equal(buzhash_perm(b'ABCDEFGHIJKLMNOP', 0, p), 3795437769) + self.assert_equal(buzhash_perm(b'ABCDEFGHIJKLMNOP', 1, p), 3795400502) + self.assert_equal(buzhash_perm(b'ABCDEFGHIJKLMNOP', 1, p), + buzhash_update_perm(buzhash_perm(b'xABCDEFGHIJKLMNO', 1, p), ord('x'), ord('P'), 16, 1, p)) + def test_small_reads(self): class SmallReadFile: input = b'a' * (20 + 1) diff --git a/src/borg/testsuite/chunker_slow.py b/src/borg/testsuite/chunker_slow.py index 2739a735ad..6892dccabf 100644 --- a/src/borg/testsuite/chunker_slow.py +++ b/src/borg/testsuite/chunker_slow.py @@ -20,20 +20,29 @@ def twist(size): data = twist(100000) - runs = [] - for winsize in (65, 129, HASH_WINDOW_SIZE, 7351): - for minexp in (4, 6, 7, 11, 12): - for maxexp in (15, 17): - if minexp >= maxexp: - continue - for maskbits in (4, 7, 10, 12): - for seed in (1849058162, 1234567653): - fh = BytesIO(data) - chunker = Chunker(seed, minexp, maxexp, maskbits, winsize) - chunks = [blake2b_256(b'', c) for c in chunker.chunkify(fh, -1)] - runs.append(blake2b_256(b'', b''.join(chunks))) - - # The "correct" hash below matches the existing chunker behavior. - # Future chunker optimisations must not change this, or existing repos will bloat. - overall_hash = blake2b_256(b'', b''.join(runs)) - self.assert_equal(overall_hash, unhexlify("b559b0ac8df8daaa221201d018815114241ea5c6609d98913cd2246a702af4e3")) + null_permutation = bytes(range(256)) + reverse_permutation = bytes(reversed(range(256))) + + # The hashes below match the existing chunker behavior. Future chunker optimisations + # must not change this, or existing repos will bloat. + tests = ( (null_permutation, + unhexlify("b559b0ac8df8daaa221201d018815114241ea5c6609d98913cd2246a702af4e3")), + (reverse_permutation, + unhexlify("6e56c9a94c29b4564c158131914ab21b34e6897002b38e71b0843be68158c00f"))) + + for permutation, expected_result in tests: + runs = [] + for winsize in (65, 129, HASH_WINDOW_SIZE, 7351): + for minexp in (4, 6, 7, 11, 12): + for maxexp in (15, 17): + if minexp >= maxexp: + continue + for maskbits in (4, 7, 10, 12): + for seed in (1849058162, 1234567653): + fh = BytesIO(data) + chunker = Chunker(seed, permutation, minexp, maxexp, maskbits, winsize) + chunks = [blake2b_256(b'', c) for c in chunker.chunkify(fh, -1)] + runs.append(blake2b_256(b'', b''.join(chunks))) + + overall_hash = blake2b_256(b'', b''.join(runs)) + self.assert_equal(overall_hash, expected_result) From 5691a5e8a3b634599b8eb159c59fe0465ef2a5e4 Mon Sep 17 00:00:00 2001 From: Nick Cleaton Date: Sun, 19 Jan 2020 13:19:30 +0000 Subject: [PATCH 2/5] update docs for chunker input permutation --- docs/internals/data-structures.rst | 8 ++++++-- docs/internals/security.rst | 4 ++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/docs/internals/data-structures.rst b/docs/internals/data-structures.rst index caaf758108..24fdc10c0f 100644 --- a/docs/internals/data-structures.rst +++ b/docs/internals/data-structures.rst @@ -624,8 +624,9 @@ can be used to tune the chunker parameters, the default is: - HASH_MASK_BITS = 21 (target chunk size ~= 2^21 B = 2 MiB) - HASH_WINDOW_SIZE = 4095 [B] (`0xFFF`) -The buzhash table is altered by XORing it with a seed randomly generated once -for the repository, and stored encrypted in the keyfile. This is to prevent +The buzhash table is altered by XORing it with a seed and shuffling its +elements. The XOR seed and shuffle pattern are randomly generated once for +the repository, and stored encrypted in the keyfile. This is to prevent chunk size based fingerprinting attacks on your encrypted repo contents (to guess what files you have based on a specific set of chunk sizes). @@ -901,6 +902,9 @@ id_key chunk_seed the seed for the buzhash chunking table (signed 32 bit integer) +chunk_permutation + the permutation for shuffling the buzhash table (256 bytes) + These fields are packed using msgpack_. The utf-8 encoded passphrase is processed with PBKDF2_ (SHA256_, 100000 iterations, random 256 bit salt) to derive a 256 bit key encryption key (KEK). diff --git a/docs/internals/security.rst b/docs/internals/security.rst index 72688bc45d..6cd1619268 100644 --- a/docs/internals/security.rst +++ b/docs/internals/security.rst @@ -407,8 +407,8 @@ buzhash chunker +++++++++++++++ The buzhash chunker chunks according to the input data, the chunker's -parameters and the secret chunker seed (which all influence the chunk boundary -positions). +parameters and the secret chunker seed and permutation (which all influence the +chunk boundary positions). Small files below some specific threshold (default: 512 KiB) result in only one chunk (identical content / size as the original file), bigger files result in From a038f213fd5af0493bb2c398a2ca5c3f4ab6f362 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Thu, 2 Apr 2020 18:13:33 +0200 Subject: [PATCH 3/5] fixup: adjust SELFTEST_COUNT --- src/borg/selftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/borg/selftest.py b/src/borg/selftest.py index 80707c0cc2..49ad0fbf1c 100644 --- a/src/borg/selftest.py +++ b/src/borg/selftest.py @@ -30,7 +30,7 @@ ChunkerTestCase, ] -SELFTEST_COUNT = 37 +SELFTEST_COUNT = 38 class SelfTestResult(TestResult): From 3606c6732d9ba9c2938dc0f21b0cf60bc2e02aba Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Thu, 2 Apr 2020 18:42:37 +0200 Subject: [PATCH 4/5] fixup: fix pep8 issue --- src/borg/testsuite/chunker_slow.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/borg/testsuite/chunker_slow.py b/src/borg/testsuite/chunker_slow.py index 6892dccabf..abc0fc04f2 100644 --- a/src/borg/testsuite/chunker_slow.py +++ b/src/borg/testsuite/chunker_slow.py @@ -25,10 +25,10 @@ def twist(size): # The hashes below match the existing chunker behavior. Future chunker optimisations # must not change this, or existing repos will bloat. - tests = ( (null_permutation, - unhexlify("b559b0ac8df8daaa221201d018815114241ea5c6609d98913cd2246a702af4e3")), - (reverse_permutation, - unhexlify("6e56c9a94c29b4564c158131914ab21b34e6897002b38e71b0843be68158c00f"))) + tests = ((null_permutation, + unhexlify("b559b0ac8df8daaa221201d018815114241ea5c6609d98913cd2246a702af4e3")), + (reverse_permutation, + unhexlify("6e56c9a94c29b4564c158131914ab21b34e6897002b38e71b0843be68158c00f"))) for permutation, expected_result in tests: runs = [] From 6647331268a8c69d321dd84d0db27fa33f685559 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Thu, 2 Apr 2020 19:06:44 +0200 Subject: [PATCH 5/5] fixup: python can do the swap in a 1-liner --- src/borg/crypto/key.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/borg/crypto/key.py b/src/borg/crypto/key.py index a4644bcaf4..f689ef7439 100644 --- a/src/borg/crypto/key.py +++ b/src/borg/crypto/key.py @@ -369,9 +369,7 @@ def _derive_byte_permutation(key_material): for i in range(256): pool, offset = divmod(pool, 256-i) j = i + offset - tmp = perm[i] - perm[i] = perm[j] - perm[j] = tmp + perm[i], perm[j] = perm[j], perm[i] if pool == 0: # the pool value was less than 256!, we have an unbiased choice