diff --git a/Doc/library/uuid.rst b/Doc/library/uuid.rst index bff6b9dcdc57ca..11836b823b945e 100644 --- a/Doc/library/uuid.rst +++ b/Doc/library/uuid.rst @@ -12,8 +12,8 @@ This module provides immutable :class:`UUID` objects (the :class:`UUID` class) and the functions :func:`uuid1`, :func:`uuid3`, :func:`uuid4`, :func:`uuid5`, -and :func:`uuid.uuid8` for generating version 1, 3, 4, 5, and 8 UUIDs as -specified in :rfc:`9562` (which supersedes :rfc:`4122`). +:func:`uuid7`, and :func:`uuid8` for generating version 1, 3, 4, 5, 7, and 8 +UUIDs as specified in :rfc:`9562` (which supersedes :rfc:`4122`). If all you want is a unique ID, you should probably call :func:`uuid1` or :func:`uuid4`. Note that :func:`uuid1` may compromise privacy since it creates @@ -153,8 +153,8 @@ which relays any information about the UUID's safety, using this enumeration: The UUID version number (1 through 8, meaningful only when the variant is :const:`RFC_4122`). - .. versionchanged:: 3.14 - Added UUID version 8. + .. versionchanged:: next + Added UUID versions 7 and 8. .. attribute:: UUID.is_safe @@ -222,6 +222,13 @@ The :mod:`uuid` module defines the following functions: .. index:: single: uuid5 +.. function:: uuid7() + + .. versionadded:: next + +.. index:: single: uuid7 + + .. function:: uuid8(a=None, b=None, c=None) Generate a pseudo-random UUID according to @@ -326,7 +333,7 @@ The :mod:`uuid` module can be executed as a script from the command line. .. code-block:: sh - python -m uuid [-h] [-u {uuid1,uuid3,uuid4,uuid5,uuid8}] [-n NAMESPACE] [-N NAME] + python -m uuid [-h] [-u {uuid1,uuid3,uuid4,uuid5,uuid7,uuid8}] [-n NAMESPACE] [-N NAME] The following options are accepted: @@ -342,8 +349,8 @@ The following options are accepted: Specify the function name to use to generate the uuid. By default :func:`uuid4` is used. - .. versionadded:: 3.14 - Allow generating UUID version 8. + .. versionchanged:: next + Allow generating UUID versions 7 and 8. .. option:: -n --namespace diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index ac0ae8cf0133e6..55c9780b8b3492 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -823,8 +823,8 @@ urllib uuid ---- -* Add support for UUID version 8 via :func:`uuid.uuid8` as specified - in :rfc:`9562`. +* Add support for UUID versions 7 and 8 via :func:`uuid.uuid7` and + :func:`uuid.uuid8` respectively, as specified in :rfc:`9562`. (Contributed by Bénédikt Tran in :gh:`89083`.) * :const:`uuid.NIL` and :const:`uuid.MAX` are now available to represent the diff --git a/Lib/test/test_uuid.py b/Lib/test/test_uuid.py index 8216c4dd00e35a..8a0a11e8b2a73f 100755 --- a/Lib/test/test_uuid.py +++ b/Lib/test/test_uuid.py @@ -1,6 +1,3 @@ -import unittest -from test import support -from test.support import import_helper import builtins import contextlib import copy @@ -10,10 +7,14 @@ import pickle import random import sys +import unittest import weakref from itertools import product from unittest import mock +from test import support +from test.support import import_helper + py_uuid = import_helper.import_fresh_module('uuid', blocked=['_uuid']) c_uuid = import_helper.import_fresh_module('uuid', fresh=['_uuid']) @@ -724,6 +725,209 @@ def test_uuid5(self): equal(u, self.uuid.UUID(v)) equal(str(u), v) + def test_uuid7(self): + equal = self.assertEqual + u = self.uuid.uuid7() + equal(u.variant, self.uuid.RFC_4122) + equal(u.version, 7) + + # 1 Jan 2023 12:34:56.123_456_789 + timestamp_ns = 1672533296_123_456_789 # ns precision + timestamp_ms, _ = divmod(timestamp_ns, 1_000_000) + + for _ in range(100): + counter_hi = random.getrandbits(11) + counter_lo = random.getrandbits(30) + counter = (counter_hi << 30) | counter_lo + + tail = random.getrandbits(32) + # effective number of bits is 32 + 30 + 11 = 73 + random_bits = counter << 32 | tail + + # set all remaining MSB of fake random bits to 1 to ensure that + # the implementation correctly removes them + random_bits = (((1 << 7) - 1) << 73) | random_bits + random_data = random_bits.to_bytes(10) + + with ( + mock.patch.multiple( + self.uuid, + _last_timestamp_v7=None, + _last_counter_v7=0, + ), + mock.patch('time.time_ns', return_value=timestamp_ns), + mock.patch('os.urandom', return_value=random_data) as urand + ): + u = self.uuid.uuid7() + urand.assert_called_once_with(10) + equal(u.variant, self.uuid.RFC_4122) + equal(u.version, 7) + + equal(self.uuid._last_timestamp_v7, timestamp_ms) + equal(self.uuid._last_counter_v7, counter) + + unix_ts_ms = timestamp_ms & 0xffff_ffff_ffff + equal((u.int >> 80) & 0xffff_ffff_ffff, unix_ts_ms) + + equal((u.int >> 75) & 1, 0) # check that the MSB is 0 + equal((u.int >> 64) & 0xfff, counter_hi) + equal((u.int >> 32) & 0x3fff_ffff, counter_lo) + equal(u.int & 0xffff_ffff, tail) + + def test_uuid7_uniqueness(self): + # Test that UUIDv7-generated values are unique. + # + # While UUIDv8 has an entropy of 122 bits, those 122 bits may not + # necessarily be sampled from a PRNG. On the other hand, UUIDv7 + # uses os.urandom() as a PRNG which features better randomness. + # + # Until reaching UNIX_EPOCH + 10'000 years, the probability for + # generating two identical UUIDs is negligible. + N = 1000 + uuids = {self.uuid.uuid7() for _ in range(N)} + self.assertEqual(len(uuids), N) + + versions = {u.version for u in uuids} + self.assertSetEqual(versions, {7}) + + def test_uuid7_monotonicity(self): + equal = self.assertEqual + + us = [self.uuid.uuid7() for _ in range(10_000)] + equal(us, sorted(us)) + + with mock.patch.multiple( + self.uuid, + _last_timestamp_v7=0, + _last_counter_v7=0, + ): + # 1 Jan 2023 12:34:56.123_456_789 + timestamp_ns = 1672533296_123_456_789 # ns precision + timestamp_ms, _ = divmod(timestamp_ns, 1_000_000) + + # counter_{hi,lo} are chosen so that "counter + 1" does not overflow + counter_hi = random.getrandbits(11) + counter_lo = random.getrandbits(29) + counter = (counter_hi << 30) | counter_lo + self.assertLess(counter + 1, 0x3ff_ffff_ffff) + + tail = random.getrandbits(32) + random_bits = counter << 32 | tail + random_data = random_bits.to_bytes(10) + + with ( + mock.patch('time.time_ns', return_value=timestamp_ns), + mock.patch('os.urandom', return_value=random_data) as urand + ): + u1 = self.uuid.uuid7() + urand.assert_called_once_with(10) + equal(self.uuid._last_timestamp_v7, timestamp_ms) + equal(self.uuid._last_counter_v7, counter) + equal((u1.int >> 64) & 0xfff, counter_hi) + equal((u1.int >> 32) & 0x3fff_ffff, counter_lo) + equal(u1.int & 0xffff_ffff, tail) + + # 1 Jan 2023 12:34:56.123_457_032 (same millisecond but not same ns) + next_timestamp_ns = 1672533296_123_457_032 + next_timestamp_ms, _ = divmod(timestamp_ns, 1_000_000) + equal(timestamp_ms, next_timestamp_ms) + + next_tail_bytes = os.urandom(4) + next_fail = int.from_bytes(next_tail_bytes) + + with ( + mock.patch('time.time_ns', return_value=next_timestamp_ns), + mock.patch('os.urandom', return_value=next_tail_bytes) as urand + ): + u2 = self.uuid.uuid7() + urand.assert_called_once_with(4) + # same milli-second + equal(self.uuid._last_timestamp_v7, timestamp_ms) + # 42-bit counter advanced by 1 + equal(self.uuid._last_counter_v7, counter + 1) + equal((u2.int >> 64) & 0xfff, counter_hi) + equal((u2.int >> 32) & 0x3fff_ffff, counter_lo + 1) + equal(u2.int & 0xffff_ffff, next_fail) + + self.assertLess(u1, u2) + + def test_uuid7_timestamp_backwards(self): + equal = self.assertEqual + # 1 Jan 2023 12:34:56.123_456_789 + timestamp_ns = 1672533296_123_456_789 # ns precision + timestamp_ms, _ = divmod(timestamp_ns, 1_000_000) + fake_last_timestamp_v7 = timestamp_ms + 1 + + # counter_{hi,lo} are chosen so that "counter + 1" does not overflow + counter_hi = random.getrandbits(11) + counter_lo = random.getrandbits(29) + counter = (counter_hi << 30) | counter_lo + self.assertLess(counter + 1, 0x3ff_ffff_ffff) + + tail_bytes = os.urandom(4) + tail = int.from_bytes(tail_bytes) + + with ( + mock.patch.multiple( + self.uuid, + _last_timestamp_v7=fake_last_timestamp_v7, + _last_counter_v7=counter, + ), + mock.patch('time.time_ns', return_value=timestamp_ns), + mock.patch('os.urandom', return_value=tail_bytes) as urand + ): + u = self.uuid.uuid7() + urand.assert_called_once_with(4) + equal(u.variant, self.uuid.RFC_4122) + equal(u.version, 7) + equal(self.uuid._last_timestamp_v7, fake_last_timestamp_v7 + 1) + unix_ts_ms = (fake_last_timestamp_v7 + 1) & 0xffff_ffff_ffff + equal((u.int >> 80) & 0xffff_ffff_ffff, unix_ts_ms) + # 42-bit counter advanced by 1 + equal(self.uuid._last_counter_v7, counter + 1) + equal((u.int >> 64) & 0xfff, counter_hi) + # 42-bit counter advanced by 1 (counter_hi is untouched) + equal((u.int >> 32) & 0x3fff_ffff, counter_lo + 1) + equal(u.int & 0xffff_ffff, tail) + + def test_uuid7_overflow_counter(self): + equal = self.assertEqual + # 1 Jan 2023 12:34:56.123_456_789 + timestamp_ns = 1672533296_123_456_789 # ns precision + timestamp_ms, _ = divmod(timestamp_ns, 1_000_000) + + new_counter_hi = random.getrandbits(11) + new_counter_lo = random.getrandbits(30) + new_counter = (new_counter_hi << 30) | new_counter_lo + + tail = random.getrandbits(32) + random_bits = (new_counter << 32) | tail + random_data = random_bits.to_bytes(10) + + with ( + mock.patch.multiple( + self.uuid, + _last_timestamp_v7=timestamp_ms, + # same timestamp, but force an overflow on the counter + _last_counter_v7=0x3ff_ffff_ffff, + ), + mock.patch('time.time_ns', return_value=timestamp_ns), + mock.patch('os.urandom', return_value=random_data) as urand + ): + u = self.uuid.uuid7() + urand.assert_called_with(10) + equal(u.variant, self.uuid.RFC_4122) + equal(u.version, 7) + # timestamp advanced due to overflow + equal(self.uuid._last_timestamp_v7, timestamp_ms + 1) + unix_ts_ms = (timestamp_ms + 1) & 0xffff_ffff_ffff + equal((u.int >> 80) & 0xffff_ffff_ffff, unix_ts_ms) + # counter overflowed, so we picked a new one + equal(self.uuid._last_counter_v7, new_counter) + equal((u.int >> 64) & 0xfff, new_counter_hi) + equal((u.int >> 32) & 0x3fff_ffff, new_counter_lo) + equal(u.int & 0xffff_ffff, tail) + def test_uuid8(self): equal = self.assertEqual u = self.uuid.uuid8() diff --git a/Lib/uuid.py b/Lib/uuid.py index 36809b85cb8ceb..f36b9e1ddf3da2 100644 --- a/Lib/uuid.py +++ b/Lib/uuid.py @@ -1,8 +1,8 @@ r"""UUID objects (universally unique identifiers) according to RFC 4122/9562. This module provides immutable UUID objects (class UUID) and the functions -uuid1(), uuid3(), uuid4(), uuid5(), and uuid8() for generating version 1, 3, -4, 5, and 8 UUIDs as specified in RFC 4122/9562. +uuid1(), uuid3(), uuid4(), uuid5(), uuid7(), and uuid8() for generating +version 1, 3, 4, 5, 7, and 8 UUIDs as specified in RFC 4122/9562. If all you want is a unique ID, you should probably call uuid1() or uuid4(). Note that uuid1() may compromise privacy since it creates a UUID containing @@ -101,6 +101,7 @@ class SafeUUID: _RFC_4122_VERSION_3_FLAGS = ((3 << 76) | (0x8000 << 48)) _RFC_4122_VERSION_4_FLAGS = ((4 << 76) | (0x8000 << 48)) _RFC_4122_VERSION_5_FLAGS = ((5 << 76) | (0x8000 << 48)) +_RFC_4122_VERSION_7_FLAGS = ((7 << 76) | (0x8000 << 48)) _RFC_4122_VERSION_8_FLAGS = ((8 << 76) | (0x8000 << 48)) @@ -756,6 +757,71 @@ def uuid5(namespace, name): int_uuid_5 |= _RFC_4122_VERSION_5_FLAGS return UUID._from_int(int_uuid_5) +_last_timestamp_v7 = None +_last_counter_v7 = 0 # 42-bit counter + +def uuid7(): + """Generate a UUID from a Unix timestamp in milliseconds and random bits. + + UUIDv7 objects feature monotonicity within a millisecond. + """ + # --- 48 --- -- 4 -- --- 12 --- -- 2 -- --- 30 --- - 32 - + # unix_ts_ms | version | counter_hi | variant | counter_lo | random + # + # 'counter = counter_hi | counter_lo' is a 42-bit counter constructed + # with Method 1 of RFC 9562, §6.2, and its MSB is set to 0. + # + # 'random' is a 32-bit random value regenerated for every new UUID. + # + # If multiple UUIDs are generated within the same millisecond, the LSB + # of 'counter' is incremented by 1. When overflowing, the timestamp is + # advanced and the counter is reset to a random 42-bit integer with MSB + # set to 0. + + def get_counter_and_tail(): + rand = int.from_bytes(os.urandom(10)) + # 42-bit counter with MSB set to 0 + counter = (rand >> 32) & 0x1ff_ffff_ffff + # 32-bit random data + tail = rand & 0xffff_ffff + return counter, tail + + global _last_timestamp_v7 + global _last_counter_v7 + + import time + nanoseconds = time.time_ns() + timestamp_ms = nanoseconds // 1_000_000 + + if _last_timestamp_v7 is None or timestamp_ms > _last_timestamp_v7: + counter, tail = get_counter_and_tail() + else: + if timestamp_ms < _last_timestamp_v7: + timestamp_ms = _last_timestamp_v7 + 1 + # advance the 42-bit counter + counter = _last_counter_v7 + 1 + if counter > 0x3ff_ffff_ffff: + timestamp_ms += 1 # advance the 48-bit timestamp + counter, tail = get_counter_and_tail() + else: + tail = int.from_bytes(os.urandom(4)) + + _last_timestamp_v7 = timestamp_ms + _last_counter_v7 = counter + + unix_ts_ms = timestamp_ms & 0xffff_ffff_ffff + counter_msbs = counter >> 30 + counter_hi = counter_msbs & 0x0fff # keep 12 bits and clear variant bits + counter_lo = counter & 0x3fff_ffff # keep 30 bits and clear version bits + + int_uuid_7 = unix_ts_ms << 80 + int_uuid_7 |= counter_hi << 64 + int_uuid_7 |= counter_lo << 32 + int_uuid_7 |= tail & 0xffff_ffff + # by construction, the variant and version bits are already cleared + int_uuid_7 |= _RFC_4122_VERSION_7_FLAGS + return UUID._from_int(int_uuid_7) + def uuid8(a=None, b=None, c=None): """Generate a UUID from three custom blocks. @@ -788,6 +854,7 @@ def main(): "uuid3": uuid3, "uuid4": uuid4, "uuid5": uuid5, + "uuid7": uuid7, "uuid8": uuid8, } uuid_namespace_funcs = ("uuid3", "uuid5") diff --git a/Misc/NEWS.d/next/Library/2024-06-28-11-27-25.gh-issue-89083.DKL_Sk.rst b/Misc/NEWS.d/next/Library/2024-06-28-11-27-25.gh-issue-89083.DKL_Sk.rst new file mode 100644 index 00000000000000..f85e05622623c2 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-06-28-11-27-25.gh-issue-89083.DKL_Sk.rst @@ -0,0 +1,2 @@ +Add :func:`uuid.uuid7` for generating UUIDv7 objects as specified in +:rfc:`9562`. Patch by Bénédikt Tran.