-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Prototype SQIDs in line with ADR 0018
- Loading branch information
1 parent
5b4272d
commit e8a5b94
Showing
3 changed files
with
68 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
from sqids import Sqids | ||
|
||
# HASH_SUBSTRING_LENGTH must be strictly less than 16; | ||
# for 16, hashes starting with 8 have a number too large to be turned into sqid. | ||
# (under the hood, sqids are numbers less than the hex value | ||
# 8000 0000 0000 0000 and do not exist for numbers higher than that) | ||
HASH_SUBSTRING_LENGTH = 12 | ||
|
||
# SQID_ALPHABET contains no vowels, including y | ||
SQID_ALPHABET = "bcdfghjklmnpqrstvwxz" | ||
SQID_MIN_LENGTH = 8 | ||
|
||
sqids = Sqids(alphabet=SQID_ALPHABET, min_length=SQID_MIN_LENGTH) | ||
|
||
|
||
def _hex_digest_to_int(digest_string: str) -> int: | ||
return int(digest_string.encode("utf-8")[:HASH_SUBSTRING_LENGTH], 16) | ||
|
||
|
||
def hex_digest_to_sqid(digest_string: str) -> str: | ||
num = _hex_digest_to_int(digest_string) | ||
return sqids.encode([num]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
import content_sqid | ||
import pytest | ||
from content_sqid import _hex_digest_to_int, hex_digest_to_sqid | ||
|
||
|
||
@pytest.fixture() | ||
def no_hash_limit(): | ||
"""Remove the limitation on the length of the contenthash that is consumed temporarily""" | ||
old = content_sqid.HASH_SUBSTRING_LENGTH | ||
content_sqid.HASH_SUBSTRING_LENGTH = 999 | ||
yield None | ||
content_sqid.HASH_SUBSTRING_LENGTH = old | ||
|
||
|
||
def test_hex_to_int(): | ||
""" | ||
These values shouldn't change -- if they do, it means our hashes aren't stable. | ||
Changing the alphabet will change them. | ||
""" | ||
assert _hex_digest_to_int("deadbeef") == 3735928559 | ||
assert hex_digest_to_sqid("deadbeef") == "hdgcqtcnm" | ||
|
||
|
||
def test_min_length(): | ||
"""Low-value hashes are an acceptable length""" | ||
assert hex_digest_to_sqid("0") == "xcsrdnmp" | ||
|
||
|
||
def test_max_value(): | ||
"""This should be the largest value we can ever get""" | ||
assert hex_digest_to_sqid("ffffffffffffffffffffffffffffff") == "tspwbpshvpklr" | ||
|
||
|
||
def test_hex_truncation(): | ||
"""A large hex value works and is the same value as the truncated version""" | ||
assert _hex_digest_to_int( | ||
"2597c39e63c20d69dc0cb189a88a8ab127c335cdcbf1d9ee43de3f711002de52" | ||
) == _hex_digest_to_int("2597c39e63c2") | ||
|
||
|
||
def test_demo_limit_of_truncation(no_hash_limit): | ||
"""Demonstrate that without a limit to the length of a hash, a 16-character hash can fail""" | ||
assert hex_digest_to_sqid("7fffffffffffffff") | ||
with pytest.raises(ValueError): | ||
assert hex_digest_to_sqid("8000000000000000") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,3 +9,4 @@ notifications-python-client~=9.0 | |
mypy-boto3-s3 | ||
mypy-boto3-sns | ||
python-dotenv | ||
sqids |