From 4a44d1bb75bf455c531b0e1f62768463cfd01b6f Mon Sep 17 00:00:00 2001 From: saif Date: Sun, 1 Dec 2024 00:36:15 +0200 Subject: [PATCH] =?UTF-8?q?=F0=9F=94=96=20Release=20version=200.1.0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 10 ++ .pre-commit-config.yaml | 20 +++ README.md | 1 + pyproject.toml | 72 +++++++++ src/idcrockford/__init__.py | 312 ++++++++++++++++++++++++++++++++++++ src/idcrockford/__main__.py | 86 ++++++++++ src/idcrockford/py.typed | 0 uv.lock | 7 + 8 files changed, 508 insertions(+) create mode 100644 .gitignore create mode 100644 .pre-commit-config.yaml create mode 100644 README.md create mode 100644 pyproject.toml create mode 100644 src/idcrockford/__init__.py create mode 100644 src/idcrockford/__main__.py create mode 100644 src/idcrockford/py.typed create mode 100644 uv.lock diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..505a3b1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,10 @@ +# Python-generated files +__pycache__/ +*.py[oc] +build/ +dist/ +wheels/ +*.egg-info + +# Virtual environments +.venv diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..1b021e4 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,20 @@ +repos: + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.2.2 + hooks: + # run the formatter + - id: ruff-format + # run the linter + - id: ruff + verbose: true + # Ignore the exit code of ruff to pass the commit, but still show the output + entry: bash -c 'ruff "$@" || true' -- + + - repo: https://github.com/pre-commit/mirrors-mypy + rev: v1.8.0 + hooks: + - id: mypy + # Enable verbose output for the 'mypy' hook. + verbose: true + # Ignore the exit code of mypy to pass the commit, but still show the output + entry: bash -c 'mypy "$@" || true' -- \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..936d120 --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +# IDCrockford \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..bb717ba --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,72 @@ +[project] +name = "idcrockford" +dynamic = ["version"] +description = "Crockford Base32 ID generation and validation" +readme = "README.md" +authors = [ + { name = "saif", email = "sirsaif99@gmail.com" } +] +requires-python = ">=3.12" +dependencies = [] + +[project.scripts] +idcrockford = "idcrockford.__main__:main" + +[tool.flit.module] +name = "idcrockford" + +[build-system] +requires = ["flit_core>=3.2,<4"] +build-backend = "flit_core.buildapi" + +[tool.ruff] +line-length = 110 +show-fixes = true + +[tool.ruff.lint] +extend-select = [ + "F", # pyflakes + "E", # pycodestyle (PEP 8) E for error + "W", # pycodestyle (PEP 8) W for warning + "I", # flake8-isort + "B", # flake8-bugbear + "N", # PEP8 Naming Conventions + "S", # flake8-bandit for Security checks + "T20", # flake8-print + "PT", # flake8-pytest-style + "Q", # flake8-quotes + "TID", # flake8-tidy-imports + "SIM", # flake8-simplify + "DTZ", # flake8-datetimez + "C90", # flake8-complexity +] +extend-ignore = [ + "B012", + "N818", + "N999", + "TID252", + "W191", + "E111", + "E114", + "E117", + "Q000", + "Q001", + "Q002", + "Q003", +] + +[tool.ruff.lint.mccabe] +max-complexity = 15 + +[tool.ruff.format] +quote-style = "double" +docstring-code-format = true +indent-style = "tab" + +[tool.ruff.lint.isort] +combine-as-imports = true + +[tool.ruff.lint.per-file-ignores] +"*/tests/*" = ["S101"] +"*/test_*" = ["S101"] +"*/__main__.py" = ["T20"] diff --git a/src/idcrockford/__init__.py b/src/idcrockford/__init__.py new file mode 100644 index 0000000..cc8486d --- /dev/null +++ b/src/idcrockford/__init__.py @@ -0,0 +1,312 @@ +__version__ = "0.1.0" + +import re +import secrets +from typing import Dict, Pattern + +# ============================================================================= +# Constants +# ============================================================================= + +CROCKFORD_BASE32_ALPHABET = "0123456789ABCDEFGHJKMNPQRSTVWXYZ" +CHECK_SYMBOLS = "*~$=U" + +ENCODE_MAP = {i: char for i, char in enumerate(CROCKFORD_BASE32_ALPHABET)} +DECODE_MAP = {char: i for i, char in enumerate(CROCKFORD_BASE32_ALPHABET)} +DECODE_MAP.update({char.lower(): i for char, i in DECODE_MAP.items()}) + +# Normalization mapping for ambiguous characters +NORMALIZE_MAP: Dict[str, str] = { + "I": "1", + "i": "1", + "L": "1", + "l": "1", + "O": "0", + "o": "0", +} + +# Regular expression for valid symbols +VALID_SYMBOLS_PATTERN: Pattern = re.compile( + f"^[{CROCKFORD_BASE32_ALPHABET}]+[{re.escape(CHECK_SYMBOLS)}]?=*$" +) + +# ============================================================================= +# Utils +# ============================================================================= + + +def _calculate_checksum(number: int) -> str: + check_base = len(CROCKFORD_BASE32_ALPHABET + CHECK_SYMBOLS) + check_value = number % check_base + return (CROCKFORD_BASE32_ALPHABET + CHECK_SYMBOLS)[check_value] + + +def _split_string(symbol_string: str, split: int = 4) -> str: + """Split string into chunks of specified size. + + Args: + symbol_string: String to split + split: Chunk size (default: 4). If 0, no splitting is performed. + """ + if not split: + return symbol_string + + chunks = [] + for pos in range(0, len(symbol_string), split): + chunks.append(symbol_string[pos : pos + split]) + return "-".join(chunks) + + +# ============================================================================= +# Exceptions +# ============================================================================= + + +class Base32CrockfordError(Exception): + """Base exception for Base32Crockford module.""" + + pass + + +class EncodingError(Base32CrockfordError): + """Exception raised for errors during encoding.""" + + pass + + +class DecodingError(Base32CrockfordError): + """Exception raised for errors during decoding.""" + + pass + + +# ============================================================================= +# Encoders +# ============================================================================= + + +class Base32Crockford: + def __init__( + self, + *, + checksum: bool = False, + split: int | bool = False, + padding: bool = False, + ): + self.checksum = checksum + self.split = split + self.padding = padding + + def encode(self, data: int | bytes) -> str: + # Handle different input types + if isinstance(data, int): + if data < 0: + raise EncodingError("Cannot encode negative integers") + encoded = self._encode_integer(data) + num = data + elif isinstance(data, (bytes, bytearray)): + encoded = self._encode_bytes(data) + num = int.from_bytes(data, "big") + else: + raise EncodingError(f"Unsupported data type: {type(data).__name__}") + + # Apply options + if self.checksum: + encoded += _calculate_checksum(num) + + if self.padding: + while len(encoded) % 8 != 0: + encoded += "=" + + return _split_string(encoded, self.split) + + def decode(self, data: str) -> int: + if not isinstance(data, str): + raise DecodingError(f"Cannot decode data of type: {type(data).__name__}") + + normalized = normalize(data) + + if self.checksum: + if len(normalized) < 2: + raise ValueError("String too short to contain checksum") + symbol_string, check_symbol = normalized[:-1], normalized[-1] + number = self._decode_string(symbol_string) + expected_check = _calculate_checksum(number) + if check_symbol != expected_check: + raise ValueError(f"Invalid check symbol '{check_symbol}' for string '{symbol_string}'") + return number + + return self._decode_string(normalized) + + @classmethod + def generate(cls, size: int = 16, **kwargs) -> str: + encoder = cls(**kwargs) + return encoder.encode(secrets.token_bytes(size)) + + # ========================================================================= + # Private methods + # ========================================================================= + + def _encode_integer(self, num: int) -> str: + """Encode integer to base32 string.""" + if not isinstance(num, int): + raise EncodingError(f"Input must be an integer, got {type(num).__name__}") + if num < 0: + raise EncodingError("Cannot encode negative integers") + + if num == 0: + return ENCODE_MAP[0] # 0 + + # Convert to base32 + output = "" + while num > 0: + num, remainder = divmod(num, 32) + output = CROCKFORD_BASE32_ALPHABET[remainder] + output + + return output + + def _encode_bytes(self, data: bytes | bytearray) -> str: + """Encode bytes to base32 string. + + Each 5-bit group is encoded as a single base32 character. + If the last group has fewer than 5 bits, it's right-padded with zeros. + """ + output = "" + bits = 0 + buffer = 0 + + # Process each byte + for byte in data: + # Add byte to buffer + buffer = (buffer << 8) | byte + bits += 8 + + # Extract 5-bit groups + while bits >= 5: + # Take top 5 bits + index = (buffer >> (bits - 5)) & 0x1F + output += CROCKFORD_BASE32_ALPHABET[index] + bits -= 5 + + # Handle remaining bits (if any) + if bits > 0: + # Right-pad with zeros to make a complete 5-bit group + index = (buffer << (5 - bits)) & 0x1F + output += CROCKFORD_BASE32_ALPHABET[index] + + return output + + def _decode_string(self, data: str) -> int: + normalized = normalize(data) + try: + num = 0 + for char in normalized: + num = num * 32 + DECODE_MAP[char] + return num + except KeyError as e: + raise DecodingError(f"Invalid character in Base32 string: {e}") from e + + +# ============================================================================= +# Fast API +# ============================================================================= + + +class CFIdentifierConfig: + """Configuration and methods for Crockford Base32 ID handling. + + Suitable for use in FastAPI: + - validates IDs + - generates IDs + """ + + def __init__( + self, + *, + checksum: bool = True, + size: int = 16, + ): + self.checksum = checksum + self.size = size + + def validate(self, symbol_string: str) -> str: + try: + normalized = normalize(symbol_string) + except ValueError as e: + raise ValueError(f"Invalid Crockford Base32 string: {e}") from e + + if self.checksum: + if len(normalized) < 2: + raise ValueError("String too short to contain checksum") + + value_part, check = normalized[:-1], normalized[-1] + + try: + number = Base32Crockford().decode(value_part) + expected_check = _calculate_checksum(number) + if check != expected_check: + raise ValueError(f"Invalid checksum '{check}', expected '{expected_check}'") + except (ValueError, DecodingError) as e: + raise ValueError(f"Invalid Crockford Base32 string: {e}") from e + + return normalized + + def generate(self) -> str: + return generate( + size=self.size, + checksum=self.checksum, + split=False, + padding=False, + ) + + +# ============================================================================= +# Public API +# ============================================================================= + + +def encode( + data: int | bytes, + *, + checksum: bool = False, + split: int | bool = False, + padding: bool = False, +) -> str: + encoder = Base32Crockford(checksum=checksum, split=split, padding=padding) + return encoder.encode(data) + + +def decode(symbol_string: str, *, checksum: bool = False) -> int: + decoder = Base32Crockford(checksum=checksum) + return decoder.decode(symbol_string) + + +def normalize(symbol_string: str, strict: bool = False) -> str: + original = symbol_string + # Remove hyphens and padding + norm_string = symbol_string.replace("-", "").rstrip("=") + + # Replace ambiguous characters and convert to uppercase + for char, replacement in NORMALIZE_MAP.items(): + norm_string = norm_string.replace(char, replacement) + norm_string = norm_string.upper() + + # Validate characters + if not VALID_SYMBOLS_PATTERN.match(norm_string): + raise ValueError(f"string '{norm_string}' contains invalid characters") + + # Check if normalization was needed in strict mode + if strict and norm_string != original: + raise ValueError(f"string '{original}' requires normalization") + + return norm_string + + +def generate( + size: int = 16, + *, + checksum: bool = False, + split: int | bool = False, + padding: bool = False, +) -> str: + return Base32Crockford.generate(size, checksum=checksum, split=split, padding=padding) diff --git a/src/idcrockford/__main__.py b/src/idcrockford/__main__.py new file mode 100644 index 0000000..08b31e1 --- /dev/null +++ b/src/idcrockford/__main__.py @@ -0,0 +1,86 @@ +import argparse +import sys + +from . import Base32CrockfordError, decode, encode, generate, normalize + + +def main(): + parser = argparse.ArgumentParser(description="Crockford Base32 Utility") + parser.add_argument( + "command", + choices=["encode", "decode", "normalize", "generate"], + help="Operation to perform", + ) + parser.add_argument( + "input", + nargs="?", + default=None, + help="Input to process. For encode: integer or string. For generate: size (optional)", + ) + parser.add_argument( + "--checksum", + action="store_true", + help="Add/validate checksum symbol", + ) + parser.add_argument( + "--split", + type=int, + default=0, + metavar="SIZE", + help="Split encoded string with hyphens (chunk size, default: no splitting)", + ) + parser.add_argument( + "--padding", + action="store_true", + help="Add padding characters (=) to output", + ) + parser.add_argument( + "--strict", + action="store_true", + help="Strict mode for normalize command - error if normalization needed", + ) + + args = parser.parse_args() + + try: + if args.command == "generate": + size = int(args.input) if args.input else 16 + result = generate( + size, + checksum=args.checksum, + split=args.split, + padding=args.padding, + ) + elif args.command == "encode": + if args.input is None: + parser.error("encode command requires an input value") + try: + input_value = int(args.input) + except ValueError: + parser.error("encode command requires an integer input") + result = encode( + input_value, + checksum=args.checksum, + split=args.split, + padding=args.padding, + ) + elif args.command == "decode": + if args.input is None: + parser.error("decode command requires an input value") + result = decode(args.input, checksum=args.checksum) + elif args.command == "normalize": + if args.input is None: + parser.error("normalize command requires an input value") + result = normalize(args.input, strict=args.strict) + + print(result) + except Base32CrockfordError as e: + print(f"Error: {e}", file=sys.stderr) + sys.exit(1) + except ValueError as e: + print(f"Error: Invalid input - {e}", file=sys.stderr) + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/src/idcrockford/py.typed b/src/idcrockford/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/uv.lock b/uv.lock new file mode 100644 index 0000000..70cb097 --- /dev/null +++ b/uv.lock @@ -0,0 +1,7 @@ +version = 1 +requires-python = ">=3.12" + +[[package]] +name = "idcrockford" +version = "0.1.0" +source = { editable = "." }