From 4a44d1bb75bf455c531b0e1f62768463cfd01b6f Mon Sep 17 00:00:00 2001
From: saif <sirsaif99@gmail.com>
Date: Sun, 1 Dec 2024 00:36:15 +0200
Subject: [PATCH] =?UTF-8?q?=F0=9F=94=96=20Release=20version=200.1.0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .gitignore                  |  10 ++
 .pre-commit-config.yaml     |  20 +++
 README.md                   |   1 +
 pyproject.toml              |  72 +++++++++
 src/idcrockford/__init__.py | 312 ++++++++++++++++++++++++++++++++++++
 src/idcrockford/__main__.py |  86 ++++++++++
 src/idcrockford/py.typed    |   0
 uv.lock                     |   7 +
 8 files changed, 508 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 .pre-commit-config.yaml
 create mode 100644 README.md
 create mode 100644 pyproject.toml
 create mode 100644 src/idcrockford/__init__.py
 create mode 100644 src/idcrockford/__main__.py
 create mode 100644 src/idcrockford/py.typed
 create mode 100644 uv.lock

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..505a3b1
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,10 @@
+# Python-generated files
+__pycache__/
+*.py[oc]
+build/
+dist/
+wheels/
+*.egg-info
+
+# Virtual environments
+.venv
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..1b021e4
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,20 @@
+repos:
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.2.2
+    hooks:
+      # run the formatter
+      - id: ruff-format
+      # run the linter
+      - id: ruff
+        verbose: true
+        # Ignore the exit code of ruff to pass the commit, but still show the output
+        entry: bash -c 'ruff "$@" || true' --
+
+  - repo: https://github.com/pre-commit/mirrors-mypy
+    rev: v1.8.0
+    hooks:
+      - id: mypy
+        # Enable verbose output for the 'mypy' hook.
+        verbose: true
+        # Ignore the exit code of mypy to pass the commit, but still show the output
+        entry: bash -c 'mypy "$@" || true' --
\ No newline at end of file
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..936d120
--- /dev/null
+++ b/README.md
@@ -0,0 +1 @@
+# IDCrockford
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..bb717ba
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,72 @@
+[project]
+name = "idcrockford"
+dynamic = ["version"]
+description = "Crockford Base32 ID generation and validation"
+readme = "README.md"
+authors = [
+    { name = "saif", email = "sirsaif99@gmail.com" }
+]
+requires-python = ">=3.12"
+dependencies = []
+
+[project.scripts]
+idcrockford = "idcrockford.__main__:main"
+
+[tool.flit.module]
+name = "idcrockford"
+
+[build-system]
+requires = ["flit_core>=3.2,<4"]
+build-backend = "flit_core.buildapi"
+
+[tool.ruff]
+line-length = 110
+show-fixes = true
+
+[tool.ruff.lint]
+extend-select = [
+    "F",    # pyflakes
+    "E",    # pycodestyle (PEP 8) E for error
+    "W",    # pycodestyle (PEP 8) W for warning
+    "I",    # flake8-isort
+    "B",    # flake8-bugbear
+    "N",    # PEP8 Naming Conventions
+    "S",    # flake8-bandit for Security checks
+    "T20",  # flake8-print
+    "PT",   # flake8-pytest-style
+    "Q",    # flake8-quotes
+    "TID",  # flake8-tidy-imports
+    "SIM",  # flake8-simplify
+    "DTZ",  # flake8-datetimez
+    "C90",  # flake8-complexity
+]
+extend-ignore = [
+    "B012",
+    "N818",
+    "N999",
+    "TID252",
+    "W191",
+    "E111",
+    "E114",
+    "E117",
+    "Q000",
+    "Q001",
+    "Q002",
+    "Q003",
+]
+
+[tool.ruff.lint.mccabe]
+max-complexity = 15
+
+[tool.ruff.format]
+quote-style = "double"
+docstring-code-format = true
+indent-style = "tab"
+
+[tool.ruff.lint.isort]
+combine-as-imports = true
+
+[tool.ruff.lint.per-file-ignores]
+"*/tests/*" = ["S101"]
+"*/test_*" = ["S101"]
+"*/__main__.py" = ["T20"]
diff --git a/src/idcrockford/__init__.py b/src/idcrockford/__init__.py
new file mode 100644
index 0000000..cc8486d
--- /dev/null
+++ b/src/idcrockford/__init__.py
@@ -0,0 +1,312 @@
+__version__ = "0.1.0"
+
+import re
+import secrets
+from typing import Dict, Pattern
+
+# =============================================================================
+# Constants
+# =============================================================================
+
+CROCKFORD_BASE32_ALPHABET = "0123456789ABCDEFGHJKMNPQRSTVWXYZ"
+CHECK_SYMBOLS = "*~$=U"
+
+ENCODE_MAP = {i: char for i, char in enumerate(CROCKFORD_BASE32_ALPHABET)}
+DECODE_MAP = {char: i for i, char in enumerate(CROCKFORD_BASE32_ALPHABET)}
+DECODE_MAP.update({char.lower(): i for char, i in DECODE_MAP.items()})
+
+# Normalization mapping for ambiguous characters
+NORMALIZE_MAP: Dict[str, str] = {
+	"I": "1",
+	"i": "1",
+	"L": "1",
+	"l": "1",
+	"O": "0",
+	"o": "0",
+}
+
+# Regular expression for valid symbols
+VALID_SYMBOLS_PATTERN: Pattern = re.compile(
+	f"^[{CROCKFORD_BASE32_ALPHABET}]+[{re.escape(CHECK_SYMBOLS)}]?=*$"
+)
+
+# =============================================================================
+# Utils
+# =============================================================================
+
+
+def _calculate_checksum(number: int) -> str:
+	check_base = len(CROCKFORD_BASE32_ALPHABET + CHECK_SYMBOLS)
+	check_value = number % check_base
+	return (CROCKFORD_BASE32_ALPHABET + CHECK_SYMBOLS)[check_value]
+
+
+def _split_string(symbol_string: str, split: int = 4) -> str:
+	"""Split string into chunks of specified size.
+
+	Args:
+		symbol_string: String to split
+		split: Chunk size (default: 4). If 0, no splitting is performed.
+	"""
+	if not split:
+		return symbol_string
+
+	chunks = []
+	for pos in range(0, len(symbol_string), split):
+		chunks.append(symbol_string[pos : pos + split])
+	return "-".join(chunks)
+
+
+# =============================================================================
+# Exceptions
+# =============================================================================
+
+
+class Base32CrockfordError(Exception):
+	"""Base exception for Base32Crockford module."""
+
+	pass
+
+
+class EncodingError(Base32CrockfordError):
+	"""Exception raised for errors during encoding."""
+
+	pass
+
+
+class DecodingError(Base32CrockfordError):
+	"""Exception raised for errors during decoding."""
+
+	pass
+
+
+# =============================================================================
+# Encoders
+# =============================================================================
+
+
+class Base32Crockford:
+	def __init__(
+		self,
+		*,
+		checksum: bool = False,
+		split: int | bool = False,
+		padding: bool = False,
+	):
+		self.checksum = checksum
+		self.split = split
+		self.padding = padding
+
+	def encode(self, data: int | bytes) -> str:
+		# Handle different input types
+		if isinstance(data, int):
+			if data < 0:
+				raise EncodingError("Cannot encode negative integers")
+			encoded = self._encode_integer(data)
+			num = data
+		elif isinstance(data, (bytes, bytearray)):
+			encoded = self._encode_bytes(data)
+			num = int.from_bytes(data, "big")
+		else:
+			raise EncodingError(f"Unsupported data type: {type(data).__name__}")
+
+		# Apply options
+		if self.checksum:
+			encoded += _calculate_checksum(num)
+
+		if self.padding:
+			while len(encoded) % 8 != 0:
+				encoded += "="
+
+		return _split_string(encoded, self.split)
+
+	def decode(self, data: str) -> int:
+		if not isinstance(data, str):
+			raise DecodingError(f"Cannot decode data of type: {type(data).__name__}")
+
+		normalized = normalize(data)
+
+		if self.checksum:
+			if len(normalized) < 2:
+				raise ValueError("String too short to contain checksum")
+			symbol_string, check_symbol = normalized[:-1], normalized[-1]
+			number = self._decode_string(symbol_string)
+			expected_check = _calculate_checksum(number)
+			if check_symbol != expected_check:
+				raise ValueError(f"Invalid check symbol '{check_symbol}' for string '{symbol_string}'")
+			return number
+
+		return self._decode_string(normalized)
+
+	@classmethod
+	def generate(cls, size: int = 16, **kwargs) -> str:
+		encoder = cls(**kwargs)
+		return encoder.encode(secrets.token_bytes(size))
+
+	# =========================================================================
+	# Private methods
+	# =========================================================================
+
+	def _encode_integer(self, num: int) -> str:
+		"""Encode integer to base32 string."""
+		if not isinstance(num, int):
+			raise EncodingError(f"Input must be an integer, got {type(num).__name__}")
+		if num < 0:
+			raise EncodingError("Cannot encode negative integers")
+
+		if num == 0:
+			return ENCODE_MAP[0]  # 0
+
+		# Convert to base32
+		output = ""
+		while num > 0:
+			num, remainder = divmod(num, 32)
+			output = CROCKFORD_BASE32_ALPHABET[remainder] + output
+
+		return output
+
+	def _encode_bytes(self, data: bytes | bytearray) -> str:
+		"""Encode bytes to base32 string.
+
+		Each 5-bit group is encoded as a single base32 character.
+		If the last group has fewer than 5 bits, it's right-padded with zeros.
+		"""
+		output = ""
+		bits = 0
+		buffer = 0
+
+		# Process each byte
+		for byte in data:
+			# Add byte to buffer
+			buffer = (buffer << 8) | byte
+			bits += 8
+
+			# Extract 5-bit groups
+			while bits >= 5:
+				# Take top 5 bits
+				index = (buffer >> (bits - 5)) & 0x1F
+				output += CROCKFORD_BASE32_ALPHABET[index]
+				bits -= 5
+
+		# Handle remaining bits (if any)
+		if bits > 0:
+			# Right-pad with zeros to make a complete 5-bit group
+			index = (buffer << (5 - bits)) & 0x1F
+			output += CROCKFORD_BASE32_ALPHABET[index]
+
+		return output
+
+	def _decode_string(self, data: str) -> int:
+		normalized = normalize(data)
+		try:
+			num = 0
+			for char in normalized:
+				num = num * 32 + DECODE_MAP[char]
+			return num
+		except KeyError as e:
+			raise DecodingError(f"Invalid character in Base32 string: {e}") from e
+
+
+# =============================================================================
+# Fast API
+# =============================================================================
+
+
+class CFIdentifierConfig:
+	"""Configuration and methods for Crockford Base32 ID handling.
+
+	Suitable for use in FastAPI:
+		- validates IDs
+		- generates IDs
+	"""
+
+	def __init__(
+		self,
+		*,
+		checksum: bool = True,
+		size: int = 16,
+	):
+		self.checksum = checksum
+		self.size = size
+
+	def validate(self, symbol_string: str) -> str:
+		try:
+			normalized = normalize(symbol_string)
+		except ValueError as e:
+			raise ValueError(f"Invalid Crockford Base32 string: {e}") from e
+
+		if self.checksum:
+			if len(normalized) < 2:
+				raise ValueError("String too short to contain checksum")
+
+			value_part, check = normalized[:-1], normalized[-1]
+
+			try:
+				number = Base32Crockford().decode(value_part)
+				expected_check = _calculate_checksum(number)
+				if check != expected_check:
+					raise ValueError(f"Invalid checksum '{check}', expected '{expected_check}'")
+			except (ValueError, DecodingError) as e:
+				raise ValueError(f"Invalid Crockford Base32 string: {e}") from e
+
+		return normalized
+
+	def generate(self) -> str:
+		return generate(
+			size=self.size,
+			checksum=self.checksum,
+			split=False,
+			padding=False,
+		)
+
+
+# =============================================================================
+# Public API
+# =============================================================================
+
+
+def encode(
+	data: int | bytes,
+	*,
+	checksum: bool = False,
+	split: int | bool = False,
+	padding: bool = False,
+) -> str:
+	encoder = Base32Crockford(checksum=checksum, split=split, padding=padding)
+	return encoder.encode(data)
+
+
+def decode(symbol_string: str, *, checksum: bool = False) -> int:
+	decoder = Base32Crockford(checksum=checksum)
+	return decoder.decode(symbol_string)
+
+
+def normalize(symbol_string: str, strict: bool = False) -> str:
+	original = symbol_string
+	# Remove hyphens and padding
+	norm_string = symbol_string.replace("-", "").rstrip("=")
+
+	# Replace ambiguous characters and convert to uppercase
+	for char, replacement in NORMALIZE_MAP.items():
+		norm_string = norm_string.replace(char, replacement)
+	norm_string = norm_string.upper()
+
+	# Validate characters
+	if not VALID_SYMBOLS_PATTERN.match(norm_string):
+		raise ValueError(f"string '{norm_string}' contains invalid characters")
+
+	# Check if normalization was needed in strict mode
+	if strict and norm_string != original:
+		raise ValueError(f"string '{original}' requires normalization")
+
+	return norm_string
+
+
+def generate(
+	size: int = 16,
+	*,
+	checksum: bool = False,
+	split: int | bool = False,
+	padding: bool = False,
+) -> str:
+	return Base32Crockford.generate(size, checksum=checksum, split=split, padding=padding)
diff --git a/src/idcrockford/__main__.py b/src/idcrockford/__main__.py
new file mode 100644
index 0000000..08b31e1
--- /dev/null
+++ b/src/idcrockford/__main__.py
@@ -0,0 +1,86 @@
+import argparse
+import sys
+
+from . import Base32CrockfordError, decode, encode, generate, normalize
+
+
+def main():
+	parser = argparse.ArgumentParser(description="Crockford Base32 Utility")
+	parser.add_argument(
+		"command",
+		choices=["encode", "decode", "normalize", "generate"],
+		help="Operation to perform",
+	)
+	parser.add_argument(
+		"input",
+		nargs="?",
+		default=None,
+		help="Input to process. For encode: integer or string. For generate: size (optional)",
+	)
+	parser.add_argument(
+		"--checksum",
+		action="store_true",
+		help="Add/validate checksum symbol",
+	)
+	parser.add_argument(
+		"--split",
+		type=int,
+		default=0,
+		metavar="SIZE",
+		help="Split encoded string with hyphens (chunk size, default: no splitting)",
+	)
+	parser.add_argument(
+		"--padding",
+		action="store_true",
+		help="Add padding characters (=) to output",
+	)
+	parser.add_argument(
+		"--strict",
+		action="store_true",
+		help="Strict mode for normalize command - error if normalization needed",
+	)
+
+	args = parser.parse_args()
+
+	try:
+		if args.command == "generate":
+			size = int(args.input) if args.input else 16
+			result = generate(
+				size,
+				checksum=args.checksum,
+				split=args.split,
+				padding=args.padding,
+			)
+		elif args.command == "encode":
+			if args.input is None:
+				parser.error("encode command requires an input value")
+			try:
+				input_value = int(args.input)
+			except ValueError:
+				parser.error("encode command requires an integer input")
+			result = encode(
+				input_value,
+				checksum=args.checksum,
+				split=args.split,
+				padding=args.padding,
+			)
+		elif args.command == "decode":
+			if args.input is None:
+				parser.error("decode command requires an input value")
+			result = decode(args.input, checksum=args.checksum)
+		elif args.command == "normalize":
+			if args.input is None:
+				parser.error("normalize command requires an input value")
+			result = normalize(args.input, strict=args.strict)
+
+		print(result)
+	except Base32CrockfordError as e:
+		print(f"Error: {e}", file=sys.stderr)
+		sys.exit(1)
+	except ValueError as e:
+		print(f"Error: Invalid input - {e}", file=sys.stderr)
+		sys.exit(1)
+
+
+if __name__ == "__main__":
+	main()
diff --git a/src/idcrockford/py.typed b/src/idcrockford/py.typed
new file mode 100644
index 0000000..e69de29
diff --git a/uv.lock b/uv.lock
new file mode 100644
index 0000000..70cb097
--- /dev/null
+++ b/uv.lock
@@ -0,0 +1,7 @@
+version = 1
+requires-python = ">=3.12"
+
+[[package]]
+name = "idcrockford"
+version = "0.1.0"
+source = { editable = "." }