Merge branch 'dev'

rwnobrega · Jan 3, 2025 · 1b8006f · 1b8006f
2 parents 22da35d + f4423ee
commit 1b8006f
Show file tree

Hide file tree

Showing 21 changed files with 452 additions and 83 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -3,6 +3,13 @@
 > [!NOTE]
 > Changelog started with version v0.10.0.
 
+## v0.16.0 (2025-01-03)
+
+### Added
+
+- Implemented [Shannon](https://komm.dev/ref/ShannonCode) and [Fano](https://komm.dev/ref/FanoCode) codes.
+- Implemented method to compute the [Kraft parameter](https://komm.dev/ref/FixedToVariableCode#kraft_parameter) of a fixed-to-variable code.
+
 ## v0.15.1 (2024-12-31)
 
 ### Fixed

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "komm"
-version = "0.15.1"
+version = "0.16.0"
 description = "An open-source library for Python 3 providing tools for analysis and simulation of analog and digital communication systems."
 readme = "README.md"
 authors = [{ name = "Roberto W. Nobrega", email = "[email protected]" }]
@@ -42,6 +42,7 @@ lint = [
 ]
 test = [
     "pytest==8.3.3",
+    "pytest-repeat==0.9.3",
     "pytest-benchmark==5.1.0",
     "pytest-cov==6.0.0",
 ]

diff --git a/site/toc.yaml b/site/toc.yaml
@@ -73,6 +73,8 @@ Sequences:
 Source coding:
   Lossless coding:
     - FixedToVariableCode
+    - ShannonCode
+    - FanoCode
     - HuffmanCode
     - VariableToFixedCode
     - TunstallCode

diff --git a/src/komm/_error_control_decoders/WagnerDecoder.py b/src/komm/_error_control_decoders/WagnerDecoder.py
@@ -11,7 +11,7 @@
 @dataclass
 class WagnerDecoder(base.BlockDecoder[SingleParityCheckCode]):
     r"""
-    Wagner decoder for [single parity-check codes](/ref/SingleParityCheckCode). For more information, see <cite>CF07, Sec. III.C</cite>.
+    Wagner decoder for [single parity-check codes](/ref/SingleParityCheckCode). For more details, see <cite>CF07, Sec. III.C</cite>.
 
     Parameters:
         code: The single parity-check code to be used for decoding.

diff --git a/src/komm/_lossless_coding/FanoCode.py b/src/komm/_lossless_coding/FanoCode.py
@@ -0,0 +1,83 @@
+import numpy as np
+import numpy.typing as npt
+from tqdm import tqdm
+
+from .._util.information_theory import PMF
+from .FixedToVariableCode import FixedToVariableCode
+from .util import Word, empty_mapping, extended_probabilities
+
+
+def FanoCode(
+    pmf: npt.ArrayLike,
+    source_block_size: int = 1,
+) -> FixedToVariableCode:
+    r"""
+    Binary Fano code. It is a [fixed-to-variable length code](/ref/FixedToVariableCode) in which the source words are first sorted in descending order of probability and then are recursively partitioned into two groups of approximately equal total probability, assigning bit $\mathtt{0}$ to one group and bit $\mathtt{1}$ to the other, until each source word is assigned a unique codeword. For more details, see [Wikipedia: Shannon–Fano coding](https://en.wikipedia.org/wiki/Shannon%E2%80%93Fano_coding).
+
+    Notes:
+        Fano codes are always [prefix-free](/ref/FixedToVariableCode/#is_prefix_free) (hence [uniquely decodable](/ref/FixedToVariableCode/#is_uniquely_decodable)).
+
+    Parameters:
+        pmf: The probability mass function of the source.
+        source_block_size: The source block size $k$. The default value is $k = 1$.
+
+    Examples:
+        >>> pmf = [0.7, 0.15, 0.15]
+
+        >>> code = komm.FanoCode(pmf, 1)
+        >>> code.enc_mapping  # doctest: +NORMALIZE_WHITESPACE
+        {(0,): (0,),
+         (1,): (1, 0),
+         (2,): (1, 1)}
+        >>> code.rate(pmf)  # doctest: +NUMBER
+        np.float64(1.3)
+
+        >>> code = komm.FanoCode(pmf, 2)
+        >>> code.enc_mapping  # doctest: +NORMALIZE_WHITESPACE
+        {(0, 0): (0,),
+         (0, 1): (1, 0, 0),
+         (0, 2): (1, 0, 1),
+         (1, 0): (1, 1, 0),
+         (1, 1): (1, 1, 1, 1, 0, 0),
+         (1, 2): (1, 1, 1, 1, 0, 1),
+         (2, 0): (1, 1, 1, 0),
+         (2, 1): (1, 1, 1, 1, 1, 0),
+         (2, 2): (1, 1, 1, 1, 1, 1)}
+        >>> code.rate(pmf)  # doctest: +NUMBER
+        np.float64(1.1975)
+    """
+    pmf = PMF(pmf)
+    return FixedToVariableCode(
+        source_cardinality=pmf.size,
+        target_cardinality=2,
+        source_block_size=source_block_size,
+        enc_mapping=fano_algorithm(pmf, source_block_size),
+    )
+
+
+def fano_algorithm(pmf: PMF, source_block_size: int) -> dict[Word, Word]:
+    pbar = tqdm(
+        desc="Generating Fano code",
+        total=2 * pmf.size**source_block_size,
+        delay=2.5,
+    )
+
+    enc_mapping = empty_mapping(pmf.size, source_block_size)
+    xpmf = extended_probabilities(pmf, source_block_size, pbar)
+    stack: list[tuple[list[tuple[Word, float]], Word]] = [(xpmf, ())]
+    while stack:
+        current_pmf, prefix = stack.pop()
+        if len(current_pmf) == 1:
+            u, _ = current_pmf[0]
+            enc_mapping[u] = prefix
+            pbar.update()
+            continue
+        probs = [p for _, p in current_pmf]
+        total = np.sum(probs)
+        index = np.argmin(np.abs(np.cumsum(probs) - total / 2))
+        stack.append((current_pmf[index + 1 :], prefix + (1,)))
+        stack.append((current_pmf[: index + 1], prefix + (0,)))
+
+    pbar.close()
+
+    return enc_mapping
diff --git a/src/komm/_lossless_coding/FixedToVariableCode.py b/src/komm/_lossless_coding/FixedToVariableCode.py
@@ -8,7 +8,7 @@
 from .util import (
     Word,
     is_prefix_free,
-    is_uniquely_decipherable,
+    is_uniquely_parsable,
     parse_fixed_length,
     parse_prefix_free,
 )
@@ -236,7 +236,7 @@ def is_uniquely_decodable(self) -> bool:
             >>> code.is_uniquely_decodable()  # 010 can be parsed as 0|10 or 01|0
             False
         """
-        return is_uniquely_decipherable(self.codewords)
+        return is_uniquely_parsable(self.codewords)
 
     def is_prefix_free(self) -> bool:
         r"""
@@ -257,6 +257,34 @@ def is_prefix_free(self) -> bool:
         """
         return is_prefix_free(self.codewords)
 
+    def kraft_parameter(self) -> float:
+        r"""
+        Computes the Kraft parameter $K$ of the code. This quantity is given by
+        $$
+            K = \sum_{u \in \mathcal{S}^k} T^{-{\ell_u}},
+        $$
+        where $\ell_u$ is the length of the codeword $\Enc(u)$, $T$ is the target cardinality, and $k$ is the source block size.
+
+        Returns:
+            kraft_parameter: The Kraft parameter $K$ of the code.
+
+        Examples:
+            >>> code = komm.FixedToVariableCode.from_codewords(5, [(0,0,0), (0,0,1), (0,1,0), (1,0,1), (1,1)])
+            >>> code.kraft_parameter()
+            np.float64(0.75)
+
+            >>> code = komm.FixedToVariableCode.from_codewords(4, [(0,), (1,0), (1,1,0), (1,1,1)])
+            >>> code.kraft_parameter()
+            np.float64(1.0)
+
+            >>> code = komm.FixedToVariableCode.from_codewords(4, [(0,0), (1,1), (0,), (1,)])
+            >>> code.kraft_parameter()
+            np.float64(1.5)
+        """
+        T = self.target_cardinality
+        lengths = np.array([len(word) for word in self.codewords])
+        return np.sum(np.float_power(T, -lengths))
+
     def rate(self, pmf: npt.ArrayLike) -> float:
         r"""
         Computes the expected rate $R$ of the code, considering a given pmf. This quantity is given by

diff --git a/src/komm/_lossless_coding/HuffmanCode.py b/src/komm/_lossless_coding/HuffmanCode.py
@@ -1,9 +1,11 @@
-import heapq
+from dataclasses import dataclass
+from heapq import heapify, heappop, heappush
 from itertools import product
 from math import prod
 from typing import Literal
 
 import numpy.typing as npt
+from tqdm import tqdm
 from typing_extensions import Self
 
 from .._util.information_theory import PMF
@@ -21,6 +23,7 @@ def HuffmanCode(
 
     Notes:
         Huffman codes are always [prefix-free](/ref/FixedToVariableCode/#is_prefix_free) (hence [uniquely decodable](/ref/FixedToVariableCode/#is_uniquely_decodable)).
+
     Parameters:
         pmf: The probability mass function of the source.
         source_block_size: The source block size $k$. The default value is $k = 1$.
@@ -63,12 +66,12 @@ def HuffmanCode(
 def huffman_algorithm(
     pmf: PMF, source_block_size: int, policy: Literal["high", "low"]
 ) -> list[Word]:
+    @dataclass
     class Node:
-        def __init__(self, index: int, probability: float):
-            self.index: int = index
-            self.probability: float = probability
-            self.parent: int | None = None
-            self.bit: int = -1
+        index: int
+        probability: float
+        parent: int | None = None
+        bit: int = -1
 
         def __lt__(self, other: Self) -> bool:
             i0, p0 = self.index, self.probability
@@ -78,29 +81,40 @@ def __lt__(self, other: Self) -> bool:
             elif policy == "low":
                 return (p0, -i0) < (p1, -i1)
 
-    tree = [
-        Node(i, prod(probs))
-        for (i, probs) in enumerate(product(pmf, repeat=source_block_size))
-    ]
-    queue = [node for node in tree]
-    heapq.heapify(queue)
-    while len(queue) > 1:
-        node1 = heapq.heappop(queue)
-        node0 = heapq.heappop(queue)
+    pbar = tqdm(
+        desc="Generating Huffman code",
+        total=3 * pmf.size**source_block_size - 1,
+        delay=2.5,
+    )
+
+    tree: list[Node] = []
+    for index, probs in enumerate(product(pmf, repeat=source_block_size)):
+        tree.append(Node(index, prod(probs)))
+        pbar.update()
+
+    heap = tree.copy()
+    heapify(heap)
+    while len(heap) > 1:
+        node1 = heappop(heap)
+        node0 = heappop(heap)
         node1.bit = 1
         node0.bit = 0
         node = Node(index=len(tree), probability=node0.probability + node1.probability)
         node0.parent = node1.parent = node.index
-        heapq.heappush(queue, node)
+        heappush(heap, node)
         tree.append(node)
+        pbar.update()
 
     codewords: list[Word] = []
-    for symbol in range(pmf.size**source_block_size):
-        node = tree[symbol]
+    for index in range(pmf.size**source_block_size):
+        node = tree[index]
         bits: list[int] = []
         while node.parent is not None:
-            bits.insert(0, node.bit)
+            bits.append(node.bit)
             node = tree[node.parent]
-        codewords.append(tuple(bits))
+        codewords.append(tuple(reversed(bits)))
+        pbar.update()
+
+    pbar.close()
 
     return codewords
diff --git a/src/komm/_lossless_coding/ShannonCode.py b/src/komm/_lossless_coding/ShannonCode.py
@@ -0,0 +1,90 @@
+from math import ceil, log2
+
+import numpy.typing as npt
+from tqdm import tqdm
+
+from .._util.information_theory import PMF
+from .FixedToVariableCode import FixedToVariableCode
+from .util import Word, empty_mapping, extended_probabilities
+
+
+def ShannonCode(
+    pmf: npt.ArrayLike,
+    source_block_size: int = 1,
+) -> FixedToVariableCode:
+    r"""
+    Binary Shannon code. It is a [fixed-to-variable length code](/ref/FixedToVariableCode) in which the length of the codeword $\Enc(u)$ for a source symbol $u \in \mathcal{S}^k$ is given by
+    $$
+        \ell_u = \left\lceil \log_2 \frac{1}{p_u} \right\rceil,
+    $$
+    where $p_u$ is the probability of the source symbol $u$. This function implements the lexicographic order assignment as described in [Wikipedia: Shannon–Fano coding](https://en.wikipedia.org/wiki/Shannon%E2%80%93Fano_coding).
+
+    Notes:
+        Shannon codes are always [prefix-free](/ref/FixedToVariableCode/#is_prefix_free) (hence [uniquely decodable](/ref/FixedToVariableCode/#is_uniquely_decodable)).
+
+    Parameters:
+        pmf: The probability mass function of the source.
+        source_block_size: The source block size $k$. The default value is $k = 1$.
+
+    Examples:
+        >>> pmf = [0.7, 0.15, 0.15]
+
+        >>> code = komm.ShannonCode(pmf, 1)
+        >>> code.enc_mapping  # doctest: +NORMALIZE_WHITESPACE
+        {(0,): (0,),
+         (1,): (1, 0, 0),
+         (2,): (1, 0, 1)}
+        >>> code.rate(pmf)  # doctest: +NUMBER
+        np.float64(1.6)
+
+        >>> code = komm.ShannonCode(pmf, 2)
+        >>> code.enc_mapping  # doctest: +NORMALIZE_WHITESPACE
+        {(0, 0): (0, 0),
+         (0, 1): (0, 1, 0, 0),
+         (0, 2): (0, 1, 0, 1),
+         (1, 0): (0, 1, 1, 0),
+         (1, 1): (1, 0, 0, 0, 0, 0),
+         (1, 2): (1, 0, 0, 0, 0, 1),
+         (2, 0): (0, 1, 1, 1),
+         (2, 1): (1, 0, 0, 0, 1, 0),
+         (2, 2): (1, 0, 0, 0, 1, 1)}
+        >>> code.rate(pmf)  # doctest: +NUMBER
+        np.float64(1.6)
+    """
+    pmf = PMF(pmf)
+    return FixedToVariableCode(
+        source_cardinality=pmf.size,
+        target_cardinality=2,
+        source_block_size=source_block_size,
+        enc_mapping=shannon_code(pmf, source_block_size),
+    )
+
+
+def next_in_lexicographic_order(word: Word) -> Word:
+    word_list = list(word)
+    for i in range(len(word_list) - 1, -1, -1):
+        if word_list[i] == 0:
+            word_list[i] = 1
+            break
+        word_list[i] = 0
+    return tuple(word_list)
+
+
+def shannon_code(pmf: PMF, source_block_size: int) -> dict[Word, Word]:
+    pbar = tqdm(
+        desc="Generating Shannon code",
+        total=2 * pmf.size**source_block_size,
+        delay=2.5,
+    )
+
+    enc_mapping = empty_mapping(pmf.size, source_block_size)
+    v = ()
+    for u, pu in extended_probabilities(pmf, source_block_size, pbar):
+        length = ceil(log2(1 / pu))
+        v = next_in_lexicographic_order(v) + (0,) * (length - len(v))
+        enc_mapping[u] = v
+        pbar.update()
+
+    pbar.close()
+
+    return enc_mapping