Skip to content

Commit

Permalink
Merge branch 'dev'
Browse files Browse the repository at this point in the history
  • Loading branch information
rwnobrega committed Jan 3, 2025
2 parents 22da35d + f4423ee commit 1b8006f
Show file tree
Hide file tree
Showing 21 changed files with 452 additions and 83 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,13 @@
> [!NOTE]
> Changelog started with version v0.10.0.

## v0.16.0 (2025-01-03)

### Added

- Implemented [Shannon](https://komm.dev/ref/ShannonCode) and [Fano](https://komm.dev/ref/FanoCode) codes.
- Implemented method to compute the [Kraft parameter](https://komm.dev/ref/FixedToVariableCode#kraft_parameter) of a fixed-to-variable code.

## v0.15.1 (2024-12-31)

### Fixed
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "komm"
version = "0.15.1"
version = "0.16.0"
description = "An open-source library for Python 3 providing tools for analysis and simulation of analog and digital communication systems."
readme = "README.md"
authors = [{ name = "Roberto W. Nobrega", email = "[email protected]" }]
Expand Down Expand Up @@ -42,6 +42,7 @@ lint = [
]
test = [
"pytest==8.3.3",
"pytest-repeat==0.9.3",
"pytest-benchmark==5.1.0",
"pytest-cov==6.0.0",
]
Expand Down
2 changes: 2 additions & 0 deletions site/toc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ Sequences:
Source coding:
Lossless coding:
- FixedToVariableCode
- ShannonCode
- FanoCode
- HuffmanCode
- VariableToFixedCode
- TunstallCode
Expand Down
2 changes: 1 addition & 1 deletion src/komm/_error_control_decoders/WagnerDecoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
@dataclass
class WagnerDecoder(base.BlockDecoder[SingleParityCheckCode]):
r"""
Wagner decoder for [single parity-check codes](/ref/SingleParityCheckCode). For more information, see <cite>CF07, Sec. III.C</cite>.
Wagner decoder for [single parity-check codes](/ref/SingleParityCheckCode). For more details, see <cite>CF07, Sec. III.C</cite>.

Parameters:
code: The single parity-check code to be used for decoding.
Expand Down
83 changes: 83 additions & 0 deletions src/komm/_lossless_coding/FanoCode.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
import numpy as np
import numpy.typing as npt
from tqdm import tqdm

from .._util.information_theory import PMF
from .FixedToVariableCode import FixedToVariableCode
from .util import Word, empty_mapping, extended_probabilities


def FanoCode(
pmf: npt.ArrayLike,
source_block_size: int = 1,
) -> FixedToVariableCode:
r"""
Binary Fano code. It is a [fixed-to-variable length code](/ref/FixedToVariableCode) in which the source words are first sorted in descending order of probability and then are recursively partitioned into two groups of approximately equal total probability, assigning bit $\mathtt{0}$ to one group and bit $\mathtt{1}$ to the other, until each source word is assigned a unique codeword. For more details, see [Wikipedia: Shannon–Fano coding](https://en.wikipedia.org/wiki/Shannon%E2%80%93Fano_coding).

Notes:
Fano codes are always [prefix-free](/ref/FixedToVariableCode/#is_prefix_free) (hence [uniquely decodable](/ref/FixedToVariableCode/#is_uniquely_decodable)).

Parameters:
pmf: The probability mass function of the source.
source_block_size: The source block size $k$. The default value is $k = 1$.

Examples:
>>> pmf = [0.7, 0.15, 0.15]

>>> code = komm.FanoCode(pmf, 1)
>>> code.enc_mapping # doctest: +NORMALIZE_WHITESPACE
{(0,): (0,),
(1,): (1, 0),
(2,): (1, 1)}
>>> code.rate(pmf) # doctest: +NUMBER
np.float64(1.3)

>>> code = komm.FanoCode(pmf, 2)
>>> code.enc_mapping # doctest: +NORMALIZE_WHITESPACE
{(0, 0): (0,),
(0, 1): (1, 0, 0),
(0, 2): (1, 0, 1),
(1, 0): (1, 1, 0),
(1, 1): (1, 1, 1, 1, 0, 0),
(1, 2): (1, 1, 1, 1, 0, 1),
(2, 0): (1, 1, 1, 0),
(2, 1): (1, 1, 1, 1, 1, 0),
(2, 2): (1, 1, 1, 1, 1, 1)}
>>> code.rate(pmf) # doctest: +NUMBER
np.float64(1.1975)
"""
pmf = PMF(pmf)
return FixedToVariableCode(
source_cardinality=pmf.size,
target_cardinality=2,
source_block_size=source_block_size,
enc_mapping=fano_algorithm(pmf, source_block_size),
)


def fano_algorithm(pmf: PMF, source_block_size: int) -> dict[Word, Word]:
pbar = tqdm(
desc="Generating Fano code",
total=2 * pmf.size**source_block_size,
delay=2.5,
)

enc_mapping = empty_mapping(pmf.size, source_block_size)
xpmf = extended_probabilities(pmf, source_block_size, pbar)
stack: list[tuple[list[tuple[Word, float]], Word]] = [(xpmf, ())]
while stack:
current_pmf, prefix = stack.pop()
if len(current_pmf) == 1:
u, _ = current_pmf[0]
enc_mapping[u] = prefix
pbar.update()
continue
probs = [p for _, p in current_pmf]
total = np.sum(probs)
index = np.argmin(np.abs(np.cumsum(probs) - total / 2))
stack.append((current_pmf[index + 1 :], prefix + (1,)))
stack.append((current_pmf[: index + 1], prefix + (0,)))

pbar.close()

return enc_mapping
32 changes: 30 additions & 2 deletions src/komm/_lossless_coding/FixedToVariableCode.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from .util import (
Word,
is_prefix_free,
is_uniquely_decipherable,
is_uniquely_parsable,
parse_fixed_length,
parse_prefix_free,
)
Expand Down Expand Up @@ -236,7 +236,7 @@ def is_uniquely_decodable(self) -> bool:
>>> code.is_uniquely_decodable() # 010 can be parsed as 0|10 or 01|0
False
"""
return is_uniquely_decipherable(self.codewords)
return is_uniquely_parsable(self.codewords)

def is_prefix_free(self) -> bool:
r"""
Expand All @@ -257,6 +257,34 @@ def is_prefix_free(self) -> bool:
"""
return is_prefix_free(self.codewords)

def kraft_parameter(self) -> float:
r"""
Computes the Kraft parameter $K$ of the code. This quantity is given by
$$
K = \sum_{u \in \mathcal{S}^k} T^{-{\ell_u}},
$$
where $\ell_u$ is the length of the codeword $\Enc(u)$, $T$ is the target cardinality, and $k$ is the source block size.

Returns:
kraft_parameter: The Kraft parameter $K$ of the code.

Examples:
>>> code = komm.FixedToVariableCode.from_codewords(5, [(0,0,0), (0,0,1), (0,1,0), (1,0,1), (1,1)])
>>> code.kraft_parameter()
np.float64(0.75)

>>> code = komm.FixedToVariableCode.from_codewords(4, [(0,), (1,0), (1,1,0), (1,1,1)])
>>> code.kraft_parameter()
np.float64(1.0)

>>> code = komm.FixedToVariableCode.from_codewords(4, [(0,0), (1,1), (0,), (1,)])
>>> code.kraft_parameter()
np.float64(1.5)
"""
T = self.target_cardinality
lengths = np.array([len(word) for word in self.codewords])
return np.sum(np.float_power(T, -lengths))

def rate(self, pmf: npt.ArrayLike) -> float:
r"""
Computes the expected rate $R$ of the code, considering a given pmf. This quantity is given by
Expand Down
54 changes: 34 additions & 20 deletions src/komm/_lossless_coding/HuffmanCode.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import heapq
from dataclasses import dataclass
from heapq import heapify, heappop, heappush
from itertools import product
from math import prod
from typing import Literal

import numpy.typing as npt
from tqdm import tqdm
from typing_extensions import Self

from .._util.information_theory import PMF
Expand All @@ -21,6 +23,7 @@ def HuffmanCode(

Notes:
Huffman codes are always [prefix-free](/ref/FixedToVariableCode/#is_prefix_free) (hence [uniquely decodable](/ref/FixedToVariableCode/#is_uniquely_decodable)).

Parameters:
pmf: The probability mass function of the source.
source_block_size: The source block size $k$. The default value is $k = 1$.
Expand Down Expand Up @@ -63,12 +66,12 @@ def HuffmanCode(
def huffman_algorithm(
pmf: PMF, source_block_size: int, policy: Literal["high", "low"]
) -> list[Word]:
@dataclass
class Node:
def __init__(self, index: int, probability: float):
self.index: int = index
self.probability: float = probability
self.parent: int | None = None
self.bit: int = -1
index: int
probability: float
parent: int | None = None
bit: int = -1

def __lt__(self, other: Self) -> bool:
i0, p0 = self.index, self.probability
Expand All @@ -78,29 +81,40 @@ def __lt__(self, other: Self) -> bool:
elif policy == "low":
return (p0, -i0) < (p1, -i1)

tree = [
Node(i, prod(probs))
for (i, probs) in enumerate(product(pmf, repeat=source_block_size))
]
queue = [node for node in tree]
heapq.heapify(queue)
while len(queue) > 1:
node1 = heapq.heappop(queue)
node0 = heapq.heappop(queue)
pbar = tqdm(
desc="Generating Huffman code",
total=3 * pmf.size**source_block_size - 1,
delay=2.5,
)

tree: list[Node] = []
for index, probs in enumerate(product(pmf, repeat=source_block_size)):
tree.append(Node(index, prod(probs)))
pbar.update()

heap = tree.copy()
heapify(heap)
while len(heap) > 1:
node1 = heappop(heap)
node0 = heappop(heap)
node1.bit = 1
node0.bit = 0
node = Node(index=len(tree), probability=node0.probability + node1.probability)
node0.parent = node1.parent = node.index
heapq.heappush(queue, node)
heappush(heap, node)
tree.append(node)
pbar.update()

codewords: list[Word] = []
for symbol in range(pmf.size**source_block_size):
node = tree[symbol]
for index in range(pmf.size**source_block_size):
node = tree[index]
bits: list[int] = []
while node.parent is not None:
bits.insert(0, node.bit)
bits.append(node.bit)
node = tree[node.parent]
codewords.append(tuple(bits))
codewords.append(tuple(reversed(bits)))
pbar.update()

pbar.close()

return codewords
90 changes: 90 additions & 0 deletions src/komm/_lossless_coding/ShannonCode.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
from math import ceil, log2

import numpy.typing as npt
from tqdm import tqdm

from .._util.information_theory import PMF
from .FixedToVariableCode import FixedToVariableCode
from .util import Word, empty_mapping, extended_probabilities


def ShannonCode(
pmf: npt.ArrayLike,
source_block_size: int = 1,
) -> FixedToVariableCode:
r"""
Binary Shannon code. It is a [fixed-to-variable length code](/ref/FixedToVariableCode) in which the length of the codeword $\Enc(u)$ for a source symbol $u \in \mathcal{S}^k$ is given by
$$
\ell_u = \left\lceil \log_2 \frac{1}{p_u} \right\rceil,
$$
where $p_u$ is the probability of the source symbol $u$. This function implements the lexicographic order assignment as described in [Wikipedia: Shannon–Fano coding](https://en.wikipedia.org/wiki/Shannon%E2%80%93Fano_coding).

Notes:
Shannon codes are always [prefix-free](/ref/FixedToVariableCode/#is_prefix_free) (hence [uniquely decodable](/ref/FixedToVariableCode/#is_uniquely_decodable)).

Parameters:
pmf: The probability mass function of the source.
source_block_size: The source block size $k$. The default value is $k = 1$.

Examples:
>>> pmf = [0.7, 0.15, 0.15]

>>> code = komm.ShannonCode(pmf, 1)
>>> code.enc_mapping # doctest: +NORMALIZE_WHITESPACE
{(0,): (0,),
(1,): (1, 0, 0),
(2,): (1, 0, 1)}
>>> code.rate(pmf) # doctest: +NUMBER
np.float64(1.6)

>>> code = komm.ShannonCode(pmf, 2)
>>> code.enc_mapping # doctest: +NORMALIZE_WHITESPACE
{(0, 0): (0, 0),
(0, 1): (0, 1, 0, 0),
(0, 2): (0, 1, 0, 1),
(1, 0): (0, 1, 1, 0),
(1, 1): (1, 0, 0, 0, 0, 0),
(1, 2): (1, 0, 0, 0, 0, 1),
(2, 0): (0, 1, 1, 1),
(2, 1): (1, 0, 0, 0, 1, 0),
(2, 2): (1, 0, 0, 0, 1, 1)}
>>> code.rate(pmf) # doctest: +NUMBER
np.float64(1.6)
"""
pmf = PMF(pmf)
return FixedToVariableCode(
source_cardinality=pmf.size,
target_cardinality=2,
source_block_size=source_block_size,
enc_mapping=shannon_code(pmf, source_block_size),
)


def next_in_lexicographic_order(word: Word) -> Word:
word_list = list(word)
for i in range(len(word_list) - 1, -1, -1):
if word_list[i] == 0:
word_list[i] = 1
break
word_list[i] = 0
return tuple(word_list)


def shannon_code(pmf: PMF, source_block_size: int) -> dict[Word, Word]:
pbar = tqdm(
desc="Generating Shannon code",
total=2 * pmf.size**source_block_size,
delay=2.5,
)

enc_mapping = empty_mapping(pmf.size, source_block_size)
v = ()
for u, pu in extended_probabilities(pmf, source_block_size, pbar):
length = ceil(log2(1 / pu))
v = next_in_lexicographic_order(v) + (0,) * (length - len(v))
enc_mapping[u] = v
pbar.update()

pbar.close()

return enc_mapping
Loading

0 comments on commit 1b8006f

Please sign in to comment.